Coverage for oc_ocdm / graph / graph_set.py: 89%
227 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-28 18:52 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-28 18:52 +0000
1# SPDX-FileCopyrightText: 2020-2022 Simone Persiani <iosonopersia@gmail.com>
2# SPDX-FileCopyrightText: 2023-2026 Arcangelo Massari <arcangelo.massari@unibo.it>
3#
4# SPDX-License-Identifier: ISC
6from __future__ import annotations
8from typing import TYPE_CHECKING, cast
10from io import BytesIO
12from oc_ocdm.abstract_set import AbstractSet
13from oc_ocdm.reader import Reader
14from oc_ocdm.support.support import get_count, get_prefix, get_short_name
15from sparqlite import SPARQLClient
17if TYPE_CHECKING:
18 from typing import Dict, ClassVar, Optional, List, Set
20from oc_ocdm.counter_handler.counter_handler import CounterHandler
21from oc_ocdm.counter_handler.filesystem_counter_handler import \
22 FilesystemCounterHandler
23from oc_ocdm.counter_handler.in_memory_counter_handler import \
24 InMemoryCounterHandler
25from oc_ocdm.graph.entities.bibliographic.agent_role import AgentRole
26from oc_ocdm.graph.entities.bibliographic.bibliographic_reference import \
27 BibliographicReference
28from oc_ocdm.graph.entities.bibliographic.bibliographic_resource import \
29 BibliographicResource
30from oc_ocdm.graph.entities.bibliographic.citation import Citation
31from oc_ocdm.graph.entities.bibliographic.discourse_element import \
32 DiscourseElement
33from oc_ocdm.graph.entities.bibliographic.pointer_list import PointerList
34from oc_ocdm.graph.entities.bibliographic.reference_annotation import \
35 ReferenceAnnotation
36from oc_ocdm.graph.entities.bibliographic.reference_pointer import \
37 ReferencePointer
38from oc_ocdm.graph.entities.bibliographic.resource_embodiment import \
39 ResourceEmbodiment
40from oc_ocdm.graph.entities.bibliographic.responsible_agent import \
41 ResponsibleAgent
42from oc_ocdm.graph.entities.identifier import Identifier
43from oc_ocdm.graph.graph_entity import GraphEntity
44from rdflib import Graph, Namespace, URIRef
47class GraphSet(AbstractSet[GraphEntity]):
48 # Labels
49 labels: ClassVar[Dict[str, str]] = {
50 "an": "annotation",
51 "ar": "agent role",
52 "be": "bibliographic entry",
53 "br": "bibliographic resource",
54 "ci": "citation",
55 "de": "discourse element",
56 "id": "identifier",
57 "pl": "single location pointer list",
58 "ra": "responsible agent",
59 "re": "resource embodiment",
60 "rp": "in-text reference pointer"
61 }
63 def __init__(self, base_iri: str, info_dir: str = "", supplier_prefix: str = "",
64 wanted_label: bool = True, custom_counter_handler: CounterHandler | None = None) -> None:
65 super(GraphSet, self).__init__()
66 # The following variable maps a URIRef with the related graph entity
67 self.res_to_entity: Dict[URIRef, GraphEntity] = {}
68 self.base_iri: str = base_iri
69 self.info_dir: str = info_dir
70 self.supplier_prefix: str = supplier_prefix
71 self.wanted_label: bool = wanted_label
72 # Graphs
73 # The following structure of URL is quite important for the other classes
74 # developed and should not be changed. The only part that can change is the
75 # value of the base_iri
76 self.g_an: str = base_iri + "an/"
77 self.g_ar: str = base_iri + "ar/"
78 self.g_be: str = base_iri + "be/"
79 self.g_br: str = base_iri + "br/"
80 self.g_ci: str = base_iri + "ci/"
81 self.g_de: str = base_iri + "de/"
82 self.g_id: str = base_iri + "id/"
83 self.g_pl: str = base_iri + "pl/"
84 self.g_ra: str = base_iri + "ra/"
85 self.g_re: str = base_iri + "re/"
86 self.g_rp: str = base_iri + "rp/"
88 if custom_counter_handler:
89 self.counter_handler = custom_counter_handler
90 elif info_dir is not None and info_dir != "":
91 self.counter_handler = FilesystemCounterHandler(info_dir, supplier_prefix)
92 else:
93 self.counter_handler = InMemoryCounterHandler()
95 def get_entity(self, res: URIRef) -> Optional[GraphEntity]:
96 if res in self.res_to_entity:
97 return self.res_to_entity[res]
99 # Add resources related to bibliographic entities
100 def add_an(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None,
101 preexisting_graph: Graph | None = None) -> ReferenceAnnotation:
102 if res is not None and get_short_name(res) != "an":
103 raise ValueError(f"Given res: <{res}> is inappropriate for a ReferenceAnnotation entity.")
104 if res is not None and res in self.res_to_entity:
105 return cast(ReferenceAnnotation, self.res_to_entity[res])
106 cur_g, count, label = self._add(self.g_an, "an", res)
107 return ReferenceAnnotation(cur_g, self, GraphEntity.iri_note, res,
108 resp_agent, source, count, label, "an",
109 preexisting_graph)
111 def add_ar(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None,
112 preexisting_graph: Graph | None = None) -> AgentRole:
113 if res is not None and get_short_name(res) != "ar":
114 raise ValueError(f"Given res: <{res}> is inappropriate for an AgentRole entity.")
115 if res is not None and res in self.res_to_entity:
116 return cast(AgentRole, self.res_to_entity[res])
117 cur_g, count, label = self._add(self.g_ar, "ar", res)
118 return AgentRole(cur_g, self, GraphEntity.iri_role_in_time, res,
119 resp_agent, source, count, label, "ar",
120 preexisting_graph)
122 def add_be(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None,
123 preexisting_graph: Graph | None = None) -> BibliographicReference:
124 if res is not None and get_short_name(res) != "be":
125 raise ValueError(f"Given res: <{res}> is inappropriate for a BibliographicReference entity.")
126 if res is not None and res in self.res_to_entity:
127 return cast(BibliographicReference, self.res_to_entity[res])
128 cur_g, count, label = self._add(self.g_be, "be", res)
129 return BibliographicReference(cur_g, self, GraphEntity.iri_bibliographic_reference, res,
130 resp_agent, source, count, label, "be",
131 preexisting_graph)
133 def add_br(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None,
134 preexisting_graph: Graph | None = None) -> BibliographicResource:
135 if res is not None and get_short_name(res) != "br":
136 raise ValueError(f"Given res: <{res}> is inappropriate for a BibliographicResource entity.")
137 if res is not None and res in self.res_to_entity:
138 return cast(BibliographicResource, self.res_to_entity[res])
139 cur_g, count, label = self._add(self.g_br, "br", res)
140 return BibliographicResource(cur_g, self, GraphEntity.iri_expression, res,
141 resp_agent, source, count, label, "br",
142 preexisting_graph)
144 def add_ci(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None,
145 preexisting_graph: Graph | None = None) -> Citation:
146 if res is not None and get_short_name(res) != "ci":
147 raise ValueError(f"Given res: <{res}> is inappropriate for a Citation entity.")
148 if res is not None and res in self.res_to_entity:
149 return cast(Citation, self.res_to_entity[res])
150 cur_g, count, label = self._add(self.g_ci, "ci", res)
151 return Citation(cur_g, self, GraphEntity.iri_citation, res,
152 resp_agent, source, count, label, "ci",
153 preexisting_graph)
155 def add_de(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None,
156 preexisting_graph: Graph | None = None) -> DiscourseElement:
157 if res is not None and get_short_name(res) != "de":
158 raise ValueError(f"Given res: <{res}> is inappropriate for a DiscourseElement entity.")
159 if res is not None and res in self.res_to_entity:
160 return cast(DiscourseElement, self.res_to_entity[res])
161 cur_g, count, label = self._add(self.g_de, "de", res)
162 return DiscourseElement(cur_g, self, GraphEntity.iri_discourse_element, res,
163 resp_agent, source, count, label, "de",
164 preexisting_graph)
166 def add_id(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None,
167 preexisting_graph: Graph | None = None) -> Identifier:
168 if res is not None and get_short_name(res) != "id":
169 raise ValueError(f"Given res: <{res}> is inappropriate for an Identifier entity.")
170 if res is not None and res in self.res_to_entity:
171 return cast(Identifier, self.res_to_entity[res])
172 cur_g, count, label = self._add(self.g_id, "id", res)
173 return Identifier(cur_g, self, GraphEntity.iri_identifier, res,
174 resp_agent, source, count, label, "id",
175 preexisting_graph)
177 def add_pl(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None,
178 preexisting_graph: Graph | None = None) -> PointerList:
179 if res is not None and get_short_name(res) != "pl":
180 raise ValueError(f"Given res: <{res}> is inappropriate for a PointerList entity.")
181 if res is not None and res in self.res_to_entity:
182 return cast(PointerList, self.res_to_entity[res])
183 cur_g, count, label = self._add(self.g_pl, "pl", res)
184 return PointerList(cur_g, self, GraphEntity.iri_singleloc_pointer_list, res,
185 resp_agent, source, count, label, "pl",
186 preexisting_graph)
188 def add_rp(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None,
189 preexisting_graph: Graph | None = None) -> ReferencePointer:
190 if res is not None and get_short_name(res) != "rp":
191 raise ValueError(f"Given res: <{res}> is inappropriate for a ReferencePointer entity.")
192 if res is not None and res in self.res_to_entity:
193 return cast(ReferencePointer, self.res_to_entity[res])
194 cur_g, count, label = self._add(self.g_rp, "rp", res)
195 return ReferencePointer(cur_g, self, GraphEntity.iri_intextref_pointer, res,
196 resp_agent, source, count, label, "rp",
197 preexisting_graph)
199 def add_ra(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None,
200 preexisting_graph: Graph | None = None) -> ResponsibleAgent:
201 if res is not None and get_short_name(res) != "ra":
202 raise ValueError(f"Given res: <{res}> is inappropriate for a ResponsibleAgent entity.")
203 if res is not None and res in self.res_to_entity:
204 return cast(ResponsibleAgent, self.res_to_entity[res])
205 cur_g, count, label = self._add(self.g_ra, "ra", res)
206 return ResponsibleAgent(cur_g, self, GraphEntity.iri_agent, res,
207 resp_agent, source, count, label, "ra",
208 preexisting_graph)
210 def add_re(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None,
211 preexisting_graph: Graph | None = None) -> ResourceEmbodiment:
212 if res is not None and get_short_name(res) != "re":
213 raise ValueError(f"Given res: <{res}> is inappropriate for a ResourceEmbodiment entity.")
214 if res is not None and res in self.res_to_entity:
215 return cast(ResourceEmbodiment, self.res_to_entity[res])
216 cur_g, count, label = self._add(self.g_re, "re", res)
217 return ResourceEmbodiment(cur_g, self, GraphEntity.iri_manifestation, res,
218 resp_agent, source, count, label, "re",
219 preexisting_graph)
221 def _add(self, graph_url: str, short_name: str, res: URIRef | None = None) -> tuple[Graph, str | None, str | None]:
222 cur_g: Graph = Graph(identifier=graph_url)
223 self._set_ns(cur_g)
225 count: Optional[str] = None
226 label: Optional[str] = None
227 supplier_prefix = get_prefix(res) if res is not None else self.supplier_prefix
228 if res is not None:
229 try:
230 res_count: int = int(get_count(res))
231 except ValueError:
232 res_count: int = -1
233 if res_count > self.counter_handler.read_counter(short_name, supplier_prefix=supplier_prefix):
234 self.counter_handler.set_counter(res_count, short_name, supplier_prefix=supplier_prefix)
235 return cur_g, count, label
237 count = supplier_prefix + str(self.counter_handler.increment_counter(short_name, supplier_prefix=supplier_prefix))
239 if self.wanted_label:
240 label = "%s %s [%s/%s]" % (self.labels[short_name], count, short_name, count)
242 return cur_g, count, label
244 def get_orphans(self) -> List[GraphEntity]:
245 full_set_of_entities: Set[URIRef] = set(self.res_to_entity.keys())
246 referenced_entities: Set[URIRef] = set()
247 for res, entity in self.res_to_entity.items():
248 for obj in entity.g.objects(subject=res, predicate=None):
249 if type(obj) == URIRef:
250 referenced_entities.add(obj)
251 set_of_orphan_res: Set[URIRef] = full_set_of_entities - referenced_entities
253 result_list: List[GraphEntity] = []
254 for orphan_res in set_of_orphan_res:
255 entity: Optional[GraphEntity] = self.get_entity(orphan_res)
256 if entity is not None:
257 result_list.append(entity)
259 return result_list
261 def remove_orphans_from_triplestore(self, ts_url: str, resp_agent: str) -> None:
262 with SPARQLClient(ts_url) as client:
263 for entity_res, entity in self.res_to_entity.items():
264 if entity.to_be_deleted:
265 query: str = f"CONSTRUCT {{?s ?p ?o}} WHERE {{?s ?p ?o ; ?p_1 <{entity_res}>}}"
266 nt_bytes = client.construct(query)
267 if nt_bytes:
268 result: Graph = Graph()
269 result.parse(BytesIO(nt_bytes), format='nt')
270 imported_entities: List[GraphEntity] = Reader.import_entities_from_graph(self, result, resp_agent)
271 for imported_entity in imported_entities:
272 imported_entity.g.remove((imported_entity.res, None, entity_res))
274 def commit_changes(self):
275 for res, entity in self.res_to_entity.items():
276 entity.commit_changes()
277 if entity.to_be_deleted:
278 del self.res_to_entity[res]
280 def _set_ns(self, g: Graph) -> None:
281 g.namespace_manager.bind("an", Namespace(self.g_an))
282 g.namespace_manager.bind("ar", Namespace(self.g_ar))
283 g.namespace_manager.bind("be", Namespace(self.g_be))
284 g.namespace_manager.bind("ci", Namespace(self.g_ci))
285 g.namespace_manager.bind("de", Namespace(self.g_de))
286 g.namespace_manager.bind("br", Namespace(self.g_br))
287 g.namespace_manager.bind("id", Namespace(self.g_id))
288 g.namespace_manager.bind("pl", Namespace(self.g_pl))
289 g.namespace_manager.bind("ra", Namespace(self.g_ra))
290 g.namespace_manager.bind("re", Namespace(self.g_re))
291 g.namespace_manager.bind("rp", Namespace(self.g_rp))
292 g.namespace_manager.bind("biro", GraphEntity.BIRO)
293 g.namespace_manager.bind("co", GraphEntity.CO)
294 g.namespace_manager.bind("c4o", GraphEntity.C4O)
295 g.namespace_manager.bind("cito", GraphEntity.CITO)
296 g.namespace_manager.bind("datacite", GraphEntity.DATACITE)
297 g.namespace_manager.bind("dcterms", GraphEntity.DCTERMS)
298 g.namespace_manager.bind("deo", GraphEntity.DEO)
299 g.namespace_manager.bind("doco", GraphEntity.DOCO)
300 g.namespace_manager.bind("fabio", GraphEntity.FABIO)
301 g.namespace_manager.bind("foaf", GraphEntity.FOAF)
302 g.namespace_manager.bind("frbr", GraphEntity.FRBR)
303 g.namespace_manager.bind("literal", GraphEntity.LITERAL)
304 g.namespace_manager.bind("oa", GraphEntity.OA)
305 g.namespace_manager.bind("oco", GraphEntity.OCO)
306 g.namespace_manager.bind("prism", GraphEntity.PRISM)
307 g.namespace_manager.bind("pro", GraphEntity.PRO)
309 def get_an(self) -> tuple[ReferenceAnnotation, ...]:
310 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ReferenceAnnotation))
312 def get_ar(self) -> tuple[AgentRole, ...]:
313 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, AgentRole))
315 def get_be(self) -> tuple[BibliographicReference, ...]:
316 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, BibliographicReference))
318 def get_br(self) -> tuple[BibliographicResource, ...]:
319 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, BibliographicResource))
321 def get_ci(self) -> tuple[Citation, ...]:
322 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, Citation))
324 def get_de(self) -> tuple[DiscourseElement, ...]:
325 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, DiscourseElement))
327 def get_id(self) -> tuple[Identifier, ...]:
328 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, Identifier))
330 def get_pl(self) -> tuple[PointerList, ...]:
331 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, PointerList))
333 def get_rp(self) -> tuple[ReferencePointer, ...]:
334 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ReferencePointer))
336 def get_ra(self) -> tuple[ResponsibleAgent, ...]:
337 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ResponsibleAgent))
339 def get_re(self) -> tuple[ResourceEmbodiment, ...]:
340 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ResourceEmbodiment))