Coverage for oc_ocdm / graph / graph_set.py: 87%
199 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-08 20:23 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-08 20:23 +0000
1# SPDX-FileCopyrightText: 2020-2022 Simone Persiani <iosonopersia@gmail.com>
2# SPDX-FileCopyrightText: 2023-2026 Arcangelo Massari <arcangelo.massari@unibo.it>
3#
4# SPDX-License-Identifier: ISC
6from __future__ import annotations
8from io import BytesIO
9from typing import TYPE_CHECKING, cast
11from rdflib import Graph
12from triplelite import RDFTerm, SubgraphView, TripleLite, from_rdflib
14from oc_ocdm.abstract_set import AbstractSet
15from oc_ocdm.counter_handler.counter_handler import CounterHandler
16from oc_ocdm.counter_handler.filesystem_counter_handler import FilesystemCounterHandler
17from oc_ocdm.counter_handler.in_memory_counter_handler import InMemoryCounterHandler
18from oc_ocdm.graph.entities.bibliographic.agent_role import AgentRole
19from oc_ocdm.graph.entities.bibliographic.bibliographic_reference import BibliographicReference
20from oc_ocdm.graph.entities.bibliographic.bibliographic_resource import BibliographicResource
21from oc_ocdm.graph.entities.bibliographic.citation import Citation
22from oc_ocdm.graph.entities.bibliographic.discourse_element import DiscourseElement
23from oc_ocdm.graph.entities.bibliographic.pointer_list import PointerList
24from oc_ocdm.graph.entities.bibliographic.reference_annotation import ReferenceAnnotation
25from oc_ocdm.graph.entities.bibliographic.reference_pointer import ReferencePointer
26from oc_ocdm.graph.entities.bibliographic.resource_embodiment import ResourceEmbodiment
27from oc_ocdm.graph.entities.bibliographic.responsible_agent import ResponsibleAgent
28from oc_ocdm.graph.entities.identifier import Identifier
29from oc_ocdm.graph.graph_entity import GraphEntity
30from oc_ocdm.support.sparql import sparql_construct
31from oc_ocdm.support.support import get_count, get_prefix, get_short_name
33if TYPE_CHECKING:
34 from typing import ClassVar, Dict, List, Optional, Set
38class GraphSet(AbstractSet[GraphEntity]):
39 # Labels
40 labels: ClassVar[Dict[str, str]] = {
41 "an": "annotation",
42 "ar": "agent role",
43 "be": "bibliographic entry",
44 "br": "bibliographic resource",
45 "ci": "citation",
46 "de": "discourse element",
47 "id": "identifier",
48 "pl": "single location pointer list",
49 "ra": "responsible agent",
50 "re": "resource embodiment",
51 "rp": "in-text reference pointer"
52 }
54 def __init__(self, base_iri: str, info_dir: str = "", supplier_prefix: str = "",
55 wanted_label: bool = True, custom_counter_handler: CounterHandler | None = None) -> None:
56 super(GraphSet, self).__init__()
57 # The following variable maps a URIRef with the related graph entity
58 self.res_to_entity: Dict[str, GraphEntity] = {}
59 self.base_iri: str = base_iri
60 self.info_dir: str = info_dir
61 self.supplier_prefix: str = supplier_prefix
62 self.wanted_label: bool = wanted_label
63 # Graphs
64 # The following structure of URL is quite important for the other classes
65 # developed and should not be changed. The only part that can change is the
66 # value of the base_iri
67 self.g_an: str = base_iri + "an/"
68 self.g_ar: str = base_iri + "ar/"
69 self.g_be: str = base_iri + "be/"
70 self.g_br: str = base_iri + "br/"
71 self.g_ci: str = base_iri + "ci/"
72 self.g_de: str = base_iri + "de/"
73 self.g_id: str = base_iri + "id/"
74 self.g_pl: str = base_iri + "pl/"
75 self.g_ra: str = base_iri + "ra/"
76 self.g_re: str = base_iri + "re/"
77 self.g_rp: str = base_iri + "rp/"
79 if custom_counter_handler:
80 self.counter_handler = custom_counter_handler
81 elif info_dir is not None and info_dir != "":
82 self.counter_handler = FilesystemCounterHandler(info_dir, supplier_prefix)
83 else:
84 self.counter_handler = InMemoryCounterHandler()
86 def get_entity(self, res: str) -> Optional[GraphEntity]:
87 if res in self.res_to_entity:
88 return self.res_to_entity[res]
90 # Add resources related to bibliographic entities
91 def add_an(self, resp_agent: str | None, source: str | None = None, res: str | None = None,
92 preexisting_graph: SubgraphView | None = None) -> ReferenceAnnotation:
93 if res is not None and get_short_name(res) != "an":
94 raise ValueError(f"Given res: <{res}> is inappropriate for a ReferenceAnnotation entity.")
95 if res is not None and res in self.res_to_entity:
96 return cast(ReferenceAnnotation, self.res_to_entity[res])
97 cur_g, count, label = self._add(self.g_an, "an", res)
98 return ReferenceAnnotation(cur_g, self, GraphEntity.iri_note, res,
99 resp_agent, source, count, label, "an",
100 preexisting_graph)
102 def add_ar(self, resp_agent: str | None, source: str | None = None, res: str | None = None,
103 preexisting_graph: SubgraphView | None = None) -> AgentRole:
104 if res is not None and get_short_name(res) != "ar":
105 raise ValueError(f"Given res: <{res}> is inappropriate for an AgentRole entity.")
106 if res is not None and res in self.res_to_entity:
107 return cast(AgentRole, self.res_to_entity[res])
108 cur_g, count, label = self._add(self.g_ar, "ar", res)
109 return AgentRole(cur_g, self, GraphEntity.iri_role_in_time, res,
110 resp_agent, source, count, label, "ar",
111 preexisting_graph)
113 def add_be(self, resp_agent: str | None, source: str | None = None, res: str | None = None,
114 preexisting_graph: SubgraphView | None = None) -> BibliographicReference:
115 if res is not None and get_short_name(res) != "be":
116 raise ValueError(f"Given res: <{res}> is inappropriate for a BibliographicReference entity.")
117 if res is not None and res in self.res_to_entity:
118 return cast(BibliographicReference, self.res_to_entity[res])
119 cur_g, count, label = self._add(self.g_be, "be", res)
120 return BibliographicReference(cur_g, self, GraphEntity.iri_bibliographic_reference, res,
121 resp_agent, source, count, label, "be",
122 preexisting_graph)
124 def add_br(self, resp_agent: str | None, source: str | None = None, res: str | None = None,
125 preexisting_graph: SubgraphView | None = None) -> BibliographicResource:
126 if res is not None and get_short_name(res) != "br":
127 raise ValueError(f"Given res: <{res}> is inappropriate for a BibliographicResource entity.")
128 if res is not None and res in self.res_to_entity:
129 return cast(BibliographicResource, self.res_to_entity[res])
130 cur_g, count, label = self._add(self.g_br, "br", res)
131 return BibliographicResource(cur_g, self, GraphEntity.iri_expression, res,
132 resp_agent, source, count, label, "br",
133 preexisting_graph)
135 def add_ci(self, resp_agent: str | None, source: str | None = None, res: str | None = None,
136 preexisting_graph: SubgraphView | None = None) -> Citation:
137 if res is not None and get_short_name(res) != "ci":
138 raise ValueError(f"Given res: <{res}> is inappropriate for a Citation entity.")
139 if res is not None and res in self.res_to_entity:
140 return cast(Citation, self.res_to_entity[res])
141 cur_g, count, label = self._add(self.g_ci, "ci", res)
142 return Citation(cur_g, self, GraphEntity.iri_citation, res,
143 resp_agent, source, count, label, "ci",
144 preexisting_graph)
146 def add_de(self, resp_agent: str | None, source: str | None = None, res: str | None = None,
147 preexisting_graph: SubgraphView | None = None) -> DiscourseElement:
148 if res is not None and get_short_name(res) != "de":
149 raise ValueError(f"Given res: <{res}> is inappropriate for a DiscourseElement entity.")
150 if res is not None and res in self.res_to_entity:
151 return cast(DiscourseElement, self.res_to_entity[res])
152 cur_g, count, label = self._add(self.g_de, "de", res)
153 return DiscourseElement(cur_g, self, GraphEntity.iri_discourse_element, res,
154 resp_agent, source, count, label, "de",
155 preexisting_graph)
157 def add_id(self, resp_agent: str | None, source: str | None = None, res: str | None = None,
158 preexisting_graph: SubgraphView | None = None) -> Identifier:
159 if res is not None and get_short_name(res) != "id":
160 raise ValueError(f"Given res: <{res}> is inappropriate for an Identifier entity.")
161 if res is not None and res in self.res_to_entity:
162 return cast(Identifier, self.res_to_entity[res])
163 cur_g, count, label = self._add(self.g_id, "id", res)
164 return Identifier(cur_g, self, GraphEntity.iri_identifier, res,
165 resp_agent, source, count, label, "id",
166 preexisting_graph)
168 def add_pl(self, resp_agent: str | None, source: str | None = None, res: str | None = None,
169 preexisting_graph: SubgraphView | None = None) -> PointerList:
170 if res is not None and get_short_name(res) != "pl":
171 raise ValueError(f"Given res: <{res}> is inappropriate for a PointerList entity.")
172 if res is not None and res in self.res_to_entity:
173 return cast(PointerList, self.res_to_entity[res])
174 cur_g, count, label = self._add(self.g_pl, "pl", res)
175 return PointerList(cur_g, self, GraphEntity.iri_singleloc_pointer_list, res,
176 resp_agent, source, count, label, "pl",
177 preexisting_graph)
179 def add_rp(self, resp_agent: str | None, source: str | None = None, res: str | None = None,
180 preexisting_graph: SubgraphView | None = None) -> ReferencePointer:
181 if res is not None and get_short_name(res) != "rp":
182 raise ValueError(f"Given res: <{res}> is inappropriate for a ReferencePointer entity.")
183 if res is not None and res in self.res_to_entity:
184 return cast(ReferencePointer, self.res_to_entity[res])
185 cur_g, count, label = self._add(self.g_rp, "rp", res)
186 return ReferencePointer(cur_g, self, GraphEntity.iri_intextref_pointer, res,
187 resp_agent, source, count, label, "rp",
188 preexisting_graph)
190 def add_ra(self, resp_agent: str | None, source: str | None = None, res: str | None = None,
191 preexisting_graph: SubgraphView | None = None) -> ResponsibleAgent:
192 if res is not None and get_short_name(res) != "ra":
193 raise ValueError(f"Given res: <{res}> is inappropriate for a ResponsibleAgent entity.")
194 if res is not None and res in self.res_to_entity:
195 return cast(ResponsibleAgent, self.res_to_entity[res])
196 cur_g, count, label = self._add(self.g_ra, "ra", res)
197 return ResponsibleAgent(cur_g, self, GraphEntity.iri_agent, res,
198 resp_agent, source, count, label, "ra",
199 preexisting_graph)
201 def add_re(self, resp_agent: str | None, source: str | None = None, res: str | None = None,
202 preexisting_graph: SubgraphView | None = None) -> ResourceEmbodiment:
203 if res is not None and get_short_name(res) != "re":
204 raise ValueError(f"Given res: <{res}> is inappropriate for a ResourceEmbodiment entity.")
205 if res is not None and res in self.res_to_entity:
206 return cast(ResourceEmbodiment, self.res_to_entity[res])
207 cur_g, count, label = self._add(self.g_re, "re", res)
208 return ResourceEmbodiment(cur_g, self, GraphEntity.iri_manifestation, res,
209 resp_agent, source, count, label, "re",
210 preexisting_graph)
212 def _add(self, graph_url: str, short_name: str, res: str | None = None) -> tuple[TripleLite, str | None, str | None]:
213 cur_g = TripleLite(identifier=graph_url)
215 count: Optional[str] = None
216 label: Optional[str] = None
217 supplier_prefix = get_prefix(res) if res is not None else self.supplier_prefix
218 if res is not None:
219 try:
220 res_count: int = int(get_count(res))
221 except ValueError:
222 res_count: int = -1
223 if res_count > self.counter_handler.read_counter(short_name, supplier_prefix=supplier_prefix):
224 self.counter_handler.set_counter(res_count, short_name, supplier_prefix=supplier_prefix)
225 return cur_g, count, label
227 count = supplier_prefix + str(self.counter_handler.increment_counter(short_name, supplier_prefix=supplier_prefix))
229 if self.wanted_label:
230 label = "%s %s [%s/%s]" % (self.labels[short_name], count, short_name, count)
232 return cur_g, count, label
234 def get_orphans(self) -> List[GraphEntity]:
235 full_set_of_entities: Set[str] = set(self.res_to_entity.keys())
236 referenced_entities: Set[str] = set()
237 for res, entity in self.res_to_entity.items():
238 for obj in entity.g.objects(subject=res, predicate=None):
239 if obj.type == "uri":
240 referenced_entities.add(obj.value)
241 set_of_orphan_res: Set[str] = full_set_of_entities - referenced_entities
243 result_list: List[GraphEntity] = []
244 for orphan_res in set_of_orphan_res:
245 entity: Optional[GraphEntity] = self.get_entity(orphan_res)
246 if entity is not None:
247 result_list.append(entity)
249 return result_list
251 def remove_orphans_from_triplestore(self, ts_url: str, resp_agent: str) -> None:
252 for entity_res, entity in self.res_to_entity.items():
253 if entity.to_be_deleted:
254 query: str = f"CONSTRUCT {{?s ?p ?o}} WHERE {{?s ?p ?o ; ?p_1 <{entity_res}>}}"
255 nt_bytes = sparql_construct(ts_url, query)
256 if nt_bytes:
257 from oc_ocdm.reader import Reader
258 rdflib_g = Graph()
259 rdflib_g.parse(BytesIO(nt_bytes), format='nt')
260 graphs = from_rdflib(rdflib_g)
261 imported_entities: List[GraphEntity] = Reader.import_entities_from_graph(self, graphs[0], resp_agent)
262 for imported_entity in imported_entities:
263 imported_entity.g.remove((imported_entity.res, None, RDFTerm("uri", str(entity_res))))
265 def commit_changes(self):
266 for res, entity in self.res_to_entity.items():
267 entity.commit_changes()
268 if entity.to_be_deleted:
269 del self.res_to_entity[res]
271 def get_an(self) -> tuple[ReferenceAnnotation, ...]:
272 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ReferenceAnnotation))
274 def get_ar(self) -> tuple[AgentRole, ...]:
275 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, AgentRole))
277 def get_be(self) -> tuple[BibliographicReference, ...]:
278 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, BibliographicReference))
280 def get_br(self) -> tuple[BibliographicResource, ...]:
281 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, BibliographicResource))
283 def get_ci(self) -> tuple[Citation, ...]:
284 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, Citation))
286 def get_de(self) -> tuple[DiscourseElement, ...]:
287 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, DiscourseElement))
289 def get_id(self) -> tuple[Identifier, ...]:
290 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, Identifier))
292 def get_pl(self) -> tuple[PointerList, ...]:
293 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, PointerList))
295 def get_rp(self) -> tuple[ReferencePointer, ...]:
296 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ReferencePointer))
298 def get_ra(self) -> tuple[ResponsibleAgent, ...]:
299 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ResponsibleAgent))
301 def get_re(self) -> tuple[ResourceEmbodiment, ...]:
302 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ResourceEmbodiment))