Coverage for oc_ocdm/graph/graph_set.py: 69%
285 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-05-30 22:05 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-05-30 22:05 +0000
1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3# Copyright (c) 2016, Silvio Peroni <essepuntato@gmail.com>
4#
5# Permission to use, copy, modify, and/or distribute this software for any purpose
6# with or without fee is hereby granted, provided that the above copyright notice
7# and this permission notice appear in all copies.
8#
9# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
15# SOFTWARE.
16from __future__ import annotations
18from typing import TYPE_CHECKING
20from oc_ocdm.abstract_set import AbstractSet
21from oc_ocdm.reader import Reader
22from oc_ocdm.support.support import get_count, get_prefix, get_short_name
23from SPARQLWrapper import RDFXML, SPARQLWrapper
25if TYPE_CHECKING:
26 from typing import Dict, ClassVar, Tuple, Optional, List, Set
27 from rdflib import ConjunctiveGraph
29from oc_ocdm.counter_handler.counter_handler import CounterHandler
30from oc_ocdm.counter_handler.filesystem_counter_handler import \
31 FilesystemCounterHandler
32from oc_ocdm.counter_handler.in_memory_counter_handler import \
33 InMemoryCounterHandler
34from oc_ocdm.graph.entities.bibliographic.agent_role import AgentRole
35from oc_ocdm.graph.entities.bibliographic.bibliographic_reference import \
36 BibliographicReference
37from oc_ocdm.graph.entities.bibliographic.bibliographic_resource import \
38 BibliographicResource
39from oc_ocdm.graph.entities.bibliographic.citation import Citation
40from oc_ocdm.graph.entities.bibliographic.discourse_element import \
41 DiscourseElement
42from oc_ocdm.graph.entities.bibliographic.pointer_list import PointerList
43from oc_ocdm.graph.entities.bibliographic.reference_annotation import \
44 ReferenceAnnotation
45from oc_ocdm.graph.entities.bibliographic.reference_pointer import \
46 ReferencePointer
47from oc_ocdm.graph.entities.bibliographic.resource_embodiment import \
48 ResourceEmbodiment
49from oc_ocdm.graph.entities.bibliographic.responsible_agent import \
50 ResponsibleAgent
51from oc_ocdm.graph.entities.identifier import Identifier
52from oc_ocdm.graph.graph_entity import GraphEntity
53from rdflib import Graph, Namespace, URIRef
56class GraphSet(AbstractSet):
57 # Labels
58 labels: ClassVar[Dict[str, str]] = {
59 "an": "annotation",
60 "ar": "agent role",
61 "be": "bibliographic entry",
62 "br": "bibliographic resource",
63 "ci": "citation",
64 "de": "discourse element",
65 "id": "identifier",
66 "pl": "single location pointer list",
67 "ra": "responsible agent",
68 "re": "resource embodiment",
69 "rp": "in-text reference pointer"
70 }
72 def __init__(self, base_iri: str, info_dir: str = "", supplier_prefix: str = "",
73 wanted_label: bool = True, custom_counter_handler: CounterHandler = None) -> None:
74 super(GraphSet, self).__init__()
75 # The following variable maps a URIRef with the related graph entity
76 self.res_to_entity: Dict[URIRef, GraphEntity] = {}
77 self.base_iri: str = base_iri
78 self.info_dir: str = info_dir
79 self.supplier_prefix: str = supplier_prefix
80 self.wanted_label: bool = wanted_label
81 # Graphs
82 # The following structure of URL is quite important for the other classes
83 # developed and should not be changed. The only part that can change is the
84 # value of the base_iri
85 self.g_an: str = base_iri + "an/"
86 self.g_ar: str = base_iri + "ar/"
87 self.g_be: str = base_iri + "be/"
88 self.g_br: str = base_iri + "br/"
89 self.g_ci: str = base_iri + "ci/"
90 self.g_de: str = base_iri + "de/"
91 self.g_id: str = base_iri + "id/"
92 self.g_pl: str = base_iri + "pl/"
93 self.g_ra: str = base_iri + "ra/"
94 self.g_re: str = base_iri + "re/"
95 self.g_rp: str = base_iri + "rp/"
97 if custom_counter_handler:
98 self.counter_handler = custom_counter_handler
99 elif info_dir is not None and info_dir != "":
100 self.counter_handler = FilesystemCounterHandler(info_dir, supplier_prefix)
101 else:
102 self.counter_handler = InMemoryCounterHandler()
104 def get_entity(self, res: URIRef) -> Optional[GraphEntity]:
105 if res in self.res_to_entity:
106 return self.res_to_entity[res]
108 # Add resources related to bibliographic entities
109 def add_an(self, resp_agent: str, source: str = None, res: URIRef = None,
110 preexisting_graph: Graph = None) -> ReferenceAnnotation:
111 if res is not None and get_short_name(res) != "an":
112 raise ValueError(f"Given res: <{res}> is inappropriate for a ReferenceAnnotation entity.")
113 if res is not None and res in self.res_to_entity:
114 return self.res_to_entity[res]
115 cur_g, count, label = self._add(self.g_an, "an", res)
116 return ReferenceAnnotation(cur_g, self, res, GraphEntity.iri_note,
117 resp_agent, source, count, label, "an",
118 preexisting_graph)
120 def add_ar(self, resp_agent: str, source: str = None, res: URIRef = None,
121 preexisting_graph: Graph = None) -> AgentRole:
122 if res is not None and get_short_name(res) != "ar":
123 raise ValueError(f"Given res: <{res}> is inappropriate for an AgentRole entity.")
124 if res is not None and res in self.res_to_entity:
125 return self.res_to_entity[res]
126 cur_g, count, label = self._add(self.g_ar, "ar", res)
127 return AgentRole(cur_g, self, res, GraphEntity.iri_role_in_time,
128 resp_agent, source, count, label, "ar",
129 preexisting_graph)
131 def add_be(self, resp_agent: str, source: str = None, res: URIRef = None,
132 preexisting_graph: Graph = None) -> BibliographicReference:
133 if res is not None and get_short_name(res) != "be":
134 raise ValueError(f"Given res: <{res}> is inappropriate for a BibliographicReference entity.")
135 if res is not None and res in self.res_to_entity:
136 return self.res_to_entity[res]
137 cur_g, count, label = self._add(self.g_be, "be", res)
138 return BibliographicReference(cur_g, self, res, GraphEntity.iri_bibliographic_reference,
139 resp_agent, source, count, label, "be",
140 preexisting_graph)
142 def add_br(self, resp_agent: str, source: str = None, res: URIRef = None,
143 preexisting_graph: Graph = None) -> BibliographicResource:
144 if res is not None and get_short_name(res) != "br":
145 raise ValueError(f"Given res: <{res}> is inappropriate for a BibliographicResource entity.")
146 if res is not None and res in self.res_to_entity:
147 return self.res_to_entity[res]
148 cur_g, count, label = self._add(self.g_br, "br", res)
149 return BibliographicResource(cur_g, self, res, GraphEntity.iri_expression,
150 resp_agent, source, count, label, "br",
151 preexisting_graph)
153 def add_ci(self, resp_agent: str, source: str = None, res: URIRef = None,
154 preexisting_graph: Graph = None) -> Citation:
155 if res is not None and get_short_name(res) != "ci":
156 raise ValueError(f"Given res: <{res}> is inappropriate for a Citation entity.")
157 if res is not None and res in self.res_to_entity:
158 return self.res_to_entity[res]
159 cur_g, count, label = self._add(self.g_ci, "ci", res)
160 return Citation(cur_g, self, res, GraphEntity.iri_citation,
161 resp_agent, source, count, label, "ci",
162 preexisting_graph)
164 def add_de(self, resp_agent: str, source: str = None, res: URIRef = None,
165 preexisting_graph: Graph = None) -> DiscourseElement:
166 if res is not None and get_short_name(res) != "de":
167 raise ValueError(f"Given res: <{res}> is inappropriate for a DiscourseElement entity.")
168 if res is not None and res in self.res_to_entity:
169 return self.res_to_entity[res]
170 cur_g, count, label = self._add(self.g_de, "de", res)
171 return DiscourseElement(cur_g, self, res, GraphEntity.iri_discourse_element,
172 resp_agent, source, count, label, "de",
173 preexisting_graph)
175 def add_id(self, resp_agent: str, source: str = None, res: URIRef = None,
176 preexisting_graph: Graph = None) -> Identifier:
177 if res is not None and get_short_name(res) != "id":
178 raise ValueError(f"Given res: <{res}> is inappropriate for an Identifier entity.")
179 if res is not None and res in self.res_to_entity:
180 return self.res_to_entity[res]
181 cur_g, count, label = self._add(self.g_id, "id", res)
182 return Identifier(cur_g, self, res, GraphEntity.iri_identifier,
183 resp_agent, source, count, label, "id",
184 preexisting_graph)
186 def add_pl(self, resp_agent: str, source: str = None, res: URIRef = None,
187 preexisting_graph: Graph = None) -> PointerList:
188 if res is not None and get_short_name(res) != "pl":
189 raise ValueError(f"Given res: <{res}> is inappropriate for a PointerList entity.")
190 if res is not None and res in self.res_to_entity:
191 return self.res_to_entity[res]
192 cur_g, count, label = self._add(self.g_pl, "pl", res)
193 return PointerList(cur_g, self, res, GraphEntity.iri_singleloc_pointer_list,
194 resp_agent, source, count, label, "pl",
195 preexisting_graph)
197 def add_rp(self, resp_agent: str, source: str = None, res: URIRef = None,
198 preexisting_graph: Graph = None) -> ReferencePointer:
199 if res is not None and get_short_name(res) != "rp":
200 raise ValueError(f"Given res: <{res}> is inappropriate for a ReferencePointer entity.")
201 if res is not None and res in self.res_to_entity:
202 return self.res_to_entity[res]
203 cur_g, count, label = self._add(self.g_rp, "rp", res)
204 return ReferencePointer(cur_g, self, res, GraphEntity.iri_intextref_pointer,
205 resp_agent, source, count, label, "rp",
206 preexisting_graph)
208 def add_ra(self, resp_agent: str, source: str = None, res: URIRef = None,
209 preexisting_graph: Graph = None) -> ResponsibleAgent:
210 if res is not None and get_short_name(res) != "ra":
211 raise ValueError(f"Given res: <{res}> is inappropriate for a ResponsibleAgent entity.")
212 if res is not None and res in self.res_to_entity:
213 return self.res_to_entity[res]
214 cur_g, count, label = self._add(self.g_ra, "ra", res)
215 return ResponsibleAgent(cur_g, self, res, GraphEntity.iri_agent,
216 resp_agent, source, count, label, "ra",
217 preexisting_graph)
219 def add_re(self, resp_agent: str, source: str = None, res: URIRef = None,
220 preexisting_graph: Graph = None) -> ResourceEmbodiment:
221 if res is not None and get_short_name(res) != "re":
222 raise ValueError(f"Given res: <{res}> is inappropriate for a ResourceEmbodiment entity.")
223 if res is not None and res in self.res_to_entity:
224 return self.res_to_entity[res]
225 cur_g, count, label = self._add(self.g_re, "re", res)
226 return ResourceEmbodiment(cur_g, self, res, GraphEntity.iri_manifestation,
227 resp_agent, source, count, label, "re",
228 preexisting_graph)
230 def _add(self, graph_url: str, short_name: str, res: URIRef = None) -> Tuple[Graph, Optional[str], Optional[str]]:
231 cur_g: Graph = Graph(identifier=graph_url)
232 self._set_ns(cur_g)
234 count: Optional[str] = None
235 label: Optional[str] = None
236 supplier_prefix = get_prefix(res) if res is not None else self.supplier_prefix
237 if res is not None:
238 try:
239 res_count: int = int(get_count(res))
240 except ValueError:
241 res_count: int = -1
242 if res_count > self.counter_handler.read_counter(short_name, supplier_prefix=supplier_prefix):
243 self.counter_handler.set_counter(res_count, short_name, supplier_prefix=supplier_prefix)
244 return cur_g, count, label
246 count = supplier_prefix + str(self.counter_handler.increment_counter(short_name, supplier_prefix=supplier_prefix))
248 if self.wanted_label:
249 label = "%s %s [%s/%s]" % (self.labels[short_name], count, short_name, count)
251 return cur_g, count, label
253 def get_orphans(self) -> List[GraphEntity]:
254 full_set_of_entities: Set[URIRef] = set(self.res_to_entity.keys())
255 referenced_entities: Set[URIRef] = set()
256 for res, entity in self.res_to_entity.items():
257 for obj in entity.g.objects(subject=res, predicate=None):
258 if type(obj) == URIRef:
259 referenced_entities.add(obj)
260 set_of_orphan_res: Set[URIRef] = full_set_of_entities - referenced_entities
262 result_list: List[GraphEntity] = []
263 for orphan_res in set_of_orphan_res:
264 entity: Optional[GraphEntity] = self.get_entity(orphan_res)
265 if entity is not None:
266 result_list.append(entity)
268 return result_list
270 def remove_orphans_from_triplestore(self, ts_url: str, resp_agent: str) -> None:
271 sparql: SPARQLWrapper = SPARQLWrapper(ts_url)
273 for entity_res, entity in self.res_to_entity.items():
274 if entity.to_be_deleted:
275 query: str = f"CONSTRUCT {{?s ?p ?o}} WHERE {{?s ?p ?o ; ?p_1 <{entity_res}>}}"
276 sparql.setQuery(query)
277 sparql.setMethod('GET')
278 sparql.setReturnFormat(RDFXML)
280 result: ConjunctiveGraph = sparql.query().convert()
281 if result is not None:
282 imported_entities: List[GraphEntity] = Reader.import_entities_from_graph(self, result, resp_agent)
283 for imported_entity in imported_entities:
284 imported_entity.g.remove((imported_entity.res, None, entity_res))
286 def commit_changes(self):
287 for res, entity in self.res_to_entity.items():
288 entity.commit_changes()
289 if entity.to_be_deleted:
290 del self.res_to_entity[res]
292 def _set_ns(self, g: Graph) -> None:
293 g.namespace_manager.bind("an", Namespace(self.g_an))
294 g.namespace_manager.bind("ar", Namespace(self.g_ar))
295 g.namespace_manager.bind("be", Namespace(self.g_be))
296 g.namespace_manager.bind("ci", Namespace(self.g_ci))
297 g.namespace_manager.bind("de", Namespace(self.g_de))
298 g.namespace_manager.bind("br", Namespace(self.g_br))
299 g.namespace_manager.bind("id", Namespace(self.g_id))
300 g.namespace_manager.bind("pl", Namespace(self.g_pl))
301 g.namespace_manager.bind("ra", Namespace(self.g_ra))
302 g.namespace_manager.bind("re", Namespace(self.g_re))
303 g.namespace_manager.bind("rp", Namespace(self.g_rp))
304 g.namespace_manager.bind("biro", GraphEntity.BIRO)
305 g.namespace_manager.bind("co", GraphEntity.CO)
306 g.namespace_manager.bind("c4o", GraphEntity.C4O)
307 g.namespace_manager.bind("cito", GraphEntity.CITO)
308 g.namespace_manager.bind("datacite", GraphEntity.DATACITE)
309 g.namespace_manager.bind("dcterms", GraphEntity.DCTERMS)
310 g.namespace_manager.bind("deo", GraphEntity.DEO)
311 g.namespace_manager.bind("doco", GraphEntity.DOCO)
312 g.namespace_manager.bind("fabio", GraphEntity.FABIO)
313 g.namespace_manager.bind("foaf", GraphEntity.FOAF)
314 g.namespace_manager.bind("frbr", GraphEntity.FRBR)
315 g.namespace_manager.bind("literal", GraphEntity.LITERAL)
316 g.namespace_manager.bind("oa", GraphEntity.OA)
317 g.namespace_manager.bind("oco", GraphEntity.OCO)
318 g.namespace_manager.bind("prism", GraphEntity.PRISM)
319 g.namespace_manager.bind("pro", GraphEntity.PRO)
321 def get_an(self) -> Tuple[ReferenceAnnotation]:
322 result: Tuple[ReferenceAnnotation] = tuple()
323 for ref in self.res_to_entity:
324 entity: GraphEntity = self.res_to_entity[ref]
325 if isinstance(entity, ReferenceAnnotation):
326 result += (entity, )
327 return result
329 def get_ar(self) -> Tuple[AgentRole]:
330 result: Tuple[AgentRole] = tuple()
331 for ref in self.res_to_entity:
332 entity: GraphEntity = self.res_to_entity[ref]
333 if isinstance(entity, AgentRole):
334 result += (entity, )
335 return result
337 def get_be(self) -> Tuple[BibliographicReference]:
338 result: Tuple[BibliographicReference] = tuple()
339 for ref in self.res_to_entity:
340 entity: GraphEntity = self.res_to_entity[ref]
341 if isinstance(entity, BibliographicReference):
342 result += (entity, )
343 return result
345 def get_br(self) -> Tuple[BibliographicResource]:
346 result: Tuple[BibliographicResource] = tuple()
347 for ref in self.res_to_entity:
348 entity: GraphEntity = self.res_to_entity[ref]
349 if isinstance(entity, BibliographicResource):
350 result += (entity, )
351 return result
353 def get_ci(self) -> Tuple[Citation]:
354 result: Tuple[Citation] = tuple()
355 for ref in self.res_to_entity:
356 entity: GraphEntity = self.res_to_entity[ref]
357 if isinstance(entity, Citation):
358 result += (entity, )
359 return result
361 def get_de(self) -> Tuple[DiscourseElement]:
362 result: Tuple[DiscourseElement] = tuple()
363 for ref in self.res_to_entity:
364 entity: GraphEntity = self.res_to_entity[ref]
365 if isinstance(entity, DiscourseElement):
366 result += (entity, )
367 return result
369 def get_id(self) -> Tuple[Identifier]:
370 result: Tuple[Identifier] = tuple()
371 for ref in self.res_to_entity:
372 entity: GraphEntity = self.res_to_entity[ref]
373 if isinstance(entity, Identifier):
374 result += (entity, )
375 return result
377 def get_pl(self) -> Tuple[PointerList]:
378 result: Tuple[PointerList] = tuple()
379 for ref in self.res_to_entity:
380 entity: GraphEntity = self.res_to_entity[ref]
381 if isinstance(entity, PointerList):
382 result += (entity, )
383 return result
385 def get_rp(self) -> Tuple[ReferencePointer]:
386 result: Tuple[ReferencePointer] = tuple()
387 for ref in self.res_to_entity:
388 entity: GraphEntity = self.res_to_entity[ref]
389 if isinstance(entity, ReferencePointer):
390 result += (entity, )
391 return result
393 def get_ra(self) -> Tuple[ResponsibleAgent]:
394 result: Tuple[ResponsibleAgent] = tuple()
395 for ref in self.res_to_entity:
396 entity: GraphEntity = self.res_to_entity[ref]
397 if isinstance(entity, ResponsibleAgent):
398 result += (entity, )
399 return result
401 def get_re(self) -> Tuple[ResourceEmbodiment]:
402 result: Tuple[ResourceEmbodiment] = tuple()
403 for ref in self.res_to_entity:
404 entity: GraphEntity = self.res_to_entity[ref]
405 if isinstance(entity, ResourceEmbodiment):
406 result += (entity, )
407 return result