Coverage for oc_ocdm/graph/graph_set.py: 89%
229 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-12-05 23:58 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-12-05 23:58 +0000
1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3# Copyright (c) 2016, Silvio Peroni <essepuntato@gmail.com>
4#
5# Permission to use, copy, modify, and/or distribute this software for any purpose
6# with or without fee is hereby granted, provided that the above copyright notice
7# and this permission notice appear in all copies.
8#
9# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
15# SOFTWARE.
16from __future__ import annotations
18from typing import TYPE_CHECKING
20from io import BytesIO
22from oc_ocdm.abstract_set import AbstractSet
23from oc_ocdm.reader import Reader
24from oc_ocdm.support.support import get_count, get_prefix, get_short_name
25from sparqlite import SPARQLClient
27if TYPE_CHECKING:
28 from typing import Dict, ClassVar, Tuple, Optional, List, Set
30from oc_ocdm.counter_handler.counter_handler import CounterHandler
31from oc_ocdm.counter_handler.filesystem_counter_handler import \
32 FilesystemCounterHandler
33from oc_ocdm.counter_handler.in_memory_counter_handler import \
34 InMemoryCounterHandler
35from oc_ocdm.graph.entities.bibliographic.agent_role import AgentRole
36from oc_ocdm.graph.entities.bibliographic.bibliographic_reference import \
37 BibliographicReference
38from oc_ocdm.graph.entities.bibliographic.bibliographic_resource import \
39 BibliographicResource
40from oc_ocdm.graph.entities.bibliographic.citation import Citation
41from oc_ocdm.graph.entities.bibliographic.discourse_element import \
42 DiscourseElement
43from oc_ocdm.graph.entities.bibliographic.pointer_list import PointerList
44from oc_ocdm.graph.entities.bibliographic.reference_annotation import \
45 ReferenceAnnotation
46from oc_ocdm.graph.entities.bibliographic.reference_pointer import \
47 ReferencePointer
48from oc_ocdm.graph.entities.bibliographic.resource_embodiment import \
49 ResourceEmbodiment
50from oc_ocdm.graph.entities.bibliographic.responsible_agent import \
51 ResponsibleAgent
52from oc_ocdm.graph.entities.identifier import Identifier
53from oc_ocdm.graph.graph_entity import GraphEntity
54from rdflib import Graph, Namespace, URIRef
57class GraphSet(AbstractSet):
58 # Labels
59 labels: ClassVar[Dict[str, str]] = {
60 "an": "annotation",
61 "ar": "agent role",
62 "be": "bibliographic entry",
63 "br": "bibliographic resource",
64 "ci": "citation",
65 "de": "discourse element",
66 "id": "identifier",
67 "pl": "single location pointer list",
68 "ra": "responsible agent",
69 "re": "resource embodiment",
70 "rp": "in-text reference pointer"
71 }
73 def __init__(self, base_iri: str, info_dir: str = "", supplier_prefix: str = "",
74 wanted_label: bool = True, custom_counter_handler: CounterHandler = None) -> None:
75 super(GraphSet, self).__init__()
76 # The following variable maps a URIRef with the related graph entity
77 self.res_to_entity: Dict[URIRef, GraphEntity] = {}
78 self.base_iri: str = base_iri
79 self.info_dir: str = info_dir
80 self.supplier_prefix: str = supplier_prefix
81 self.wanted_label: bool = wanted_label
82 # Graphs
83 # The following structure of URL is quite important for the other classes
84 # developed and should not be changed. The only part that can change is the
85 # value of the base_iri
86 self.g_an: str = base_iri + "an/"
87 self.g_ar: str = base_iri + "ar/"
88 self.g_be: str = base_iri + "be/"
89 self.g_br: str = base_iri + "br/"
90 self.g_ci: str = base_iri + "ci/"
91 self.g_de: str = base_iri + "de/"
92 self.g_id: str = base_iri + "id/"
93 self.g_pl: str = base_iri + "pl/"
94 self.g_ra: str = base_iri + "ra/"
95 self.g_re: str = base_iri + "re/"
96 self.g_rp: str = base_iri + "rp/"
98 if custom_counter_handler:
99 self.counter_handler = custom_counter_handler
100 elif info_dir is not None and info_dir != "":
101 self.counter_handler = FilesystemCounterHandler(info_dir, supplier_prefix)
102 else:
103 self.counter_handler = InMemoryCounterHandler()
105 def get_entity(self, res: URIRef) -> Optional[GraphEntity]:
106 if res in self.res_to_entity:
107 return self.res_to_entity[res]
109 # Add resources related to bibliographic entities
110 def add_an(self, resp_agent: str, source: str = None, res: URIRef = None,
111 preexisting_graph: Graph = None) -> ReferenceAnnotation:
112 if res is not None and get_short_name(res) != "an":
113 raise ValueError(f"Given res: <{res}> is inappropriate for a ReferenceAnnotation entity.")
114 if res is not None and res in self.res_to_entity:
115 return self.res_to_entity[res]
116 cur_g, count, label = self._add(self.g_an, "an", res)
117 return ReferenceAnnotation(cur_g, self, res, GraphEntity.iri_note,
118 resp_agent, source, count, label, "an",
119 preexisting_graph)
121 def add_ar(self, resp_agent: str, source: str = None, res: URIRef = None,
122 preexisting_graph: Graph = None) -> AgentRole:
123 if res is not None and get_short_name(res) != "ar":
124 raise ValueError(f"Given res: <{res}> is inappropriate for an AgentRole entity.")
125 if res is not None and res in self.res_to_entity:
126 return self.res_to_entity[res]
127 cur_g, count, label = self._add(self.g_ar, "ar", res)
128 return AgentRole(cur_g, self, res, GraphEntity.iri_role_in_time,
129 resp_agent, source, count, label, "ar",
130 preexisting_graph)
132 def add_be(self, resp_agent: str, source: str = None, res: URIRef = None,
133 preexisting_graph: Graph = None) -> BibliographicReference:
134 if res is not None and get_short_name(res) != "be":
135 raise ValueError(f"Given res: <{res}> is inappropriate for a BibliographicReference entity.")
136 if res is not None and res in self.res_to_entity:
137 return self.res_to_entity[res]
138 cur_g, count, label = self._add(self.g_be, "be", res)
139 return BibliographicReference(cur_g, self, res, GraphEntity.iri_bibliographic_reference,
140 resp_agent, source, count, label, "be",
141 preexisting_graph)
143 def add_br(self, resp_agent: str, source: str = None, res: URIRef = None,
144 preexisting_graph: Graph = None) -> BibliographicResource:
145 if res is not None and get_short_name(res) != "br":
146 raise ValueError(f"Given res: <{res}> is inappropriate for a BibliographicResource entity.")
147 if res is not None and res in self.res_to_entity:
148 return self.res_to_entity[res]
149 cur_g, count, label = self._add(self.g_br, "br", res)
150 return BibliographicResource(cur_g, self, res, GraphEntity.iri_expression,
151 resp_agent, source, count, label, "br",
152 preexisting_graph)
154 def add_ci(self, resp_agent: str, source: str = None, res: URIRef = None,
155 preexisting_graph: Graph = None) -> Citation:
156 if res is not None and get_short_name(res) != "ci":
157 raise ValueError(f"Given res: <{res}> is inappropriate for a Citation entity.")
158 if res is not None and res in self.res_to_entity:
159 return self.res_to_entity[res]
160 cur_g, count, label = self._add(self.g_ci, "ci", res)
161 return Citation(cur_g, self, res, GraphEntity.iri_citation,
162 resp_agent, source, count, label, "ci",
163 preexisting_graph)
165 def add_de(self, resp_agent: str, source: str = None, res: URIRef = None,
166 preexisting_graph: Graph = None) -> DiscourseElement:
167 if res is not None and get_short_name(res) != "de":
168 raise ValueError(f"Given res: <{res}> is inappropriate for a DiscourseElement entity.")
169 if res is not None and res in self.res_to_entity:
170 return self.res_to_entity[res]
171 cur_g, count, label = self._add(self.g_de, "de", res)
172 return DiscourseElement(cur_g, self, res, GraphEntity.iri_discourse_element,
173 resp_agent, source, count, label, "de",
174 preexisting_graph)
176 def add_id(self, resp_agent: str, source: str = None, res: URIRef = None,
177 preexisting_graph: Graph = None) -> Identifier:
178 if res is not None and get_short_name(res) != "id":
179 raise ValueError(f"Given res: <{res}> is inappropriate for an Identifier entity.")
180 if res is not None and res in self.res_to_entity:
181 return self.res_to_entity[res]
182 cur_g, count, label = self._add(self.g_id, "id", res)
183 return Identifier(cur_g, self, res, GraphEntity.iri_identifier,
184 resp_agent, source, count, label, "id",
185 preexisting_graph)
187 def add_pl(self, resp_agent: str, source: str = None, res: URIRef = None,
188 preexisting_graph: Graph = None) -> PointerList:
189 if res is not None and get_short_name(res) != "pl":
190 raise ValueError(f"Given res: <{res}> is inappropriate for a PointerList entity.")
191 if res is not None and res in self.res_to_entity:
192 return self.res_to_entity[res]
193 cur_g, count, label = self._add(self.g_pl, "pl", res)
194 return PointerList(cur_g, self, res, GraphEntity.iri_singleloc_pointer_list,
195 resp_agent, source, count, label, "pl",
196 preexisting_graph)
198 def add_rp(self, resp_agent: str, source: str = None, res: URIRef = None,
199 preexisting_graph: Graph = None) -> ReferencePointer:
200 if res is not None and get_short_name(res) != "rp":
201 raise ValueError(f"Given res: <{res}> is inappropriate for a ReferencePointer entity.")
202 if res is not None and res in self.res_to_entity:
203 return self.res_to_entity[res]
204 cur_g, count, label = self._add(self.g_rp, "rp", res)
205 return ReferencePointer(cur_g, self, res, GraphEntity.iri_intextref_pointer,
206 resp_agent, source, count, label, "rp",
207 preexisting_graph)
209 def add_ra(self, resp_agent: str, source: str = None, res: URIRef = None,
210 preexisting_graph: Graph = None) -> ResponsibleAgent:
211 if res is not None and get_short_name(res) != "ra":
212 raise ValueError(f"Given res: <{res}> is inappropriate for a ResponsibleAgent entity.")
213 if res is not None and res in self.res_to_entity:
214 return self.res_to_entity[res]
215 cur_g, count, label = self._add(self.g_ra, "ra", res)
216 return ResponsibleAgent(cur_g, self, res, GraphEntity.iri_agent,
217 resp_agent, source, count, label, "ra",
218 preexisting_graph)
220 def add_re(self, resp_agent: str, source: str = None, res: URIRef = None,
221 preexisting_graph: Graph = None) -> ResourceEmbodiment:
222 if res is not None and get_short_name(res) != "re":
223 raise ValueError(f"Given res: <{res}> is inappropriate for a ResourceEmbodiment entity.")
224 if res is not None and res in self.res_to_entity:
225 return self.res_to_entity[res]
226 cur_g, count, label = self._add(self.g_re, "re", res)
227 return ResourceEmbodiment(cur_g, self, res, GraphEntity.iri_manifestation,
228 resp_agent, source, count, label, "re",
229 preexisting_graph)
231 def _add(self, graph_url: str, short_name: str, res: URIRef = None) -> Tuple[Graph, Optional[str], Optional[str]]:
232 cur_g: Graph = Graph(identifier=graph_url)
233 self._set_ns(cur_g)
235 count: Optional[str] = None
236 label: Optional[str] = None
237 supplier_prefix = get_prefix(res) if res is not None else self.supplier_prefix
238 if res is not None:
239 try:
240 res_count: int = int(get_count(res))
241 except ValueError:
242 res_count: int = -1
243 if res_count > self.counter_handler.read_counter(short_name, supplier_prefix=supplier_prefix):
244 self.counter_handler.set_counter(res_count, short_name, supplier_prefix=supplier_prefix)
245 return cur_g, count, label
247 count = supplier_prefix + str(self.counter_handler.increment_counter(short_name, supplier_prefix=supplier_prefix))
249 if self.wanted_label:
250 label = "%s %s [%s/%s]" % (self.labels[short_name], count, short_name, count)
252 return cur_g, count, label
254 def get_orphans(self) -> List[GraphEntity]:
255 full_set_of_entities: Set[URIRef] = set(self.res_to_entity.keys())
256 referenced_entities: Set[URIRef] = set()
257 for res, entity in self.res_to_entity.items():
258 for obj in entity.g.objects(subject=res, predicate=None):
259 if type(obj) == URIRef:
260 referenced_entities.add(obj)
261 set_of_orphan_res: Set[URIRef] = full_set_of_entities - referenced_entities
263 result_list: List[GraphEntity] = []
264 for orphan_res in set_of_orphan_res:
265 entity: Optional[GraphEntity] = self.get_entity(orphan_res)
266 if entity is not None:
267 result_list.append(entity)
269 return result_list
271 def remove_orphans_from_triplestore(self, ts_url: str, resp_agent: str) -> None:
272 with SPARQLClient(ts_url) as client:
273 for entity_res, entity in self.res_to_entity.items():
274 if entity.to_be_deleted:
275 query: str = f"CONSTRUCT {{?s ?p ?o}} WHERE {{?s ?p ?o ; ?p_1 <{entity_res}>}}"
276 nt_bytes = client.construct(query)
277 if nt_bytes:
278 result: Graph = Graph()
279 result.parse(BytesIO(nt_bytes), format='nt')
280 imported_entities: List[GraphEntity] = Reader.import_entities_from_graph(self, result, resp_agent)
281 for imported_entity in imported_entities:
282 imported_entity.g.remove((imported_entity.res, None, entity_res))
284 def commit_changes(self):
285 for res, entity in self.res_to_entity.items():
286 entity.commit_changes()
287 if entity.to_be_deleted:
288 del self.res_to_entity[res]
290 def _set_ns(self, g: Graph) -> None:
291 g.namespace_manager.bind("an", Namespace(self.g_an))
292 g.namespace_manager.bind("ar", Namespace(self.g_ar))
293 g.namespace_manager.bind("be", Namespace(self.g_be))
294 g.namespace_manager.bind("ci", Namespace(self.g_ci))
295 g.namespace_manager.bind("de", Namespace(self.g_de))
296 g.namespace_manager.bind("br", Namespace(self.g_br))
297 g.namespace_manager.bind("id", Namespace(self.g_id))
298 g.namespace_manager.bind("pl", Namespace(self.g_pl))
299 g.namespace_manager.bind("ra", Namespace(self.g_ra))
300 g.namespace_manager.bind("re", Namespace(self.g_re))
301 g.namespace_manager.bind("rp", Namespace(self.g_rp))
302 g.namespace_manager.bind("biro", GraphEntity.BIRO)
303 g.namespace_manager.bind("co", GraphEntity.CO)
304 g.namespace_manager.bind("c4o", GraphEntity.C4O)
305 g.namespace_manager.bind("cito", GraphEntity.CITO)
306 g.namespace_manager.bind("datacite", GraphEntity.DATACITE)
307 g.namespace_manager.bind("dcterms", GraphEntity.DCTERMS)
308 g.namespace_manager.bind("deo", GraphEntity.DEO)
309 g.namespace_manager.bind("doco", GraphEntity.DOCO)
310 g.namespace_manager.bind("fabio", GraphEntity.FABIO)
311 g.namespace_manager.bind("foaf", GraphEntity.FOAF)
312 g.namespace_manager.bind("frbr", GraphEntity.FRBR)
313 g.namespace_manager.bind("literal", GraphEntity.LITERAL)
314 g.namespace_manager.bind("oa", GraphEntity.OA)
315 g.namespace_manager.bind("oco", GraphEntity.OCO)
316 g.namespace_manager.bind("prism", GraphEntity.PRISM)
317 g.namespace_manager.bind("pro", GraphEntity.PRO)
319 def get_an(self) -> Tuple[ReferenceAnnotation]:
320 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ReferenceAnnotation))
322 def get_ar(self) -> Tuple[AgentRole]:
323 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, AgentRole))
325 def get_be(self) -> Tuple[BibliographicReference]:
326 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, BibliographicReference))
328 def get_br(self) -> Tuple[BibliographicResource]:
329 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, BibliographicResource))
331 def get_ci(self) -> Tuple[Citation]:
332 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, Citation))
334 def get_de(self) -> Tuple[DiscourseElement]:
335 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, DiscourseElement))
337 def get_id(self) -> Tuple[Identifier]:
338 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, Identifier))
340 def get_pl(self) -> Tuple[PointerList]:
341 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, PointerList))
343 def get_rp(self) -> Tuple[ReferencePointer]:
344 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ReferencePointer))
346 def get_ra(self) -> Tuple[ResponsibleAgent]:
347 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ResponsibleAgent))
349 def get_re(self) -> Tuple[ResourceEmbodiment]:
350 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ResourceEmbodiment))