Coverage for heritrace / editor.py: 99%
159 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-21 12:56 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-21 12:56 +0000
1# SPDX-FileCopyrightText: 2024-2025 Arcangelo Massari <arcangelo.massari@unibo.it>
2#
3# SPDX-License-Identifier: ISC
5import traceback
6from datetime import datetime
7from typing import List, Set
9from heritrace.extensions import SPARQLWrapperWithRetry
10from rdflib import Graph, Literal, URIRef
11from rdflib_ocdm.counter_handler.counter_handler import CounterHandler
12from rdflib_ocdm.ocdm_graph import OCDMDataset, OCDMGraph
13from rdflib_ocdm.reader import Reader
14from rdflib_ocdm.storer import Storer
15from SPARQLWrapper import JSON
18class Editor:
19 def __init__(
20 self,
21 dataset_endpoint: str,
22 provenance_endpoint: str,
23 counter_handler: CounterHandler,
24 resp_agent: URIRef,
25 source: URIRef = None,
26 c_time: datetime | None = None,
27 dataset_is_quadstore: bool = True,
28 ):
29 self.dataset_endpoint = dataset_endpoint
30 self.provenance_endpoint = provenance_endpoint
31 self.counter_handler = counter_handler
32 self.resp_agent = resp_agent
33 self.source = source
34 self.c_time = self.to_posix_timestamp(c_time)
35 self.dataset_is_quadstore = dataset_is_quadstore
36 self.g_set = (
37 OCDMDataset(self.counter_handler)
38 if self.dataset_is_quadstore
39 else OCDMGraph(self.counter_handler)
40 )
42 def _normalize_params(self, subject, predicate=None, graph=None) -> tuple[URIRef, URIRef | None, URIRef | Graph | str | None]:
43 """Normalizza i parametri comuni per le operazioni sui grafi."""
44 # Normalizza il soggetto
45 if not isinstance(subject, URIRef):
46 subject = URIRef(subject)
48 # Normalizza il predicato se fornito
49 if predicate is not None and not isinstance(predicate, URIRef):
50 predicate = URIRef(predicate)
52 # Normalizza il grafo se fornito
53 if graph is not None:
54 if isinstance(graph, Graph):
55 graph = graph.identifier
56 elif isinstance(graph, str):
57 graph = URIRef(graph)
59 return subject, predicate, graph
61 def create(
62 self,
63 subject: URIRef,
64 predicate: URIRef,
65 value: Literal | URIRef,
66 graph: URIRef | Graph | str = None,
67 ) -> None:
68 # Normalizza i parametri
69 subject, predicate, graph = self._normalize_params(subject, predicate, graph)
71 if self.dataset_is_quadstore and graph:
72 self.g_set.add(
73 (subject, predicate, value, graph),
74 resp_agent=self.resp_agent,
75 primary_source=self.source,
76 )
77 else:
78 self.g_set.add(
79 (subject, predicate, value),
80 resp_agent=self.resp_agent,
81 primary_source=self.source,
82 )
84 def update(
85 self,
86 subject: URIRef,
87 predicate: URIRef,
88 old_value: Literal | URIRef,
89 new_value: Literal | URIRef,
90 graph: URIRef | Graph | str = None,
91 ) -> None:
92 # Normalizza i parametri
93 subject, predicate, graph = self._normalize_params(subject, predicate, graph)
95 # Check if the triple exists before updating
96 if self.dataset_is_quadstore and graph:
97 if not (subject, predicate, old_value, graph) in self.g_set:
98 raise Exception(
99 f"Triple ({subject}, {predicate}, {old_value}, {graph}) does not exist"
100 )
101 self.g_set.remove((subject, predicate, old_value, graph))
102 self.g_set.add(
103 (subject, predicate, new_value, graph),
104 resp_agent=self.resp_agent,
105 primary_source=self.source,
106 )
107 else:
108 if not (subject, predicate, old_value) in self.g_set:
109 raise Exception(
110 f"Triple ({subject}, {predicate}, {old_value}) does not exist"
111 )
112 self.g_set.remove((subject, predicate, old_value))
113 self.g_set.add(
114 (subject, predicate, new_value),
115 resp_agent=self.resp_agent,
116 primary_source=self.source,
117 )
119 def delete(
120 self,
121 subject: URIRef,
122 predicate: URIRef = None,
123 value=None,
124 graph: URIRef | Graph | str = None,
125 ) -> None:
126 # Normalizza i parametri
127 subject, predicate, graph = self._normalize_params(subject, predicate, graph)
129 if predicate is None:
130 # Delete the entire entity
131 # Check if the entity exists
132 if self.dataset_is_quadstore:
133 quads = list(self.g_set.quads((subject, None, None, None)))
134 if not quads:
135 raise Exception(f"Entity {subject} does not exist")
136 for quad in quads:
137 self.g_set.remove(quad)
139 # Also remove any triples where this entity is the object
140 object_quads = list(self.g_set.quads((None, None, subject, None)))
141 for quad in object_quads:
142 self.g_set.remove(quad)
143 else:
144 triples = list(self.g_set.triples((subject, None, None)))
145 if not triples:
146 raise Exception(f"Entity {subject} does not exist")
147 for triple in triples:
148 self.g_set.remove(triple)
150 # Also remove any triples where this entity is the object
151 object_triples = list(self.g_set.triples((None, None, subject)))
152 for triple in object_triples:
153 self.g_set.remove(triple)
154 self.g_set.mark_as_deleted(subject)
155 else:
156 if value:
157 # Check if the specific triple/quad exists before removing it
158 if self.dataset_is_quadstore and graph:
159 if not (subject, predicate, value, graph) in self.g_set:
160 raise Exception(
161 f"Triple ({subject}, {predicate}, {value}, {graph}) does not exist"
162 )
163 self.g_set.remove((subject, predicate, value, graph))
164 else:
165 if not (subject, predicate, value) in self.g_set:
166 raise Exception(
167 f"Triple ({subject}, {predicate}, {value}) does not exist"
168 )
169 self.g_set.remove((subject, predicate, value))
170 else:
171 # Check if any triples with the given subject and predicate exist
172 if self.dataset_is_quadstore and graph:
173 quads = list(self.g_set.quads((subject, predicate, None, graph)))
174 if not quads:
175 raise Exception(
176 f"No triples found with subject {subject} and predicate {predicate} in graph {graph}"
177 )
178 for quad in quads:
179 self.g_set.remove(quad)
180 else:
181 triples = list(self.g_set.triples((subject, predicate, None)))
182 if not triples:
183 raise Exception(
184 f"No triples found with subject {subject} and predicate {predicate}"
185 )
186 for triple in triples:
187 self.g_set.remove(triple)
189 # Check if the entity is now empty and mark it as deleted if so
190 from heritrace.utils.sparql_utils import get_triples_from_graph
191 if len(list(get_triples_from_graph(self.g_set, (subject, None, None)))) == 0:
192 self.g_set.mark_as_deleted(subject)
194 def import_entity(self, subject):
195 Reader.import_entities_from_triplestore(
196 self.g_set, self.dataset_endpoint, [subject]
197 )
199 def merge(self, keep_entity_uri: str, delete_entity_uri: str) -> None:
200 """
201 Merges one entity into another within the dataset.
203 The delete_entity_uri will be removed, and its properties and
204 incoming references will be transferred to keep_entity_uri.
205 All operations are performed within the local graph set managed by
206 this Editor instance and then saved, ensuring provenance capture.
208 Args:
209 keep_entity_uri: The URI of the entity to keep.
210 delete_entity_uri: The URI of the entity to delete and merge from.
212 Raises:
213 ValueError: If keep_entity_uri and delete_entity_uri are the same.
214 Exception: If errors occur during SPARQL queries or graph operations.
215 """
216 keep_uri, _, _ = self._normalize_params(keep_entity_uri)
217 delete_uri, _, _ = self._normalize_params(delete_entity_uri)
219 if keep_uri == delete_uri:
220 raise ValueError("Cannot merge an entity with itself.")
222 sparql = SPARQLWrapperWithRetry(self.dataset_endpoint)
223 entities_to_import: Set[URIRef] = {keep_uri, delete_uri}
224 incoming_triples_to_update: List[tuple[URIRef, URIRef]] = []
225 outgoing_triples_to_move: List[tuple[URIRef, Literal | URIRef]] = []
227 try:
228 # 1. Find incoming references to delete_uri
229 # We fetch subjects and predicates pointing to the entity to be deleted.
230 query_incoming = f"SELECT DISTINCT ?s ?p WHERE {{ ?s ?p <{delete_uri}> . FILTER (?s != <{keep_uri}>) }}"
231 sparql.setQuery(query_incoming)
232 sparql.setReturnFormat(JSON)
233 results_incoming = sparql.query().convert()
234 for result in results_incoming["results"]["bindings"]:
235 s_uri = URIRef(result["s"]["value"])
236 p_uri = URIRef(result["p"]["value"])
237 incoming_triples_to_update.append((s_uri, p_uri))
238 entities_to_import.add(s_uri) # Ensure referencing entities are loaded
240 # 2. Find outgoing properties from delete_uri (excluding rdf:type)
241 # We fetch predicates and objects of the entity to be deleted.
242 query_outgoing = f"""
243 PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
244 SELECT DISTINCT ?p ?o WHERE {{
245 <{delete_uri}> ?p ?o .
246 FILTER (?p != rdf:type)
247 }}
248 """
249 sparql.setQuery(query_outgoing)
250 sparql.setReturnFormat(JSON)
251 results_outgoing = sparql.query().convert()
252 for result in results_outgoing["results"]["bindings"]:
253 p_uri = URIRef(result["p"]["value"])
254 o_node = result["o"]
255 o_val: Literal | URIRef | None = None
256 if o_node["type"] == "uri":
257 o_val = URIRef(o_node["value"])
258 entities_to_import.add(o_val) # Ensure referenced entities are loaded
259 elif o_node["type"] in {"literal", "typed-literal"}:
260 o_val = Literal(o_node["value"], lang=o_node.get("xml:lang"), datatype=URIRef(o_node["datatype"]) if o_node.get("datatype") else None)
261 else: # bnode? Skip for now or handle if necessary
262 print(f"Warning: Skipping non-URI/Literal object type '{o_node['type']}' from {delete_uri} via {p_uri}")
263 continue
264 if o_val:
265 outgoing_triples_to_move.append((p_uri, o_val))
267 # 3. Import all involved entities into the local graph set
268 # This brings the current state of these entities from the triplestore
269 # into the Editor's context for modification.
270 if entities_to_import:
271 Reader.import_entities_from_triplestore(
272 self.g_set, self.dataset_endpoint, list(entities_to_import)
273 )
274 # Mark the start of modifications if using preexisting_finished pattern
275 self.g_set.preexisting_finished(self.resp_agent, self.source, self.c_time)
278 # 4. Perform the merge using the built-in function
279 # This function handles moving triples and updating the internal
280 # merge_index and entity_index for provenance generation.
281 self.g_set.merge(keep_uri, delete_uri)
283 # 5. Save changes and provenance
284 # This uploads the modified local graph and the generated provenance graph.
285 self.save()
287 except Exception as e:
288 print(f"Error during merge operation for {keep_uri} and {delete_uri}: {e}")
289 print(traceback.format_exc())
290 # Avoid committing partial changes by not calling save()
291 raise # Re-raise the exception to signal failure
293 def preexisting_finished(self):
294 self.g_set.preexisting_finished(self.resp_agent, self.source, self.c_time)
296 def save(self):
297 self.g_set.generate_provenance()
298 dataset_storer = Storer(self.g_set)
299 prov_storer = Storer(self.g_set.provenance)
300 dataset_storer.upload_all(self.dataset_endpoint)
301 prov_storer.upload_all(self.provenance_endpoint)
302 self.g_set.commit_changes()
304 def to_posix_timestamp(self, value: str | datetime | None) -> float | None:
305 if value is None:
306 return None
307 elif isinstance(value, datetime):
308 return value.timestamp()
309 elif isinstance(value, str):
310 dt = datetime.fromisoformat(value)
311 return dt.timestamp()
313 def set_primary_source(self, source: str | URIRef) -> None:
314 """
315 Set the primary source for this editor instance.
317 This will affect all future operations performed by this editor.
319 Args:
320 source: The primary source URI to use
321 """
322 if source:
323 if not isinstance(source, URIRef):
324 source = URIRef(source)
325 self.source = source