Coverage for heritrace / utils / sparql_utils.py: 94%
436 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-21 12:56 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-21 12:56 +0000
1# SPDX-FileCopyrightText: 2024-2026 Arcangelo Massari <arcangelo.massari@unibo.it>
2#
3# SPDX-License-Identifier: ISC
5import os
6from collections import defaultdict
7from concurrent.futures import ProcessPoolExecutor, as_completed
8from typing import List
10from rdflib import RDF, Dataset, Graph, Literal, URIRef
11from rdflib.term import Node
12from rdflib.plugins.sparql.algebra import translateUpdate
13from rdflib.plugins.sparql.parser import parseUpdate
14from rdflib.util import from_n3
15from SPARQLWrapper import JSON
16from time_agnostic_library.agnostic_entity import AgnosticEntity
18from heritrace.editor import Editor
19from heritrace.extensions import (get_change_tracking_config,
20 get_classes_with_multiple_shapes,
21 get_custom_filter, get_dataset_is_quadstore,
22 get_display_rules, get_provenance_sparql,
23 get_shacl_graph, get_sparql)
24from heritrace.utils.converters import convert_to_datetime
25from heritrace.utils.display_rules_utils import (find_matching_rule,
26 get_highest_priority_class,
27 get_sortable_properties,
28 is_entity_type_visible)
29from heritrace.utils.shacl_utils import (determine_shape_for_classes,
30 determine_shape_for_entity_triples)
31from heritrace.utils.virtuoso_utils import (VIRTUOSO_EXCLUDED_GRAPHS,
32 is_virtuoso)
34_AVAILABLE_CLASSES_CACHE = None
37def _parse_n3(value: str) -> Node:
38 result = from_n3(value)
39 if not isinstance(result, Node):
40 raise ValueError(f"Cannot parse N3 value: {value}")
41 return result
44def n3_set_to_graph(n3_set: set[tuple[str, ...]], is_quadstore: bool) -> Graph | Dataset:
45 if is_quadstore:
46 g = Dataset(default_union=True)
47 for tup in n3_set:
48 g.add((_parse_n3(tup[0]), _parse_n3(tup[1]), _parse_n3(tup[2]), _parse_n3(tup[3]))) # type: ignore[arg-type]
49 else:
50 g = Graph()
51 for tup in n3_set:
52 g.add((_parse_n3(tup[0]), _parse_n3(tup[1]), _parse_n3(tup[2])))
53 return g
56def convert_to_rdflib_graphs(snapshots: dict, is_quadstore: bool) -> dict:
57 converted = {}
58 for entity_uri, timestamps in snapshots.items():
59 converted[entity_uri] = {}
60 for ts, n3_set in timestamps.items():
61 converted[entity_uri][ts] = n3_set_to_graph(n3_set, is_quadstore)
62 return converted
65def get_triples_from_graph(graph_or_dataset, pattern):
66 """
67 Get triples from a Graph or Dataset, handling both cases correctly.
69 For Dataset (quadstore), converts quads to triples by extracting (s, p, o).
70 For Graph (triplestore), uses triples() directly.
72 Args:
73 graph_or_dataset: Graph or Dataset instance
74 pattern: Triple pattern tuple (s, p, o) where each can be None
76 Returns:
77 Generator of triples (s, p, o)
78 """
79 if isinstance(graph_or_dataset, Dataset):
80 # For Dataset, use quads() and extract only (s, p, o)
81 for s, p, o, g in graph_or_dataset.quads(pattern):
82 yield (s, p, o)
83 else:
84 # For Graph, use triples() directly
85 yield from graph_or_dataset.triples(pattern)
86COUNT_LIMIT = int(os.getenv("COUNT_LIMIT", "10000"))
89def precompute_available_classes_cache():
90 """Pre-compute available classes cache at application startup."""
91 global _AVAILABLE_CLASSES_CACHE
92 _AVAILABLE_CLASSES_CACHE = get_available_classes()
93 return _AVAILABLE_CLASSES_CACHE
96def _wrap_virtuoso_graph_pattern(pattern: str) -> str:
97 """Wrap a SPARQL pattern with Virtuoso GRAPH clause if needed."""
98 if is_virtuoso():
99 return f"""
100 GRAPH ?g {{
101 {pattern}
102 }}
103 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>))
104 """
105 return pattern
108def _build_count_query_with_limit(class_uri: str, limit: int) -> str:
109 """Build a COUNT query with LIMIT for a specific class."""
111 return f"""
112 SELECT (COUNT(?subject) as ?count)
113 WHERE {{
114 {{
115 SELECT DISTINCT ?subject
116 WHERE {{
117 ?subject a <{class_uri}> .
118 }}
119 LIMIT {limit}
120 }}
121 }}
122 """
125def _count_class_instances(class_uri: str, limit: int = COUNT_LIMIT) -> tuple:
126 """
127 Count instances of a class up to a limit.
129 Returns:
130 tuple: (display_count, numeric_count) where display_count may be "LIMIT+"
131 """
132 sparql = get_sparql()
133 query = _build_count_query_with_limit(class_uri, limit + 1)
135 sparql.setQuery(query)
136 sparql.setReturnFormat(JSON)
137 result = sparql.query().convert()
139 count = int(result["results"]["bindings"][0]["count"]["value"])
141 if count > limit:
142 return f"{limit}+", limit
143 return str(count), count
146def _get_entities_with_enhanced_shape_detection(class_uri: str, classes_with_multiple_shapes: set, limit: int = COUNT_LIMIT):
147 """
148 Get entities for a class using enhanced shape detection for classes with multiple shapes.
149 Uses LIMIT to avoid loading all entities.
150 """
151 # Early exit if no classes have multiple shapes
152 if not classes_with_multiple_shapes or class_uri not in classes_with_multiple_shapes:
153 return defaultdict(list)
155 sparql = get_sparql()
157 subjects_query = f"""
158 SELECT DISTINCT ?subject
159 WHERE {{
160 ?subject a <{class_uri}> .
161 }}
162 LIMIT {limit}
163 """
165 sparql.setQuery(subjects_query)
166 sparql.setReturnFormat(JSON)
167 subjects_results = sparql.query().convert()
169 subjects = [r["subject"]["value"] for r in subjects_results["results"]["bindings"]]
171 if not subjects:
172 return defaultdict(list)
174 # Fetch triples only for these specific subjects
175 subjects_filter = " ".join([f"(<{s}>)" for s in subjects])
176 pattern_with_filter = f"?subject a <{class_uri}> . ?subject ?p ?o . VALUES (?subject) {{ {subjects_filter} }}"
178 triples_query = f"""
179 SELECT ?subject ?p ?o
180 WHERE {{
181 {pattern_with_filter}
182 }}
183 """
185 sparql.setQuery(triples_query)
186 sparql.setReturnFormat(JSON)
187 results = sparql.query().convert()
189 entities_triples = defaultdict(list)
190 for binding in results["results"]["bindings"]:
191 subject = binding["subject"]["value"]
192 predicate = binding["p"]["value"]
193 obj = binding["o"]["value"]
194 entities_triples[subject].append((subject, predicate, obj))
196 shape_to_entities = defaultdict(list)
197 for subject_uri, triples in entities_triples.items():
198 shape_uri = determine_shape_for_entity_triples(triples)
199 if shape_uri:
200 entity_key = (class_uri, shape_uri)
201 if is_entity_type_visible(entity_key):
202 shape_to_entities[shape_uri].append({
203 "uri": subject_uri,
204 "class": class_uri,
205 "shape": shape_uri
206 })
208 return shape_to_entities
211def get_classes_from_shacl_or_display_rules():
212 """Extract classes from SHACL shapes or display_rules configuration."""
213 SH_TARGET_CLASS = URIRef("http://www.w3.org/ns/shacl#targetClass")
214 classes = set()
216 shacl_graph = get_shacl_graph()
217 if shacl_graph:
218 for shape in shacl_graph.subjects(SH_TARGET_CLASS, None, unique=True):
219 for target_class in shacl_graph.objects(shape, SH_TARGET_CLASS, unique=True):
220 classes.add(str(target_class))
222 if not classes:
223 display_rules = get_display_rules()
224 if display_rules:
225 for rule in display_rules:
226 if "target" in rule and "class" in rule["target"]:
227 classes.add(rule["target"]["class"])
229 return list(classes)
232def get_available_classes():
233 """
234 Fetch and format all available entity classes.
235 Returns cached result if available (computed at startup).
236 For small datasets (< COUNT_LIMIT), cache is invalidated to keep counts accurate.
237 """
238 global _AVAILABLE_CLASSES_CACHE
240 if _AVAILABLE_CLASSES_CACHE is not None:
241 total_count = sum(cls.get('count_numeric', 0) for cls in _AVAILABLE_CLASSES_CACHE)
242 if total_count < COUNT_LIMIT:
243 _AVAILABLE_CLASSES_CACHE = None
245 if _AVAILABLE_CLASSES_CACHE is not None:
246 return _AVAILABLE_CLASSES_CACHE
248 custom_filter = get_custom_filter()
249 classes_from_config = get_classes_from_shacl_or_display_rules()
251 if classes_from_config:
252 class_uris = classes_from_config
253 else:
254 sparql = get_sparql()
255 pattern = "?subject a ?class ."
256 wrapped_pattern = _wrap_virtuoso_graph_pattern(pattern)
258 query = f"""
259 SELECT DISTINCT ?class
260 WHERE {{
261 {wrapped_pattern}
262 }}
263 """
265 sparql.setQuery(query)
266 sparql.setReturnFormat(JSON)
267 results = sparql.query().convert()
268 class_uris = [r["class"]["value"] for r in results["results"]["bindings"]]
270 # Count instances for each class
271 classes_with_counts = []
272 for class_uri in class_uris:
273 display_count, numeric_count = _count_class_instances(class_uri)
274 classes_with_counts.append({
275 "uri": class_uri,
276 "display_count": display_count,
277 "numeric_count": numeric_count
278 })
280 # Sort by count descending
281 classes_with_counts.sort(key=lambda x: x["numeric_count"], reverse=True)
283 available_classes = []
284 classes_with_multiple_shapes = get_classes_with_multiple_shapes()
286 for class_data in classes_with_counts:
287 class_uri = class_data["uri"]
289 if classes_with_multiple_shapes and class_uri in classes_with_multiple_shapes:
290 shape_to_entities = _get_entities_with_enhanced_shape_detection(
291 class_uri, classes_with_multiple_shapes, limit=COUNT_LIMIT
292 )
294 for shape_uri, entities in shape_to_entities.items():
295 if entities:
296 entity_key = (class_uri, shape_uri)
297 available_classes.append({
298 "uri": class_uri,
299 "label": custom_filter.human_readable_class(entity_key),
300 "count": f"{len(entities)}+" if len(entities) >= COUNT_LIMIT else str(len(entities)),
301 "count_numeric": len(entities),
302 "shape": shape_uri
303 })
304 else:
305 shape_uri = determine_shape_for_classes([class_uri])
306 entity_key = (class_uri, shape_uri)
308 if is_entity_type_visible(entity_key):
309 available_classes.append({
310 "uri": class_uri,
311 "label": custom_filter.human_readable_class(entity_key),
312 "count": class_data["display_count"],
313 "count_numeric": class_data["numeric_count"],
314 "shape": shape_uri
315 })
317 available_classes.sort(key=lambda x: x["label"].lower())
318 return available_classes
321def build_sort_clause(sort_property: str, entity_type: str, shape_uri: str = None) -> str:
322 """
323 Build a SPARQL sort clause based on the sortableBy configuration.
325 Args:
326 sort_property: The property to sort by
327 entity_type: The entity type URI
328 shape_uri: Optional shape URI for more specific sorting rules
330 Returns:
331 SPARQL sort clause or empty string
332 """
333 if not sort_property or not entity_type:
334 return ""
336 rule = find_matching_rule(entity_type, shape_uri)
338 if not rule or "sortableBy" not in rule:
339 return ""
341 sort_config = next(
342 (s for s in rule["sortableBy"] if s.get("property") == sort_property),
343 None
344 )
346 if not sort_config:
347 return ""
349 return f"OPTIONAL {{ ?subject <{sort_property}> ?sortValue }}"
352def get_entities_for_class(
353 selected_class, page, per_page, sort_property=None, sort_direction="ASC", selected_shape=None
354):
355 """
356 Retrieve entities for a specific class with pagination and sorting.
358 Args:
359 selected_class (str): URI of the class to retrieve entities for
360 page (int): Page number (1-indexed)
361 per_page (int): Number of entities per page
362 sort_property (str, optional): Property URI to sort by. Defaults to None.
363 sort_direction (str, optional): Sort direction ("ASC" or "DESC"). Defaults to "ASC".
364 selected_shape (str, optional): Shape URI for filtering entities. Defaults to None.
366 Returns:
367 tuple: (list of entities, total count)
369 Performance Notes:
370 - If sort_property is None, NO ORDER BY clause is applied to the SPARQL query.
371 This significantly improves performance for large datasets by avoiding expensive
372 sorting operations on URIs.
373 - Without explicit ordering, the triplestore returns results in its natural order,
374 which is deterministic within a session but may vary after database reloads.
375 - For optimal performance with large datasets, configure display_rules.yaml without
376 sortableBy properties to prevent users from triggering expensive sort operations.
377 """
378 sparql = get_sparql()
379 custom_filter = get_custom_filter()
380 classes_with_multiple_shapes = get_classes_with_multiple_shapes()
382 use_shape_filtering = (selected_shape and selected_class in classes_with_multiple_shapes)
384 if use_shape_filtering:
385 # For shape filtering, we need to fetch entities and check their shape
386 # Use a larger LIMIT to ensure we get enough entities after filtering
387 offset = (page - 1) * per_page
388 fetch_limit = per_page * 5 # Safety margin for filtering
390 subjects_query = f"""
391 SELECT DISTINCT ?subject
392 WHERE {{
393 ?subject a <{selected_class}> .
394 }}
395 LIMIT {fetch_limit}
396 OFFSET {offset}
397 """
399 sparql.setQuery(subjects_query)
400 sparql.setReturnFormat(JSON)
401 subjects_results = sparql.query().convert()
403 subjects = [r["subject"]["value"] for r in subjects_results["results"]["bindings"]]
405 if not subjects:
406 return [], 0
408 # Now fetch triples for these specific subjects
409 subjects_filter = " ".join([f"(<{s}>)" for s in subjects])
411 triples_query = f"""
412 SELECT ?subject ?p ?o
413 WHERE {{
414 ?subject a <{selected_class}> . ?subject ?p ?o . VALUES (?subject) {{ {subjects_filter} }}
415 }}
416 """
418 sparql.setQuery(triples_query)
419 sparql.setReturnFormat(JSON)
420 results = sparql.query().convert()
422 entities_triples = defaultdict(list)
423 for binding in results["results"]["bindings"]:
424 subject = binding["subject"]["value"]
425 predicate = binding["p"]["value"]
426 obj = binding["o"]["value"]
427 entities_triples[subject].append((subject, predicate, obj))
429 filtered_entities = []
430 for subject_uri, triples in entities_triples.items():
431 entity_shape = determine_shape_for_entity_triples(list(triples))
432 if entity_shape == selected_shape:
433 entity_label = custom_filter.human_readable_entity(
434 subject_uri, (selected_class, selected_shape), None
435 )
436 filtered_entities.append({"uri": subject_uri, "label": entity_label})
438 if sort_property and sort_direction:
439 reverse_sort = sort_direction.upper() == "DESC"
440 filtered_entities.sort(key=lambda x: x["label"].lower(), reverse=reverse_sort)
442 # For shape-filtered results, we can't accurately determine total_count without scanning all entities
443 # Return the number of filtered entities as an approximation
444 total_count = len(filtered_entities)
445 return filtered_entities[:per_page], total_count
447 # Standard pagination path
448 offset = (page - 1) * per_page
449 sort_clause = ""
450 order_clause = ""
452 if sort_property:
453 sort_clause = build_sort_clause(sort_property, selected_class, selected_shape)
454 if sort_clause:
455 order_clause = f"ORDER BY {sort_direction}(?sortValue)"
457 entities_query = f"""
458 SELECT ?subject {f"?sortValue" if sort_property else ""}
459 WHERE {{
460 ?subject a <{selected_class}> . {sort_clause}
461 }}
462 {order_clause}
463 LIMIT {per_page}
464 OFFSET {offset}
465 """
467 available_classes = get_available_classes()
469 class_info = next(
470 (c for c in available_classes
471 if c["uri"] == selected_class and c.get("shape") == selected_shape),
472 None
473 )
474 total_count = class_info.get("count_numeric", 0) if class_info else 0
476 sparql.setQuery(entities_query)
477 sparql.setReturnFormat(JSON)
478 entities_results = sparql.query().convert()
480 entities = []
481 shape = selected_shape if selected_shape else determine_shape_for_classes([selected_class])
483 for result in entities_results["results"]["bindings"]:
484 subject_uri = result["subject"]["value"]
485 entity_label = custom_filter.human_readable_entity(
486 subject_uri, (selected_class, shape), None
487 )
488 entities.append({"uri": subject_uri, "label": entity_label})
490 return entities, total_count
493def get_catalog_data(
494 selected_class: str,
495 page: int,
496 per_page: int,
497 sort_property: str = None,
498 sort_direction: str = "ASC",
499 selected_shape: str = None
500) -> dict:
501 """
502 Get catalog data with pagination and sorting.
504 Args:
505 selected_class (str): Selected class URI
506 page (int): Current page number
507 per_page (int): Items per page
508 sort_property (str, optional): Property to sort by
509 sort_direction (str, optional): Sort direction ('ASC' or 'DESC')
510 selected_shape (str, optional): URI of the shape to use for sorting rules
512 Returns:
513 dict: Catalog data including entities, pagination info, and sort settings
514 """
516 entities = []
517 total_count = 0
518 sortable_properties = []
520 if selected_class:
521 sortable_properties = get_sortable_properties(
522 (selected_class, selected_shape)
523 )
525 if not sort_property and sortable_properties:
526 sort_property = sortable_properties[0]["property"]
528 entities, total_count = get_entities_for_class(
529 selected_class, page, per_page, sort_property, sort_direction, selected_shape
530 )
532 return {
533 "entities": entities,
534 "total_pages": (
535 (total_count + per_page - 1) // per_page if total_count > 0 else 0
536 ),
537 "current_page": page,
538 "per_page": per_page,
539 "total_count": total_count,
540 "sort_property": sort_property,
541 "sort_direction": sort_direction,
542 "sortable_properties": sortable_properties,
543 "selected_class": selected_class,
544 "selected_shape": selected_shape,
545 }
548def fetch_data_graph_for_subject(subject: str) -> Graph | Dataset:
549 """
550 Fetch all triples/quads associated with a subject from the dataset.
551 Handles both triplestore and quadstore cases appropriately.
553 Args:
554 subject (str): The URI of the subject to fetch data for
556 Returns:
557 Graph|Dataset: A graph containing all triples/quads for the subject
558 """
559 g = Dataset() if get_dataset_is_quadstore() else Graph()
560 sparql = get_sparql()
562 if is_virtuoso():
563 # For virtuoso we need to explicitly query the graph
564 query = f"""
565 SELECT ?predicate ?object ?g WHERE {{
566 GRAPH ?g {{
567 <{subject}> ?predicate ?object.
568 }}
569 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>))
570 }}
571 """
572 else:
573 if get_dataset_is_quadstore():
574 # For non-virtuoso quadstore, we need to query all graphs
575 query = f"""
576 SELECT ?predicate ?object ?g WHERE {{
577 GRAPH ?g {{
578 <{subject}> ?predicate ?object.
579 }}
580 }}
581 """
582 else:
583 # For regular triplestore
584 query = f"""
585 SELECT ?predicate ?object WHERE {{
586 <{subject}> ?predicate ?object.
587 }}
588 """
590 sparql.setQuery(query)
591 sparql.setReturnFormat(JSON)
592 query_results = sparql.query().convert()
593 results = query_results.get("results", {}).get("bindings", [])
595 for result in results:
596 # Create the appropriate value (Literal or URIRef)
597 obj_data = result["object"]
598 if obj_data["type"] in {"literal", "typed-literal"}:
599 if "datatype" in obj_data:
600 value = Literal(
601 obj_data["value"], datatype=URIRef(obj_data["datatype"])
602 )
603 else:
604 # Create literal without explicit datatype to match Reader.import_entities_from_triplestore
605 value = Literal(obj_data["value"])
606 else:
607 value = URIRef(obj_data["value"])
609 # Add triple/quad based on store type
610 if get_dataset_is_quadstore():
611 graph_uri = URIRef(result["g"]["value"])
612 g.add(
613 (
614 URIRef(subject),
615 URIRef(result["predicate"]["value"]),
616 value,
617 graph_uri,
618 )
619 )
620 else:
621 g.add((URIRef(subject), URIRef(result["predicate"]["value"]), value))
623 return g
626def parse_sparql_update(query) -> dict:
627 parsed = parseUpdate(query)
628 translated = translateUpdate(parsed).algebra
629 modifications = {}
631 def extract_quads(quads):
632 result = []
633 for graph, triples in quads.items():
634 for triple in triples:
635 result.append((triple[0], triple[1], triple[2]))
636 return result
638 for operation in translated:
639 if operation.name == "DeleteData":
640 if hasattr(operation, "quads") and operation.quads:
641 deletions = extract_quads(operation.quads)
642 else:
643 deletions = operation.triples
644 if deletions:
645 modifications.setdefault("Deletions", list()).extend(deletions)
646 elif operation.name == "InsertData":
647 if hasattr(operation, "quads") and operation.quads:
648 additions = extract_quads(operation.quads)
649 else:
650 additions = operation.triples
651 if additions:
652 modifications.setdefault("Additions", list()).extend(additions)
654 return modifications
657def fetch_current_state_with_related_entities(
658 provenance: dict,
659) -> Graph | Dataset:
660 """
661 Fetch the current state of an entity and all its related entities known from provenance.
663 Args:
664 provenance (dict): Dictionary containing provenance metadata for main entity and related entities
666 Returns:
667 Dataset: A graph containing the current state of all entities
668 """
669 combined_graph = Dataset() if get_dataset_is_quadstore() else Graph()
671 # Fetch state for all entities mentioned in provenance
672 for entity_uri in provenance.keys():
673 current_graph = fetch_data_graph_for_subject(entity_uri)
675 if get_dataset_is_quadstore():
676 for quad in current_graph.quads():
677 combined_graph.add(quad)
678 else:
679 for triple in current_graph:
680 combined_graph.add(triple)
682 return combined_graph
685def get_deleted_entities_with_filtering(
686 page=1,
687 per_page=50,
688 sort_property="deletionTime",
689 sort_direction="DESC",
690 selected_class=None,
691 selected_shape=None,
692):
693 """
694 Fetch and process deleted entities from the provenance graph, with filtering and sorting.
695 """
696 sortable_properties = [
697 {"property": "deletionTime", "displayName": "Deletion Time", "sortType": "date"}
698 ]
699 provenance_sparql = get_provenance_sparql()
700 custom_filter = get_custom_filter()
702 prov_query = """
703 SELECT DISTINCT ?entity ?lastSnapshot ?deletionTime ?agent ?lastValidSnapshotTime
704 WHERE {
705 ?lastSnapshot a <http://www.w3.org/ns/prov#Entity> ;
706 <http://www.w3.org/ns/prov#specializationOf> ?entity ;
707 <http://www.w3.org/ns/prov#generatedAtTime> ?deletionTime ;
708 <http://www.w3.org/ns/prov#invalidatedAtTime> ?invalidationTime ;
709 <http://www.w3.org/ns/prov#wasDerivedFrom> ?lastValidSnapshot.
711 ?lastValidSnapshot <http://www.w3.org/ns/prov#generatedAtTime> ?lastValidSnapshotTime .
713 OPTIONAL { ?lastSnapshot <http://www.w3.org/ns/prov#wasAttributedTo> ?agent . }
715 FILTER NOT EXISTS {
716 ?laterSnapshot <http://www.w3.org/ns/prov#wasDerivedFrom> ?lastSnapshot .
717 }
718 }
719 """
720 provenance_sparql.setQuery(prov_query)
721 provenance_sparql.setReturnFormat(JSON)
722 prov_results = provenance_sparql.query().convert()
724 results_bindings = prov_results["results"]["bindings"]
725 if not results_bindings:
726 return [], [], None, None, [], 0
728 deleted_entities = []
729 max_workers = max(1, min(os.cpu_count() or 4, len(results_bindings)))
730 with ProcessPoolExecutor(max_workers=max_workers) as executor:
731 future_to_entity = {
732 executor.submit(process_deleted_entity, result, sortable_properties): result
733 for result in results_bindings
734 }
735 for future in as_completed(future_to_entity):
736 entity_info = future.result()
737 if entity_info is not None:
738 deleted_entities.append(entity_info)
740 class_counts = {}
741 for entity in deleted_entities:
742 for type_uri in entity["entity_types"]:
743 class_counts[type_uri] = class_counts.get(type_uri, 0) + 1
745 available_classes = [
746 {
747 "uri": class_uri,
748 "label": custom_filter.human_readable_class((class_uri, determine_shape_for_classes([class_uri]))),
749 "count": count,
750 }
751 for class_uri, count in class_counts.items()
752 ]
754 reverse_sort = sort_direction.upper() == "DESC"
755 if sort_property == "deletionTime":
756 deleted_entities.sort(key=lambda e: e["deletionTime"], reverse=reverse_sort)
757 else:
758 deleted_entities.sort(
759 key=lambda e: e["sort_values"].get(sort_property, "").lower(),
760 reverse=reverse_sort,
761 )
763 available_classes.sort(key=lambda x: x["label"].lower())
764 if not selected_class and available_classes:
765 selected_class = available_classes[0]["uri"]
767 if selected_class:
768 if selected_shape is None:
769 selected_shape = determine_shape_for_classes([selected_class])
770 entity_key = (selected_class, selected_shape)
771 sortable_properties.extend(
772 get_sortable_properties(entity_key)
773 )
775 if selected_class:
776 filtered_entities = [
777 entity
778 for entity in deleted_entities
779 if selected_class in entity["entity_types"]
780 ]
781 else:
782 filtered_entities = deleted_entities
784 total_count = len(filtered_entities)
785 offset = (page - 1) * per_page
786 paginated_entities = filtered_entities[offset : offset + per_page]
788 return paginated_entities, available_classes, selected_class, selected_shape, sortable_properties, total_count
791def process_deleted_entity(result: dict, sortable_properties: list) -> dict | None:
792 """
793 Process a single deleted entity, filtering by visible classes.
794 """
795 change_tracking_config = get_change_tracking_config()
796 custom_filter = get_custom_filter()
798 entity_uri = result["entity"]["value"]
799 last_valid_snapshot_time = result["lastValidSnapshotTime"]["value"]
801 agnostic_entity = AgnosticEntity(
802 res=entity_uri, config=change_tracking_config, include_related_objects=True, include_merged_entities=True, include_reverse_relations=True
803 )
804 state, _, _ = agnostic_entity.get_state_at_time(
805 (last_valid_snapshot_time, last_valid_snapshot_time)
806 )
807 state = convert_to_rdflib_graphs(state, get_dataset_is_quadstore())
809 if entity_uri not in state:
810 return None
812 last_valid_time = convert_to_datetime(last_valid_snapshot_time, stringify=True)
813 last_valid_state: Graph | Dataset = state[entity_uri][last_valid_time]
815 entity_types = [
816 str(o)
817 for s, p, o in get_triples_from_graph(last_valid_state, (URIRef(entity_uri), RDF.type, None))
818 ]
819 highest_priority_type = get_highest_priority_class(entity_types)
820 shape = determine_shape_for_classes([highest_priority_type])
821 visible_types = [t for t in entity_types if is_entity_type_visible((t, determine_shape_for_classes([t])))]
822 if not visible_types:
823 return None
825 sort_values = {}
826 for prop in sortable_properties:
827 prop_uri = prop["property"]
828 values = [
829 str(o)
830 for s, p, o in get_triples_from_graph(
831 last_valid_state, (URIRef(entity_uri), URIRef(prop_uri), None)
832 )
833 ]
834 sort_values[prop_uri] = values[0] if values else ""
836 return {
837 "uri": entity_uri,
838 "deletionTime": result["deletionTime"]["value"],
839 "deletedBy": custom_filter.format_agent_reference(
840 result.get("agent", {}).get("value", "")
841 ),
842 "lastValidSnapshotTime": last_valid_snapshot_time,
843 "type": custom_filter.human_readable_predicate(
844 highest_priority_type, (highest_priority_type, shape)
845 ),
846 "label": custom_filter.human_readable_entity(
847 entity_uri, (highest_priority_type, shape), last_valid_state
848 ),
849 "entity_types": visible_types,
850 "sort_values": sort_values,
851 }
854def find_orphaned_entities(subject, entity_type, predicate=None, object_value=None):
855 """
856 Find entities that would become orphaned after deleting a triple or an entire entity,
857 including intermediate relation entities.
859 An entity is considered orphaned if:
860 1. It has no incoming references from other entities (except from the entity being deleted)
861 2. It does not reference any entities that are subjects of other triples
863 For intermediate relations, an entity is also considered orphaned if:
864 1. It connects to the entity being deleted
865 2. It has no other valid connections after the deletion
866 3. It is directly involved in the deletion operation (if predicate and object_value are specified)
868 Args:
869 subject (str): The URI of the subject being deleted
870 entity_type (str): The type of the entity being deleted
871 predicate (str, optional): The predicate being deleted
872 object_value (str, optional): The object value being deleted
874 Returns:
875 tuple: Lists of (orphaned_entities, intermediate_orphans)
876 """
877 sparql = get_sparql()
878 display_rules = get_display_rules()
880 intermediate_classes = set()
882 for rule in display_rules:
883 if "target" in rule and "class" in rule["target"] and rule["target"]["class"] == entity_type:
884 for prop in rule.get("displayProperties", []):
885 if "intermediateRelation" in prop:
886 intermediate_classes.add(prop["intermediateRelation"]["class"])
888 orphan_query = f"""
889 SELECT DISTINCT ?entity ?type
890 WHERE {{
891 {f"<{subject}> <{predicate}> ?entity ." if predicate and object_value else ""}
892 {f"FILTER(?entity = <{object_value}>)" if predicate and object_value else ""}
894 # If no specific predicate, get all connected entities
895 {f"<{subject}> ?p ?entity ." if not predicate else ""}
897 FILTER(isIRI(?entity))
898 ?entity a ?type .
900 # No incoming references from other entities
901 FILTER NOT EXISTS {{
902 ?other ?anyPredicate ?entity .
903 FILTER(?other != <{subject}>)
904 }}
906 # No outgoing references to active entities
907 FILTER NOT EXISTS {{
908 ?entity ?outgoingPredicate ?connectedEntity .
909 ?connectedEntity ?furtherPredicate ?furtherObject .
910 {f"FILTER(?connectedEntity != <{subject}>)" if not predicate else ""}
911 }}
913 # Exclude intermediate relation entities
914 FILTER(?type NOT IN (<{f">, <".join(intermediate_classes)}>))
915 }}
916 """
918 # Query to find orphaned intermediate relations
919 if predicate and object_value:
920 intermediate_query = f"""
921 SELECT DISTINCT ?entity ?type
922 WHERE {{
923 <{object_value}> a ?type .
924 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>))
925 BIND(<{object_value}> AS ?entity)
926 }}
927 """
928 else:
929 # Se stiamo cancellando l'intera entità, trova tutte le entità intermedie collegate
930 intermediate_query = f"""
931 SELECT DISTINCT ?entity ?type
932 WHERE {{
933 # Find intermediate relations connected to the entity being deleted
934 {{
935 <{subject}> ?p ?entity .
936 ?entity a ?type .
937 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>))
938 }} UNION {{
939 ?entity ?p <{subject}> .
940 ?entity a ?type .
941 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>))
942 }}
943 }}
944 """
946 orphaned = []
947 intermediate_orphans = []
949 # Execute queries and process results
950 for query, result_list in [
951 (orphan_query, orphaned),
952 (intermediate_query, intermediate_orphans),
953 ]:
954 sparql.setQuery(query)
955 sparql.setReturnFormat(JSON)
956 results = sparql.query().convert()
958 for result in results["results"]["bindings"]:
959 result_list.append(
960 {"uri": result["entity"]["value"], "type": result["type"]["value"]}
961 )
963 return orphaned, intermediate_orphans
966def import_entity_graph(editor: Editor, subject: str, max_depth: int = 5, include_referencing_entities: bool = False):
967 """
968 Recursively import the main subject and its connected entity graph up to a specified depth.
970 This function imports the specified subject and all entities connected to it,
971 directly or indirectly, up to the maximum depth specified. It traverses the
972 graph of connected entities, importing each one into the editor.
974 Args:
975 editor (Editor): The Editor instance to use for importing.
976 subject (str): The URI of the subject to start the import from.
977 max_depth (int): The maximum depth of recursion (default is 5).
978 include_referencing_entities (bool): Whether to include entities that have the subject as their object (default False).
979 Useful when deleting an entity to ensure all references are properly removed.
981 Returns:
982 Editor: The updated Editor instance with all imported entities.
983 """
984 imported_subjects = set()
986 # First import referencing entities if needed
987 if include_referencing_entities:
988 sparql = get_sparql()
990 # Build query based on database type
991 if editor.dataset_is_quadstore:
992 query = f"""
993 SELECT DISTINCT ?s
994 WHERE {{
995 GRAPH ?g {{
996 ?s ?p <{subject}> .
997 }}
998 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)
999 }}
1000 """
1001 else:
1002 query = f"""
1003 SELECT DISTINCT ?s
1004 WHERE {{
1005 ?s ?p <{subject}> .
1006 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)
1007 }}
1008 """
1010 sparql.setQuery(query)
1011 sparql.setReturnFormat(JSON)
1012 results = sparql.query().convert()
1014 # Import each referencing entity
1015 for result in results["results"]["bindings"]:
1016 referencing_subject = result["s"]["value"]
1017 if referencing_subject != subject and referencing_subject not in imported_subjects:
1018 imported_subjects.add(referencing_subject)
1019 editor.import_entity(URIRef(referencing_subject))
1021 def recursive_import(current_subject: str, current_depth: int):
1022 if current_depth > max_depth or current_subject in imported_subjects:
1023 return
1025 imported_subjects.add(current_subject)
1026 editor.import_entity(URIRef(current_subject))
1028 query = f"""
1029 SELECT ?p ?o
1030 WHERE {{
1031 <{current_subject}> ?p ?o .
1032 FILTER(isIRI(?o))
1033 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)
1034 }}
1035 """
1037 sparql = get_sparql()
1038 sparql.setQuery(query)
1039 sparql.setReturnFormat(JSON)
1040 results = sparql.query().convert()
1042 for result in results["results"]["bindings"]:
1043 object_entity = result["o"]["value"]
1044 recursive_import(object_entity, current_depth + 1)
1046 recursive_import(subject, 1)
1047 return editor
1050def get_entity_types(subject_uri: str) -> List[str]:
1051 """
1052 Get all RDF types for an entity.
1054 Args:
1055 subject_uri: URI of the entity
1057 Returns:
1058 List of type URIs
1059 """
1060 sparql = get_sparql()
1062 query = f"""
1063 SELECT ?type WHERE {{
1064 <{subject_uri}> a ?type .
1065 }}
1066 """
1068 sparql.setQuery(query)
1069 sparql.setReturnFormat(JSON)
1070 results = sparql.query().convert()
1072 return [result["type"]["value"] for result in results["results"]["bindings"]]
1075def collect_referenced_entities(data, existing_entities=None):
1076 """
1077 Recursively collect all URIs of existing entities referenced in the structured data.
1079 This function traverses the structured data to find explicit references to existing entities
1080 that need to be imported into the editor before calling preexisting_finished().
1082 Args:
1083 data: The structured data (can be dict, list, or string)
1084 existing_entities: Set to collect URIs (created if None)
1086 Returns:
1087 Set of URIs (strings) of existing entities that should be imported
1088 """
1090 if existing_entities is None:
1091 existing_entities = set()
1093 if isinstance(data, dict):
1094 if data.get("is_existing_entity") is True and "entity_uri" in data:
1095 existing_entities.add(data["entity_uri"])
1097 # If it's an entity with entity_type, it's a new entity being created
1098 elif "entity_type" in data:
1099 properties = data.get("properties", {})
1100 for prop_values in properties.values():
1101 collect_referenced_entities(prop_values, existing_entities)
1102 else:
1103 for value in data.values():
1104 collect_referenced_entities(value, existing_entities)
1106 elif isinstance(data, list):
1107 for item in data:
1108 collect_referenced_entities(item, existing_entities)
1110 return existing_entities
1113def import_referenced_entities(editor, structured_data):
1114 """
1115 Import all existing entities referenced in structured data into the editor.
1117 This function should be called before editor.preexisting_finished() to ensure
1118 that all existing entities that will be linked have their snapshots created.
1120 Args:
1121 editor: The Editor instance
1122 structured_data: The structured data containing entity references
1123 """
1124 referenced_entities = collect_referenced_entities(structured_data)
1125 for entity_uri in referenced_entities:
1126 try:
1127 editor.import_entity(entity_uri)
1128 except Exception as e:
1129 print(f"Warning: Could not import entity {entity_uri}: {e}")
1130 continue