Coverage for heritrace/utils/sparql_utils.py: 94%
412 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-11-26 11:33 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-11-26 11:33 +0000
1import os
2from collections import defaultdict
3from concurrent.futures import ProcessPoolExecutor, as_completed
4from typing import List
6from rdflib import RDF, Dataset, Graph, Literal, URIRef
7from rdflib.plugins.sparql.algebra import translateUpdate
8from rdflib.plugins.sparql.parser import parseUpdate
9from SPARQLWrapper import JSON
10from time_agnostic_library.agnostic_entity import AgnosticEntity
12from heritrace.editor import Editor
13from heritrace.extensions import (get_change_tracking_config,
14 get_classes_with_multiple_shapes,
15 get_custom_filter, get_dataset_is_quadstore,
16 get_display_rules, get_provenance_sparql,
17 get_shacl_graph, get_sparql)
18from heritrace.utils.converters import convert_to_datetime
19from heritrace.utils.display_rules_utils import (find_matching_rule,
20 get_highest_priority_class,
21 get_sortable_properties,
22 is_entity_type_visible)
23from heritrace.utils.shacl_utils import (determine_shape_for_classes,
24 determine_shape_for_entity_triples)
25from heritrace.utils.virtuoso_utils import (VIRTUOSO_EXCLUDED_GRAPHS,
26 is_virtuoso)
28_AVAILABLE_CLASSES_CACHE = None
31def get_triples_from_graph(graph_or_dataset, pattern):
32 """
33 Get triples from a Graph or Dataset, handling both cases correctly.
35 For Dataset (quadstore), converts quads to triples by extracting (s, p, o).
36 For Graph (triplestore), uses triples() directly.
38 Args:
39 graph_or_dataset: Graph or Dataset instance
40 pattern: Triple pattern tuple (s, p, o) where each can be None
42 Returns:
43 Generator of triples (s, p, o)
44 """
45 if isinstance(graph_or_dataset, Dataset):
46 # For Dataset, use quads() and extract only (s, p, o)
47 for s, p, o, g in graph_or_dataset.quads(pattern):
48 yield (s, p, o)
49 else:
50 # For Graph, use triples() directly
51 yield from graph_or_dataset.triples(pattern)
52COUNT_LIMIT = int(os.getenv("COUNT_LIMIT", "10000"))
55def precompute_available_classes_cache():
56 """Pre-compute available classes cache at application startup."""
57 global _AVAILABLE_CLASSES_CACHE
58 _AVAILABLE_CLASSES_CACHE = get_available_classes()
59 return _AVAILABLE_CLASSES_CACHE
62def _wrap_virtuoso_graph_pattern(pattern: str) -> str:
63 """Wrap a SPARQL pattern with Virtuoso GRAPH clause if needed."""
64 if is_virtuoso():
65 return f"""
66 GRAPH ?g {{
67 {pattern}
68 }}
69 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>))
70 """
71 return pattern
74def _build_count_query_with_limit(class_uri: str, limit: int) -> str:
75 """Build a COUNT query with LIMIT for a specific class."""
77 return f"""
78 SELECT (COUNT(?subject) as ?count)
79 WHERE {{
80 {{
81 SELECT DISTINCT ?subject
82 WHERE {{
83 ?subject a <{class_uri}> .
84 }}
85 LIMIT {limit}
86 }}
87 }}
88 """
91def _count_class_instances(class_uri: str, limit: int = COUNT_LIMIT) -> tuple:
92 """
93 Count instances of a class up to a limit.
95 Returns:
96 tuple: (display_count, numeric_count) where display_count may be "LIMIT+"
97 """
98 sparql = get_sparql()
99 query = _build_count_query_with_limit(class_uri, limit + 1)
101 sparql.setQuery(query)
102 sparql.setReturnFormat(JSON)
103 result = sparql.query().convert()
105 count = int(result["results"]["bindings"][0]["count"]["value"])
107 if count > limit:
108 return f"{limit}+", limit
109 return str(count), count
112def _get_entities_with_enhanced_shape_detection(class_uri: str, classes_with_multiple_shapes: set, limit: int = COUNT_LIMIT):
113 """
114 Get entities for a class using enhanced shape detection for classes with multiple shapes.
115 Uses LIMIT to avoid loading all entities.
116 """
117 # Early exit if no classes have multiple shapes
118 if not classes_with_multiple_shapes or class_uri not in classes_with_multiple_shapes:
119 return defaultdict(list)
121 sparql = get_sparql()
123 subjects_query = f"""
124 SELECT DISTINCT ?subject
125 WHERE {{
126 ?subject a <{class_uri}> .
127 }}
128 LIMIT {limit}
129 """
131 sparql.setQuery(subjects_query)
132 sparql.setReturnFormat(JSON)
133 subjects_results = sparql.query().convert()
135 subjects = [r["subject"]["value"] for r in subjects_results["results"]["bindings"]]
137 if not subjects:
138 return defaultdict(list)
140 # Fetch triples only for these specific subjects
141 subjects_filter = " ".join([f"(<{s}>)" for s in subjects])
142 pattern_with_filter = f"?subject a <{class_uri}> . ?subject ?p ?o . VALUES (?subject) {{ {subjects_filter} }}"
144 triples_query = f"""
145 SELECT ?subject ?p ?o
146 WHERE {{
147 {pattern_with_filter}
148 }}
149 """
151 sparql.setQuery(triples_query)
152 sparql.setReturnFormat(JSON)
153 results = sparql.query().convert()
155 entities_triples = defaultdict(list)
156 for binding in results["results"]["bindings"]:
157 subject = binding["subject"]["value"]
158 predicate = binding["p"]["value"]
159 obj = binding["o"]["value"]
160 entities_triples[subject].append((subject, predicate, obj))
162 shape_to_entities = defaultdict(list)
163 for subject_uri, triples in entities_triples.items():
164 shape_uri = determine_shape_for_entity_triples(triples)
165 if shape_uri:
166 entity_key = (class_uri, shape_uri)
167 if is_entity_type_visible(entity_key):
168 shape_to_entities[shape_uri].append({
169 "uri": subject_uri,
170 "class": class_uri,
171 "shape": shape_uri
172 })
174 return shape_to_entities
177def get_classes_from_shacl_or_display_rules():
178 """Extract classes from SHACL shapes or display_rules configuration."""
179 SH_TARGET_CLASS = URIRef("http://www.w3.org/ns/shacl#targetClass")
180 classes = set()
182 shacl_graph = get_shacl_graph()
183 if shacl_graph:
184 for shape in shacl_graph.subjects(SH_TARGET_CLASS, None, unique=True):
185 for target_class in shacl_graph.objects(shape, SH_TARGET_CLASS, unique=True):
186 classes.add(str(target_class))
188 if not classes:
189 display_rules = get_display_rules()
190 if display_rules:
191 for rule in display_rules:
192 if "target" in rule and "class" in rule["target"]:
193 classes.add(rule["target"]["class"])
195 return list(classes)
198def get_available_classes():
199 """
200 Fetch and format all available entity classes.
201 Returns cached result if available (computed at startup).
202 For small datasets (< COUNT_LIMIT), cache is invalidated to keep counts accurate.
203 """
204 global _AVAILABLE_CLASSES_CACHE
206 if _AVAILABLE_CLASSES_CACHE is not None:
207 total_count = sum(cls.get('count_numeric', 0) for cls in _AVAILABLE_CLASSES_CACHE)
208 if total_count < COUNT_LIMIT:
209 _AVAILABLE_CLASSES_CACHE = None
211 if _AVAILABLE_CLASSES_CACHE is not None:
212 return _AVAILABLE_CLASSES_CACHE
214 custom_filter = get_custom_filter()
215 classes_from_config = get_classes_from_shacl_or_display_rules()
217 if classes_from_config:
218 class_uris = classes_from_config
219 else:
220 sparql = get_sparql()
221 pattern = "?subject a ?class ."
222 wrapped_pattern = _wrap_virtuoso_graph_pattern(pattern)
224 query = f"""
225 SELECT DISTINCT ?class
226 WHERE {{
227 {wrapped_pattern}
228 }}
229 """
231 sparql.setQuery(query)
232 sparql.setReturnFormat(JSON)
233 results = sparql.query().convert()
234 class_uris = [r["class"]["value"] for r in results["results"]["bindings"]]
236 # Count instances for each class
237 classes_with_counts = []
238 for class_uri in class_uris:
239 display_count, numeric_count = _count_class_instances(class_uri)
240 classes_with_counts.append({
241 "uri": class_uri,
242 "display_count": display_count,
243 "numeric_count": numeric_count
244 })
246 # Sort by count descending
247 classes_with_counts.sort(key=lambda x: x["numeric_count"], reverse=True)
249 available_classes = []
250 classes_with_multiple_shapes = get_classes_with_multiple_shapes()
252 for class_data in classes_with_counts:
253 class_uri = class_data["uri"]
255 if classes_with_multiple_shapes and class_uri in classes_with_multiple_shapes:
256 shape_to_entities = _get_entities_with_enhanced_shape_detection(
257 class_uri, classes_with_multiple_shapes, limit=COUNT_LIMIT
258 )
260 for shape_uri, entities in shape_to_entities.items():
261 if entities:
262 entity_key = (class_uri, shape_uri)
263 available_classes.append({
264 "uri": class_uri,
265 "label": custom_filter.human_readable_class(entity_key),
266 "count": f"{len(entities)}+" if len(entities) >= COUNT_LIMIT else str(len(entities)),
267 "count_numeric": len(entities),
268 "shape": shape_uri
269 })
270 else:
271 shape_uri = determine_shape_for_classes([class_uri])
272 entity_key = (class_uri, shape_uri)
274 if is_entity_type_visible(entity_key):
275 available_classes.append({
276 "uri": class_uri,
277 "label": custom_filter.human_readable_class(entity_key),
278 "count": class_data["display_count"],
279 "count_numeric": class_data["numeric_count"],
280 "shape": shape_uri
281 })
283 available_classes.sort(key=lambda x: x["label"].lower())
284 return available_classes
287def build_sort_clause(sort_property: str, entity_type: str, shape_uri: str = None) -> str:
288 """
289 Build a SPARQL sort clause based on the sortableBy configuration.
291 Args:
292 sort_property: The property to sort by
293 entity_type: The entity type URI
294 shape_uri: Optional shape URI for more specific sorting rules
296 Returns:
297 SPARQL sort clause or empty string
298 """
299 if not sort_property or not entity_type:
300 return ""
302 rule = find_matching_rule(entity_type, shape_uri)
304 if not rule or "sortableBy" not in rule:
305 return ""
307 sort_config = next(
308 (s for s in rule["sortableBy"] if s.get("property") == sort_property),
309 None
310 )
312 if not sort_config:
313 return ""
315 return f"OPTIONAL {{ ?subject <{sort_property}> ?sortValue }}"
318def get_entities_for_class(
319 selected_class, page, per_page, sort_property=None, sort_direction="ASC", selected_shape=None
320):
321 """
322 Retrieve entities for a specific class with pagination and sorting.
324 Args:
325 selected_class (str): URI of the class to retrieve entities for
326 page (int): Page number (1-indexed)
327 per_page (int): Number of entities per page
328 sort_property (str, optional): Property URI to sort by. Defaults to None.
329 sort_direction (str, optional): Sort direction ("ASC" or "DESC"). Defaults to "ASC".
330 selected_shape (str, optional): Shape URI for filtering entities. Defaults to None.
332 Returns:
333 tuple: (list of entities, total count)
335 Performance Notes:
336 - If sort_property is None, NO ORDER BY clause is applied to the SPARQL query.
337 This significantly improves performance for large datasets by avoiding expensive
338 sorting operations on URIs.
339 - Without explicit ordering, the triplestore returns results in its natural order,
340 which is deterministic within a session but may vary after database reloads.
341 - For optimal performance with large datasets, configure display_rules.yaml without
342 sortableBy properties to prevent users from triggering expensive sort operations.
343 """
344 sparql = get_sparql()
345 custom_filter = get_custom_filter()
346 classes_with_multiple_shapes = get_classes_with_multiple_shapes()
348 use_shape_filtering = (selected_shape and selected_class in classes_with_multiple_shapes)
350 if use_shape_filtering:
351 # For shape filtering, we need to fetch entities and check their shape
352 # Use a larger LIMIT to ensure we get enough entities after filtering
353 offset = (page - 1) * per_page
354 fetch_limit = per_page * 5 # Safety margin for filtering
356 subjects_query = f"""
357 SELECT DISTINCT ?subject
358 WHERE {{
359 ?subject a <{selected_class}> .
360 }}
361 LIMIT {fetch_limit}
362 OFFSET {offset}
363 """
365 sparql.setQuery(subjects_query)
366 sparql.setReturnFormat(JSON)
367 subjects_results = sparql.query().convert()
369 subjects = [r["subject"]["value"] for r in subjects_results["results"]["bindings"]]
371 if not subjects:
372 return [], 0
374 # Now fetch triples for these specific subjects
375 subjects_filter = " ".join([f"(<{s}>)" for s in subjects])
377 triples_query = f"""
378 SELECT ?subject ?p ?o
379 WHERE {{
380 ?subject a <{selected_class}> . ?subject ?p ?o . VALUES (?subject) {{ {subjects_filter} }}
381 }}
382 """
384 sparql.setQuery(triples_query)
385 sparql.setReturnFormat(JSON)
386 results = sparql.query().convert()
388 entities_triples = defaultdict(list)
389 for binding in results["results"]["bindings"]:
390 subject = binding["subject"]["value"]
391 predicate = binding["p"]["value"]
392 obj = binding["o"]["value"]
393 entities_triples[subject].append((subject, predicate, obj))
395 filtered_entities = []
396 for subject_uri, triples in entities_triples.items():
397 entity_shape = determine_shape_for_entity_triples(list(triples))
398 if entity_shape == selected_shape:
399 entity_label = custom_filter.human_readable_entity(
400 subject_uri, (selected_class, selected_shape), None
401 )
402 filtered_entities.append({"uri": subject_uri, "label": entity_label})
404 if sort_property and sort_direction:
405 reverse_sort = sort_direction.upper() == "DESC"
406 filtered_entities.sort(key=lambda x: x["label"].lower(), reverse=reverse_sort)
408 # For shape-filtered results, we can't accurately determine total_count without scanning all entities
409 # Return the number of filtered entities as an approximation
410 total_count = len(filtered_entities)
411 return filtered_entities[:per_page], total_count
413 # Standard pagination path
414 offset = (page - 1) * per_page
415 sort_clause = ""
416 order_clause = ""
418 if sort_property:
419 sort_clause = build_sort_clause(sort_property, selected_class, selected_shape)
420 if sort_clause:
421 order_clause = f"ORDER BY {sort_direction}(?sortValue)"
423 entities_query = f"""
424 SELECT ?subject {f"?sortValue" if sort_property else ""}
425 WHERE {{
426 ?subject a <{selected_class}> . {sort_clause}
427 }}
428 {order_clause}
429 LIMIT {per_page}
430 OFFSET {offset}
431 """
433 available_classes = get_available_classes()
435 class_info = next(
436 (c for c in available_classes
437 if c["uri"] == selected_class and c.get("shape") == selected_shape),
438 None
439 )
440 total_count = class_info.get("count_numeric", 0) if class_info else 0
442 sparql.setQuery(entities_query)
443 sparql.setReturnFormat(JSON)
444 entities_results = sparql.query().convert()
446 entities = []
447 shape = selected_shape if selected_shape else determine_shape_for_classes([selected_class])
449 for result in entities_results["results"]["bindings"]:
450 subject_uri = result["subject"]["value"]
451 entity_label = custom_filter.human_readable_entity(
452 subject_uri, (selected_class, shape), None
453 )
454 entities.append({"uri": subject_uri, "label": entity_label})
456 return entities, total_count
459def get_catalog_data(
460 selected_class: str,
461 page: int,
462 per_page: int,
463 sort_property: str = None,
464 sort_direction: str = "ASC",
465 selected_shape: str = None
466) -> dict:
467 """
468 Get catalog data with pagination and sorting.
470 Args:
471 selected_class (str): Selected class URI
472 page (int): Current page number
473 per_page (int): Items per page
474 sort_property (str, optional): Property to sort by
475 sort_direction (str, optional): Sort direction ('ASC' or 'DESC')
476 selected_shape (str, optional): URI of the shape to use for sorting rules
478 Returns:
479 dict: Catalog data including entities, pagination info, and sort settings
480 """
482 entities = []
483 total_count = 0
484 sortable_properties = []
486 if selected_class:
487 sortable_properties = get_sortable_properties(
488 (selected_class, selected_shape)
489 )
491 if not sort_property and sortable_properties:
492 sort_property = sortable_properties[0]["property"]
494 entities, total_count = get_entities_for_class(
495 selected_class, page, per_page, sort_property, sort_direction, selected_shape
496 )
498 return {
499 "entities": entities,
500 "total_pages": (
501 (total_count + per_page - 1) // per_page if total_count > 0 else 0
502 ),
503 "current_page": page,
504 "per_page": per_page,
505 "total_count": total_count,
506 "sort_property": sort_property,
507 "sort_direction": sort_direction,
508 "sortable_properties": sortable_properties,
509 "selected_class": selected_class,
510 "selected_shape": selected_shape,
511 }
514def fetch_data_graph_for_subject(subject: str) -> Graph | Dataset:
515 """
516 Fetch all triples/quads associated with a subject from the dataset.
517 Handles both triplestore and quadstore cases appropriately.
519 Args:
520 subject (str): The URI of the subject to fetch data for
522 Returns:
523 Graph|Dataset: A graph containing all triples/quads for the subject
524 """
525 g = Dataset() if get_dataset_is_quadstore() else Graph()
526 sparql = get_sparql()
528 if is_virtuoso():
529 # For virtuoso we need to explicitly query the graph
530 query = f"""
531 SELECT ?predicate ?object ?g WHERE {{
532 GRAPH ?g {{
533 <{subject}> ?predicate ?object.
534 }}
535 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>))
536 }}
537 """
538 else:
539 if get_dataset_is_quadstore():
540 # For non-virtuoso quadstore, we need to query all graphs
541 query = f"""
542 SELECT ?predicate ?object ?g WHERE {{
543 GRAPH ?g {{
544 <{subject}> ?predicate ?object.
545 }}
546 }}
547 """
548 else:
549 # For regular triplestore
550 query = f"""
551 SELECT ?predicate ?object WHERE {{
552 <{subject}> ?predicate ?object.
553 }}
554 """
556 sparql.setQuery(query)
557 sparql.setReturnFormat(JSON)
558 query_results = sparql.query().convert()
559 results = query_results.get("results", {}).get("bindings", [])
561 for result in results:
562 # Create the appropriate value (Literal or URIRef)
563 obj_data = result["object"]
564 if obj_data["type"] in {"literal", "typed-literal"}:
565 if "datatype" in obj_data:
566 value = Literal(
567 obj_data["value"], datatype=URIRef(obj_data["datatype"])
568 )
569 else:
570 # Create literal without explicit datatype to match Reader.import_entities_from_triplestore
571 value = Literal(obj_data["value"])
572 else:
573 value = URIRef(obj_data["value"])
575 # Add triple/quad based on store type
576 if get_dataset_is_quadstore():
577 graph_uri = URIRef(result["g"]["value"])
578 g.add(
579 (
580 URIRef(subject),
581 URIRef(result["predicate"]["value"]),
582 value,
583 graph_uri,
584 )
585 )
586 else:
587 g.add((URIRef(subject), URIRef(result["predicate"]["value"]), value))
589 return g
592def parse_sparql_update(query) -> dict:
593 parsed = parseUpdate(query)
594 translated = translateUpdate(parsed).algebra
595 modifications = {}
597 def extract_quads(quads):
598 result = []
599 for graph, triples in quads.items():
600 for triple in triples:
601 result.append((triple[0], triple[1], triple[2]))
602 return result
604 for operation in translated:
605 if operation.name == "DeleteData":
606 if hasattr(operation, "quads") and operation.quads:
607 deletions = extract_quads(operation.quads)
608 else:
609 deletions = operation.triples
610 if deletions:
611 modifications.setdefault("Deletions", list()).extend(deletions)
612 elif operation.name == "InsertData":
613 if hasattr(operation, "quads") and operation.quads:
614 additions = extract_quads(operation.quads)
615 else:
616 additions = operation.triples
617 if additions:
618 modifications.setdefault("Additions", list()).extend(additions)
620 return modifications
623def fetch_current_state_with_related_entities(
624 provenance: dict,
625) -> Graph | Dataset:
626 """
627 Fetch the current state of an entity and all its related entities known from provenance.
629 Args:
630 provenance (dict): Dictionary containing provenance metadata for main entity and related entities
632 Returns:
633 Dataset: A graph containing the current state of all entities
634 """
635 combined_graph = Dataset() if get_dataset_is_quadstore() else Graph()
637 # Fetch state for all entities mentioned in provenance
638 for entity_uri in provenance.keys():
639 current_graph = fetch_data_graph_for_subject(entity_uri)
641 if get_dataset_is_quadstore():
642 for quad in current_graph.quads():
643 combined_graph.add(quad)
644 else:
645 for triple in current_graph:
646 combined_graph.add(triple)
648 return combined_graph
651def get_deleted_entities_with_filtering(
652 page=1,
653 per_page=50,
654 sort_property="deletionTime",
655 sort_direction="DESC",
656 selected_class=None,
657 selected_shape=None,
658):
659 """
660 Fetch and process deleted entities from the provenance graph, with filtering and sorting.
661 """
662 sortable_properties = [
663 {"property": "deletionTime", "displayName": "Deletion Time", "sortType": "date"}
664 ]
665 provenance_sparql = get_provenance_sparql()
666 custom_filter = get_custom_filter()
668 prov_query = """
669 SELECT DISTINCT ?entity ?lastSnapshot ?deletionTime ?agent ?lastValidSnapshotTime
670 WHERE {
671 ?lastSnapshot a <http://www.w3.org/ns/prov#Entity> ;
672 <http://www.w3.org/ns/prov#specializationOf> ?entity ;
673 <http://www.w3.org/ns/prov#generatedAtTime> ?deletionTime ;
674 <http://www.w3.org/ns/prov#invalidatedAtTime> ?invalidationTime ;
675 <http://www.w3.org/ns/prov#wasDerivedFrom> ?lastValidSnapshot.
677 ?lastValidSnapshot <http://www.w3.org/ns/prov#generatedAtTime> ?lastValidSnapshotTime .
679 OPTIONAL { ?lastSnapshot <http://www.w3.org/ns/prov#wasAttributedTo> ?agent . }
681 FILTER NOT EXISTS {
682 ?laterSnapshot <http://www.w3.org/ns/prov#wasDerivedFrom> ?lastSnapshot .
683 }
684 }
685 """
686 provenance_sparql.setQuery(prov_query)
687 provenance_sparql.setReturnFormat(JSON)
688 prov_results = provenance_sparql.query().convert()
690 results_bindings = prov_results["results"]["bindings"]
691 if not results_bindings:
692 return [], [], None, None, [], 0
694 deleted_entities = []
695 max_workers = max(1, min(os.cpu_count() or 4, len(results_bindings)))
696 with ProcessPoolExecutor(max_workers=max_workers) as executor:
697 future_to_entity = {
698 executor.submit(process_deleted_entity, result, sortable_properties): result
699 for result in results_bindings
700 }
701 for future in as_completed(future_to_entity):
702 entity_info = future.result()
703 if entity_info is not None:
704 deleted_entities.append(entity_info)
706 class_counts = {}
707 for entity in deleted_entities:
708 for type_uri in entity["entity_types"]:
709 class_counts[type_uri] = class_counts.get(type_uri, 0) + 1
711 available_classes = [
712 {
713 "uri": class_uri,
714 "label": custom_filter.human_readable_class((class_uri, determine_shape_for_classes([class_uri]))),
715 "count": count,
716 }
717 for class_uri, count in class_counts.items()
718 ]
720 reverse_sort = sort_direction.upper() == "DESC"
721 if sort_property == "deletionTime":
722 deleted_entities.sort(key=lambda e: e["deletionTime"], reverse=reverse_sort)
723 else:
724 deleted_entities.sort(
725 key=lambda e: e["sort_values"].get(sort_property, "").lower(),
726 reverse=reverse_sort,
727 )
729 available_classes.sort(key=lambda x: x["label"].lower())
730 if not selected_class and available_classes:
731 selected_class = available_classes[0]["uri"]
733 if selected_class:
734 if selected_shape is None:
735 selected_shape = determine_shape_for_classes([selected_class])
736 entity_key = (selected_class, selected_shape)
737 sortable_properties.extend(
738 get_sortable_properties(entity_key)
739 )
741 if selected_class:
742 filtered_entities = [
743 entity
744 for entity in deleted_entities
745 if selected_class in entity["entity_types"]
746 ]
747 else:
748 filtered_entities = deleted_entities
750 total_count = len(filtered_entities)
751 offset = (page - 1) * per_page
752 paginated_entities = filtered_entities[offset : offset + per_page]
754 return paginated_entities, available_classes, selected_class, selected_shape, sortable_properties, total_count
757def process_deleted_entity(result: dict, sortable_properties: list) -> dict | None:
758 """
759 Process a single deleted entity, filtering by visible classes.
760 """
761 change_tracking_config = get_change_tracking_config()
762 custom_filter = get_custom_filter()
764 entity_uri = result["entity"]["value"]
765 last_valid_snapshot_time = result["lastValidSnapshotTime"]["value"]
767 agnostic_entity = AgnosticEntity(
768 res=entity_uri, config=change_tracking_config, include_related_objects=True, include_merged_entities=True, include_reverse_relations=True
769 )
770 state, _, _ = agnostic_entity.get_state_at_time(
771 (last_valid_snapshot_time, last_valid_snapshot_time)
772 )
774 if entity_uri not in state:
775 return None
777 last_valid_time = convert_to_datetime(last_valid_snapshot_time, stringify=True)
778 last_valid_state: Dataset = state[entity_uri][last_valid_time]
780 entity_types = [
781 str(o)
782 for s, p, o in get_triples_from_graph(last_valid_state, (URIRef(entity_uri), RDF.type, None))
783 ]
784 highest_priority_type = get_highest_priority_class(entity_types)
785 shape = determine_shape_for_classes([highest_priority_type])
786 visible_types = [t for t in entity_types if is_entity_type_visible((t, determine_shape_for_classes([t])))]
787 if not visible_types:
788 return None
790 sort_values = {}
791 for prop in sortable_properties:
792 prop_uri = prop["property"]
793 values = [
794 str(o)
795 for s, p, o in get_triples_from_graph(
796 last_valid_state, (URIRef(entity_uri), URIRef(prop_uri), None)
797 )
798 ]
799 sort_values[prop_uri] = values[0] if values else ""
801 return {
802 "uri": entity_uri,
803 "deletionTime": result["deletionTime"]["value"],
804 "deletedBy": custom_filter.format_agent_reference(
805 result.get("agent", {}).get("value", "")
806 ),
807 "lastValidSnapshotTime": last_valid_snapshot_time,
808 "type": custom_filter.human_readable_predicate(
809 highest_priority_type, (highest_priority_type, shape)
810 ),
811 "label": custom_filter.human_readable_entity(
812 entity_uri, (highest_priority_type, shape), last_valid_state
813 ),
814 "entity_types": visible_types,
815 "sort_values": sort_values,
816 }
819def find_orphaned_entities(subject, entity_type, predicate=None, object_value=None):
820 """
821 Find entities that would become orphaned after deleting a triple or an entire entity,
822 including intermediate relation entities.
824 An entity is considered orphaned if:
825 1. It has no incoming references from other entities (except from the entity being deleted)
826 2. It does not reference any entities that are subjects of other triples
828 For intermediate relations, an entity is also considered orphaned if:
829 1. It connects to the entity being deleted
830 2. It has no other valid connections after the deletion
831 3. It is directly involved in the deletion operation (if predicate and object_value are specified)
833 Args:
834 subject (str): The URI of the subject being deleted
835 entity_type (str): The type of the entity being deleted
836 predicate (str, optional): The predicate being deleted
837 object_value (str, optional): The object value being deleted
839 Returns:
840 tuple: Lists of (orphaned_entities, intermediate_orphans)
841 """
842 sparql = get_sparql()
843 display_rules = get_display_rules()
845 intermediate_classes = set()
847 for rule in display_rules:
848 if "target" in rule and "class" in rule["target"] and rule["target"]["class"] == entity_type:
849 for prop in rule.get("displayProperties", []):
850 if "intermediateRelation" in prop:
851 intermediate_classes.add(prop["intermediateRelation"]["class"])
853 orphan_query = f"""
854 SELECT DISTINCT ?entity ?type
855 WHERE {{
856 {f"<{subject}> <{predicate}> ?entity ." if predicate and object_value else ""}
857 {f"FILTER(?entity = <{object_value}>)" if predicate and object_value else ""}
859 # If no specific predicate, get all connected entities
860 {f"<{subject}> ?p ?entity ." if not predicate else ""}
862 FILTER(isIRI(?entity))
863 ?entity a ?type .
865 # No incoming references from other entities
866 FILTER NOT EXISTS {{
867 ?other ?anyPredicate ?entity .
868 FILTER(?other != <{subject}>)
869 }}
871 # No outgoing references to active entities
872 FILTER NOT EXISTS {{
873 ?entity ?outgoingPredicate ?connectedEntity .
874 ?connectedEntity ?furtherPredicate ?furtherObject .
875 {f"FILTER(?connectedEntity != <{subject}>)" if not predicate else ""}
876 }}
878 # Exclude intermediate relation entities
879 FILTER(?type NOT IN (<{f">, <".join(intermediate_classes)}>))
880 }}
881 """
883 # Query to find orphaned intermediate relations
884 if predicate and object_value:
885 intermediate_query = f"""
886 SELECT DISTINCT ?entity ?type
887 WHERE {{
888 <{object_value}> a ?type .
889 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>))
890 BIND(<{object_value}> AS ?entity)
891 }}
892 """
893 else:
894 # Se stiamo cancellando l'intera entità, trova tutte le entità intermedie collegate
895 intermediate_query = f"""
896 SELECT DISTINCT ?entity ?type
897 WHERE {{
898 # Find intermediate relations connected to the entity being deleted
899 {{
900 <{subject}> ?p ?entity .
901 ?entity a ?type .
902 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>))
903 }} UNION {{
904 ?entity ?p <{subject}> .
905 ?entity a ?type .
906 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>))
907 }}
908 }}
909 """
911 orphaned = []
912 intermediate_orphans = []
914 # Execute queries and process results
915 for query, result_list in [
916 (orphan_query, orphaned),
917 (intermediate_query, intermediate_orphans),
918 ]:
919 sparql.setQuery(query)
920 sparql.setReturnFormat(JSON)
921 results = sparql.query().convert()
923 for result in results["results"]["bindings"]:
924 result_list.append(
925 {"uri": result["entity"]["value"], "type": result["type"]["value"]}
926 )
928 return orphaned, intermediate_orphans
931def import_entity_graph(editor: Editor, subject: str, max_depth: int = 5, include_referencing_entities: bool = False):
932 """
933 Recursively import the main subject and its connected entity graph up to a specified depth.
935 This function imports the specified subject and all entities connected to it,
936 directly or indirectly, up to the maximum depth specified. It traverses the
937 graph of connected entities, importing each one into the editor.
939 Args:
940 editor (Editor): The Editor instance to use for importing.
941 subject (str): The URI of the subject to start the import from.
942 max_depth (int): The maximum depth of recursion (default is 5).
943 include_referencing_entities (bool): Whether to include entities that have the subject as their object (default False).
944 Useful when deleting an entity to ensure all references are properly removed.
946 Returns:
947 Editor: The updated Editor instance with all imported entities.
948 """
949 imported_subjects = set()
951 # First import referencing entities if needed
952 if include_referencing_entities:
953 sparql = get_sparql()
955 # Build query based on database type
956 if editor.dataset_is_quadstore:
957 query = f"""
958 SELECT DISTINCT ?s
959 WHERE {{
960 GRAPH ?g {{
961 ?s ?p <{subject}> .
962 }}
963 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)
964 }}
965 """
966 else:
967 query = f"""
968 SELECT DISTINCT ?s
969 WHERE {{
970 ?s ?p <{subject}> .
971 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)
972 }}
973 """
975 sparql.setQuery(query)
976 sparql.setReturnFormat(JSON)
977 results = sparql.query().convert()
979 # Import each referencing entity
980 for result in results["results"]["bindings"]:
981 referencing_subject = result["s"]["value"]
982 if referencing_subject != subject and referencing_subject not in imported_subjects:
983 imported_subjects.add(referencing_subject)
984 editor.import_entity(URIRef(referencing_subject))
986 def recursive_import(current_subject: str, current_depth: int):
987 if current_depth > max_depth or current_subject in imported_subjects:
988 return
990 imported_subjects.add(current_subject)
991 editor.import_entity(URIRef(current_subject))
993 query = f"""
994 SELECT ?p ?o
995 WHERE {{
996 <{current_subject}> ?p ?o .
997 FILTER(isIRI(?o))
998 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)
999 }}
1000 """
1002 sparql = get_sparql()
1003 sparql.setQuery(query)
1004 sparql.setReturnFormat(JSON)
1005 results = sparql.query().convert()
1007 for result in results["results"]["bindings"]:
1008 object_entity = result["o"]["value"]
1009 recursive_import(object_entity, current_depth + 1)
1011 recursive_import(subject, 1)
1012 return editor
1015def get_entity_types(subject_uri: str) -> List[str]:
1016 """
1017 Get all RDF types for an entity.
1019 Args:
1020 subject_uri: URI of the entity
1022 Returns:
1023 List of type URIs
1024 """
1025 sparql = get_sparql()
1027 query = f"""
1028 SELECT ?type WHERE {{
1029 <{subject_uri}> a ?type .
1030 }}
1031 """
1033 sparql.setQuery(query)
1034 sparql.setReturnFormat(JSON)
1035 results = sparql.query().convert()
1037 return [result["type"]["value"] for result in results["results"]["bindings"]]
1040def collect_referenced_entities(data, existing_entities=None):
1041 """
1042 Recursively collect all URIs of existing entities referenced in the structured data.
1044 This function traverses the structured data to find explicit references to existing entities
1045 that need to be imported into the editor before calling preexisting_finished().
1047 Args:
1048 data: The structured data (can be dict, list, or string)
1049 existing_entities: Set to collect URIs (created if None)
1051 Returns:
1052 Set of URIs (strings) of existing entities that should be imported
1053 """
1055 if existing_entities is None:
1056 existing_entities = set()
1058 if isinstance(data, dict):
1059 if data.get("is_existing_entity") is True and "entity_uri" in data:
1060 existing_entities.add(data["entity_uri"])
1062 # If it's an entity with entity_type, it's a new entity being created
1063 elif "entity_type" in data:
1064 properties = data.get("properties", {})
1065 for prop_values in properties.values():
1066 collect_referenced_entities(prop_values, existing_entities)
1067 else:
1068 for value in data.values():
1069 collect_referenced_entities(value, existing_entities)
1071 elif isinstance(data, list):
1072 for item in data:
1073 collect_referenced_entities(item, existing_entities)
1075 return existing_entities
1078def import_referenced_entities(editor, structured_data):
1079 """
1080 Import all existing entities referenced in structured data into the editor.
1082 This function should be called before editor.preexisting_finished() to ensure
1083 that all existing entities that will be linked have their snapshots created.
1085 Args:
1086 editor: The Editor instance
1087 structured_data: The structured data containing entity references
1088 """
1089 referenced_entities = collect_referenced_entities(structured_data)
1090 for entity_uri in referenced_entities:
1091 try:
1092 editor.import_entity(entity_uri)
1093 except Exception as e:
1094 print(f"Warning: Could not import entity {entity_uri}: {e}")
1095 continue