Coverage for heritrace/utils/sparql_utils.py: 95%
403 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-10-13 17:12 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-10-13 17:12 +0000
1import os
2from collections import defaultdict
3from concurrent.futures import ProcessPoolExecutor, as_completed
4from typing import List
6from rdflib import RDF, ConjunctiveGraph, Graph, Literal, URIRef
7from rdflib.plugins.sparql.algebra import translateUpdate
8from rdflib.plugins.sparql.parser import parseUpdate
9from SPARQLWrapper import JSON
10from time_agnostic_library.agnostic_entity import AgnosticEntity
12from heritrace.editor import Editor
13from heritrace.extensions import (get_change_tracking_config,
14 get_classes_with_multiple_shapes,
15 get_custom_filter, get_dataset_is_quadstore,
16 get_display_rules, get_provenance_sparql,
17 get_shacl_graph, get_sparql)
18from heritrace.utils.converters import convert_to_datetime
19from heritrace.utils.display_rules_utils import (find_matching_rule,
20 get_highest_priority_class,
21 get_sortable_properties,
22 is_entity_type_visible)
23from heritrace.utils.shacl_utils import (determine_shape_for_classes,
24 determine_shape_for_entity_triples)
25from heritrace.utils.virtuoso_utils import (VIRTUOSO_EXCLUDED_GRAPHS,
26 is_virtuoso)
28_AVAILABLE_CLASSES_CACHE = None
29COUNT_LIMIT = int(os.getenv("COUNT_LIMIT", "10000"))
32def precompute_available_classes_cache():
33 """Pre-compute available classes cache at application startup."""
34 global _AVAILABLE_CLASSES_CACHE
35 _AVAILABLE_CLASSES_CACHE = get_available_classes()
36 return _AVAILABLE_CLASSES_CACHE
39def _wrap_virtuoso_graph_pattern(pattern: str) -> str:
40 """Wrap a SPARQL pattern with Virtuoso GRAPH clause if needed."""
41 if is_virtuoso():
42 return f"""
43 GRAPH ?g {{
44 {pattern}
45 }}
46 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>))
47 """
48 return pattern
51def _build_count_query_with_limit(class_uri: str, limit: int) -> str:
52 """Build a COUNT query with LIMIT for a specific class."""
54 return f"""
55 SELECT (COUNT(?subject) as ?count)
56 WHERE {{
57 {{
58 SELECT DISTINCT ?subject
59 WHERE {{
60 ?subject a <{class_uri}> .
61 }}
62 LIMIT {limit}
63 }}
64 }}
65 """
68def _count_class_instances(class_uri: str, limit: int = COUNT_LIMIT) -> tuple:
69 """
70 Count instances of a class up to a limit.
72 Returns:
73 tuple: (display_count, numeric_count) where display_count may be "LIMIT+"
74 """
75 sparql = get_sparql()
76 query = _build_count_query_with_limit(class_uri, limit + 1)
78 sparql.setQuery(query)
79 sparql.setReturnFormat(JSON)
80 result = sparql.query().convert()
82 count = int(result["results"]["bindings"][0]["count"]["value"])
84 if count > limit:
85 return f"{limit}+", limit
86 return str(count), count
89def _get_entities_with_enhanced_shape_detection(class_uri: str, classes_with_multiple_shapes: set, limit: int = COUNT_LIMIT):
90 """
91 Get entities for a class using enhanced shape detection for classes with multiple shapes.
92 Uses LIMIT to avoid loading all entities.
93 """
94 # Early exit if no classes have multiple shapes
95 if not classes_with_multiple_shapes or class_uri not in classes_with_multiple_shapes:
96 return defaultdict(list)
98 sparql = get_sparql()
100 subjects_query = f"""
101 SELECT DISTINCT ?subject
102 WHERE {{
103 ?subject a <{class_uri}> .
104 }}
105 LIMIT {limit}
106 """
108 sparql.setQuery(subjects_query)
109 sparql.setReturnFormat(JSON)
110 subjects_results = sparql.query().convert()
112 subjects = [r["subject"]["value"] for r in subjects_results["results"]["bindings"]]
114 if not subjects:
115 return defaultdict(list)
117 # Fetch triples only for these specific subjects
118 subjects_filter = " ".join([f"(<{s}>)" for s in subjects])
119 pattern_with_filter = f"?subject a <{class_uri}> . ?subject ?p ?o . VALUES (?subject) {{ {subjects_filter} }}"
121 triples_query = f"""
122 SELECT ?subject ?p ?o
123 WHERE {{
124 {pattern_with_filter}
125 }}
126 """
128 sparql.setQuery(triples_query)
129 sparql.setReturnFormat(JSON)
130 results = sparql.query().convert()
132 entities_triples = defaultdict(list)
133 for binding in results["results"]["bindings"]:
134 subject = binding["subject"]["value"]
135 predicate = binding["p"]["value"]
136 obj = binding["o"]["value"]
137 entities_triples[subject].append((subject, predicate, obj))
139 shape_to_entities = defaultdict(list)
140 for subject_uri, triples in entities_triples.items():
141 shape_uri = determine_shape_for_entity_triples(triples)
142 if shape_uri:
143 entity_key = (class_uri, shape_uri)
144 if is_entity_type_visible(entity_key):
145 shape_to_entities[shape_uri].append({
146 "uri": subject_uri,
147 "class": class_uri,
148 "shape": shape_uri
149 })
151 return shape_to_entities
154def get_classes_from_shacl_or_display_rules():
155 """Extract classes from SHACL shapes or display_rules configuration."""
156 SH_TARGET_CLASS = URIRef("http://www.w3.org/ns/shacl#targetClass")
157 classes = set()
159 shacl_graph = get_shacl_graph()
160 if shacl_graph:
161 for shape in shacl_graph.subjects(SH_TARGET_CLASS, None, unique=True):
162 for target_class in shacl_graph.objects(shape, SH_TARGET_CLASS, unique=True):
163 classes.add(str(target_class))
165 if not classes:
166 display_rules = get_display_rules()
167 if display_rules:
168 for rule in display_rules:
169 if "target" in rule and "class" in rule["target"]:
170 classes.add(rule["target"]["class"])
172 return list(classes)
175def get_available_classes():
176 """
177 Fetch and format all available entity classes.
178 Returns cached result if available (computed at startup).
179 """
180 global _AVAILABLE_CLASSES_CACHE
182 if _AVAILABLE_CLASSES_CACHE is not None:
183 return _AVAILABLE_CLASSES_CACHE
185 custom_filter = get_custom_filter()
186 classes_from_config = get_classes_from_shacl_or_display_rules()
188 if classes_from_config:
189 class_uris = classes_from_config
190 else:
191 sparql = get_sparql()
192 pattern = "?subject a ?class ."
193 wrapped_pattern = _wrap_virtuoso_graph_pattern(pattern)
195 query = f"""
196 SELECT DISTINCT ?class
197 WHERE {{
198 {wrapped_pattern}
199 }}
200 """
202 sparql.setQuery(query)
203 sparql.setReturnFormat(JSON)
204 results = sparql.query().convert()
205 class_uris = [r["class"]["value"] for r in results["results"]["bindings"]]
207 # Count instances for each class
208 classes_with_counts = []
209 for class_uri in class_uris:
210 display_count, numeric_count = _count_class_instances(class_uri)
211 classes_with_counts.append({
212 "uri": class_uri,
213 "display_count": display_count,
214 "numeric_count": numeric_count
215 })
217 # Sort by count descending
218 classes_with_counts.sort(key=lambda x: x["numeric_count"], reverse=True)
220 available_classes = []
221 classes_with_multiple_shapes = get_classes_with_multiple_shapes()
223 for class_data in classes_with_counts:
224 class_uri = class_data["uri"]
226 if classes_with_multiple_shapes and class_uri in classes_with_multiple_shapes:
227 shape_to_entities = _get_entities_with_enhanced_shape_detection(
228 class_uri, classes_with_multiple_shapes, limit=COUNT_LIMIT
229 )
231 for shape_uri, entities in shape_to_entities.items():
232 if entities:
233 entity_key = (class_uri, shape_uri)
234 available_classes.append({
235 "uri": class_uri,
236 "label": custom_filter.human_readable_class(entity_key),
237 "count": f"{len(entities)}+" if len(entities) >= COUNT_LIMIT else str(len(entities)),
238 "count_numeric": len(entities),
239 "shape": shape_uri
240 })
241 else:
242 shape_uri = determine_shape_for_classes([class_uri])
243 entity_key = (class_uri, shape_uri)
245 if is_entity_type_visible(entity_key):
246 available_classes.append({
247 "uri": class_uri,
248 "label": custom_filter.human_readable_class(entity_key),
249 "count": class_data["display_count"],
250 "count_numeric": class_data["numeric_count"],
251 "shape": shape_uri
252 })
254 available_classes.sort(key=lambda x: x["label"].lower())
255 return available_classes
258def build_sort_clause(sort_property: str, entity_type: str, shape_uri: str = None) -> str:
259 """
260 Build a SPARQL sort clause based on the sortableBy configuration.
262 Args:
263 sort_property: The property to sort by
264 entity_type: The entity type URI
265 shape_uri: Optional shape URI for more specific sorting rules
267 Returns:
268 SPARQL sort clause or empty string
269 """
270 if not sort_property or not entity_type:
271 return ""
273 rule = find_matching_rule(entity_type, shape_uri)
275 if not rule or "sortableBy" not in rule:
276 return ""
278 sort_config = next(
279 (s for s in rule["sortableBy"] if s.get("property") == sort_property),
280 None
281 )
283 if not sort_config:
284 return ""
286 return f"OPTIONAL {{ ?subject <{sort_property}> ?sortValue }}"
289def get_entities_for_class(
290 selected_class, page, per_page, sort_property=None, sort_direction="ASC", selected_shape=None
291):
292 """
293 Retrieve entities for a specific class with pagination and sorting.
295 Args:
296 selected_class (str): URI of the class to retrieve entities for
297 page (int): Page number (1-indexed)
298 per_page (int): Number of entities per page
299 sort_property (str, optional): Property URI to sort by. Defaults to None.
300 sort_direction (str, optional): Sort direction ("ASC" or "DESC"). Defaults to "ASC".
301 selected_shape (str, optional): Shape URI for filtering entities. Defaults to None.
303 Returns:
304 tuple: (list of entities, total count)
306 Performance Notes:
307 - If sort_property is None, NO ORDER BY clause is applied to the SPARQL query.
308 This significantly improves performance for large datasets by avoiding expensive
309 sorting operations on URIs.
310 - Without explicit ordering, the triplestore returns results in its natural order,
311 which is deterministic within a session but may vary after database reloads.
312 - For optimal performance with large datasets, configure display_rules.yaml without
313 sortableBy properties to prevent users from triggering expensive sort operations.
314 """
315 sparql = get_sparql()
316 custom_filter = get_custom_filter()
317 classes_with_multiple_shapes = get_classes_with_multiple_shapes()
319 use_shape_filtering = (selected_shape and selected_class in classes_with_multiple_shapes)
321 if use_shape_filtering:
322 # For shape filtering, we need to fetch entities and check their shape
323 # Use a larger LIMIT to ensure we get enough entities after filtering
324 offset = (page - 1) * per_page
325 fetch_limit = per_page * 5 # Safety margin for filtering
327 subjects_query = f"""
328 SELECT DISTINCT ?subject
329 WHERE {{
330 ?subject a <{selected_class}> .
331 }}
332 LIMIT {fetch_limit}
333 OFFSET {offset}
334 """
336 sparql.setQuery(subjects_query)
337 sparql.setReturnFormat(JSON)
338 subjects_results = sparql.query().convert()
340 subjects = [r["subject"]["value"] for r in subjects_results["results"]["bindings"]]
342 if not subjects:
343 return [], 0
345 # Now fetch triples for these specific subjects
346 subjects_filter = " ".join([f"(<{s}>)" for s in subjects])
348 triples_query = f"""
349 SELECT ?subject ?p ?o
350 WHERE {{
351 ?subject a <{selected_class}> . ?subject ?p ?o . VALUES (?subject) {{ {subjects_filter} }}
352 }}
353 """
355 sparql.setQuery(triples_query)
356 sparql.setReturnFormat(JSON)
357 results = sparql.query().convert()
359 entities_triples = defaultdict(list)
360 for binding in results["results"]["bindings"]:
361 subject = binding["subject"]["value"]
362 predicate = binding["p"]["value"]
363 obj = binding["o"]["value"]
364 entities_triples[subject].append((subject, predicate, obj))
366 filtered_entities = []
367 for subject_uri, triples in entities_triples.items():
368 entity_shape = determine_shape_for_entity_triples(list(triples))
369 if entity_shape == selected_shape:
370 entity_label = custom_filter.human_readable_entity(
371 subject_uri, (selected_class, selected_shape), None
372 )
373 filtered_entities.append({"uri": subject_uri, "label": entity_label})
375 if sort_property and sort_direction:
376 reverse_sort = sort_direction.upper() == "DESC"
377 filtered_entities.sort(key=lambda x: x["label"].lower(), reverse=reverse_sort)
379 # For shape-filtered results, we can't accurately determine total_count without scanning all entities
380 # Return the number of filtered entities as an approximation
381 total_count = len(filtered_entities)
382 return filtered_entities[:per_page], total_count
384 # Standard pagination path
385 offset = (page - 1) * per_page
386 sort_clause = ""
387 order_clause = ""
389 if sort_property:
390 sort_clause = build_sort_clause(sort_property, selected_class, selected_shape)
391 if sort_clause:
392 order_clause = f"ORDER BY {sort_direction}(?sortValue)"
394 entities_query = f"""
395 SELECT ?subject {f"?sortValue" if sort_property else ""}
396 WHERE {{
397 ?subject a <{selected_class}> . {sort_clause}
398 }}
399 {order_clause}
400 LIMIT {per_page}
401 OFFSET {offset}
402 """
404 available_classes = get_available_classes()
406 class_info = next(
407 (c for c in available_classes
408 if c["uri"] == selected_class and c.get("shape") == selected_shape),
409 None
410 )
411 total_count = class_info.get("count_numeric", 0) if class_info else 0
413 sparql.setQuery(entities_query)
414 sparql.setReturnFormat(JSON)
415 entities_results = sparql.query().convert()
417 entities = []
418 shape = selected_shape if selected_shape else determine_shape_for_classes([selected_class])
420 for result in entities_results["results"]["bindings"]:
421 subject_uri = result["subject"]["value"]
422 entity_label = custom_filter.human_readable_entity(
423 subject_uri, (selected_class, shape), None
424 )
425 entities.append({"uri": subject_uri, "label": entity_label})
427 return entities, total_count
430def get_catalog_data(
431 selected_class: str,
432 page: int,
433 per_page: int,
434 sort_property: str = None,
435 sort_direction: str = "ASC",
436 selected_shape: str = None
437) -> dict:
438 """
439 Get catalog data with pagination and sorting.
441 Args:
442 selected_class (str): Selected class URI
443 page (int): Current page number
444 per_page (int): Items per page
445 sort_property (str, optional): Property to sort by
446 sort_direction (str, optional): Sort direction ('ASC' or 'DESC')
447 selected_shape (str, optional): URI of the shape to use for sorting rules
449 Returns:
450 dict: Catalog data including entities, pagination info, and sort settings
451 """
453 entities = []
454 total_count = 0
455 sortable_properties = []
457 if selected_class:
458 sortable_properties = get_sortable_properties(
459 (selected_class, selected_shape)
460 )
462 if not sort_property and sortable_properties:
463 sort_property = sortable_properties[0]["property"]
465 entities, total_count = get_entities_for_class(
466 selected_class, page, per_page, sort_property, sort_direction, selected_shape
467 )
469 return {
470 "entities": entities,
471 "total_pages": (
472 (total_count + per_page - 1) // per_page if total_count > 0 else 0
473 ),
474 "current_page": page,
475 "per_page": per_page,
476 "total_count": total_count,
477 "sort_property": sort_property,
478 "sort_direction": sort_direction,
479 "sortable_properties": sortable_properties,
480 "selected_class": selected_class,
481 "selected_shape": selected_shape,
482 }
485def fetch_data_graph_for_subject(subject: str) -> Graph | ConjunctiveGraph:
486 """
487 Fetch all triples/quads associated with a subject from the dataset.
488 Handles both triplestore and quadstore cases appropriately.
490 Args:
491 subject (str): The URI of the subject to fetch data for
493 Returns:
494 Graph|ConjunctiveGraph: A graph containing all triples/quads for the subject
495 """
496 g = ConjunctiveGraph() if get_dataset_is_quadstore() else Graph()
497 sparql = get_sparql()
499 if is_virtuoso():
500 # For virtuoso we need to explicitly query the graph
501 query = f"""
502 SELECT ?predicate ?object ?g WHERE {{
503 GRAPH ?g {{
504 <{subject}> ?predicate ?object.
505 }}
506 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>))
507 }}
508 """
509 else:
510 if get_dataset_is_quadstore():
511 # For non-virtuoso quadstore, we need to query all graphs
512 query = f"""
513 SELECT ?predicate ?object ?g WHERE {{
514 GRAPH ?g {{
515 <{subject}> ?predicate ?object.
516 }}
517 }}
518 """
519 else:
520 # For regular triplestore
521 query = f"""
522 SELECT ?predicate ?object WHERE {{
523 <{subject}> ?predicate ?object.
524 }}
525 """
527 sparql.setQuery(query)
528 sparql.setReturnFormat(JSON)
529 query_results = sparql.query().convert()
530 results = query_results.get("results", {}).get("bindings", [])
532 for result in results:
533 # Create the appropriate value (Literal or URIRef)
534 obj_data = result["object"]
535 if obj_data["type"] in {"literal", "typed-literal"}:
536 if "datatype" in obj_data:
537 value = Literal(
538 obj_data["value"], datatype=URIRef(obj_data["datatype"])
539 )
540 else:
541 # Create literal without explicit datatype to match Reader.import_entities_from_triplestore
542 value = Literal(obj_data["value"])
543 else:
544 value = URIRef(obj_data["value"])
546 # Add triple/quad based on store type
547 if get_dataset_is_quadstore():
548 graph_uri = URIRef(result["g"]["value"])
549 g.add(
550 (
551 URIRef(subject),
552 URIRef(result["predicate"]["value"]),
553 value,
554 graph_uri,
555 )
556 )
557 else:
558 g.add((URIRef(subject), URIRef(result["predicate"]["value"]), value))
560 return g
563def parse_sparql_update(query) -> dict:
564 parsed = parseUpdate(query)
565 translated = translateUpdate(parsed).algebra
566 modifications = {}
568 def extract_quads(quads):
569 result = []
570 for graph, triples in quads.items():
571 for triple in triples:
572 result.append((triple[0], triple[1], triple[2]))
573 return result
575 for operation in translated:
576 if operation.name == "DeleteData":
577 if hasattr(operation, "quads") and operation.quads:
578 deletions = extract_quads(operation.quads)
579 else:
580 deletions = operation.triples
581 if deletions:
582 modifications.setdefault("Deletions", list()).extend(deletions)
583 elif operation.name == "InsertData":
584 if hasattr(operation, "quads") and operation.quads:
585 additions = extract_quads(operation.quads)
586 else:
587 additions = operation.triples
588 if additions:
589 modifications.setdefault("Additions", list()).extend(additions)
591 return modifications
594def fetch_current_state_with_related_entities(
595 provenance: dict,
596) -> Graph | ConjunctiveGraph:
597 """
598 Fetch the current state of an entity and all its related entities known from provenance.
600 Args:
601 provenance (dict): Dictionary containing provenance metadata for main entity and related entities
603 Returns:
604 ConjunctiveGraph: A graph containing the current state of all entities
605 """
606 combined_graph = ConjunctiveGraph() if get_dataset_is_quadstore() else Graph()
608 # Fetch state for all entities mentioned in provenance
609 for entity_uri in provenance.keys():
610 current_graph = fetch_data_graph_for_subject(entity_uri)
612 if get_dataset_is_quadstore():
613 for quad in current_graph.quads():
614 combined_graph.add(quad)
615 else:
616 for triple in current_graph:
617 combined_graph.add(triple)
619 return combined_graph
622def get_deleted_entities_with_filtering(
623 page=1,
624 per_page=50,
625 sort_property="deletionTime",
626 sort_direction="DESC",
627 selected_class=None,
628 selected_shape=None,
629):
630 """
631 Fetch and process deleted entities from the provenance graph, with filtering and sorting.
632 """
633 sortable_properties = [
634 {"property": "deletionTime", "displayName": "Deletion Time", "sortType": "date"}
635 ]
636 provenance_sparql = get_provenance_sparql()
637 custom_filter = get_custom_filter()
639 prov_query = """
640 SELECT DISTINCT ?entity ?lastSnapshot ?deletionTime ?agent ?lastValidSnapshotTime
641 WHERE {
642 ?lastSnapshot a <http://www.w3.org/ns/prov#Entity> ;
643 <http://www.w3.org/ns/prov#specializationOf> ?entity ;
644 <http://www.w3.org/ns/prov#generatedAtTime> ?deletionTime ;
645 <http://www.w3.org/ns/prov#invalidatedAtTime> ?invalidationTime ;
646 <http://www.w3.org/ns/prov#wasDerivedFrom> ?lastValidSnapshot.
648 ?lastValidSnapshot <http://www.w3.org/ns/prov#generatedAtTime> ?lastValidSnapshotTime .
650 OPTIONAL { ?lastSnapshot <http://www.w3.org/ns/prov#wasAttributedTo> ?agent . }
652 FILTER NOT EXISTS {
653 ?laterSnapshot <http://www.w3.org/ns/prov#wasDerivedFrom> ?lastSnapshot .
654 }
655 }
656 """
657 provenance_sparql.setQuery(prov_query)
658 provenance_sparql.setReturnFormat(JSON)
659 prov_results = provenance_sparql.query().convert()
661 results_bindings = prov_results["results"]["bindings"]
662 if not results_bindings:
663 return [], [], None, None, [], 0
665 deleted_entities = []
666 max_workers = max(1, min(os.cpu_count() or 4, len(results_bindings)))
667 with ProcessPoolExecutor(max_workers=max_workers) as executor:
668 future_to_entity = {
669 executor.submit(process_deleted_entity, result, sortable_properties): result
670 for result in results_bindings
671 }
672 for future in as_completed(future_to_entity):
673 entity_info = future.result()
674 if entity_info is not None:
675 deleted_entities.append(entity_info)
677 class_counts = {}
678 for entity in deleted_entities:
679 for type_uri in entity["entity_types"]:
680 class_counts[type_uri] = class_counts.get(type_uri, 0) + 1
682 available_classes = [
683 {
684 "uri": class_uri,
685 "label": custom_filter.human_readable_class((class_uri, determine_shape_for_classes([class_uri]))),
686 "count": count,
687 }
688 for class_uri, count in class_counts.items()
689 ]
691 reverse_sort = sort_direction.upper() == "DESC"
692 if sort_property == "deletionTime":
693 deleted_entities.sort(key=lambda e: e["deletionTime"], reverse=reverse_sort)
694 else:
695 deleted_entities.sort(
696 key=lambda e: e["sort_values"].get(sort_property, "").lower(),
697 reverse=reverse_sort,
698 )
700 available_classes.sort(key=lambda x: x["label"].lower())
701 if not selected_class and available_classes:
702 selected_class = available_classes[0]["uri"]
704 if selected_class:
705 if selected_shape is None:
706 selected_shape = determine_shape_for_classes([selected_class])
707 entity_key = (selected_class, selected_shape)
708 sortable_properties.extend(
709 get_sortable_properties(entity_key)
710 )
712 if selected_class:
713 filtered_entities = [
714 entity
715 for entity in deleted_entities
716 if selected_class in entity["entity_types"]
717 ]
718 else:
719 filtered_entities = deleted_entities
721 total_count = len(filtered_entities)
722 offset = (page - 1) * per_page
723 paginated_entities = filtered_entities[offset : offset + per_page]
725 return paginated_entities, available_classes, selected_class, selected_shape, sortable_properties, total_count
728def process_deleted_entity(result: dict, sortable_properties: list) -> dict | None:
729 """
730 Process a single deleted entity, filtering by visible classes.
731 """
732 change_tracking_config = get_change_tracking_config()
733 custom_filter = get_custom_filter()
735 entity_uri = result["entity"]["value"]
736 last_valid_snapshot_time = result["lastValidSnapshotTime"]["value"]
738 agnostic_entity = AgnosticEntity(
739 res=entity_uri, config=change_tracking_config, include_related_objects=True, include_merged_entities=True, include_reverse_relations=True
740 )
741 state, _, _ = agnostic_entity.get_state_at_time(
742 (last_valid_snapshot_time, last_valid_snapshot_time)
743 )
745 if entity_uri not in state:
746 return None
748 last_valid_time = convert_to_datetime(last_valid_snapshot_time, stringify=True)
749 last_valid_state: ConjunctiveGraph = state[entity_uri][last_valid_time]
751 entity_types = [
752 str(o)
753 for s, p, o in last_valid_state.triples((URIRef(entity_uri), RDF.type, None))
754 ]
755 highest_priority_type = get_highest_priority_class(entity_types)
756 shape = determine_shape_for_classes([highest_priority_type])
757 visible_types = [t for t in entity_types if is_entity_type_visible((t, determine_shape_for_classes([t])))]
758 if not visible_types:
759 return None
761 sort_values = {}
762 for prop in sortable_properties:
763 prop_uri = prop["property"]
764 values = [
765 str(o)
766 for s, p, o in last_valid_state.triples(
767 (URIRef(entity_uri), URIRef(prop_uri), None)
768 )
769 ]
770 sort_values[prop_uri] = values[0] if values else ""
772 return {
773 "uri": entity_uri,
774 "deletionTime": result["deletionTime"]["value"],
775 "deletedBy": custom_filter.format_agent_reference(
776 result.get("agent", {}).get("value", "")
777 ),
778 "lastValidSnapshotTime": last_valid_snapshot_time,
779 "type": custom_filter.human_readable_predicate(
780 highest_priority_type, (highest_priority_type, shape)
781 ),
782 "label": custom_filter.human_readable_entity(
783 entity_uri, (highest_priority_type, shape), last_valid_state
784 ),
785 "entity_types": visible_types,
786 "sort_values": sort_values,
787 }
790def find_orphaned_entities(subject, entity_type, predicate=None, object_value=None):
791 """
792 Find entities that would become orphaned after deleting a triple or an entire entity,
793 including intermediate relation entities.
795 An entity is considered orphaned if:
796 1. It has no incoming references from other entities (except from the entity being deleted)
797 2. It does not reference any entities that are subjects of other triples
799 For intermediate relations, an entity is also considered orphaned if:
800 1. It connects to the entity being deleted
801 2. It has no other valid connections after the deletion
802 3. It is directly involved in the deletion operation (if predicate and object_value are specified)
804 Args:
805 subject (str): The URI of the subject being deleted
806 entity_type (str): The type of the entity being deleted
807 predicate (str, optional): The predicate being deleted
808 object_value (str, optional): The object value being deleted
810 Returns:
811 tuple: Lists of (orphaned_entities, intermediate_orphans)
812 """
813 sparql = get_sparql()
814 display_rules = get_display_rules()
816 intermediate_classes = set()
818 for rule in display_rules:
819 if "target" in rule and "class" in rule["target"] and rule["target"]["class"] == entity_type:
820 for prop in rule.get("displayProperties", []):
821 if "intermediateRelation" in prop:
822 intermediate_classes.add(prop["intermediateRelation"]["class"])
824 orphan_query = f"""
825 SELECT DISTINCT ?entity ?type
826 WHERE {{
827 {f"<{subject}> <{predicate}> ?entity ." if predicate and object_value else ""}
828 {f"FILTER(?entity = <{object_value}>)" if predicate and object_value else ""}
830 # If no specific predicate, get all connected entities
831 {f"<{subject}> ?p ?entity ." if not predicate else ""}
833 FILTER(isIRI(?entity))
834 ?entity a ?type .
836 # No incoming references from other entities
837 FILTER NOT EXISTS {{
838 ?other ?anyPredicate ?entity .
839 FILTER(?other != <{subject}>)
840 }}
842 # No outgoing references to active entities
843 FILTER NOT EXISTS {{
844 ?entity ?outgoingPredicate ?connectedEntity .
845 ?connectedEntity ?furtherPredicate ?furtherObject .
846 {f"FILTER(?connectedEntity != <{subject}>)" if not predicate else ""}
847 }}
849 # Exclude intermediate relation entities
850 FILTER(?type NOT IN (<{f">, <".join(intermediate_classes)}>))
851 }}
852 """
854 # Query to find orphaned intermediate relations
855 if predicate and object_value:
856 intermediate_query = f"""
857 SELECT DISTINCT ?entity ?type
858 WHERE {{
859 <{object_value}> a ?type .
860 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>))
861 BIND(<{object_value}> AS ?entity)
862 }}
863 """
864 else:
865 # Se stiamo cancellando l'intera entità, trova tutte le entità intermedie collegate
866 intermediate_query = f"""
867 SELECT DISTINCT ?entity ?type
868 WHERE {{
869 # Find intermediate relations connected to the entity being deleted
870 {{
871 <{subject}> ?p ?entity .
872 ?entity a ?type .
873 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>))
874 }} UNION {{
875 ?entity ?p <{subject}> .
876 ?entity a ?type .
877 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>))
878 }}
879 }}
880 """
882 orphaned = []
883 intermediate_orphans = []
885 # Execute queries and process results
886 for query, result_list in [
887 (orphan_query, orphaned),
888 (intermediate_query, intermediate_orphans),
889 ]:
890 sparql.setQuery(query)
891 sparql.setReturnFormat(JSON)
892 results = sparql.query().convert()
894 for result in results["results"]["bindings"]:
895 result_list.append(
896 {"uri": result["entity"]["value"], "type": result["type"]["value"]}
897 )
899 return orphaned, intermediate_orphans
902def import_entity_graph(editor: Editor, subject: str, max_depth: int = 5, include_referencing_entities: bool = False):
903 """
904 Recursively import the main subject and its connected entity graph up to a specified depth.
906 This function imports the specified subject and all entities connected to it,
907 directly or indirectly, up to the maximum depth specified. It traverses the
908 graph of connected entities, importing each one into the editor.
910 Args:
911 editor (Editor): The Editor instance to use for importing.
912 subject (str): The URI of the subject to start the import from.
913 max_depth (int): The maximum depth of recursion (default is 5).
914 include_referencing_entities (bool): Whether to include entities that have the subject as their object (default False).
915 Useful when deleting an entity to ensure all references are properly removed.
917 Returns:
918 Editor: The updated Editor instance with all imported entities.
919 """
920 imported_subjects = set()
922 # First import referencing entities if needed
923 if include_referencing_entities:
924 sparql = get_sparql()
926 # Build query based on database type
927 if editor.dataset_is_quadstore:
928 query = f"""
929 SELECT DISTINCT ?s
930 WHERE {{
931 GRAPH ?g {{
932 ?s ?p <{subject}> .
933 }}
934 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)
935 }}
936 """
937 else:
938 query = f"""
939 SELECT DISTINCT ?s
940 WHERE {{
941 ?s ?p <{subject}> .
942 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)
943 }}
944 """
946 sparql.setQuery(query)
947 sparql.setReturnFormat(JSON)
948 results = sparql.query().convert()
950 # Import each referencing entity
951 for result in results["results"]["bindings"]:
952 referencing_subject = result["s"]["value"]
953 if referencing_subject != subject and referencing_subject not in imported_subjects:
954 imported_subjects.add(referencing_subject)
955 editor.import_entity(URIRef(referencing_subject))
957 def recursive_import(current_subject: str, current_depth: int):
958 if current_depth > max_depth or current_subject in imported_subjects:
959 return
961 imported_subjects.add(current_subject)
962 editor.import_entity(URIRef(current_subject))
964 query = f"""
965 SELECT ?p ?o
966 WHERE {{
967 <{current_subject}> ?p ?o .
968 FILTER(isIRI(?o))
969 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)
970 }}
971 """
973 sparql = get_sparql()
974 sparql.setQuery(query)
975 sparql.setReturnFormat(JSON)
976 results = sparql.query().convert()
978 for result in results["results"]["bindings"]:
979 object_entity = result["o"]["value"]
980 recursive_import(object_entity, current_depth + 1)
982 recursive_import(subject, 1)
983 return editor
986def get_entity_types(subject_uri: str) -> List[str]:
987 """
988 Get all RDF types for an entity.
990 Args:
991 subject_uri: URI of the entity
993 Returns:
994 List of type URIs
995 """
996 sparql = get_sparql()
998 query = f"""
999 SELECT ?type WHERE {{
1000 <{subject_uri}> a ?type .
1001 }}
1002 """
1004 sparql.setQuery(query)
1005 sparql.setReturnFormat(JSON)
1006 results = sparql.query().convert()
1008 return [result["type"]["value"] for result in results["results"]["bindings"]]
1011def collect_referenced_entities(data, existing_entities=None):
1012 """
1013 Recursively collect all URIs of existing entities referenced in the structured data.
1015 This function traverses the structured data to find explicit references to existing entities
1016 that need to be imported into the editor before calling preexisting_finished().
1018 Args:
1019 data: The structured data (can be dict, list, or string)
1020 existing_entities: Set to collect URIs (created if None)
1022 Returns:
1023 Set of URIs (strings) of existing entities that should be imported
1024 """
1026 if existing_entities is None:
1027 existing_entities = set()
1029 if isinstance(data, dict):
1030 if data.get("is_existing_entity") is True and "entity_uri" in data:
1031 existing_entities.add(data["entity_uri"])
1033 # If it's an entity with entity_type, it's a new entity being created
1034 elif "entity_type" in data:
1035 properties = data.get("properties", {})
1036 for prop_values in properties.values():
1037 collect_referenced_entities(prop_values, existing_entities)
1038 else:
1039 for value in data.values():
1040 collect_referenced_entities(value, existing_entities)
1042 elif isinstance(data, list):
1043 for item in data:
1044 collect_referenced_entities(item, existing_entities)
1046 return existing_entities
1049def import_referenced_entities(editor, structured_data):
1050 """
1051 Import all existing entities referenced in structured data into the editor.
1053 This function should be called before editor.preexisting_finished() to ensure
1054 that all existing entities that will be linked have their snapshots created.
1056 Args:
1057 editor: The Editor instance
1058 structured_data: The structured data containing entity references
1059 """
1060 referenced_entities = collect_referenced_entities(structured_data)
1061 for entity_uri in referenced_entities:
1062 try:
1063 editor.import_entity(entity_uri)
1064 except Exception as e:
1065 print(f"Warning: Could not import entity {entity_uri}: {e}")
1066 continue