Coverage for heritrace/utils/sparql_utils.py: 99%
346 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-08-01 22:12 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-08-01 22:12 +0000
1import os
2from collections import defaultdict
3from concurrent.futures import ProcessPoolExecutor, as_completed
4from typing import List
6from heritrace.editor import Editor
7from heritrace.extensions import (get_change_tracking_config,
8 get_classes_with_multiple_shapes,
9 get_custom_filter, get_dataset_is_quadstore,
10 get_display_rules, get_provenance_sparql,
11 get_sparql)
12from heritrace.utils.converters import convert_to_datetime
13from heritrace.utils.display_rules_utils import (find_matching_rule,
14 get_highest_priority_class,
15 get_sortable_properties,
16 is_entity_type_visible)
17from heritrace.utils.shacl_utils import (determine_shape_for_classes,
18 determine_shape_for_entity_triples)
19from heritrace.utils.virtuoso_utils import (VIRTUOSO_EXCLUDED_GRAPHS,
20 is_virtuoso)
21from rdflib import RDF, XSD, ConjunctiveGraph, Graph, Literal, URIRef
22from rdflib.plugins.sparql.algebra import translateUpdate
23from rdflib.plugins.sparql.parser import parseUpdate
24from SPARQLWrapper import JSON
25from time_agnostic_library.agnostic_entity import AgnosticEntity
28def _get_entities_with_enhanced_shape_detection(class_uri: str, classes_with_multiple_shapes: set):
29 """
30 Get entities for a class using enhanced shape detection for classes with multiple shapes.
32 Args:
33 class_uri: The class URI to get entities for
34 classes_with_multiple_shapes: Set of classes that have multiple shapes
36 Returns:
37 Dict[str, List]: Dictionary mapping shape URIs to lists of entity info dicts
38 """
39 sparql = get_sparql()
41 if is_virtuoso():
42 query = f"""
43 SELECT DISTINCT ?subject ?p ?o
44 WHERE {{
45 GRAPH ?g {{
46 ?subject a <{class_uri}> .
47 ?subject ?p ?o .
48 }}
49 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>))
50 }}
51 """
52 else:
53 query = f"""
54 SELECT DISTINCT ?subject ?p ?o
55 WHERE {{
56 ?subject a <{class_uri}> .
57 ?subject ?p ?o .
58 }}
59 """
61 sparql.setQuery(query)
62 sparql.setReturnFormat(JSON)
63 results = sparql.query().convert()
65 entities_triples = defaultdict(list)
66 for binding in results["results"]["bindings"]:
67 subject = binding["subject"]["value"]
68 predicate = binding["p"]["value"]
69 obj = binding["o"]["value"]
70 entities_triples[subject].append((subject, predicate, obj))
72 shape_to_entities = defaultdict(list)
73 for subject_uri, triples in entities_triples.items():
74 shape_uri = determine_shape_for_entity_triples(triples)
76 if shape_uri:
77 entity_key = (class_uri, shape_uri)
78 if is_entity_type_visible(entity_key):
79 shape_to_entities[shape_uri].append({
80 "uri": subject_uri,
81 "class": class_uri,
82 "shape": shape_uri
83 })
85 return shape_to_entities
88def get_available_classes():
89 """
90 Fetch and format all available entity classes from the triplestore.
91 Now handles classes with multiple shapes efficiently.
93 Returns:
94 list: List of dictionaries containing class information
95 """
96 sparql = get_sparql()
97 custom_filter = get_custom_filter()
99 if is_virtuoso():
100 classes_query = f"""
101 SELECT DISTINCT ?class (COUNT(DISTINCT ?subject) as ?count)
102 WHERE {{
103 GRAPH ?g {{
104 ?subject a ?class .
105 }}
106 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>))
107 }}
108 GROUP BY ?class
109 ORDER BY DESC(?count)
110 """
111 else:
112 classes_query = """
113 SELECT DISTINCT ?class (COUNT(DISTINCT ?subject) as ?count)
114 WHERE {
115 ?subject a ?class .
116 }
117 GROUP BY ?class
118 ORDER BY DESC(?count)
119 """
121 sparql.setQuery(classes_query)
122 sparql.setReturnFormat(JSON)
123 classes_results = sparql.query().convert()
125 classes_with_multiple_shapes = get_classes_with_multiple_shapes()
127 available_classes = []
128 for result in classes_results["results"]["bindings"]:
129 class_uri = result["class"]["value"]
130 total_count = int(result["count"]["value"])
132 if class_uri in classes_with_multiple_shapes:
133 shape_to_entities = _get_entities_with_enhanced_shape_detection(
134 class_uri, classes_with_multiple_shapes
135 )
137 for shape_uri, entities in shape_to_entities.items():
138 if entities:
139 entity_key = (class_uri, shape_uri)
140 available_classes.append({
141 "uri": class_uri,
142 "label": custom_filter.human_readable_class(entity_key),
143 "count": len(entities),
144 "shape": shape_uri
145 })
146 else:
147 shape_uri = determine_shape_for_classes([class_uri])
148 entity_key = (class_uri, shape_uri)
150 if is_entity_type_visible(entity_key):
151 available_classes.append({
152 "uri": class_uri,
153 "label": custom_filter.human_readable_class(entity_key),
154 "count": total_count,
155 "shape": shape_uri
156 })
158 available_classes.sort(key=lambda x: x["label"].lower())
159 return available_classes
162def build_sort_clause(sort_property: str, entity_type: str, shape_uri: str = None) -> str:
163 """
164 Build a SPARQL sort clause based on the sortableBy configuration.
166 Args:
167 sort_property: The property to sort by
168 entity_type: The entity type URI
169 shape_uri: Optional shape URI for more specific sorting rules
171 Returns:
172 SPARQL sort clause or empty string
173 """
174 if not sort_property or not entity_type:
175 return ""
177 rule = find_matching_rule(entity_type, shape_uri)
179 if not rule or "sortableBy" not in rule:
180 return ""
182 sort_config = next(
183 (s for s in rule["sortableBy"] if s.get("property") == sort_property),
184 None
185 )
187 if not sort_config:
188 return ""
190 return f"OPTIONAL {{ ?subject <{sort_property}> ?sortValue }}"
193def get_entities_for_class(
194 selected_class, page, per_page, sort_property=None, sort_direction="ASC", selected_shape=None
195):
196 """
197 Retrieve entities for a specific class with pagination and sorting.
199 Args:
200 selected_class (str): URI of the class to fetch entities for
201 page (int): Current page number
202 per_page (int): Number of items per page
203 sort_property (str, optional): Property to sort by
204 sort_direction (str, optional): Sort direction ('ASC' or 'DESC')
205 selected_shape (str, optional): URI of the shape to filter by
207 Returns:
208 tuple: (list of entities, total count)
209 """
210 sparql = get_sparql()
211 custom_filter = get_custom_filter()
212 classes_with_multiple_shapes = get_classes_with_multiple_shapes()
214 use_shape_filtering = (selected_shape and
215 selected_class in classes_with_multiple_shapes)
217 if use_shape_filtering:
218 if is_virtuoso():
219 query = f"""
220 SELECT DISTINCT ?subject ?p ?o
221 WHERE {{
222 GRAPH ?g {{
223 ?subject a <{selected_class}> .
224 ?subject ?p ?o .
225 }}
226 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>))
227 }}
228 """
229 else:
230 query = f"""
231 SELECT DISTINCT ?subject ?p ?o
232 WHERE {{
233 ?subject a <{selected_class}> .
234 ?subject ?p ?o .
235 }}
236 """
238 sparql.setQuery(query)
239 sparql.setReturnFormat(JSON)
240 results = sparql.query().convert()
242 entities_triples = defaultdict(list)
243 for binding in results["results"]["bindings"]:
244 subject = binding["subject"]["value"]
245 predicate = binding["p"]["value"]
246 obj = binding["o"]["value"]
247 entities_triples[subject].append((subject, predicate, obj))
249 filtered_entities = []
250 for subject_uri, triples in entities_triples.items():
251 entity_shape = determine_shape_for_entity_triples(list(triples))
252 if entity_shape == selected_shape:
253 entity_label = custom_filter.human_readable_entity(
254 subject_uri, (selected_class, selected_shape), None
255 )
256 filtered_entities.append({"uri": subject_uri, "label": entity_label})
258 if sort_property and sort_direction:
259 reverse_sort = sort_direction.upper() == "DESC"
260 filtered_entities.sort(key=lambda x: x["label"].lower(), reverse=reverse_sort)
262 total_count = len(filtered_entities)
263 offset = (page - 1) * per_page
264 paginated_entities = filtered_entities[offset:offset + per_page]
266 return paginated_entities, total_count
268 offset = (page - 1) * per_page
270 sort_clause = ""
271 order_clause = "ORDER BY ?subject"
272 if sort_property:
273 sort_clause = build_sort_clause(sort_property, selected_class, selected_shape)
274 if sort_clause:
275 order_clause = f"ORDER BY {sort_direction}(?sortValue)"
277 if is_virtuoso():
278 entities_query = f"""
279 SELECT DISTINCT ?subject {f"?sortValue" if sort_property else ""}
280 WHERE {{
281 GRAPH ?g {{
282 ?subject a <{selected_class}> .
283 {sort_clause}
284 }}
285 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>))
286 }}
287 {order_clause}
288 LIMIT {per_page}
289 OFFSET {offset}
290 """
292 count_query = f"""
293 SELECT (COUNT(DISTINCT ?subject) as ?count)
294 WHERE {{
295 GRAPH ?g {{
296 ?subject a <{selected_class}> .
297 }}
298 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>))
299 }}
300 """
301 else:
302 entities_query = f"""
303 SELECT DISTINCT ?subject {f"?sortValue" if sort_property else ""}
304 WHERE {{
305 ?subject a <{selected_class}> .
306 {sort_clause}
307 }}
308 {order_clause}
309 LIMIT {per_page}
310 OFFSET {offset}
311 """
313 count_query = f"""
314 SELECT (COUNT(DISTINCT ?subject) as ?count)
315 WHERE {{
316 ?subject a <{selected_class}> .
317 }}
318 """
320 sparql.setQuery(count_query)
321 sparql.setReturnFormat(JSON)
322 count_results = sparql.query().convert()
323 total_count = int(count_results["results"]["bindings"][0]["count"]["value"])
324 sparql.setQuery(entities_query)
325 entities_results = sparql.query().convert()
327 entities = []
328 for result in entities_results["results"]["bindings"]:
329 subject_uri = result["subject"]["value"]
330 shape = selected_shape if selected_shape else determine_shape_for_classes([selected_class])
331 entity_label = custom_filter.human_readable_entity(
332 subject_uri, (selected_class, shape), None
333 )
335 entities.append({"uri": subject_uri, "label": entity_label})
337 return entities, total_count
340def get_catalog_data(
341 selected_class: str,
342 page: int,
343 per_page: int,
344 sort_property: str = None,
345 sort_direction: str = "ASC",
346 selected_shape: str = None
347) -> dict:
348 """
349 Get catalog data with pagination and sorting.
351 Args:
352 selected_class (str): Selected class URI
353 page (int): Current page number
354 per_page (int): Items per page
355 sort_property (str, optional): Property to sort by
356 sort_direction (str, optional): Sort direction ('ASC' or 'DESC')
357 selected_shape (str, optional): URI of the shape to use for sorting rules
359 Returns:
360 dict: Catalog data including entities, pagination info, and sort settings
361 """
363 entities = []
364 total_count = 0
365 sortable_properties = []
367 if selected_class:
368 sortable_properties = get_sortable_properties(
369 (selected_class, selected_shape)
370 )
372 if not sort_property and sortable_properties:
373 sort_property = sortable_properties[0]["property"]
375 entities, total_count = get_entities_for_class(
376 selected_class, page, per_page, sort_property, sort_direction, selected_shape
377 )
379 return {
380 "entities": entities,
381 "total_pages": (
382 (total_count + per_page - 1) // per_page if total_count > 0 else 0
383 ),
384 "current_page": page,
385 "per_page": per_page,
386 "total_count": total_count,
387 "sort_property": sort_property,
388 "sort_direction": sort_direction,
389 "sortable_properties": sortable_properties,
390 "selected_class": selected_class,
391 "selected_shape": selected_shape,
392 }
395def fetch_data_graph_for_subject(subject: str) -> Graph | ConjunctiveGraph:
396 """
397 Fetch all triples/quads associated with a subject from the dataset.
398 Handles both triplestore and quadstore cases appropriately.
400 Args:
401 subject (str): The URI of the subject to fetch data for
403 Returns:
404 Graph|ConjunctiveGraph: A graph containing all triples/quads for the subject
405 """
406 g = ConjunctiveGraph() if get_dataset_is_quadstore() else Graph()
407 sparql = get_sparql()
409 if is_virtuoso():
410 # For virtuoso we need to explicitly query the graph
411 query = f"""
412 SELECT ?predicate ?object ?g WHERE {{
413 GRAPH ?g {{
414 <{subject}> ?predicate ?object.
415 }}
416 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>))
417 }}
418 """
419 else:
420 if get_dataset_is_quadstore():
421 # For non-virtuoso quadstore, we need to query all graphs
422 query = f"""
423 SELECT ?predicate ?object ?g WHERE {{
424 GRAPH ?g {{
425 <{subject}> ?predicate ?object.
426 }}
427 }}
428 """
429 else:
430 # For regular triplestore
431 query = f"""
432 SELECT ?predicate ?object WHERE {{
433 <{subject}> ?predicate ?object.
434 }}
435 """
437 sparql.setQuery(query)
438 sparql.setReturnFormat(JSON)
439 query_results = sparql.query().convert()
440 results = query_results.get("results", {}).get("bindings", [])
442 for result in results:
443 # Create the appropriate value (Literal or URIRef)
444 obj_data = result["object"]
445 if obj_data["type"] in {"literal", "typed-literal"}:
446 if "datatype" in obj_data:
447 value = Literal(
448 obj_data["value"], datatype=URIRef(obj_data["datatype"])
449 )
450 else:
451 # Add explicit string datatype to match time-agnostic library behavior
452 value = Literal(obj_data["value"], datatype=XSD.string)
453 else:
454 value = URIRef(obj_data["value"])
456 # Add triple/quad based on store type
457 if get_dataset_is_quadstore():
458 graph_uri = URIRef(result["g"]["value"])
459 g.add(
460 (
461 URIRef(subject),
462 URIRef(result["predicate"]["value"]),
463 value,
464 graph_uri,
465 )
466 )
467 else:
468 g.add((URIRef(subject), URIRef(result["predicate"]["value"]), value))
470 return g
473def parse_sparql_update(query) -> dict:
474 parsed = parseUpdate(query)
475 translated = translateUpdate(parsed).algebra
476 modifications = {}
478 def extract_quads(quads):
479 result = []
480 for graph, triples in quads.items():
481 for triple in triples:
482 result.append((triple[0], triple[1], triple[2]))
483 return result
485 for operation in translated:
486 if operation.name == "DeleteData":
487 if hasattr(operation, "quads") and operation.quads:
488 deletions = extract_quads(operation.quads)
489 else:
490 deletions = operation.triples
491 if deletions:
492 modifications.setdefault("Deletions", list()).extend(deletions)
493 elif operation.name == "InsertData":
494 if hasattr(operation, "quads") and operation.quads:
495 additions = extract_quads(operation.quads)
496 else:
497 additions = operation.triples
498 if additions:
499 modifications.setdefault("Additions", list()).extend(additions)
501 return modifications
504def fetch_current_state_with_related_entities(
505 provenance: dict,
506) -> Graph | ConjunctiveGraph:
507 """
508 Fetch the current state of an entity and all its related entities known from provenance.
510 Args:
511 provenance (dict): Dictionary containing provenance metadata for main entity and related entities
513 Returns:
514 ConjunctiveGraph: A graph containing the current state of all entities
515 """
516 combined_graph = ConjunctiveGraph() if get_dataset_is_quadstore() else Graph()
518 # Fetch state for all entities mentioned in provenance
519 for entity_uri in provenance.keys():
520 current_graph = fetch_data_graph_for_subject(entity_uri)
522 if get_dataset_is_quadstore():
523 for quad in current_graph.quads():
524 combined_graph.add(quad)
525 else:
526 for triple in current_graph:
527 combined_graph.add(triple)
529 return combined_graph
532def get_deleted_entities_with_filtering(
533 page=1,
534 per_page=50,
535 sort_property="deletionTime",
536 sort_direction="DESC",
537 selected_class=None,
538 selected_shape=None,
539):
540 """
541 Fetch and process deleted entities from the provenance graph, with filtering and sorting.
542 """
543 sortable_properties = [
544 {"property": "deletionTime", "displayName": "Deletion Time", "sortType": "date"}
545 ]
546 provenance_sparql = get_provenance_sparql()
547 custom_filter = get_custom_filter()
549 prov_query = """
550 SELECT DISTINCT ?entity ?lastSnapshot ?deletionTime ?agent ?lastValidSnapshotTime
551 WHERE {
552 ?lastSnapshot a <http://www.w3.org/ns/prov#Entity> ;
553 <http://www.w3.org/ns/prov#specializationOf> ?entity ;
554 <http://www.w3.org/ns/prov#generatedAtTime> ?deletionTime ;
555 <http://www.w3.org/ns/prov#invalidatedAtTime> ?invalidationTime ;
556 <http://www.w3.org/ns/prov#wasDerivedFrom> ?lastValidSnapshot.
558 ?lastValidSnapshot <http://www.w3.org/ns/prov#generatedAtTime> ?lastValidSnapshotTime .
560 OPTIONAL { ?lastSnapshot <http://www.w3.org/ns/prov#wasAttributedTo> ?agent . }
562 FILTER NOT EXISTS {
563 ?laterSnapshot <http://www.w3.org/ns/prov#wasDerivedFrom> ?lastSnapshot .
564 }
565 }
566 """
567 provenance_sparql.setQuery(prov_query)
568 provenance_sparql.setReturnFormat(JSON)
569 prov_results = provenance_sparql.query().convert()
571 results_bindings = prov_results["results"]["bindings"]
572 if not results_bindings:
573 return [], [], None, None, [], 0
575 deleted_entities = []
576 max_workers = max(1, min(os.cpu_count() or 4, len(results_bindings)))
577 with ProcessPoolExecutor(max_workers=max_workers) as executor:
578 future_to_entity = {
579 executor.submit(process_deleted_entity, result, sortable_properties): result
580 for result in results_bindings
581 }
582 for future in as_completed(future_to_entity):
583 entity_info = future.result()
584 if entity_info is not None:
585 deleted_entities.append(entity_info)
587 class_counts = {}
588 for entity in deleted_entities:
589 for type_uri in entity["entity_types"]:
590 class_counts[type_uri] = class_counts.get(type_uri, 0) + 1
592 available_classes = [
593 {
594 "uri": class_uri,
595 "label": custom_filter.human_readable_class((class_uri, determine_shape_for_classes([class_uri]))),
596 "count": count,
597 }
598 for class_uri, count in class_counts.items()
599 ]
601 reverse_sort = sort_direction.upper() == "DESC"
602 if sort_property == "deletionTime":
603 deleted_entities.sort(key=lambda e: e["deletionTime"], reverse=reverse_sort)
604 else:
605 deleted_entities.sort(
606 key=lambda e: e["sort_values"].get(sort_property, "").lower(),
607 reverse=reverse_sort,
608 )
610 available_classes.sort(key=lambda x: x["label"].lower())
611 if not selected_class and available_classes:
612 selected_class = available_classes[0]["uri"]
614 if selected_class:
615 if selected_shape is None:
616 selected_shape = determine_shape_for_classes([selected_class])
617 entity_key = (selected_class, selected_shape)
618 sortable_properties.extend(
619 get_sortable_properties(entity_key)
620 )
622 if selected_class:
623 filtered_entities = [
624 entity
625 for entity in deleted_entities
626 if selected_class in entity["entity_types"]
627 ]
628 else:
629 filtered_entities = deleted_entities
631 total_count = len(filtered_entities)
632 offset = (page - 1) * per_page
633 paginated_entities = filtered_entities[offset : offset + per_page]
635 return paginated_entities, available_classes, selected_class, selected_shape, sortable_properties, total_count
638def process_deleted_entity(result: dict, sortable_properties: list) -> dict | None:
639 """
640 Process a single deleted entity, filtering by visible classes.
641 """
642 change_tracking_config = get_change_tracking_config()
643 custom_filter = get_custom_filter()
645 entity_uri = result["entity"]["value"]
646 last_valid_snapshot_time = result["lastValidSnapshotTime"]["value"]
648 agnostic_entity = AgnosticEntity(
649 res=entity_uri, config=change_tracking_config, include_related_objects=True, include_merged_entities=True, include_reverse_relations=True
650 )
651 state, _, _ = agnostic_entity.get_state_at_time(
652 (last_valid_snapshot_time, last_valid_snapshot_time)
653 )
655 if entity_uri not in state:
656 return None
658 last_valid_time = convert_to_datetime(last_valid_snapshot_time, stringify=True)
659 last_valid_state: ConjunctiveGraph = state[entity_uri][last_valid_time]
661 entity_types = [
662 str(o)
663 for s, p, o in last_valid_state.triples((URIRef(entity_uri), RDF.type, None))
664 ]
665 highest_priority_type = get_highest_priority_class(entity_types)
666 shape = determine_shape_for_classes([highest_priority_type])
667 visible_types = [t for t in entity_types if is_entity_type_visible((t, determine_shape_for_classes([t])))]
668 if not visible_types:
669 return None
671 sort_values = {}
672 for prop in sortable_properties:
673 prop_uri = prop["property"]
674 values = [
675 str(o)
676 for s, p, o in last_valid_state.triples(
677 (URIRef(entity_uri), URIRef(prop_uri), None)
678 )
679 ]
680 sort_values[prop_uri] = values[0] if values else ""
682 return {
683 "uri": entity_uri,
684 "deletionTime": result["deletionTime"]["value"],
685 "deletedBy": custom_filter.format_agent_reference(
686 result.get("agent", {}).get("value", "")
687 ),
688 "lastValidSnapshotTime": last_valid_snapshot_time,
689 "type": custom_filter.human_readable_predicate(
690 highest_priority_type, (highest_priority_type, shape)
691 ),
692 "label": custom_filter.human_readable_entity(
693 entity_uri, (highest_priority_type, shape), last_valid_state
694 ),
695 "entity_types": visible_types,
696 "sort_values": sort_values,
697 }
700def find_orphaned_entities(subject, entity_type, predicate=None, object_value=None):
701 """
702 Find entities that would become orphaned after deleting a triple or an entire entity,
703 including intermediate relation entities.
705 An entity is considered orphaned if:
706 1. It has no incoming references from other entities (except from the entity being deleted)
707 2. It does not reference any entities that are subjects of other triples
709 For intermediate relations, an entity is also considered orphaned if:
710 1. It connects to the entity being deleted
711 2. It has no other valid connections after the deletion
712 3. It is directly involved in the deletion operation (if predicate and object_value are specified)
714 Args:
715 subject (str): The URI of the subject being deleted
716 entity_type (str): The type of the entity being deleted
717 predicate (str, optional): The predicate being deleted
718 object_value (str, optional): The object value being deleted
720 Returns:
721 tuple: Lists of (orphaned_entities, intermediate_orphans)
722 """
723 sparql = get_sparql()
724 display_rules = get_display_rules()
726 intermediate_classes = set()
728 for rule in display_rules:
729 if "target" in rule and "class" in rule["target"] and rule["target"]["class"] == entity_type:
730 for prop in rule.get("displayProperties", []):
731 if "intermediateRelation" in prop:
732 intermediate_classes.add(prop["intermediateRelation"]["class"])
734 orphan_query = f"""
735 SELECT DISTINCT ?entity ?type
736 WHERE {{
737 {f"<{subject}> <{predicate}> ?entity ." if predicate and object_value else ""}
738 {f"FILTER(?entity = <{object_value}>)" if predicate and object_value else ""}
740 # If no specific predicate, get all connected entities
741 {f"<{subject}> ?p ?entity ." if not predicate else ""}
743 FILTER(isIRI(?entity))
744 ?entity a ?type .
746 # No incoming references from other entities
747 FILTER NOT EXISTS {{
748 ?other ?anyPredicate ?entity .
749 FILTER(?other != <{subject}>)
750 }}
752 # No outgoing references to active entities
753 FILTER NOT EXISTS {{
754 ?entity ?outgoingPredicate ?connectedEntity .
755 ?connectedEntity ?furtherPredicate ?furtherObject .
756 {f"FILTER(?connectedEntity != <{subject}>)" if not predicate else ""}
757 }}
759 # Exclude intermediate relation entities
760 FILTER(?type NOT IN (<{f">, <".join(intermediate_classes)}>))
761 }}
762 """
764 # Query to find orphaned intermediate relations
765 if predicate and object_value:
766 intermediate_query = f"""
767 SELECT DISTINCT ?entity ?type
768 WHERE {{
769 <{object_value}> a ?type .
770 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>))
771 BIND(<{object_value}> AS ?entity)
772 }}
773 """
774 else:
775 # Se stiamo cancellando l'intera entità, trova tutte le entità intermedie collegate
776 intermediate_query = f"""
777 SELECT DISTINCT ?entity ?type
778 WHERE {{
779 # Find intermediate relations connected to the entity being deleted
780 {{
781 <{subject}> ?p ?entity .
782 ?entity a ?type .
783 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>))
784 }} UNION {{
785 ?entity ?p <{subject}> .
786 ?entity a ?type .
787 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>))
788 }}
789 }}
790 """
792 orphaned = []
793 intermediate_orphans = []
795 # Execute queries and process results
796 for query, result_list in [
797 (orphan_query, orphaned),
798 (intermediate_query, intermediate_orphans),
799 ]:
800 sparql.setQuery(query)
801 sparql.setReturnFormat(JSON)
802 results = sparql.query().convert()
804 for result in results["results"]["bindings"]:
805 result_list.append(
806 {"uri": result["entity"]["value"], "type": result["type"]["value"]}
807 )
809 return orphaned, intermediate_orphans
812def import_entity_graph(editor: Editor, subject: str, max_depth: int = 5, include_referencing_entities: bool = False):
813 """
814 Recursively import the main subject and its connected entity graph up to a specified depth.
816 This function imports the specified subject and all entities connected to it,
817 directly or indirectly, up to the maximum depth specified. It traverses the
818 graph of connected entities, importing each one into the editor.
820 Args:
821 editor (Editor): The Editor instance to use for importing.
822 subject (str): The URI of the subject to start the import from.
823 max_depth (int): The maximum depth of recursion (default is 5).
824 include_referencing_entities (bool): Whether to include entities that have the subject as their object (default False).
825 Useful when deleting an entity to ensure all references are properly removed.
827 Returns:
828 Editor: The updated Editor instance with all imported entities.
829 """
830 imported_subjects = set()
832 # First import referencing entities if needed
833 if include_referencing_entities:
834 sparql = get_sparql()
836 # Build query based on database type
837 if editor.dataset_is_quadstore:
838 query = f"""
839 SELECT DISTINCT ?s
840 WHERE {{
841 GRAPH ?g {{
842 ?s ?p <{subject}> .
843 }}
844 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)
845 }}
846 """
847 else:
848 query = f"""
849 SELECT DISTINCT ?s
850 WHERE {{
851 ?s ?p <{subject}> .
852 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)
853 }}
854 """
856 sparql.setQuery(query)
857 sparql.setReturnFormat(JSON)
858 results = sparql.query().convert()
860 # Import each referencing entity
861 for result in results["results"]["bindings"]:
862 referencing_subject = result["s"]["value"]
863 if referencing_subject != subject and referencing_subject not in imported_subjects:
864 imported_subjects.add(referencing_subject)
865 editor.import_entity(URIRef(referencing_subject))
867 def recursive_import(current_subject: str, current_depth: int):
868 if current_depth > max_depth or current_subject in imported_subjects:
869 return
871 imported_subjects.add(current_subject)
872 editor.import_entity(URIRef(current_subject))
874 query = f"""
875 SELECT ?p ?o
876 WHERE {{
877 <{current_subject}> ?p ?o .
878 FILTER(isIRI(?o))
879 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)
880 }}
881 """
883 sparql = get_sparql()
884 sparql.setQuery(query)
885 sparql.setReturnFormat(JSON)
886 results = sparql.query().convert()
888 for result in results["results"]["bindings"]:
889 object_entity = result["o"]["value"]
890 recursive_import(object_entity, current_depth + 1)
892 recursive_import(subject, 1)
893 return editor
896def get_entity_types(subject_uri: str) -> List[str]:
897 """
898 Get all RDF types for an entity.
900 Args:
901 subject_uri: URI of the entity
903 Returns:
904 List of type URIs
905 """
906 sparql = get_sparql()
908 query = f"""
909 SELECT ?type WHERE {{
910 <{subject_uri}> a ?type .
911 }}
912 """
914 sparql.setQuery(query)
915 sparql.setReturnFormat(JSON)
916 results = sparql.query().convert()
918 return [result["type"]["value"] for result in results["results"]["bindings"]]
921def collect_referenced_entities(data, existing_entities=None):
922 """
923 Recursively collect all URIs of existing entities referenced in the structured data.
925 This function traverses the structured data to find explicit references to existing entities
926 that need to be imported into the editor before calling preexisting_finished().
928 Args:
929 data: The structured data (can be dict, list, or string)
930 existing_entities: Set to collect URIs (created if None)
932 Returns:
933 Set of URIs (strings) of existing entities that should be imported
934 """
936 if existing_entities is None:
937 existing_entities = set()
939 if isinstance(data, dict):
940 if data.get("is_existing_entity") is True and "entity_uri" in data:
941 existing_entities.add(data["entity_uri"])
943 # If it's an entity with entity_type, it's a new entity being created
944 elif "entity_type" in data:
945 properties = data.get("properties", {})
946 for prop_values in properties.values():
947 collect_referenced_entities(prop_values, existing_entities)
948 else:
949 for value in data.values():
950 collect_referenced_entities(value, existing_entities)
952 elif isinstance(data, list):
953 for item in data:
954 collect_referenced_entities(item, existing_entities)
956 return existing_entities
959def import_referenced_entities(editor, structured_data):
960 """
961 Import all existing entities referenced in structured data into the editor.
963 This function should be called before editor.preexisting_finished() to ensure
964 that all existing entities that will be linked have their snapshots created.
966 Args:
967 editor: The Editor instance
968 structured_data: The structured data containing entity references
969 """
970 referenced_entities = collect_referenced_entities(structured_data)
971 for entity_uri in referenced_entities:
972 try:
973 editor.import_entity(entity_uri)
974 except Exception as e:
975 print(f"Warning: Could not import entity {entity_uri}: {e}")
976 continue