Coverage for heritrace/utils/sparql_utils.py: 80%
322 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-06-24 11:39 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-06-24 11:39 +0000
1import os
2from collections import defaultdict
3from concurrent.futures import ProcessPoolExecutor, as_completed
4from typing import List
6from heritrace.editor import Editor
7from heritrace.extensions import (get_change_tracking_config,
8 get_classes_with_multiple_shapes,
9 get_custom_filter, get_dataset_is_quadstore,
10 get_display_rules, get_provenance_sparql,
11 get_sparql)
12from heritrace.utils.converters import convert_to_datetime
13from heritrace.utils.display_rules_utils import (find_matching_rule,
14 get_highest_priority_class,
15 get_sortable_properties,
16 is_entity_type_visible)
17from heritrace.utils.shacl_utils import (determine_shape_for_classes,
18 determine_shape_for_entity_triples)
19from heritrace.utils.virtuoso_utils import (VIRTUOSO_EXCLUDED_GRAPHS,
20 is_virtuoso)
21from rdflib import RDF, ConjunctiveGraph, Graph, Literal, URIRef
22from rdflib.plugins.sparql.algebra import translateUpdate
23from rdflib.plugins.sparql.parser import parseUpdate
24from SPARQLWrapper import JSON
25from time_agnostic_library.agnostic_entity import AgnosticEntity
28def _get_entities_with_enhanced_shape_detection(class_uri: str, classes_with_multiple_shapes: set):
29 """
30 Get entities for a class using enhanced shape detection for classes with multiple shapes.
32 Args:
33 class_uri: The class URI to get entities for
34 classes_with_multiple_shapes: Set of classes that have multiple shapes
36 Returns:
37 Dict[str, List]: Dictionary mapping shape URIs to lists of entity info dicts
38 """
39 sparql = get_sparql()
41 if is_virtuoso():
42 query = f"""
43 SELECT DISTINCT ?subject ?p ?o
44 WHERE {{
45 GRAPH ?g {{
46 ?subject a <{class_uri}> .
47 ?subject ?p ?o .
48 }}
49 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>))
50 }}
51 """
52 else:
53 query = f"""
54 SELECT DISTINCT ?subject ?p ?o
55 WHERE {{
56 ?subject a <{class_uri}> .
57 ?subject ?p ?o .
58 }}
59 """
61 sparql.setQuery(query)
62 sparql.setReturnFormat(JSON)
63 results = sparql.query().convert()
65 entities_triples = defaultdict(list)
66 for binding in results["results"]["bindings"]:
67 subject = binding["subject"]["value"]
68 predicate = binding["p"]["value"]
69 obj = binding["o"]["value"]
70 entities_triples[subject].append((subject, predicate, obj))
72 shape_to_entities = defaultdict(list)
73 for subject_uri, triples in entities_triples.items():
74 shape_uri = determine_shape_for_entity_triples(triples)
76 if shape_uri:
77 entity_key = (class_uri, shape_uri)
78 if is_entity_type_visible(entity_key):
79 shape_to_entities[shape_uri].append({
80 "uri": subject_uri,
81 "class": class_uri,
82 "shape": shape_uri
83 })
85 return shape_to_entities
88def get_available_classes():
89 """
90 Fetch and format all available entity classes from the triplestore.
91 Now handles classes with multiple shapes efficiently.
93 Returns:
94 list: List of dictionaries containing class information
95 """
96 sparql = get_sparql()
97 custom_filter = get_custom_filter()
99 if is_virtuoso():
100 classes_query = f"""
101 SELECT DISTINCT ?class (COUNT(DISTINCT ?subject) as ?count)
102 WHERE {{
103 GRAPH ?g {{
104 ?subject a ?class .
105 }}
106 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>))
107 }}
108 GROUP BY ?class
109 ORDER BY DESC(?count)
110 """
111 else:
112 classes_query = """
113 SELECT DISTINCT ?class (COUNT(DISTINCT ?subject) as ?count)
114 WHERE {
115 ?subject a ?class .
116 }
117 GROUP BY ?class
118 ORDER BY DESC(?count)
119 """
121 sparql.setQuery(classes_query)
122 sparql.setReturnFormat(JSON)
123 classes_results = sparql.query().convert()
125 classes_with_multiple_shapes = get_classes_with_multiple_shapes()
127 available_classes = []
128 for result in classes_results["results"]["bindings"]:
129 class_uri = result["class"]["value"]
130 total_count = int(result["count"]["value"])
132 if class_uri in classes_with_multiple_shapes:
133 shape_to_entities = _get_entities_with_enhanced_shape_detection(
134 class_uri, classes_with_multiple_shapes
135 )
137 for shape_uri, entities in shape_to_entities.items():
138 if entities:
139 entity_key = (class_uri, shape_uri)
140 available_classes.append({
141 "uri": class_uri,
142 "label": custom_filter.human_readable_class(entity_key),
143 "count": len(entities),
144 "shape": shape_uri
145 })
146 else:
147 shape_uri = determine_shape_for_classes([class_uri])
148 entity_key = (class_uri, shape_uri)
150 if is_entity_type_visible(entity_key):
151 available_classes.append({
152 "uri": class_uri,
153 "label": custom_filter.human_readable_class(entity_key),
154 "count": total_count,
155 "shape": shape_uri
156 })
158 available_classes.sort(key=lambda x: x["label"].lower())
159 return available_classes
162def build_sort_clause(sort_property: str, entity_type: str, shape_uri: str = None) -> str:
163 """
164 Build a SPARQL sort clause based on the sortableBy configuration.
166 Args:
167 sort_property: The property to sort by
168 entity_type: The entity type URI
169 shape_uri: Optional shape URI for more specific sorting rules
171 Returns:
172 SPARQL sort clause or empty string
173 """
174 if not sort_property or not entity_type:
175 return ""
177 rule = find_matching_rule(entity_type, shape_uri)
179 if not rule or "sortableBy" not in rule:
180 return ""
182 sort_config = next(
183 (s for s in rule["sortableBy"] if s.get("property") == sort_property),
184 None
185 )
187 if not sort_config:
188 return ""
190 return f"OPTIONAL {{ ?subject <{sort_property}> ?sortValue }}"
193def get_entities_for_class(
194 selected_class, page, per_page, sort_property=None, sort_direction="ASC", selected_shape=None
195):
196 """
197 Retrieve entities for a specific class with pagination and sorting.
199 Args:
200 selected_class (str): URI of the class to fetch entities for
201 page (int): Current page number
202 per_page (int): Number of items per page
203 sort_property (str, optional): Property to sort by
204 sort_direction (str, optional): Sort direction ('ASC' or 'DESC')
205 selected_shape (str, optional): URI of the shape to filter by
207 Returns:
208 tuple: (list of entities, total count)
209 """
210 sparql = get_sparql()
211 custom_filter = get_custom_filter()
212 classes_with_multiple_shapes = get_classes_with_multiple_shapes()
214 use_shape_filtering = (selected_shape and
215 selected_class in classes_with_multiple_shapes)
217 if use_shape_filtering:
218 if is_virtuoso():
219 query = f"""
220 SELECT DISTINCT ?subject ?p ?o
221 WHERE {{
222 GRAPH ?g {{
223 ?subject a <{selected_class}> .
224 ?subject ?p ?o .
225 }}
226 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>))
227 }}
228 """
229 else:
230 query = f"""
231 SELECT DISTINCT ?subject ?p ?o
232 WHERE {{
233 ?subject a <{selected_class}> .
234 ?subject ?p ?o .
235 }}
236 """
238 sparql.setQuery(query)
239 sparql.setReturnFormat(JSON)
240 results = sparql.query().convert()
242 entities_triples = defaultdict(list)
243 for binding in results["results"]["bindings"]:
244 subject = binding["subject"]["value"]
245 predicate = binding["p"]["value"]
246 obj = binding["o"]["value"]
247 entities_triples[subject].append((subject, predicate, obj))
249 filtered_entities = []
250 for subject_uri, triples in entities_triples.items():
251 entity_shape = determine_shape_for_entity_triples(list(triples))
252 if entity_shape == selected_shape:
253 entity_label = custom_filter.human_readable_entity(
254 subject_uri, (selected_class, selected_shape), None
255 )
256 filtered_entities.append({"uri": subject_uri, "label": entity_label})
258 if sort_property and sort_direction:
259 reverse_sort = sort_direction.upper() == "DESC"
260 filtered_entities.sort(key=lambda x: x["label"].lower(), reverse=reverse_sort)
262 total_count = len(filtered_entities)
263 offset = (page - 1) * per_page
264 paginated_entities = filtered_entities[offset:offset + per_page]
266 return paginated_entities, total_count
268 offset = (page - 1) * per_page
270 sort_clause = ""
271 order_clause = "ORDER BY ?subject"
272 if sort_property:
273 sort_clause = build_sort_clause(sort_property, selected_class, selected_shape)
274 if sort_clause:
275 order_clause = f"ORDER BY {sort_direction}(?sortValue)"
277 if is_virtuoso():
278 entities_query = f"""
279 SELECT DISTINCT ?subject {f"?sortValue" if sort_property else ""}
280 WHERE {{
281 GRAPH ?g {{
282 ?subject a <{selected_class}> .
283 {sort_clause}
284 }}
285 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>))
286 }}
287 {order_clause}
288 LIMIT {per_page}
289 OFFSET {offset}
290 """
292 count_query = f"""
293 SELECT (COUNT(DISTINCT ?subject) as ?count)
294 WHERE {{
295 GRAPH ?g {{
296 ?subject a <{selected_class}> .
297 }}
298 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>))
299 }}
300 """
301 else:
302 entities_query = f"""
303 SELECT DISTINCT ?subject {f"?sortValue" if sort_property else ""}
304 WHERE {{
305 ?subject a <{selected_class}> .
306 {sort_clause}
307 }}
308 {order_clause}
309 LIMIT {per_page}
310 OFFSET {offset}
311 """
313 count_query = f"""
314 SELECT (COUNT(DISTINCT ?subject) as ?count)
315 WHERE {{
316 ?subject a <{selected_class}> .
317 }}
318 """
320 sparql.setQuery(count_query)
321 sparql.setReturnFormat(JSON)
322 count_results = sparql.query().convert()
323 total_count = int(count_results["results"]["bindings"][0]["count"]["value"])
324 sparql.setQuery(entities_query)
325 entities_results = sparql.query().convert()
327 entities = []
328 for result in entities_results["results"]["bindings"]:
329 subject_uri = result["subject"]["value"]
330 shape = selected_shape if selected_shape else determine_shape_for_classes([selected_class])
331 entity_label = custom_filter.human_readable_entity(
332 subject_uri, (selected_class, shape), None
333 )
335 entities.append({"uri": subject_uri, "label": entity_label})
337 return entities, total_count
340def get_catalog_data(
341 selected_class: str,
342 page: int,
343 per_page: int,
344 sort_property: str = None,
345 sort_direction: str = "ASC",
346 selected_shape: str = None
347) -> dict:
348 """
349 Get catalog data with pagination and sorting.
351 Args:
352 selected_class (str): Selected class URI
353 page (int): Current page number
354 per_page (int): Items per page
355 sort_property (str, optional): Property to sort by
356 sort_direction (str, optional): Sort direction ('ASC' or 'DESC')
357 selected_shape (str, optional): URI of the shape to use for sorting rules
359 Returns:
360 dict: Catalog data including entities, pagination info, and sort settings
361 """
363 entities = []
364 total_count = 0
365 sortable_properties = []
367 if selected_class:
368 sortable_properties = get_sortable_properties(
369 (selected_class, selected_shape)
370 )
372 if not sort_property and sortable_properties:
373 sort_property = sortable_properties[0]["property"]
375 entities, total_count = get_entities_for_class(
376 selected_class, page, per_page, sort_property, sort_direction, selected_shape
377 )
379 return {
380 "entities": entities,
381 "total_pages": (
382 (total_count + per_page - 1) // per_page if total_count > 0 else 0
383 ),
384 "current_page": page,
385 "per_page": per_page,
386 "total_count": total_count,
387 "sort_property": sort_property,
388 "sort_direction": sort_direction,
389 "sortable_properties": sortable_properties,
390 "selected_class": selected_class,
391 "selected_shape": selected_shape,
392 }
395def fetch_data_graph_for_subject(subject: str) -> Graph | ConjunctiveGraph:
396 """
397 Fetch all triples/quads associated with a subject from the dataset.
398 Handles both triplestore and quadstore cases appropriately.
400 Args:
401 subject (str): The URI of the subject to fetch data for
403 Returns:
404 Graph|ConjunctiveGraph: A graph containing all triples/quads for the subject
405 """
406 g = ConjunctiveGraph() if get_dataset_is_quadstore() else Graph()
407 sparql = get_sparql()
409 if is_virtuoso():
410 # For virtuoso we need to explicitly query the graph
411 query = f"""
412 SELECT ?predicate ?object ?g WHERE {{
413 GRAPH ?g {{
414 <{subject}> ?predicate ?object.
415 }}
416 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>))
417 }}
418 """
419 else:
420 if get_dataset_is_quadstore():
421 # For non-virtuoso quadstore, we need to query all graphs
422 query = f"""
423 SELECT ?predicate ?object ?g WHERE {{
424 GRAPH ?g {{
425 <{subject}> ?predicate ?object.
426 }}
427 }}
428 """
429 else:
430 # For regular triplestore
431 query = f"""
432 SELECT ?predicate ?object WHERE {{
433 <{subject}> ?predicate ?object.
434 }}
435 """
437 sparql.setQuery(query)
438 sparql.setReturnFormat(JSON)
439 query_results = sparql.query().convert()
440 results = query_results.get("results", {}).get("bindings", [])
442 for result in results:
443 # Create the appropriate value (Literal or URIRef)
444 obj_data = result["object"]
445 if obj_data["type"] in {"literal", "typed-literal"}:
446 if "datatype" in obj_data:
447 value = Literal(
448 obj_data["value"], datatype=URIRef(obj_data["datatype"])
449 )
450 else:
451 value = Literal(obj_data["value"])
452 else:
453 value = URIRef(obj_data["value"])
455 # Add triple/quad based on store type
456 if get_dataset_is_quadstore():
457 graph_uri = URIRef(result["g"]["value"])
458 g.add(
459 (
460 URIRef(subject),
461 URIRef(result["predicate"]["value"]),
462 value,
463 graph_uri,
464 )
465 )
466 else:
467 g.add((URIRef(subject), URIRef(result["predicate"]["value"]), value))
469 return g
472def parse_sparql_update(query) -> dict:
473 parsed = parseUpdate(query)
474 translated = translateUpdate(parsed).algebra
475 modifications = {}
477 def extract_quads(quads):
478 result = []
479 for graph, triples in quads.items():
480 for triple in triples:
481 result.append((triple[0], triple[1], triple[2]))
482 return result
484 for operation in translated:
485 if operation.name == "DeleteData":
486 if hasattr(operation, "quads") and operation.quads:
487 deletions = extract_quads(operation.quads)
488 else:
489 deletions = operation.triples
490 if deletions:
491 modifications.setdefault("Deletions", list()).extend(deletions)
492 elif operation.name == "InsertData":
493 if hasattr(operation, "quads") and operation.quads:
494 additions = extract_quads(operation.quads)
495 else:
496 additions = operation.triples
497 if additions:
498 modifications.setdefault("Additions", list()).extend(additions)
500 return modifications
503def fetch_current_state_with_related_entities(
504 provenance: dict,
505) -> Graph | ConjunctiveGraph:
506 """
507 Fetch the current state of an entity and all its related entities known from provenance.
509 Args:
510 provenance (dict): Dictionary containing provenance metadata for main entity and related entities
512 Returns:
513 ConjunctiveGraph: A graph containing the current state of all entities
514 """
515 combined_graph = ConjunctiveGraph() if get_dataset_is_quadstore() else Graph()
517 # Fetch state for all entities mentioned in provenance
518 for entity_uri in provenance.keys():
519 current_graph = fetch_data_graph_for_subject(entity_uri)
521 if get_dataset_is_quadstore():
522 for quad in current_graph.quads():
523 combined_graph.add(quad)
524 else:
525 for triple in current_graph:
526 combined_graph.add(triple)
528 return combined_graph
531def get_deleted_entities_with_filtering(
532 page=1,
533 per_page=50,
534 sort_property="deletionTime",
535 sort_direction="DESC",
536 selected_class=None,
537 selected_shape=None,
538):
539 """
540 Fetch and process deleted entities from the provenance graph, with filtering and sorting.
541 """
542 sortable_properties = [
543 {"property": "deletionTime", "displayName": "Deletion Time", "sortType": "date"}
544 ]
545 provenance_sparql = get_provenance_sparql()
546 custom_filter = get_custom_filter()
548 prov_query = """
549 SELECT DISTINCT ?entity ?lastSnapshot ?deletionTime ?agent ?lastValidSnapshotTime
550 WHERE {
551 ?lastSnapshot a <http://www.w3.org/ns/prov#Entity> ;
552 <http://www.w3.org/ns/prov#specializationOf> ?entity ;
553 <http://www.w3.org/ns/prov#generatedAtTime> ?deletionTime ;
554 <http://www.w3.org/ns/prov#invalidatedAtTime> ?invalidationTime ;
555 <http://www.w3.org/ns/prov#wasDerivedFrom> ?lastValidSnapshot.
557 ?lastValidSnapshot <http://www.w3.org/ns/prov#generatedAtTime> ?lastValidSnapshotTime .
559 OPTIONAL { ?lastSnapshot <http://www.w3.org/ns/prov#wasAttributedTo> ?agent . }
561 FILTER NOT EXISTS {
562 ?laterSnapshot <http://www.w3.org/ns/prov#wasDerivedFrom> ?lastSnapshot .
563 }
564 }
565 """
566 provenance_sparql.setQuery(prov_query)
567 provenance_sparql.setReturnFormat(JSON)
568 prov_results = provenance_sparql.query().convert()
570 results_bindings = prov_results["results"]["bindings"]
571 if not results_bindings:
572 return [], [], None, None, [], 0
574 deleted_entities = []
575 max_workers = max(1, min(os.cpu_count() or 4, len(results_bindings)))
576 with ProcessPoolExecutor(max_workers=max_workers) as executor:
577 future_to_entity = {
578 executor.submit(process_deleted_entity, result, sortable_properties): result
579 for result in results_bindings
580 }
581 for future in as_completed(future_to_entity):
582 entity_info = future.result()
583 if entity_info is not None:
584 deleted_entities.append(entity_info)
586 class_counts = {}
587 for entity in deleted_entities:
588 for type_uri in entity["entity_types"]:
589 class_counts[type_uri] = class_counts.get(type_uri, 0) + 1
591 available_classes = [
592 {
593 "uri": class_uri,
594 "label": custom_filter.human_readable_class((class_uri, determine_shape_for_classes([class_uri]))),
595 "count": count,
596 }
597 for class_uri, count in class_counts.items()
598 ]
600 reverse_sort = sort_direction.upper() == "DESC"
601 if sort_property == "deletionTime":
602 deleted_entities.sort(key=lambda e: e["deletionTime"], reverse=reverse_sort)
603 else:
604 deleted_entities.sort(
605 key=lambda e: e["sort_values"].get(sort_property, "").lower(),
606 reverse=reverse_sort,
607 )
609 available_classes.sort(key=lambda x: x["label"].lower())
610 if not selected_class and available_classes:
611 selected_class = available_classes[0]["uri"]
613 if selected_class:
614 if selected_shape is None:
615 selected_shape = determine_shape_for_classes([selected_class])
616 entity_key = (selected_class, selected_shape)
617 sortable_properties.extend(
618 get_sortable_properties(entity_key)
619 )
621 if selected_class:
622 filtered_entities = [
623 entity
624 for entity in deleted_entities
625 if selected_class in entity["entity_types"]
626 ]
627 else:
628 filtered_entities = deleted_entities
630 total_count = len(filtered_entities)
631 offset = (page - 1) * per_page
632 paginated_entities = filtered_entities[offset : offset + per_page]
634 return paginated_entities, available_classes, selected_class, selected_shape, sortable_properties, total_count
637def process_deleted_entity(result: dict, sortable_properties: list) -> dict | None:
638 """
639 Process a single deleted entity, filtering by visible classes.
640 """
641 change_tracking_config = get_change_tracking_config()
642 custom_filter = get_custom_filter()
644 entity_uri = result["entity"]["value"]
645 last_valid_snapshot_time = result["lastValidSnapshotTime"]["value"]
647 agnostic_entity = AgnosticEntity(
648 res=entity_uri, config=change_tracking_config, include_related_objects=True, include_merged_entities=True, include_reverse_relations=True
649 )
650 state, _, _ = agnostic_entity.get_state_at_time(
651 (last_valid_snapshot_time, last_valid_snapshot_time)
652 )
654 if entity_uri not in state:
655 return None
657 last_valid_time = convert_to_datetime(last_valid_snapshot_time, stringify=True)
658 last_valid_state: ConjunctiveGraph = state[entity_uri][last_valid_time]
660 entity_types = [
661 str(o)
662 for s, p, o in last_valid_state.triples((URIRef(entity_uri), RDF.type, None))
663 ]
664 highest_priority_type = get_highest_priority_class(entity_types)
665 shape = determine_shape_for_classes([highest_priority_type])
666 visible_types = [t for t in entity_types if is_entity_type_visible((t, determine_shape_for_classes([t])))]
667 if not visible_types:
668 return None
670 sort_values = {}
671 for prop in sortable_properties:
672 prop_uri = prop["property"]
673 values = [
674 str(o)
675 for s, p, o in last_valid_state.triples(
676 (URIRef(entity_uri), URIRef(prop_uri), None)
677 )
678 ]
679 sort_values[prop_uri] = values[0] if values else ""
681 return {
682 "uri": entity_uri,
683 "deletionTime": result["deletionTime"]["value"],
684 "deletedBy": custom_filter.format_agent_reference(
685 result.get("agent", {}).get("value", "")
686 ),
687 "lastValidSnapshotTime": last_valid_snapshot_time,
688 "type": custom_filter.human_readable_predicate(
689 highest_priority_type, (highest_priority_type, shape)
690 ),
691 "label": custom_filter.human_readable_entity(
692 entity_uri, (highest_priority_type, shape), last_valid_state
693 ),
694 "entity_types": visible_types,
695 "sort_values": sort_values,
696 }
699def find_orphaned_entities(subject, entity_type, predicate=None, object_value=None):
700 """
701 Find entities that would become orphaned after deleting a triple or an entire entity,
702 including intermediate relation entities.
704 An entity is considered orphaned if:
705 1. It has no incoming references from other entities (except from the entity being deleted)
706 2. It does not reference any entities that are subjects of other triples
708 For intermediate relations, an entity is also considered orphaned if:
709 1. It connects to the entity being deleted
710 2. It has no other valid connections after the deletion
711 3. It is directly involved in the deletion operation (if predicate and object_value are specified)
713 Args:
714 subject (str): The URI of the subject being deleted
715 entity_type (str): The type of the entity being deleted
716 predicate (str, optional): The predicate being deleted
717 object_value (str, optional): The object value being deleted
719 Returns:
720 tuple: Lists of (orphaned_entities, intermediate_orphans)
721 """
722 sparql = get_sparql()
723 display_rules = get_display_rules()
725 intermediate_classes = set()
727 for rule in display_rules:
728 if "target" in rule and "class" in rule["target"] and rule["target"]["class"] == entity_type:
729 for prop in rule.get("displayProperties", []):
730 if "intermediateRelation" in prop:
731 intermediate_classes.add(prop["intermediateRelation"]["class"])
733 orphan_query = f"""
734 SELECT DISTINCT ?entity ?type
735 WHERE {{
736 {f"<{subject}> <{predicate}> ?entity ." if predicate and object_value else ""}
737 {f"FILTER(?entity = <{object_value}>)" if predicate and object_value else ""}
739 # If no specific predicate, get all connected entities
740 {f"<{subject}> ?p ?entity ." if not predicate else ""}
742 FILTER(isIRI(?entity))
743 ?entity a ?type .
745 # No incoming references from other entities
746 FILTER NOT EXISTS {{
747 ?other ?anyPredicate ?entity .
748 FILTER(?other != <{subject}>)
749 }}
751 # No outgoing references to active entities
752 FILTER NOT EXISTS {{
753 ?entity ?outgoingPredicate ?connectedEntity .
754 ?connectedEntity ?furtherPredicate ?furtherObject .
755 {f"FILTER(?connectedEntity != <{subject}>)" if not predicate else ""}
756 }}
758 # Exclude intermediate relation entities
759 FILTER(?type NOT IN (<{f">, <".join(intermediate_classes)}>))
760 }}
761 """
763 # Query to find orphaned intermediate relations
764 if predicate and object_value:
765 intermediate_query = f"""
766 SELECT DISTINCT ?entity ?type
767 WHERE {{
768 <{object_value}> a ?type .
769 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>))
770 BIND(<{object_value}> AS ?entity)
771 }}
772 """
773 else:
774 # Se stiamo cancellando l'intera entità, trova tutte le entità intermedie collegate
775 intermediate_query = f"""
776 SELECT DISTINCT ?entity ?type
777 WHERE {{
778 # Find intermediate relations connected to the entity being deleted
779 {{
780 <{subject}> ?p ?entity .
781 ?entity a ?type .
782 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>))
783 }} UNION {{
784 ?entity ?p <{subject}> .
785 ?entity a ?type .
786 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>))
787 }}
788 }}
789 """
791 orphaned = []
792 intermediate_orphans = []
794 # Execute queries and process results
795 for query, result_list in [
796 (orphan_query, orphaned),
797 (intermediate_query, intermediate_orphans),
798 ]:
799 sparql.setQuery(query)
800 sparql.setReturnFormat(JSON)
801 results = sparql.query().convert()
803 for result in results["results"]["bindings"]:
804 result_list.append(
805 {"uri": result["entity"]["value"], "type": result["type"]["value"]}
806 )
808 return orphaned, intermediate_orphans
811def import_entity_graph(editor: Editor, subject: str, max_depth: int = 5, include_referencing_entities: bool = False):
812 """
813 Recursively import the main subject and its connected entity graph up to a specified depth.
815 This function imports the specified subject and all entities connected to it,
816 directly or indirectly, up to the maximum depth specified. It traverses the
817 graph of connected entities, importing each one into the editor.
819 Args:
820 editor (Editor): The Editor instance to use for importing.
821 subject (str): The URI of the subject to start the import from.
822 max_depth (int): The maximum depth of recursion (default is 5).
823 include_referencing_entities (bool): Whether to include entities that have the subject as their object (default False).
824 Useful when deleting an entity to ensure all references are properly removed.
826 Returns:
827 Editor: The updated Editor instance with all imported entities.
828 """
829 imported_subjects = set()
831 # First import referencing entities if needed
832 if include_referencing_entities:
833 sparql = get_sparql()
835 # Build query based on database type
836 if editor.dataset_is_quadstore:
837 query = f"""
838 SELECT DISTINCT ?s
839 WHERE {{
840 GRAPH ?g {{
841 ?s ?p <{subject}> .
842 }}
843 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)
844 }}
845 """
846 else:
847 query = f"""
848 SELECT DISTINCT ?s
849 WHERE {{
850 ?s ?p <{subject}> .
851 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)
852 }}
853 """
855 sparql.setQuery(query)
856 sparql.setReturnFormat(JSON)
857 results = sparql.query().convert()
859 # Import each referencing entity
860 for result in results["results"]["bindings"]:
861 referencing_subject = result["s"]["value"]
862 if referencing_subject != subject and referencing_subject not in imported_subjects:
863 imported_subjects.add(referencing_subject)
864 editor.import_entity(URIRef(referencing_subject))
866 def recursive_import(current_subject: str, current_depth: int):
867 if current_depth > max_depth or current_subject in imported_subjects:
868 return
870 imported_subjects.add(current_subject)
871 editor.import_entity(URIRef(current_subject))
873 query = f"""
874 SELECT ?p ?o
875 WHERE {{
876 <{current_subject}> ?p ?o .
877 FILTER(isIRI(?o))
878 }}
879 """
881 sparql = get_sparql()
882 sparql.setQuery(query)
883 sparql.setReturnFormat(JSON)
884 results = sparql.query().convert()
886 for result in results["results"]["bindings"]:
887 object_entity = result["o"]["value"]
888 recursive_import(object_entity, current_depth + 1)
890 recursive_import(subject, 1)
891 return editor
894def get_entity_types(subject_uri: str) -> List[str]:
895 """
896 Get all RDF types for an entity.
898 Args:
899 subject_uri: URI of the entity
901 Returns:
902 List of type URIs
903 """
904 sparql = get_sparql()
906 query = f"""
907 SELECT ?type WHERE {{
908 <{subject_uri}> a ?type .
909 }}
910 """
912 sparql.setQuery(query)
913 sparql.setReturnFormat(JSON)
914 results = sparql.query().convert()
916 return [result["type"]["value"] for result in results["results"]["bindings"]]