Coverage for heritrace/utils/sparql_utils.py: 75%

252 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-04-18 11:10 +0000

1import os 

2from concurrent.futures import ProcessPoolExecutor, as_completed 

3from typing import List 

4 

5from heritrace.editor import Editor 

6from heritrace.extensions import (display_rules, form_fields_cache, 

7 get_change_tracking_config, 

8 get_custom_filter, get_dataset_is_quadstore, 

9 get_display_rules, get_provenance_sparql, 

10 get_sparql) 

11from heritrace.utils.converters import convert_to_datetime 

12from heritrace.utils.display_rules_utils import (get_highest_priority_class, 

13 get_sortable_properties, 

14 is_entity_type_visible) 

15from heritrace.utils.virtuoso_utils import (VIRTUOSO_EXCLUDED_GRAPHS, 

16 is_virtuoso) 

17from rdflib import RDF, ConjunctiveGraph, Graph, Literal, URIRef 

18from rdflib.plugins.sparql.algebra import translateUpdate 

19from rdflib.plugins.sparql.parser import parseUpdate 

20from SPARQLWrapper import JSON, SPARQLWrapper 

21from time_agnostic_library.agnostic_entity import AgnosticEntity 

22 

23 

24def get_available_classes(): 

25 """ 

26 Fetch and format all available entity classes from the triplestore. 

27 

28 Returns: 

29 list: List of dictionaries containing class information 

30 """ 

31 sparql = get_sparql() 

32 custom_filter = get_custom_filter() 

33 

34 if is_virtuoso(): 

35 classes_query = f""" 

36 SELECT DISTINCT ?class (COUNT(DISTINCT ?subject) as ?count) 

37 WHERE {{ 

38 GRAPH ?g {{ 

39 ?subject a ?class . 

40 }} 

41 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>)) 

42 }} 

43 GROUP BY ?class 

44 ORDER BY DESC(?count) 

45 """ 

46 else: 

47 classes_query = """ 

48 SELECT DISTINCT ?class (COUNT(DISTINCT ?subject) as ?count) 

49 WHERE { 

50 ?subject a ?class . 

51 } 

52 GROUP BY ?class 

53 ORDER BY DESC(?count) 

54 """ 

55 

56 sparql.setQuery(classes_query) 

57 sparql.setReturnFormat(JSON) 

58 classes_results = sparql.query().convert() 

59 

60 available_classes = [ 

61 { 

62 "uri": result["class"]["value"], 

63 "label": custom_filter.human_readable_predicate( 

64 result["class"]["value"], [result["class"]["value"]] 

65 ), 

66 "count": int(result["count"]["value"]), 

67 } 

68 for result in classes_results["results"]["bindings"] 

69 if is_entity_type_visible(result["class"]["value"]) 

70 ] 

71 

72 # Sort classes by label 

73 available_classes.sort(key=lambda x: x["label"].lower()) 

74 return available_classes 

75 

76 

77def build_sort_clause(sort_property: str, entity_type: str, display_rules) -> str: 

78 """ 

79 Costruisce la clausola di ordinamento SPARQL in base alla configurazione sortableBy. 

80 

81 Args: 

82 sort_property: La proprietà su cui ordinare 

83 entity_type: Il tipo di entità 

84 

85 Returns: 

86 Clausola SPARQL per l'ordinamento o stringa vuota 

87 """ 

88 if not display_rules or not sort_property: 

89 return "" 

90 

91 # Trova la configurazione di ordinamento 

92 sort_config = None 

93 for rule in display_rules: 

94 if rule["class"] == entity_type and "sortableBy" in rule: 

95 sort_config = next( 

96 (s for s in rule["sortableBy"] if s["property"] == sort_property), None 

97 ) 

98 break 

99 

100 if not sort_config: 

101 return "" 

102 

103 return f"OPTIONAL {{ ?subject <{sort_property}> ?sortValue }}" 

104 

105 

106def get_entities_for_class( 

107 selected_class, page, per_page, sort_property=None, sort_direction="ASC" 

108): 

109 """ 

110 Retrieve entities for a specific class with pagination and sorting. 

111 

112 Args: 

113 selected_class (str): URI of the class to fetch entities for 

114 page (int): Current page number 

115 per_page (int): Number of items per page 

116 sort_property (str, optional): Property to sort by 

117 sort_direction (str, optional): Sort direction ('ASC' or 'DESC') 

118 

119 Returns: 

120 tuple: (list of entities, total count) 

121 """ 

122 sparql = get_sparql() 

123 custom_filter = get_custom_filter() 

124 

125 offset = (page - 1) * per_page 

126 

127 # Build sort clause if sort property is provided 

128 sort_clause = "" 

129 order_clause = "ORDER BY ?subject" 

130 if sort_property: 

131 sort_clause = build_sort_clause(sort_property, selected_class, display_rules) 

132 order_clause = f"ORDER BY {sort_direction}(?sortValue)" 

133 

134 # Build query based on database type 

135 if is_virtuoso(): 

136 entities_query = f""" 

137 SELECT DISTINCT ?subject {f"?sortValue" if sort_property else ""} 

138 WHERE {{ 

139 GRAPH ?g {{ 

140 ?subject a <{selected_class}> . 

141 {sort_clause} 

142 }} 

143 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>)) 

144 }} 

145 {order_clause} 

146 LIMIT {per_page}  

147 OFFSET {offset} 

148 """ 

149 

150 # Count query for total number of entities 

151 count_query = f""" 

152 SELECT (COUNT(DISTINCT ?subject) as ?count) 

153 WHERE {{ 

154 GRAPH ?g {{ 

155 ?subject a <{selected_class}> . 

156 }} 

157 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>)) 

158 }} 

159 """ 

160 else: 

161 entities_query = f""" 

162 SELECT DISTINCT ?subject {f"?sortValue" if sort_property else ""} 

163 WHERE {{ 

164 ?subject a <{selected_class}> . 

165 {sort_clause} 

166 }} 

167 {order_clause} 

168 LIMIT {per_page}  

169 OFFSET {offset} 

170 """ 

171 

172 count_query = f""" 

173 SELECT (COUNT(DISTINCT ?subject) as ?count) 

174 WHERE {{ 

175 ?subject a <{selected_class}> . 

176 }} 

177 """ 

178 

179 # Execute count query 

180 sparql.setQuery(count_query) 

181 sparql.setReturnFormat(JSON) 

182 count_results = sparql.query().convert() 

183 total_count = int(count_results["results"]["bindings"][0]["count"]["value"]) 

184 

185 # Execute entities query 

186 sparql.setQuery(entities_query) 

187 entities_results = sparql.query().convert() 

188 

189 entities = [] 

190 for result in entities_results["results"]["bindings"]: 

191 subject_uri = result["subject"]["value"] 

192 entity_label = custom_filter.human_readable_entity( 

193 subject_uri, [selected_class] 

194 ) 

195 

196 entities.append({"uri": subject_uri, "label": entity_label}) 

197 

198 return entities, total_count 

199 

200 

201def get_catalog_data( 

202 selected_class, page, per_page, sort_property=None, sort_direction="ASC" 

203): 

204 """ 

205 Get catalog data with pagination and sorting. 

206 

207 Args: 

208 selected_class (str): Selected class URI 

209 page (int): Current page number 

210 per_page (int): Items per page 

211 sort_property (str, optional): Property to sort by 

212 sort_direction (str, optional): Sort direction ('ASC' or 'DESC') 

213 

214 Returns: 

215 dict: Catalog data including entities, pagination info, and sort settings 

216 """ 

217 entities = [] 

218 total_count = 0 

219 sortable_properties = [] 

220 

221 if selected_class: 

222 entities, total_count = get_entities_for_class( 

223 selected_class, page, per_page, sort_property, sort_direction 

224 ) 

225 

226 # Get sortable properties for the class 

227 sortable_properties = get_sortable_properties( 

228 selected_class, display_rules, form_fields_cache 

229 ) 

230 

231 if not sort_property and sortable_properties: 

232 sort_property = sortable_properties[0]["property"] 

233 

234 return { 

235 "entities": entities, 

236 "total_pages": ( 

237 (total_count + per_page - 1) // per_page if total_count > 0 else 0 

238 ), 

239 "current_page": page, 

240 "per_page": per_page, 

241 "total_count": total_count, 

242 "sort_property": sort_property, 

243 "sort_direction": sort_direction, 

244 "sortable_properties": sortable_properties, 

245 "selected_class": selected_class, 

246 } 

247 

248 

249def fetch_data_graph_for_subject(subject: str) -> Graph | ConjunctiveGraph: 

250 """ 

251 Fetch all triples/quads associated with a subject from the dataset. 

252 Handles both triplestore and quadstore cases appropriately. 

253 

254 Args: 

255 subject (str): The URI of the subject to fetch data for 

256 

257 Returns: 

258 Graph|ConjunctiveGraph: A graph containing all triples/quads for the subject 

259 """ 

260 g = ConjunctiveGraph() if get_dataset_is_quadstore() else Graph() 

261 sparql = get_sparql() 

262 

263 if is_virtuoso(): 

264 # For virtuoso we need to explicitly query the graph 

265 query = f""" 

266 SELECT ?predicate ?object ?g WHERE {{ 

267 GRAPH ?g {{ 

268 <{subject}> ?predicate ?object. 

269 }} 

270 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>)) 

271 }} 

272 """ 

273 else: 

274 if get_dataset_is_quadstore(): 

275 # For non-virtuoso quadstore, we need to query all graphs 

276 query = f""" 

277 SELECT ?predicate ?object ?g WHERE {{ 

278 GRAPH ?g {{ 

279 <{subject}> ?predicate ?object. 

280 }} 

281 }} 

282 """ 

283 else: 

284 # For regular triplestore 

285 query = f""" 

286 SELECT ?predicate ?object WHERE {{ 

287 <{subject}> ?predicate ?object. 

288 }} 

289 """ 

290 

291 sparql.setQuery(query) 

292 sparql.setReturnFormat(JSON) 

293 results = sparql.query().convert().get("results", {}).get("bindings", []) 

294 

295 for result in results: 

296 # Create the appropriate value (Literal or URIRef) 

297 obj_data = result["object"] 

298 if obj_data["type"] in {"literal", "typed-literal"}: 

299 if "datatype" in obj_data: 

300 value = Literal( 

301 obj_data["value"], datatype=URIRef(obj_data["datatype"]) 

302 ) 

303 else: 

304 value = Literal(obj_data["value"]) 

305 else: 

306 value = URIRef(obj_data["value"]) 

307 

308 # Add triple/quad based on store type 

309 if get_dataset_is_quadstore(): 

310 graph_uri = URIRef(result["g"]["value"]) 

311 g.add( 

312 ( 

313 URIRef(subject), 

314 URIRef(result["predicate"]["value"]), 

315 value, 

316 graph_uri, 

317 ) 

318 ) 

319 else: 

320 g.add((URIRef(subject), URIRef(result["predicate"]["value"]), value)) 

321 

322 return g 

323 

324 

325def parse_sparql_update(query) -> dict: 

326 parsed = parseUpdate(query) 

327 translated = translateUpdate(parsed).algebra 

328 modifications = {} 

329 

330 def extract_quads(quads): 

331 result = [] 

332 for graph, triples in quads.items(): 

333 for triple in triples: 

334 result.append((triple[0], triple[1], triple[2])) 

335 return result 

336 

337 for operation in translated: 

338 if operation.name == "DeleteData": 

339 if hasattr(operation, "quads") and operation.quads: 

340 deletions = extract_quads(operation.quads) 

341 else: 

342 deletions = operation.triples 

343 if deletions: 

344 modifications.setdefault("Deletions", list()).extend(deletions) 

345 elif operation.name == "InsertData": 

346 if hasattr(operation, "quads") and operation.quads: 

347 additions = extract_quads(operation.quads) 

348 else: 

349 additions = operation.triples 

350 if additions: 

351 modifications.setdefault("Additions", list()).extend(additions) 

352 

353 return modifications 

354 

355 

356def fetch_current_state_with_related_entities( 

357 provenance: dict, 

358) -> Graph | ConjunctiveGraph: 

359 """ 

360 Fetch the current state of an entity and all its related entities known from provenance. 

361 

362 Args: 

363 provenance (dict): Dictionary containing provenance metadata for main entity and related entities 

364 

365 Returns: 

366 ConjunctiveGraph: A graph containing the current state of all entities 

367 """ 

368 combined_graph = ConjunctiveGraph() if get_dataset_is_quadstore() else Graph() 

369 

370 # Fetch state for all entities mentioned in provenance 

371 for entity_uri in provenance.keys(): 

372 current_graph = fetch_data_graph_for_subject(entity_uri) 

373 

374 if get_dataset_is_quadstore(): 

375 for quad in current_graph.quads(): 

376 combined_graph.add(quad) 

377 else: 

378 for triple in current_graph: 

379 combined_graph.add(triple) 

380 

381 return combined_graph 

382 

383 

384def get_deleted_entities_with_filtering( 

385 page=1, 

386 per_page=50, 

387 sort_property="deletionTime", 

388 sort_direction="DESC", 

389 selected_class=None, 

390): 

391 """ 

392 Fetch and process deleted entities from the provenance graph, with filtering and sorting. 

393 """ 

394 sortable_properties = [ 

395 {"property": "deletionTime", "displayName": "Deletion Time", "sortType": "date"} 

396 ] 

397 provenance_sparql = get_provenance_sparql() 

398 custom_filter = get_custom_filter() 

399 

400 if selected_class: 

401 sortable_properties.extend( 

402 get_sortable_properties(selected_class, display_rules, form_fields_cache) 

403 ) 

404 

405 prov_query = """ 

406 SELECT DISTINCT ?entity ?lastSnapshot ?deletionTime ?agent ?lastValidSnapshotTime 

407 WHERE { 

408 ?lastSnapshot a <http://www.w3.org/ns/prov#Entity> ; 

409 <http://www.w3.org/ns/prov#specializationOf> ?entity ; 

410 <http://www.w3.org/ns/prov#generatedAtTime> ?deletionTime ; 

411 <http://www.w3.org/ns/prov#invalidatedAtTime> ?invalidationTime ; 

412 <http://www.w3.org/ns/prov#wasDerivedFrom> ?lastValidSnapshot. 

413 

414 ?lastValidSnapshot <http://www.w3.org/ns/prov#generatedAtTime> ?lastValidSnapshotTime . 

415 

416 OPTIONAL { ?lastSnapshot <http://www.w3.org/ns/prov#wasAttributedTo> ?agent . } 

417 

418 FILTER NOT EXISTS { 

419 ?laterSnapshot <http://www.w3.org/ns/prov#wasDerivedFrom> ?lastSnapshot . 

420 } 

421 } 

422 """ 

423 provenance_sparql.setQuery(prov_query) 

424 provenance_sparql.setReturnFormat(JSON) 

425 prov_results = provenance_sparql.query().convert() 

426 

427 results_bindings = prov_results["results"]["bindings"] 

428 if not results_bindings: 

429 return [], [], None, [], 0 

430 

431 # Process entities with parallel execution 

432 deleted_entities = [] 

433 max_workers = max(1, min(os.cpu_count() or 4, len(results_bindings))) 

434 with ProcessPoolExecutor(max_workers=max_workers) as executor: 

435 future_to_entity = { 

436 executor.submit(process_deleted_entity, result, sortable_properties): result 

437 for result in results_bindings 

438 } 

439 for future in as_completed(future_to_entity): 

440 entity_info = future.result() 

441 if entity_info is not None: 

442 deleted_entities.append(entity_info) 

443 

444 # Calculate class counts from filtered entities 

445 class_counts = {} 

446 for entity in deleted_entities: 

447 for type_uri in entity["entity_types"]: 

448 class_counts[type_uri] = class_counts.get(type_uri, 0) + 1 

449 

450 available_classes = [ 

451 { 

452 "uri": class_uri, 

453 "label": custom_filter.human_readable_predicate(class_uri, [class_uri]), 

454 "count": count, 

455 } 

456 for class_uri, count in class_counts.items() 

457 ] 

458 

459 # Determine the sort key based on sort_property 

460 reverse_sort = sort_direction.upper() == "DESC" 

461 if sort_property == "deletionTime": 

462 deleted_entities.sort(key=lambda e: e["deletionTime"], reverse=reverse_sort) 

463 else: 

464 deleted_entities.sort( 

465 key=lambda e: e["sort_values"].get(sort_property, "").lower(), 

466 reverse=reverse_sort, 

467 ) 

468 

469 available_classes.sort(key=lambda x: x["label"].lower()) 

470 if not selected_class and available_classes: 

471 selected_class = available_classes[0]["uri"] 

472 

473 # First filter by class 

474 if selected_class: 

475 filtered_entities = [ 

476 entity 

477 for entity in deleted_entities 

478 if selected_class in entity["entity_types"] 

479 ] 

480 else: 

481 filtered_entities = deleted_entities 

482 

483 # Calculate total count for pagination 

484 total_count = len(filtered_entities) 

485 

486 # Then paginate the filtered results 

487 offset = (page - 1) * per_page 

488 paginated_entities = filtered_entities[offset : offset + per_page] 

489 

490 return paginated_entities, available_classes, selected_class, sortable_properties, total_count 

491 

492 

493def process_deleted_entity(result, sortable_properties): 

494 """ 

495 Process a single deleted entity, filtering by visible classes. 

496 """ 

497 change_tracking_config = get_change_tracking_config() 

498 custom_filter = get_custom_filter() 

499 

500 entity_uri = result["entity"]["value"] 

501 last_valid_snapshot_time = result["lastValidSnapshotTime"]["value"] 

502 

503 # Get entity state at its last valid time 

504 agnostic_entity = AgnosticEntity( 

505 res=entity_uri, config=change_tracking_config, related_entities_history=True 

506 ) 

507 state, _, _ = agnostic_entity.get_state_at_time( 

508 (last_valid_snapshot_time, last_valid_snapshot_time) 

509 ) 

510 

511 if entity_uri not in state: 

512 return None 

513 

514 last_valid_time = convert_to_datetime(last_valid_snapshot_time, stringify=True) 

515 last_valid_state: ConjunctiveGraph = state[entity_uri][last_valid_time] 

516 

517 # Get entity types and filter for visible ones early 

518 entity_types = [ 

519 str(o) 

520 for s, p, o in last_valid_state.triples((URIRef(entity_uri), RDF.type, None)) 

521 ] 

522 visible_types = [t for t in entity_types if is_entity_type_visible(t)] 

523 

524 if not visible_types: 

525 return None 

526 

527 # Get the highest priority class 

528 highest_priority_type = get_highest_priority_class(visible_types) 

529 if not highest_priority_type: 

530 return None 

531 

532 # Extract sort values for sortable properties 

533 sort_values = {} 

534 for prop in sortable_properties: 

535 prop_uri = prop["property"] 

536 values = [ 

537 str(o) 

538 for s, p, o in last_valid_state.triples( 

539 (URIRef(entity_uri), URIRef(prop_uri), None) 

540 ) 

541 ] 

542 sort_values[prop_uri] = values[0] if values else "" 

543 

544 return { 

545 "uri": entity_uri, 

546 "deletionTime": result["deletionTime"]["value"], 

547 "deletedBy": custom_filter.format_agent_reference( 

548 result.get("agent", {}).get("value", "") 

549 ), 

550 "lastValidSnapshotTime": last_valid_snapshot_time, 

551 "type": custom_filter.human_readable_predicate( 

552 highest_priority_type, [highest_priority_type] 

553 ), 

554 "label": custom_filter.human_readable_entity( 

555 entity_uri, [highest_priority_type], last_valid_state 

556 ), 

557 "entity_types": visible_types, 

558 "sort_values": sort_values, 

559 } 

560 

561 

562def find_orphaned_entities(subject, entity_type, predicate=None, object_value=None): 

563 """ 

564 Find entities that would become orphaned after deleting a triple or an entire entity, 

565 including intermediate relation entities. 

566 

567 An entity is considered orphaned if: 

568 1. It has no incoming references from other entities (except from the entity being deleted) 

569 2. It does not reference any entities that are subjects of other triples 

570 

571 For intermediate relations, an entity is also considered orphaned if: 

572 1. It connects to the entity being deleted 

573 2. It has no other valid connections after the deletion 

574 3. It is directly involved in the deletion operation (if predicate and object_value are specified) 

575 

576 Args: 

577 subject (str): The URI of the subject being deleted 

578 entity_type (str): The type of the entity being deleted 

579 predicate (str, optional): The predicate being deleted 

580 object_value (str, optional): The object value being deleted 

581 

582 Returns: 

583 tuple: Lists of (orphaned_entities, intermediate_orphans) 

584 """ 

585 sparql = get_sparql() 

586 display_rules = get_display_rules() 

587 

588 # Extract intermediate relation classes from display rules 

589 intermediate_classes = set() 

590 

591 for rule in display_rules: 

592 if rule["class"] == entity_type: 

593 for prop in rule.get("displayProperties", []): 

594 if "intermediateRelation" in prop: 

595 intermediate_classes.add(prop["intermediateRelation"]["class"]) 

596 

597 # Query to find regular orphans 

598 orphan_query = f""" 

599 SELECT DISTINCT ?entity ?type 

600 WHERE {{ 

601 {f"<{subject}> <{predicate}> ?entity ." if predicate and object_value else ""} 

602 {f"FILTER(?entity = <{object_value}>)" if predicate and object_value else ""} 

603  

604 # If no specific predicate, get all connected entities 

605 {f"<{subject}> ?p ?entity ." if not predicate else ""} 

606  

607 FILTER(isIRI(?entity)) 

608 ?entity a ?type . 

609  

610 # No incoming references from other entities 

611 FILTER NOT EXISTS {{ 

612 ?other ?anyPredicate ?entity . 

613 FILTER(?other != <{subject}>) 

614 }} 

615  

616 # No outgoing references to active entities 

617 FILTER NOT EXISTS {{ 

618 ?entity ?outgoingPredicate ?connectedEntity . 

619 ?connectedEntity ?furtherPredicate ?furtherObject . 

620 {f"FILTER(?connectedEntity != <{subject}>)" if not predicate else ""} 

621 }} 

622  

623 # Exclude intermediate relation entities 

624 FILTER(?type NOT IN (<{f">, <".join(intermediate_classes)}>)) 

625 }} 

626 """ 

627 

628 # Query to find orphaned intermediate relations 

629 # Se stiamo cancellando una tripla specifica (predicate e object_value specificati) 

630 if predicate and object_value: 

631 # Verifica se l'object_value è un'entità intermedia 

632 intermediate_query = f""" 

633 SELECT DISTINCT ?entity ?type 

634 WHERE {{ 

635 <{object_value}> a ?type . 

636 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>))  

637 BIND(<{object_value}> AS ?entity) 

638 }} 

639 """ 

640 else: 

641 # Se stiamo cancellando l'intera entità, trova tutte le entità intermedie collegate 

642 intermediate_query = f""" 

643 SELECT DISTINCT ?entity ?type 

644 WHERE {{ 

645 # Find intermediate relations connected to the entity being deleted 

646 {{ 

647 <{subject}> ?p ?entity . 

648 ?entity a ?type . 

649 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>)) 

650 }} UNION {{ 

651 ?entity ?p <{subject}> . 

652 ?entity a ?type . 

653 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>)) 

654 }}  

655 }} 

656 """ 

657 

658 orphaned = [] 

659 intermediate_orphans = [] 

660 

661 # Execute queries and process results 

662 for query, result_list in [ 

663 (orphan_query, orphaned), 

664 (intermediate_query, intermediate_orphans), 

665 ]: 

666 sparql.setQuery(query) 

667 sparql.setReturnFormat(JSON) 

668 results = sparql.query().convert() 

669 

670 for result in results["results"]["bindings"]: 

671 result_list.append( 

672 {"uri": result["entity"]["value"], "type": result["type"]["value"]} 

673 ) 

674 

675 return orphaned, intermediate_orphans 

676 

677 

678def import_entity_graph(editor: Editor, subject: str, max_depth: int = 5, include_referencing_entities: bool = False): 

679 """ 

680 Recursively import the main subject and its connected entity graph up to a specified depth. 

681 

682 This function imports the specified subject and all entities connected to it, 

683 directly or indirectly, up to the maximum depth specified. It traverses the 

684 graph of connected entities, importing each one into the editor. 

685 

686 Args: 

687 editor (Editor): The Editor instance to use for importing. 

688 subject (str): The URI of the subject to start the import from. 

689 max_depth (int): The maximum depth of recursion (default is 5). 

690 include_referencing_entities (bool): Whether to include entities that have the subject as their object (default False). 

691 Useful when deleting an entity to ensure all references are properly removed. 

692 

693 Returns: 

694 Editor: The updated Editor instance with all imported entities. 

695 """ 

696 imported_subjects = set() 

697 

698 # First import referencing entities if needed 

699 if include_referencing_entities: 

700 sparql = SPARQLWrapper(editor.dataset_endpoint) 

701 

702 # Build query based on database type 

703 if editor.dataset_is_quadstore: 

704 query = f""" 

705 SELECT DISTINCT ?s 

706 WHERE {{ 

707 GRAPH ?g {{ 

708 ?s ?p <{subject}> . 

709 }} 

710 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>) 

711 }} 

712 """ 

713 else: 

714 query = f""" 

715 SELECT DISTINCT ?s 

716 WHERE {{ 

717 ?s ?p <{subject}> . 

718 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>) 

719 }} 

720 """ 

721 

722 sparql.setQuery(query) 

723 sparql.setReturnFormat(JSON) 

724 results = sparql.query().convert() 

725 

726 # Import each referencing entity 

727 for result in results["results"]["bindings"]: 

728 referencing_subject = result["s"]["value"] 

729 if referencing_subject != subject and referencing_subject not in imported_subjects: 

730 imported_subjects.add(referencing_subject) 

731 editor.import_entity(URIRef(referencing_subject)) 

732 

733 def recursive_import(current_subject: str, current_depth: int): 

734 if current_depth > max_depth or current_subject in imported_subjects: 

735 return 

736 

737 imported_subjects.add(current_subject) 

738 editor.import_entity(URIRef(current_subject)) 

739 

740 query = f""" 

741 SELECT ?p ?o 

742 WHERE {{ 

743 <{current_subject}> ?p ?o . 

744 FILTER(isIRI(?o)) 

745 }} 

746 """ 

747 

748 sparql = SPARQLWrapper(editor.dataset_endpoint) 

749 sparql.setQuery(query) 

750 sparql.setReturnFormat(JSON) 

751 results = sparql.query().convert() 

752 

753 for result in results["results"]["bindings"]: 

754 object_entity = result["o"]["value"] 

755 recursive_import(object_entity, current_depth + 1) 

756 

757 recursive_import(subject, 1) 

758 return editor 

759 

760 

761def get_entity_types(subject_uri: str) -> List[str]: 

762 """ 

763 Get all RDF types for an entity. 

764 

765 Args: 

766 subject_uri: URI of the entity 

767 

768 Returns: 

769 List of type URIs 

770 """ 

771 sparql = get_sparql() 

772 

773 query = f""" 

774 SELECT ?type WHERE {{ 

775 <{subject_uri}> a ?type . 

776 }} 

777 """ 

778 

779 sparql.setQuery(query) 

780 sparql.setReturnFormat(JSON) 

781 results = sparql.query().convert() 

782 

783 return [result["type"]["value"] for result in results["results"]["bindings"]]