Coverage for heritrace/utils/sparql_utils.py: 99%

346 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-08-01 22:12 +0000

1import os 

2from collections import defaultdict 

3from concurrent.futures import ProcessPoolExecutor, as_completed 

4from typing import List 

5 

6from heritrace.editor import Editor 

7from heritrace.extensions import (get_change_tracking_config, 

8 get_classes_with_multiple_shapes, 

9 get_custom_filter, get_dataset_is_quadstore, 

10 get_display_rules, get_provenance_sparql, 

11 get_sparql) 

12from heritrace.utils.converters import convert_to_datetime 

13from heritrace.utils.display_rules_utils import (find_matching_rule, 

14 get_highest_priority_class, 

15 get_sortable_properties, 

16 is_entity_type_visible) 

17from heritrace.utils.shacl_utils import (determine_shape_for_classes, 

18 determine_shape_for_entity_triples) 

19from heritrace.utils.virtuoso_utils import (VIRTUOSO_EXCLUDED_GRAPHS, 

20 is_virtuoso) 

21from rdflib import RDF, XSD, ConjunctiveGraph, Graph, Literal, URIRef 

22from rdflib.plugins.sparql.algebra import translateUpdate 

23from rdflib.plugins.sparql.parser import parseUpdate 

24from SPARQLWrapper import JSON 

25from time_agnostic_library.agnostic_entity import AgnosticEntity 

26 

27 

28def _get_entities_with_enhanced_shape_detection(class_uri: str, classes_with_multiple_shapes: set): 

29 """ 

30 Get entities for a class using enhanced shape detection for classes with multiple shapes. 

31  

32 Args: 

33 class_uri: The class URI to get entities for 

34 classes_with_multiple_shapes: Set of classes that have multiple shapes 

35  

36 Returns: 

37 Dict[str, List]: Dictionary mapping shape URIs to lists of entity info dicts 

38 """ 

39 sparql = get_sparql() 

40 

41 if is_virtuoso(): 

42 query = f""" 

43 SELECT DISTINCT ?subject ?p ?o 

44 WHERE {{ 

45 GRAPH ?g {{ 

46 ?subject a <{class_uri}> . 

47 ?subject ?p ?o . 

48 }} 

49 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>)) 

50 }} 

51 """ 

52 else: 

53 query = f""" 

54 SELECT DISTINCT ?subject ?p ?o 

55 WHERE {{ 

56 ?subject a <{class_uri}> . 

57 ?subject ?p ?o . 

58 }} 

59 """ 

60 

61 sparql.setQuery(query) 

62 sparql.setReturnFormat(JSON) 

63 results = sparql.query().convert() 

64 

65 entities_triples = defaultdict(list) 

66 for binding in results["results"]["bindings"]: 

67 subject = binding["subject"]["value"] 

68 predicate = binding["p"]["value"] 

69 obj = binding["o"]["value"] 

70 entities_triples[subject].append((subject, predicate, obj)) 

71 

72 shape_to_entities = defaultdict(list) 

73 for subject_uri, triples in entities_triples.items(): 

74 shape_uri = determine_shape_for_entity_triples(triples) 

75 

76 if shape_uri: 

77 entity_key = (class_uri, shape_uri) 

78 if is_entity_type_visible(entity_key): 

79 shape_to_entities[shape_uri].append({ 

80 "uri": subject_uri, 

81 "class": class_uri, 

82 "shape": shape_uri 

83 }) 

84 

85 return shape_to_entities 

86 

87 

88def get_available_classes(): 

89 """ 

90 Fetch and format all available entity classes from the triplestore. 

91 Now handles classes with multiple shapes efficiently. 

92 

93 Returns: 

94 list: List of dictionaries containing class information 

95 """ 

96 sparql = get_sparql() 

97 custom_filter = get_custom_filter() 

98 

99 if is_virtuoso(): 

100 classes_query = f""" 

101 SELECT DISTINCT ?class (COUNT(DISTINCT ?subject) as ?count) 

102 WHERE {{ 

103 GRAPH ?g {{ 

104 ?subject a ?class . 

105 }} 

106 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>)) 

107 }} 

108 GROUP BY ?class 

109 ORDER BY DESC(?count) 

110 """ 

111 else: 

112 classes_query = """ 

113 SELECT DISTINCT ?class (COUNT(DISTINCT ?subject) as ?count) 

114 WHERE { 

115 ?subject a ?class . 

116 } 

117 GROUP BY ?class 

118 ORDER BY DESC(?count) 

119 """ 

120 

121 sparql.setQuery(classes_query) 

122 sparql.setReturnFormat(JSON) 

123 classes_results = sparql.query().convert() 

124 

125 classes_with_multiple_shapes = get_classes_with_multiple_shapes() 

126 

127 available_classes = [] 

128 for result in classes_results["results"]["bindings"]: 

129 class_uri = result["class"]["value"] 

130 total_count = int(result["count"]["value"]) 

131 

132 if class_uri in classes_with_multiple_shapes: 

133 shape_to_entities = _get_entities_with_enhanced_shape_detection( 

134 class_uri, classes_with_multiple_shapes 

135 ) 

136 

137 for shape_uri, entities in shape_to_entities.items(): 

138 if entities: 

139 entity_key = (class_uri, shape_uri) 

140 available_classes.append({ 

141 "uri": class_uri, 

142 "label": custom_filter.human_readable_class(entity_key), 

143 "count": len(entities), 

144 "shape": shape_uri 

145 }) 

146 else: 

147 shape_uri = determine_shape_for_classes([class_uri]) 

148 entity_key = (class_uri, shape_uri) 

149 

150 if is_entity_type_visible(entity_key): 

151 available_classes.append({ 

152 "uri": class_uri, 

153 "label": custom_filter.human_readable_class(entity_key), 

154 "count": total_count, 

155 "shape": shape_uri 

156 }) 

157 

158 available_classes.sort(key=lambda x: x["label"].lower()) 

159 return available_classes 

160 

161 

162def build_sort_clause(sort_property: str, entity_type: str, shape_uri: str = None) -> str: 

163 """ 

164 Build a SPARQL sort clause based on the sortableBy configuration. 

165 

166 Args: 

167 sort_property: The property to sort by 

168 entity_type: The entity type URI 

169 shape_uri: Optional shape URI for more specific sorting rules 

170 

171 Returns: 

172 SPARQL sort clause or empty string 

173 """ 

174 if not sort_property or not entity_type: 

175 return "" 

176 

177 rule = find_matching_rule(entity_type, shape_uri) 

178 

179 if not rule or "sortableBy" not in rule: 

180 return "" 

181 

182 sort_config = next( 

183 (s for s in rule["sortableBy"] if s.get("property") == sort_property), 

184 None 

185 ) 

186 

187 if not sort_config: 

188 return "" 

189 

190 return f"OPTIONAL {{ ?subject <{sort_property}> ?sortValue }}" 

191 

192 

193def get_entities_for_class( 

194 selected_class, page, per_page, sort_property=None, sort_direction="ASC", selected_shape=None 

195): 

196 """ 

197 Retrieve entities for a specific class with pagination and sorting. 

198 

199 Args: 

200 selected_class (str): URI of the class to fetch entities for 

201 page (int): Current page number 

202 per_page (int): Number of items per page 

203 sort_property (str, optional): Property to sort by 

204 sort_direction (str, optional): Sort direction ('ASC' or 'DESC') 

205 selected_shape (str, optional): URI of the shape to filter by 

206 

207 Returns: 

208 tuple: (list of entities, total count) 

209 """ 

210 sparql = get_sparql() 

211 custom_filter = get_custom_filter() 

212 classes_with_multiple_shapes = get_classes_with_multiple_shapes() 

213 

214 use_shape_filtering = (selected_shape and 

215 selected_class in classes_with_multiple_shapes) 

216 

217 if use_shape_filtering: 

218 if is_virtuoso(): 

219 query = f""" 

220 SELECT DISTINCT ?subject ?p ?o 

221 WHERE {{ 

222 GRAPH ?g {{ 

223 ?subject a <{selected_class}> . 

224 ?subject ?p ?o . 

225 }} 

226 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>)) 

227 }} 

228 """ 

229 else: 

230 query = f""" 

231 SELECT DISTINCT ?subject ?p ?o 

232 WHERE {{ 

233 ?subject a <{selected_class}> . 

234 ?subject ?p ?o . 

235 }} 

236 """ 

237 

238 sparql.setQuery(query) 

239 sparql.setReturnFormat(JSON) 

240 results = sparql.query().convert() 

241 

242 entities_triples = defaultdict(list) 

243 for binding in results["results"]["bindings"]: 

244 subject = binding["subject"]["value"] 

245 predicate = binding["p"]["value"] 

246 obj = binding["o"]["value"] 

247 entities_triples[subject].append((subject, predicate, obj)) 

248 

249 filtered_entities = [] 

250 for subject_uri, triples in entities_triples.items(): 

251 entity_shape = determine_shape_for_entity_triples(list(triples)) 

252 if entity_shape == selected_shape: 

253 entity_label = custom_filter.human_readable_entity( 

254 subject_uri, (selected_class, selected_shape), None 

255 ) 

256 filtered_entities.append({"uri": subject_uri, "label": entity_label}) 

257 

258 if sort_property and sort_direction: 

259 reverse_sort = sort_direction.upper() == "DESC" 

260 filtered_entities.sort(key=lambda x: x["label"].lower(), reverse=reverse_sort) 

261 

262 total_count = len(filtered_entities) 

263 offset = (page - 1) * per_page 

264 paginated_entities = filtered_entities[offset:offset + per_page] 

265 

266 return paginated_entities, total_count 

267 

268 offset = (page - 1) * per_page 

269 

270 sort_clause = "" 

271 order_clause = "ORDER BY ?subject" 

272 if sort_property: 

273 sort_clause = build_sort_clause(sort_property, selected_class, selected_shape) 

274 if sort_clause: 

275 order_clause = f"ORDER BY {sort_direction}(?sortValue)" 

276 

277 if is_virtuoso(): 

278 entities_query = f""" 

279 SELECT DISTINCT ?subject {f"?sortValue" if sort_property else ""} 

280 WHERE {{ 

281 GRAPH ?g {{ 

282 ?subject a <{selected_class}> . 

283 {sort_clause} 

284 }} 

285 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>)) 

286 }} 

287 {order_clause} 

288 LIMIT {per_page}  

289 OFFSET {offset} 

290 """ 

291 

292 count_query = f""" 

293 SELECT (COUNT(DISTINCT ?subject) as ?count) 

294 WHERE {{ 

295 GRAPH ?g {{ 

296 ?subject a <{selected_class}> . 

297 }} 

298 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>)) 

299 }} 

300 """ 

301 else: 

302 entities_query = f""" 

303 SELECT DISTINCT ?subject {f"?sortValue" if sort_property else ""} 

304 WHERE {{ 

305 ?subject a <{selected_class}> . 

306 {sort_clause} 

307 }} 

308 {order_clause} 

309 LIMIT {per_page}  

310 OFFSET {offset} 

311 """ 

312 

313 count_query = f""" 

314 SELECT (COUNT(DISTINCT ?subject) as ?count) 

315 WHERE {{ 

316 ?subject a <{selected_class}> . 

317 }} 

318 """ 

319 

320 sparql.setQuery(count_query) 

321 sparql.setReturnFormat(JSON) 

322 count_results = sparql.query().convert() 

323 total_count = int(count_results["results"]["bindings"][0]["count"]["value"]) 

324 sparql.setQuery(entities_query) 

325 entities_results = sparql.query().convert() 

326 

327 entities = [] 

328 for result in entities_results["results"]["bindings"]: 

329 subject_uri = result["subject"]["value"] 

330 shape = selected_shape if selected_shape else determine_shape_for_classes([selected_class]) 

331 entity_label = custom_filter.human_readable_entity( 

332 subject_uri, (selected_class, shape), None 

333 ) 

334 

335 entities.append({"uri": subject_uri, "label": entity_label}) 

336 

337 return entities, total_count 

338 

339 

340def get_catalog_data( 

341 selected_class: str, 

342 page: int, 

343 per_page: int, 

344 sort_property: str = None, 

345 sort_direction: str = "ASC", 

346 selected_shape: str = None 

347) -> dict: 

348 """ 

349 Get catalog data with pagination and sorting. 

350 

351 Args: 

352 selected_class (str): Selected class URI 

353 page (int): Current page number 

354 per_page (int): Items per page 

355 sort_property (str, optional): Property to sort by 

356 sort_direction (str, optional): Sort direction ('ASC' or 'DESC') 

357 selected_shape (str, optional): URI of the shape to use for sorting rules 

358 

359 Returns: 

360 dict: Catalog data including entities, pagination info, and sort settings 

361 """ 

362 

363 entities = [] 

364 total_count = 0 

365 sortable_properties = [] 

366 

367 if selected_class: 

368 sortable_properties = get_sortable_properties( 

369 (selected_class, selected_shape) 

370 ) 

371 

372 if not sort_property and sortable_properties: 

373 sort_property = sortable_properties[0]["property"] 

374 

375 entities, total_count = get_entities_for_class( 

376 selected_class, page, per_page, sort_property, sort_direction, selected_shape 

377 ) 

378 

379 return { 

380 "entities": entities, 

381 "total_pages": ( 

382 (total_count + per_page - 1) // per_page if total_count > 0 else 0 

383 ), 

384 "current_page": page, 

385 "per_page": per_page, 

386 "total_count": total_count, 

387 "sort_property": sort_property, 

388 "sort_direction": sort_direction, 

389 "sortable_properties": sortable_properties, 

390 "selected_class": selected_class, 

391 "selected_shape": selected_shape, 

392 } 

393 

394 

395def fetch_data_graph_for_subject(subject: str) -> Graph | ConjunctiveGraph: 

396 """ 

397 Fetch all triples/quads associated with a subject from the dataset. 

398 Handles both triplestore and quadstore cases appropriately. 

399 

400 Args: 

401 subject (str): The URI of the subject to fetch data for 

402 

403 Returns: 

404 Graph|ConjunctiveGraph: A graph containing all triples/quads for the subject 

405 """ 

406 g = ConjunctiveGraph() if get_dataset_is_quadstore() else Graph() 

407 sparql = get_sparql() 

408 

409 if is_virtuoso(): 

410 # For virtuoso we need to explicitly query the graph 

411 query = f""" 

412 SELECT ?predicate ?object ?g WHERE {{ 

413 GRAPH ?g {{ 

414 <{subject}> ?predicate ?object. 

415 }} 

416 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>)) 

417 }} 

418 """ 

419 else: 

420 if get_dataset_is_quadstore(): 

421 # For non-virtuoso quadstore, we need to query all graphs 

422 query = f""" 

423 SELECT ?predicate ?object ?g WHERE {{ 

424 GRAPH ?g {{ 

425 <{subject}> ?predicate ?object. 

426 }} 

427 }} 

428 """ 

429 else: 

430 # For regular triplestore 

431 query = f""" 

432 SELECT ?predicate ?object WHERE {{ 

433 <{subject}> ?predicate ?object. 

434 }} 

435 """ 

436 

437 sparql.setQuery(query) 

438 sparql.setReturnFormat(JSON) 

439 query_results = sparql.query().convert() 

440 results = query_results.get("results", {}).get("bindings", []) 

441 

442 for result in results: 

443 # Create the appropriate value (Literal or URIRef) 

444 obj_data = result["object"] 

445 if obj_data["type"] in {"literal", "typed-literal"}: 

446 if "datatype" in obj_data: 

447 value = Literal( 

448 obj_data["value"], datatype=URIRef(obj_data["datatype"]) 

449 ) 

450 else: 

451 # Add explicit string datatype to match time-agnostic library behavior 

452 value = Literal(obj_data["value"], datatype=XSD.string) 

453 else: 

454 value = URIRef(obj_data["value"]) 

455 

456 # Add triple/quad based on store type 

457 if get_dataset_is_quadstore(): 

458 graph_uri = URIRef(result["g"]["value"]) 

459 g.add( 

460 ( 

461 URIRef(subject), 

462 URIRef(result["predicate"]["value"]), 

463 value, 

464 graph_uri, 

465 ) 

466 ) 

467 else: 

468 g.add((URIRef(subject), URIRef(result["predicate"]["value"]), value)) 

469 

470 return g 

471 

472 

473def parse_sparql_update(query) -> dict: 

474 parsed = parseUpdate(query) 

475 translated = translateUpdate(parsed).algebra 

476 modifications = {} 

477 

478 def extract_quads(quads): 

479 result = [] 

480 for graph, triples in quads.items(): 

481 for triple in triples: 

482 result.append((triple[0], triple[1], triple[2])) 

483 return result 

484 

485 for operation in translated: 

486 if operation.name == "DeleteData": 

487 if hasattr(operation, "quads") and operation.quads: 

488 deletions = extract_quads(operation.quads) 

489 else: 

490 deletions = operation.triples 

491 if deletions: 

492 modifications.setdefault("Deletions", list()).extend(deletions) 

493 elif operation.name == "InsertData": 

494 if hasattr(operation, "quads") and operation.quads: 

495 additions = extract_quads(operation.quads) 

496 else: 

497 additions = operation.triples 

498 if additions: 

499 modifications.setdefault("Additions", list()).extend(additions) 

500 

501 return modifications 

502 

503 

504def fetch_current_state_with_related_entities( 

505 provenance: dict, 

506) -> Graph | ConjunctiveGraph: 

507 """ 

508 Fetch the current state of an entity and all its related entities known from provenance. 

509 

510 Args: 

511 provenance (dict): Dictionary containing provenance metadata for main entity and related entities 

512 

513 Returns: 

514 ConjunctiveGraph: A graph containing the current state of all entities 

515 """ 

516 combined_graph = ConjunctiveGraph() if get_dataset_is_quadstore() else Graph() 

517 

518 # Fetch state for all entities mentioned in provenance 

519 for entity_uri in provenance.keys(): 

520 current_graph = fetch_data_graph_for_subject(entity_uri) 

521 

522 if get_dataset_is_quadstore(): 

523 for quad in current_graph.quads(): 

524 combined_graph.add(quad) 

525 else: 

526 for triple in current_graph: 

527 combined_graph.add(triple) 

528 

529 return combined_graph 

530 

531 

532def get_deleted_entities_with_filtering( 

533 page=1, 

534 per_page=50, 

535 sort_property="deletionTime", 

536 sort_direction="DESC", 

537 selected_class=None, 

538 selected_shape=None, 

539): 

540 """ 

541 Fetch and process deleted entities from the provenance graph, with filtering and sorting. 

542 """ 

543 sortable_properties = [ 

544 {"property": "deletionTime", "displayName": "Deletion Time", "sortType": "date"} 

545 ] 

546 provenance_sparql = get_provenance_sparql() 

547 custom_filter = get_custom_filter() 

548 

549 prov_query = """ 

550 SELECT DISTINCT ?entity ?lastSnapshot ?deletionTime ?agent ?lastValidSnapshotTime 

551 WHERE { 

552 ?lastSnapshot a <http://www.w3.org/ns/prov#Entity> ; 

553 <http://www.w3.org/ns/prov#specializationOf> ?entity ; 

554 <http://www.w3.org/ns/prov#generatedAtTime> ?deletionTime ; 

555 <http://www.w3.org/ns/prov#invalidatedAtTime> ?invalidationTime ; 

556 <http://www.w3.org/ns/prov#wasDerivedFrom> ?lastValidSnapshot. 

557 

558 ?lastValidSnapshot <http://www.w3.org/ns/prov#generatedAtTime> ?lastValidSnapshotTime . 

559 

560 OPTIONAL { ?lastSnapshot <http://www.w3.org/ns/prov#wasAttributedTo> ?agent . } 

561 

562 FILTER NOT EXISTS { 

563 ?laterSnapshot <http://www.w3.org/ns/prov#wasDerivedFrom> ?lastSnapshot . 

564 } 

565 } 

566 """ 

567 provenance_sparql.setQuery(prov_query) 

568 provenance_sparql.setReturnFormat(JSON) 

569 prov_results = provenance_sparql.query().convert() 

570 

571 results_bindings = prov_results["results"]["bindings"] 

572 if not results_bindings: 

573 return [], [], None, None, [], 0 

574 

575 deleted_entities = [] 

576 max_workers = max(1, min(os.cpu_count() or 4, len(results_bindings))) 

577 with ProcessPoolExecutor(max_workers=max_workers) as executor: 

578 future_to_entity = { 

579 executor.submit(process_deleted_entity, result, sortable_properties): result 

580 for result in results_bindings 

581 } 

582 for future in as_completed(future_to_entity): 

583 entity_info = future.result() 

584 if entity_info is not None: 

585 deleted_entities.append(entity_info) 

586 

587 class_counts = {} 

588 for entity in deleted_entities: 

589 for type_uri in entity["entity_types"]: 

590 class_counts[type_uri] = class_counts.get(type_uri, 0) + 1 

591 

592 available_classes = [ 

593 { 

594 "uri": class_uri, 

595 "label": custom_filter.human_readable_class((class_uri, determine_shape_for_classes([class_uri]))), 

596 "count": count, 

597 } 

598 for class_uri, count in class_counts.items() 

599 ] 

600 

601 reverse_sort = sort_direction.upper() == "DESC" 

602 if sort_property == "deletionTime": 

603 deleted_entities.sort(key=lambda e: e["deletionTime"], reverse=reverse_sort) 

604 else: 

605 deleted_entities.sort( 

606 key=lambda e: e["sort_values"].get(sort_property, "").lower(), 

607 reverse=reverse_sort, 

608 ) 

609 

610 available_classes.sort(key=lambda x: x["label"].lower()) 

611 if not selected_class and available_classes: 

612 selected_class = available_classes[0]["uri"] 

613 

614 if selected_class: 

615 if selected_shape is None: 

616 selected_shape = determine_shape_for_classes([selected_class]) 

617 entity_key = (selected_class, selected_shape) 

618 sortable_properties.extend( 

619 get_sortable_properties(entity_key) 

620 ) 

621 

622 if selected_class: 

623 filtered_entities = [ 

624 entity 

625 for entity in deleted_entities 

626 if selected_class in entity["entity_types"] 

627 ] 

628 else: 

629 filtered_entities = deleted_entities 

630 

631 total_count = len(filtered_entities) 

632 offset = (page - 1) * per_page 

633 paginated_entities = filtered_entities[offset : offset + per_page] 

634 

635 return paginated_entities, available_classes, selected_class, selected_shape, sortable_properties, total_count 

636 

637 

638def process_deleted_entity(result: dict, sortable_properties: list) -> dict | None: 

639 """ 

640 Process a single deleted entity, filtering by visible classes. 

641 """ 

642 change_tracking_config = get_change_tracking_config() 

643 custom_filter = get_custom_filter() 

644 

645 entity_uri = result["entity"]["value"] 

646 last_valid_snapshot_time = result["lastValidSnapshotTime"]["value"] 

647 

648 agnostic_entity = AgnosticEntity( 

649 res=entity_uri, config=change_tracking_config, include_related_objects=True, include_merged_entities=True, include_reverse_relations=True 

650 ) 

651 state, _, _ = agnostic_entity.get_state_at_time( 

652 (last_valid_snapshot_time, last_valid_snapshot_time) 

653 ) 

654 

655 if entity_uri not in state: 

656 return None 

657 

658 last_valid_time = convert_to_datetime(last_valid_snapshot_time, stringify=True) 

659 last_valid_state: ConjunctiveGraph = state[entity_uri][last_valid_time] 

660 

661 entity_types = [ 

662 str(o) 

663 for s, p, o in last_valid_state.triples((URIRef(entity_uri), RDF.type, None)) 

664 ] 

665 highest_priority_type = get_highest_priority_class(entity_types) 

666 shape = determine_shape_for_classes([highest_priority_type]) 

667 visible_types = [t for t in entity_types if is_entity_type_visible((t, determine_shape_for_classes([t])))] 

668 if not visible_types: 

669 return None 

670 

671 sort_values = {} 

672 for prop in sortable_properties: 

673 prop_uri = prop["property"] 

674 values = [ 

675 str(o) 

676 for s, p, o in last_valid_state.triples( 

677 (URIRef(entity_uri), URIRef(prop_uri), None) 

678 ) 

679 ] 

680 sort_values[prop_uri] = values[0] if values else "" 

681 

682 return { 

683 "uri": entity_uri, 

684 "deletionTime": result["deletionTime"]["value"], 

685 "deletedBy": custom_filter.format_agent_reference( 

686 result.get("agent", {}).get("value", "") 

687 ), 

688 "lastValidSnapshotTime": last_valid_snapshot_time, 

689 "type": custom_filter.human_readable_predicate( 

690 highest_priority_type, (highest_priority_type, shape) 

691 ), 

692 "label": custom_filter.human_readable_entity( 

693 entity_uri, (highest_priority_type, shape), last_valid_state 

694 ), 

695 "entity_types": visible_types, 

696 "sort_values": sort_values, 

697 } 

698 

699 

700def find_orphaned_entities(subject, entity_type, predicate=None, object_value=None): 

701 """ 

702 Find entities that would become orphaned after deleting a triple or an entire entity, 

703 including intermediate relation entities. 

704 

705 An entity is considered orphaned if: 

706 1. It has no incoming references from other entities (except from the entity being deleted) 

707 2. It does not reference any entities that are subjects of other triples 

708 

709 For intermediate relations, an entity is also considered orphaned if: 

710 1. It connects to the entity being deleted 

711 2. It has no other valid connections after the deletion 

712 3. It is directly involved in the deletion operation (if predicate and object_value are specified) 

713 

714 Args: 

715 subject (str): The URI of the subject being deleted 

716 entity_type (str): The type of the entity being deleted 

717 predicate (str, optional): The predicate being deleted 

718 object_value (str, optional): The object value being deleted 

719 

720 Returns: 

721 tuple: Lists of (orphaned_entities, intermediate_orphans) 

722 """ 

723 sparql = get_sparql() 

724 display_rules = get_display_rules() 

725 

726 intermediate_classes = set() 

727 

728 for rule in display_rules: 

729 if "target" in rule and "class" in rule["target"] and rule["target"]["class"] == entity_type: 

730 for prop in rule.get("displayProperties", []): 

731 if "intermediateRelation" in prop: 

732 intermediate_classes.add(prop["intermediateRelation"]["class"]) 

733 

734 orphan_query = f""" 

735 SELECT DISTINCT ?entity ?type 

736 WHERE {{ 

737 {f"<{subject}> <{predicate}> ?entity ." if predicate and object_value else ""} 

738 {f"FILTER(?entity = <{object_value}>)" if predicate and object_value else ""} 

739  

740 # If no specific predicate, get all connected entities 

741 {f"<{subject}> ?p ?entity ." if not predicate else ""} 

742  

743 FILTER(isIRI(?entity)) 

744 ?entity a ?type . 

745  

746 # No incoming references from other entities 

747 FILTER NOT EXISTS {{ 

748 ?other ?anyPredicate ?entity . 

749 FILTER(?other != <{subject}>) 

750 }} 

751  

752 # No outgoing references to active entities 

753 FILTER NOT EXISTS {{ 

754 ?entity ?outgoingPredicate ?connectedEntity . 

755 ?connectedEntity ?furtherPredicate ?furtherObject . 

756 {f"FILTER(?connectedEntity != <{subject}>)" if not predicate else ""} 

757 }} 

758  

759 # Exclude intermediate relation entities 

760 FILTER(?type NOT IN (<{f">, <".join(intermediate_classes)}>)) 

761 }} 

762 """ 

763 

764 # Query to find orphaned intermediate relations 

765 if predicate and object_value: 

766 intermediate_query = f""" 

767 SELECT DISTINCT ?entity ?type 

768 WHERE {{ 

769 <{object_value}> a ?type . 

770 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>))  

771 BIND(<{object_value}> AS ?entity) 

772 }} 

773 """ 

774 else: 

775 # Se stiamo cancellando l'intera entità, trova tutte le entità intermedie collegate 

776 intermediate_query = f""" 

777 SELECT DISTINCT ?entity ?type 

778 WHERE {{ 

779 # Find intermediate relations connected to the entity being deleted 

780 {{ 

781 <{subject}> ?p ?entity . 

782 ?entity a ?type . 

783 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>)) 

784 }} UNION {{ 

785 ?entity ?p <{subject}> . 

786 ?entity a ?type . 

787 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>)) 

788 }}  

789 }} 

790 """ 

791 

792 orphaned = [] 

793 intermediate_orphans = [] 

794 

795 # Execute queries and process results 

796 for query, result_list in [ 

797 (orphan_query, orphaned), 

798 (intermediate_query, intermediate_orphans), 

799 ]: 

800 sparql.setQuery(query) 

801 sparql.setReturnFormat(JSON) 

802 results = sparql.query().convert() 

803 

804 for result in results["results"]["bindings"]: 

805 result_list.append( 

806 {"uri": result["entity"]["value"], "type": result["type"]["value"]} 

807 ) 

808 

809 return orphaned, intermediate_orphans 

810 

811 

812def import_entity_graph(editor: Editor, subject: str, max_depth: int = 5, include_referencing_entities: bool = False): 

813 """ 

814 Recursively import the main subject and its connected entity graph up to a specified depth. 

815 

816 This function imports the specified subject and all entities connected to it, 

817 directly or indirectly, up to the maximum depth specified. It traverses the 

818 graph of connected entities, importing each one into the editor. 

819 

820 Args: 

821 editor (Editor): The Editor instance to use for importing. 

822 subject (str): The URI of the subject to start the import from. 

823 max_depth (int): The maximum depth of recursion (default is 5). 

824 include_referencing_entities (bool): Whether to include entities that have the subject as their object (default False). 

825 Useful when deleting an entity to ensure all references are properly removed. 

826 

827 Returns: 

828 Editor: The updated Editor instance with all imported entities. 

829 """ 

830 imported_subjects = set() 

831 

832 # First import referencing entities if needed 

833 if include_referencing_entities: 

834 sparql = get_sparql() 

835 

836 # Build query based on database type 

837 if editor.dataset_is_quadstore: 

838 query = f""" 

839 SELECT DISTINCT ?s 

840 WHERE {{ 

841 GRAPH ?g {{ 

842 ?s ?p <{subject}> . 

843 }} 

844 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>) 

845 }} 

846 """ 

847 else: 

848 query = f""" 

849 SELECT DISTINCT ?s 

850 WHERE {{ 

851 ?s ?p <{subject}> . 

852 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>) 

853 }} 

854 """ 

855 

856 sparql.setQuery(query) 

857 sparql.setReturnFormat(JSON) 

858 results = sparql.query().convert() 

859 

860 # Import each referencing entity 

861 for result in results["results"]["bindings"]: 

862 referencing_subject = result["s"]["value"] 

863 if referencing_subject != subject and referencing_subject not in imported_subjects: 

864 imported_subjects.add(referencing_subject) 

865 editor.import_entity(URIRef(referencing_subject)) 

866 

867 def recursive_import(current_subject: str, current_depth: int): 

868 if current_depth > max_depth or current_subject in imported_subjects: 

869 return 

870 

871 imported_subjects.add(current_subject) 

872 editor.import_entity(URIRef(current_subject)) 

873 

874 query = f""" 

875 SELECT ?p ?o 

876 WHERE {{ 

877 <{current_subject}> ?p ?o . 

878 FILTER(isIRI(?o)) 

879 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>) 

880 }} 

881 """ 

882 

883 sparql = get_sparql() 

884 sparql.setQuery(query) 

885 sparql.setReturnFormat(JSON) 

886 results = sparql.query().convert() 

887 

888 for result in results["results"]["bindings"]: 

889 object_entity = result["o"]["value"] 

890 recursive_import(object_entity, current_depth + 1) 

891 

892 recursive_import(subject, 1) 

893 return editor 

894 

895 

896def get_entity_types(subject_uri: str) -> List[str]: 

897 """ 

898 Get all RDF types for an entity. 

899 

900 Args: 

901 subject_uri: URI of the entity 

902 

903 Returns: 

904 List of type URIs 

905 """ 

906 sparql = get_sparql() 

907 

908 query = f""" 

909 SELECT ?type WHERE {{ 

910 <{subject_uri}> a ?type . 

911 }} 

912 """ 

913 

914 sparql.setQuery(query) 

915 sparql.setReturnFormat(JSON) 

916 results = sparql.query().convert() 

917 

918 return [result["type"]["value"] for result in results["results"]["bindings"]] 

919 

920 

921def collect_referenced_entities(data, existing_entities=None): 

922 """ 

923 Recursively collect all URIs of existing entities referenced in the structured data. 

924  

925 This function traverses the structured data to find explicit references to existing entities 

926 that need to be imported into the editor before calling preexisting_finished(). 

927  

928 Args: 

929 data: The structured data (can be dict, list, or string) 

930 existing_entities: Set to collect URIs (created if None) 

931  

932 Returns: 

933 Set of URIs (strings) of existing entities that should be imported 

934 """ 

935 

936 if existing_entities is None: 

937 existing_entities = set() 

938 

939 if isinstance(data, dict): 

940 if data.get("is_existing_entity") is True and "entity_uri" in data: 

941 existing_entities.add(data["entity_uri"]) 

942 

943 # If it's an entity with entity_type, it's a new entity being created 

944 elif "entity_type" in data: 

945 properties = data.get("properties", {}) 

946 for prop_values in properties.values(): 

947 collect_referenced_entities(prop_values, existing_entities) 

948 else: 

949 for value in data.values(): 

950 collect_referenced_entities(value, existing_entities) 

951 

952 elif isinstance(data, list): 

953 for item in data: 

954 collect_referenced_entities(item, existing_entities) 

955 

956 return existing_entities 

957 

958 

959def import_referenced_entities(editor, structured_data): 

960 """ 

961 Import all existing entities referenced in structured data into the editor. 

962  

963 This function should be called before editor.preexisting_finished() to ensure 

964 that all existing entities that will be linked have their snapshots created. 

965  

966 Args: 

967 editor: The Editor instance 

968 structured_data: The structured data containing entity references 

969 """ 

970 referenced_entities = collect_referenced_entities(structured_data) 

971 for entity_uri in referenced_entities: 

972 try: 

973 editor.import_entity(entity_uri) 

974 except Exception as e: 

975 print(f"Warning: Could not import entity {entity_uri}: {e}") 

976 continue