Coverage for heritrace/utils/sparql_utils.py: 95%

403 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-10-13 17:12 +0000

1import os 

2from collections import defaultdict 

3from concurrent.futures import ProcessPoolExecutor, as_completed 

4from typing import List 

5 

6from rdflib import RDF, ConjunctiveGraph, Graph, Literal, URIRef 

7from rdflib.plugins.sparql.algebra import translateUpdate 

8from rdflib.plugins.sparql.parser import parseUpdate 

9from SPARQLWrapper import JSON 

10from time_agnostic_library.agnostic_entity import AgnosticEntity 

11 

12from heritrace.editor import Editor 

13from heritrace.extensions import (get_change_tracking_config, 

14 get_classes_with_multiple_shapes, 

15 get_custom_filter, get_dataset_is_quadstore, 

16 get_display_rules, get_provenance_sparql, 

17 get_shacl_graph, get_sparql) 

18from heritrace.utils.converters import convert_to_datetime 

19from heritrace.utils.display_rules_utils import (find_matching_rule, 

20 get_highest_priority_class, 

21 get_sortable_properties, 

22 is_entity_type_visible) 

23from heritrace.utils.shacl_utils import (determine_shape_for_classes, 

24 determine_shape_for_entity_triples) 

25from heritrace.utils.virtuoso_utils import (VIRTUOSO_EXCLUDED_GRAPHS, 

26 is_virtuoso) 

27 

28_AVAILABLE_CLASSES_CACHE = None 

29COUNT_LIMIT = int(os.getenv("COUNT_LIMIT", "10000")) 

30 

31 

32def precompute_available_classes_cache(): 

33 """Pre-compute available classes cache at application startup.""" 

34 global _AVAILABLE_CLASSES_CACHE 

35 _AVAILABLE_CLASSES_CACHE = get_available_classes() 

36 return _AVAILABLE_CLASSES_CACHE 

37 

38 

39def _wrap_virtuoso_graph_pattern(pattern: str) -> str: 

40 """Wrap a SPARQL pattern with Virtuoso GRAPH clause if needed.""" 

41 if is_virtuoso(): 

42 return f""" 

43 GRAPH ?g {{ 

44 {pattern} 

45 }} 

46 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>)) 

47 """ 

48 return pattern 

49 

50 

51def _build_count_query_with_limit(class_uri: str, limit: int) -> str: 

52 """Build a COUNT query with LIMIT for a specific class.""" 

53 

54 return f""" 

55 SELECT (COUNT(?subject) as ?count) 

56 WHERE {{ 

57 {{ 

58 SELECT DISTINCT ?subject 

59 WHERE {{ 

60 ?subject a <{class_uri}> . 

61 }} 

62 LIMIT {limit} 

63 }} 

64 }} 

65 """ 

66 

67 

68def _count_class_instances(class_uri: str, limit: int = COUNT_LIMIT) -> tuple: 

69 """ 

70 Count instances of a class up to a limit. 

71 

72 Returns: 

73 tuple: (display_count, numeric_count) where display_count may be "LIMIT+" 

74 """ 

75 sparql = get_sparql() 

76 query = _build_count_query_with_limit(class_uri, limit + 1) 

77 

78 sparql.setQuery(query) 

79 sparql.setReturnFormat(JSON) 

80 result = sparql.query().convert() 

81 

82 count = int(result["results"]["bindings"][0]["count"]["value"]) 

83 

84 if count > limit: 

85 return f"{limit}+", limit 

86 return str(count), count 

87 

88 

89def _get_entities_with_enhanced_shape_detection(class_uri: str, classes_with_multiple_shapes: set, limit: int = COUNT_LIMIT): 

90 """ 

91 Get entities for a class using enhanced shape detection for classes with multiple shapes. 

92 Uses LIMIT to avoid loading all entities. 

93 """ 

94 # Early exit if no classes have multiple shapes 

95 if not classes_with_multiple_shapes or class_uri not in classes_with_multiple_shapes: 

96 return defaultdict(list) 

97 

98 sparql = get_sparql() 

99 

100 subjects_query = f""" 

101 SELECT DISTINCT ?subject 

102 WHERE {{ 

103 ?subject a <{class_uri}> . 

104 }} 

105 LIMIT {limit} 

106 """ 

107 

108 sparql.setQuery(subjects_query) 

109 sparql.setReturnFormat(JSON) 

110 subjects_results = sparql.query().convert() 

111 

112 subjects = [r["subject"]["value"] for r in subjects_results["results"]["bindings"]] 

113 

114 if not subjects: 

115 return defaultdict(list) 

116 

117 # Fetch triples only for these specific subjects 

118 subjects_filter = " ".join([f"(<{s}>)" for s in subjects]) 

119 pattern_with_filter = f"?subject a <{class_uri}> . ?subject ?p ?o . VALUES (?subject) {{ {subjects_filter} }}" 

120 

121 triples_query = f""" 

122 SELECT ?subject ?p ?o 

123 WHERE {{ 

124 {pattern_with_filter} 

125 }} 

126 """ 

127 

128 sparql.setQuery(triples_query) 

129 sparql.setReturnFormat(JSON) 

130 results = sparql.query().convert() 

131 

132 entities_triples = defaultdict(list) 

133 for binding in results["results"]["bindings"]: 

134 subject = binding["subject"]["value"] 

135 predicate = binding["p"]["value"] 

136 obj = binding["o"]["value"] 

137 entities_triples[subject].append((subject, predicate, obj)) 

138 

139 shape_to_entities = defaultdict(list) 

140 for subject_uri, triples in entities_triples.items(): 

141 shape_uri = determine_shape_for_entity_triples(triples) 

142 if shape_uri: 

143 entity_key = (class_uri, shape_uri) 

144 if is_entity_type_visible(entity_key): 

145 shape_to_entities[shape_uri].append({ 

146 "uri": subject_uri, 

147 "class": class_uri, 

148 "shape": shape_uri 

149 }) 

150 

151 return shape_to_entities 

152 

153 

154def get_classes_from_shacl_or_display_rules(): 

155 """Extract classes from SHACL shapes or display_rules configuration.""" 

156 SH_TARGET_CLASS = URIRef("http://www.w3.org/ns/shacl#targetClass") 

157 classes = set() 

158 

159 shacl_graph = get_shacl_graph() 

160 if shacl_graph: 

161 for shape in shacl_graph.subjects(SH_TARGET_CLASS, None, unique=True): 

162 for target_class in shacl_graph.objects(shape, SH_TARGET_CLASS, unique=True): 

163 classes.add(str(target_class)) 

164 

165 if not classes: 

166 display_rules = get_display_rules() 

167 if display_rules: 

168 for rule in display_rules: 

169 if "target" in rule and "class" in rule["target"]: 

170 classes.add(rule["target"]["class"]) 

171 

172 return list(classes) 

173 

174 

175def get_available_classes(): 

176 """ 

177 Fetch and format all available entity classes. 

178 Returns cached result if available (computed at startup). 

179 """ 

180 global _AVAILABLE_CLASSES_CACHE 

181 

182 if _AVAILABLE_CLASSES_CACHE is not None: 

183 return _AVAILABLE_CLASSES_CACHE 

184 

185 custom_filter = get_custom_filter() 

186 classes_from_config = get_classes_from_shacl_or_display_rules() 

187 

188 if classes_from_config: 

189 class_uris = classes_from_config 

190 else: 

191 sparql = get_sparql() 

192 pattern = "?subject a ?class ." 

193 wrapped_pattern = _wrap_virtuoso_graph_pattern(pattern) 

194 

195 query = f""" 

196 SELECT DISTINCT ?class 

197 WHERE {{ 

198 {wrapped_pattern} 

199 }} 

200 """ 

201 

202 sparql.setQuery(query) 

203 sparql.setReturnFormat(JSON) 

204 results = sparql.query().convert() 

205 class_uris = [r["class"]["value"] for r in results["results"]["bindings"]] 

206 

207 # Count instances for each class 

208 classes_with_counts = [] 

209 for class_uri in class_uris: 

210 display_count, numeric_count = _count_class_instances(class_uri) 

211 classes_with_counts.append({ 

212 "uri": class_uri, 

213 "display_count": display_count, 

214 "numeric_count": numeric_count 

215 }) 

216 

217 # Sort by count descending 

218 classes_with_counts.sort(key=lambda x: x["numeric_count"], reverse=True) 

219 

220 available_classes = [] 

221 classes_with_multiple_shapes = get_classes_with_multiple_shapes() 

222 

223 for class_data in classes_with_counts: 

224 class_uri = class_data["uri"] 

225 

226 if classes_with_multiple_shapes and class_uri in classes_with_multiple_shapes: 

227 shape_to_entities = _get_entities_with_enhanced_shape_detection( 

228 class_uri, classes_with_multiple_shapes, limit=COUNT_LIMIT 

229 ) 

230 

231 for shape_uri, entities in shape_to_entities.items(): 

232 if entities: 

233 entity_key = (class_uri, shape_uri) 

234 available_classes.append({ 

235 "uri": class_uri, 

236 "label": custom_filter.human_readable_class(entity_key), 

237 "count": f"{len(entities)}+" if len(entities) >= COUNT_LIMIT else str(len(entities)), 

238 "count_numeric": len(entities), 

239 "shape": shape_uri 

240 }) 

241 else: 

242 shape_uri = determine_shape_for_classes([class_uri]) 

243 entity_key = (class_uri, shape_uri) 

244 

245 if is_entity_type_visible(entity_key): 

246 available_classes.append({ 

247 "uri": class_uri, 

248 "label": custom_filter.human_readable_class(entity_key), 

249 "count": class_data["display_count"], 

250 "count_numeric": class_data["numeric_count"], 

251 "shape": shape_uri 

252 }) 

253 

254 available_classes.sort(key=lambda x: x["label"].lower()) 

255 return available_classes 

256 

257 

258def build_sort_clause(sort_property: str, entity_type: str, shape_uri: str = None) -> str: 

259 """ 

260 Build a SPARQL sort clause based on the sortableBy configuration. 

261 

262 Args: 

263 sort_property: The property to sort by 

264 entity_type: The entity type URI 

265 shape_uri: Optional shape URI for more specific sorting rules 

266 

267 Returns: 

268 SPARQL sort clause or empty string 

269 """ 

270 if not sort_property or not entity_type: 

271 return "" 

272 

273 rule = find_matching_rule(entity_type, shape_uri) 

274 

275 if not rule or "sortableBy" not in rule: 

276 return "" 

277 

278 sort_config = next( 

279 (s for s in rule["sortableBy"] if s.get("property") == sort_property), 

280 None 

281 ) 

282 

283 if not sort_config: 

284 return "" 

285 

286 return f"OPTIONAL {{ ?subject <{sort_property}> ?sortValue }}" 

287 

288 

289def get_entities_for_class( 

290 selected_class, page, per_page, sort_property=None, sort_direction="ASC", selected_shape=None 

291): 

292 """ 

293 Retrieve entities for a specific class with pagination and sorting. 

294 

295 Args: 

296 selected_class (str): URI of the class to retrieve entities for 

297 page (int): Page number (1-indexed) 

298 per_page (int): Number of entities per page 

299 sort_property (str, optional): Property URI to sort by. Defaults to None. 

300 sort_direction (str, optional): Sort direction ("ASC" or "DESC"). Defaults to "ASC". 

301 selected_shape (str, optional): Shape URI for filtering entities. Defaults to None. 

302 

303 Returns: 

304 tuple: (list of entities, total count) 

305 

306 Performance Notes: 

307 - If sort_property is None, NO ORDER BY clause is applied to the SPARQL query. 

308 This significantly improves performance for large datasets by avoiding expensive 

309 sorting operations on URIs. 

310 - Without explicit ordering, the triplestore returns results in its natural order, 

311 which is deterministic within a session but may vary after database reloads. 

312 - For optimal performance with large datasets, configure display_rules.yaml without 

313 sortableBy properties to prevent users from triggering expensive sort operations. 

314 """ 

315 sparql = get_sparql() 

316 custom_filter = get_custom_filter() 

317 classes_with_multiple_shapes = get_classes_with_multiple_shapes() 

318 

319 use_shape_filtering = (selected_shape and selected_class in classes_with_multiple_shapes) 

320 

321 if use_shape_filtering: 

322 # For shape filtering, we need to fetch entities and check their shape 

323 # Use a larger LIMIT to ensure we get enough entities after filtering 

324 offset = (page - 1) * per_page 

325 fetch_limit = per_page * 5 # Safety margin for filtering 

326 

327 subjects_query = f""" 

328 SELECT DISTINCT ?subject 

329 WHERE {{ 

330 ?subject a <{selected_class}> . 

331 }} 

332 LIMIT {fetch_limit} 

333 OFFSET {offset} 

334 """ 

335 

336 sparql.setQuery(subjects_query) 

337 sparql.setReturnFormat(JSON) 

338 subjects_results = sparql.query().convert() 

339 

340 subjects = [r["subject"]["value"] for r in subjects_results["results"]["bindings"]] 

341 

342 if not subjects: 

343 return [], 0 

344 

345 # Now fetch triples for these specific subjects 

346 subjects_filter = " ".join([f"(<{s}>)" for s in subjects]) 

347 

348 triples_query = f""" 

349 SELECT ?subject ?p ?o 

350 WHERE {{ 

351 ?subject a <{selected_class}> . ?subject ?p ?o . VALUES (?subject) {{ {subjects_filter} }} 

352 }} 

353 """ 

354 

355 sparql.setQuery(triples_query) 

356 sparql.setReturnFormat(JSON) 

357 results = sparql.query().convert() 

358 

359 entities_triples = defaultdict(list) 

360 for binding in results["results"]["bindings"]: 

361 subject = binding["subject"]["value"] 

362 predicate = binding["p"]["value"] 

363 obj = binding["o"]["value"] 

364 entities_triples[subject].append((subject, predicate, obj)) 

365 

366 filtered_entities = [] 

367 for subject_uri, triples in entities_triples.items(): 

368 entity_shape = determine_shape_for_entity_triples(list(triples)) 

369 if entity_shape == selected_shape: 

370 entity_label = custom_filter.human_readable_entity( 

371 subject_uri, (selected_class, selected_shape), None 

372 ) 

373 filtered_entities.append({"uri": subject_uri, "label": entity_label}) 

374 

375 if sort_property and sort_direction: 

376 reverse_sort = sort_direction.upper() == "DESC" 

377 filtered_entities.sort(key=lambda x: x["label"].lower(), reverse=reverse_sort) 

378 

379 # For shape-filtered results, we can't accurately determine total_count without scanning all entities 

380 # Return the number of filtered entities as an approximation 

381 total_count = len(filtered_entities) 

382 return filtered_entities[:per_page], total_count 

383 

384 # Standard pagination path 

385 offset = (page - 1) * per_page 

386 sort_clause = "" 

387 order_clause = "" 

388 

389 if sort_property: 

390 sort_clause = build_sort_clause(sort_property, selected_class, selected_shape) 

391 if sort_clause: 

392 order_clause = f"ORDER BY {sort_direction}(?sortValue)" 

393 

394 entities_query = f""" 

395 SELECT ?subject {f"?sortValue" if sort_property else ""} 

396 WHERE {{ 

397 ?subject a <{selected_class}> . {sort_clause} 

398 }} 

399 {order_clause} 

400 LIMIT {per_page} 

401 OFFSET {offset} 

402 """ 

403 

404 available_classes = get_available_classes() 

405 

406 class_info = next( 

407 (c for c in available_classes 

408 if c["uri"] == selected_class and c.get("shape") == selected_shape), 

409 None 

410 ) 

411 total_count = class_info.get("count_numeric", 0) if class_info else 0 

412 

413 sparql.setQuery(entities_query) 

414 sparql.setReturnFormat(JSON) 

415 entities_results = sparql.query().convert() 

416 

417 entities = [] 

418 shape = selected_shape if selected_shape else determine_shape_for_classes([selected_class]) 

419 

420 for result in entities_results["results"]["bindings"]: 

421 subject_uri = result["subject"]["value"] 

422 entity_label = custom_filter.human_readable_entity( 

423 subject_uri, (selected_class, shape), None 

424 ) 

425 entities.append({"uri": subject_uri, "label": entity_label}) 

426 

427 return entities, total_count 

428 

429 

430def get_catalog_data( 

431 selected_class: str, 

432 page: int, 

433 per_page: int, 

434 sort_property: str = None, 

435 sort_direction: str = "ASC", 

436 selected_shape: str = None 

437) -> dict: 

438 """ 

439 Get catalog data with pagination and sorting. 

440 

441 Args: 

442 selected_class (str): Selected class URI 

443 page (int): Current page number 

444 per_page (int): Items per page 

445 sort_property (str, optional): Property to sort by 

446 sort_direction (str, optional): Sort direction ('ASC' or 'DESC') 

447 selected_shape (str, optional): URI of the shape to use for sorting rules 

448 

449 Returns: 

450 dict: Catalog data including entities, pagination info, and sort settings 

451 """ 

452 

453 entities = [] 

454 total_count = 0 

455 sortable_properties = [] 

456 

457 if selected_class: 

458 sortable_properties = get_sortable_properties( 

459 (selected_class, selected_shape) 

460 ) 

461 

462 if not sort_property and sortable_properties: 

463 sort_property = sortable_properties[0]["property"] 

464 

465 entities, total_count = get_entities_for_class( 

466 selected_class, page, per_page, sort_property, sort_direction, selected_shape 

467 ) 

468 

469 return { 

470 "entities": entities, 

471 "total_pages": ( 

472 (total_count + per_page - 1) // per_page if total_count > 0 else 0 

473 ), 

474 "current_page": page, 

475 "per_page": per_page, 

476 "total_count": total_count, 

477 "sort_property": sort_property, 

478 "sort_direction": sort_direction, 

479 "sortable_properties": sortable_properties, 

480 "selected_class": selected_class, 

481 "selected_shape": selected_shape, 

482 } 

483 

484 

485def fetch_data_graph_for_subject(subject: str) -> Graph | ConjunctiveGraph: 

486 """ 

487 Fetch all triples/quads associated with a subject from the dataset. 

488 Handles both triplestore and quadstore cases appropriately. 

489 

490 Args: 

491 subject (str): The URI of the subject to fetch data for 

492 

493 Returns: 

494 Graph|ConjunctiveGraph: A graph containing all triples/quads for the subject 

495 """ 

496 g = ConjunctiveGraph() if get_dataset_is_quadstore() else Graph() 

497 sparql = get_sparql() 

498 

499 if is_virtuoso(): 

500 # For virtuoso we need to explicitly query the graph 

501 query = f""" 

502 SELECT ?predicate ?object ?g WHERE {{ 

503 GRAPH ?g {{ 

504 <{subject}> ?predicate ?object. 

505 }} 

506 FILTER(?g NOT IN (<{'>, <'.join(VIRTUOSO_EXCLUDED_GRAPHS)}>)) 

507 }} 

508 """ 

509 else: 

510 if get_dataset_is_quadstore(): 

511 # For non-virtuoso quadstore, we need to query all graphs 

512 query = f""" 

513 SELECT ?predicate ?object ?g WHERE {{ 

514 GRAPH ?g {{ 

515 <{subject}> ?predicate ?object. 

516 }} 

517 }} 

518 """ 

519 else: 

520 # For regular triplestore 

521 query = f""" 

522 SELECT ?predicate ?object WHERE {{ 

523 <{subject}> ?predicate ?object. 

524 }} 

525 """ 

526 

527 sparql.setQuery(query) 

528 sparql.setReturnFormat(JSON) 

529 query_results = sparql.query().convert() 

530 results = query_results.get("results", {}).get("bindings", []) 

531 

532 for result in results: 

533 # Create the appropriate value (Literal or URIRef) 

534 obj_data = result["object"] 

535 if obj_data["type"] in {"literal", "typed-literal"}: 

536 if "datatype" in obj_data: 

537 value = Literal( 

538 obj_data["value"], datatype=URIRef(obj_data["datatype"]) 

539 ) 

540 else: 

541 # Create literal without explicit datatype to match Reader.import_entities_from_triplestore 

542 value = Literal(obj_data["value"]) 

543 else: 

544 value = URIRef(obj_data["value"]) 

545 

546 # Add triple/quad based on store type 

547 if get_dataset_is_quadstore(): 

548 graph_uri = URIRef(result["g"]["value"]) 

549 g.add( 

550 ( 

551 URIRef(subject), 

552 URIRef(result["predicate"]["value"]), 

553 value, 

554 graph_uri, 

555 ) 

556 ) 

557 else: 

558 g.add((URIRef(subject), URIRef(result["predicate"]["value"]), value)) 

559 

560 return g 

561 

562 

563def parse_sparql_update(query) -> dict: 

564 parsed = parseUpdate(query) 

565 translated = translateUpdate(parsed).algebra 

566 modifications = {} 

567 

568 def extract_quads(quads): 

569 result = [] 

570 for graph, triples in quads.items(): 

571 for triple in triples: 

572 result.append((triple[0], triple[1], triple[2])) 

573 return result 

574 

575 for operation in translated: 

576 if operation.name == "DeleteData": 

577 if hasattr(operation, "quads") and operation.quads: 

578 deletions = extract_quads(operation.quads) 

579 else: 

580 deletions = operation.triples 

581 if deletions: 

582 modifications.setdefault("Deletions", list()).extend(deletions) 

583 elif operation.name == "InsertData": 

584 if hasattr(operation, "quads") and operation.quads: 

585 additions = extract_quads(operation.quads) 

586 else: 

587 additions = operation.triples 

588 if additions: 

589 modifications.setdefault("Additions", list()).extend(additions) 

590 

591 return modifications 

592 

593 

594def fetch_current_state_with_related_entities( 

595 provenance: dict, 

596) -> Graph | ConjunctiveGraph: 

597 """ 

598 Fetch the current state of an entity and all its related entities known from provenance. 

599 

600 Args: 

601 provenance (dict): Dictionary containing provenance metadata for main entity and related entities 

602 

603 Returns: 

604 ConjunctiveGraph: A graph containing the current state of all entities 

605 """ 

606 combined_graph = ConjunctiveGraph() if get_dataset_is_quadstore() else Graph() 

607 

608 # Fetch state for all entities mentioned in provenance 

609 for entity_uri in provenance.keys(): 

610 current_graph = fetch_data_graph_for_subject(entity_uri) 

611 

612 if get_dataset_is_quadstore(): 

613 for quad in current_graph.quads(): 

614 combined_graph.add(quad) 

615 else: 

616 for triple in current_graph: 

617 combined_graph.add(triple) 

618 

619 return combined_graph 

620 

621 

622def get_deleted_entities_with_filtering( 

623 page=1, 

624 per_page=50, 

625 sort_property="deletionTime", 

626 sort_direction="DESC", 

627 selected_class=None, 

628 selected_shape=None, 

629): 

630 """ 

631 Fetch and process deleted entities from the provenance graph, with filtering and sorting. 

632 """ 

633 sortable_properties = [ 

634 {"property": "deletionTime", "displayName": "Deletion Time", "sortType": "date"} 

635 ] 

636 provenance_sparql = get_provenance_sparql() 

637 custom_filter = get_custom_filter() 

638 

639 prov_query = """ 

640 SELECT DISTINCT ?entity ?lastSnapshot ?deletionTime ?agent ?lastValidSnapshotTime 

641 WHERE { 

642 ?lastSnapshot a <http://www.w3.org/ns/prov#Entity> ; 

643 <http://www.w3.org/ns/prov#specializationOf> ?entity ; 

644 <http://www.w3.org/ns/prov#generatedAtTime> ?deletionTime ; 

645 <http://www.w3.org/ns/prov#invalidatedAtTime> ?invalidationTime ; 

646 <http://www.w3.org/ns/prov#wasDerivedFrom> ?lastValidSnapshot. 

647 

648 ?lastValidSnapshot <http://www.w3.org/ns/prov#generatedAtTime> ?lastValidSnapshotTime . 

649 

650 OPTIONAL { ?lastSnapshot <http://www.w3.org/ns/prov#wasAttributedTo> ?agent . } 

651 

652 FILTER NOT EXISTS { 

653 ?laterSnapshot <http://www.w3.org/ns/prov#wasDerivedFrom> ?lastSnapshot . 

654 } 

655 } 

656 """ 

657 provenance_sparql.setQuery(prov_query) 

658 provenance_sparql.setReturnFormat(JSON) 

659 prov_results = provenance_sparql.query().convert() 

660 

661 results_bindings = prov_results["results"]["bindings"] 

662 if not results_bindings: 

663 return [], [], None, None, [], 0 

664 

665 deleted_entities = [] 

666 max_workers = max(1, min(os.cpu_count() or 4, len(results_bindings))) 

667 with ProcessPoolExecutor(max_workers=max_workers) as executor: 

668 future_to_entity = { 

669 executor.submit(process_deleted_entity, result, sortable_properties): result 

670 for result in results_bindings 

671 } 

672 for future in as_completed(future_to_entity): 

673 entity_info = future.result() 

674 if entity_info is not None: 

675 deleted_entities.append(entity_info) 

676 

677 class_counts = {} 

678 for entity in deleted_entities: 

679 for type_uri in entity["entity_types"]: 

680 class_counts[type_uri] = class_counts.get(type_uri, 0) + 1 

681 

682 available_classes = [ 

683 { 

684 "uri": class_uri, 

685 "label": custom_filter.human_readable_class((class_uri, determine_shape_for_classes([class_uri]))), 

686 "count": count, 

687 } 

688 for class_uri, count in class_counts.items() 

689 ] 

690 

691 reverse_sort = sort_direction.upper() == "DESC" 

692 if sort_property == "deletionTime": 

693 deleted_entities.sort(key=lambda e: e["deletionTime"], reverse=reverse_sort) 

694 else: 

695 deleted_entities.sort( 

696 key=lambda e: e["sort_values"].get(sort_property, "").lower(), 

697 reverse=reverse_sort, 

698 ) 

699 

700 available_classes.sort(key=lambda x: x["label"].lower()) 

701 if not selected_class and available_classes: 

702 selected_class = available_classes[0]["uri"] 

703 

704 if selected_class: 

705 if selected_shape is None: 

706 selected_shape = determine_shape_for_classes([selected_class]) 

707 entity_key = (selected_class, selected_shape) 

708 sortable_properties.extend( 

709 get_sortable_properties(entity_key) 

710 ) 

711 

712 if selected_class: 

713 filtered_entities = [ 

714 entity 

715 for entity in deleted_entities 

716 if selected_class in entity["entity_types"] 

717 ] 

718 else: 

719 filtered_entities = deleted_entities 

720 

721 total_count = len(filtered_entities) 

722 offset = (page - 1) * per_page 

723 paginated_entities = filtered_entities[offset : offset + per_page] 

724 

725 return paginated_entities, available_classes, selected_class, selected_shape, sortable_properties, total_count 

726 

727 

728def process_deleted_entity(result: dict, sortable_properties: list) -> dict | None: 

729 """ 

730 Process a single deleted entity, filtering by visible classes. 

731 """ 

732 change_tracking_config = get_change_tracking_config() 

733 custom_filter = get_custom_filter() 

734 

735 entity_uri = result["entity"]["value"] 

736 last_valid_snapshot_time = result["lastValidSnapshotTime"]["value"] 

737 

738 agnostic_entity = AgnosticEntity( 

739 res=entity_uri, config=change_tracking_config, include_related_objects=True, include_merged_entities=True, include_reverse_relations=True 

740 ) 

741 state, _, _ = agnostic_entity.get_state_at_time( 

742 (last_valid_snapshot_time, last_valid_snapshot_time) 

743 ) 

744 

745 if entity_uri not in state: 

746 return None 

747 

748 last_valid_time = convert_to_datetime(last_valid_snapshot_time, stringify=True) 

749 last_valid_state: ConjunctiveGraph = state[entity_uri][last_valid_time] 

750 

751 entity_types = [ 

752 str(o) 

753 for s, p, o in last_valid_state.triples((URIRef(entity_uri), RDF.type, None)) 

754 ] 

755 highest_priority_type = get_highest_priority_class(entity_types) 

756 shape = determine_shape_for_classes([highest_priority_type]) 

757 visible_types = [t for t in entity_types if is_entity_type_visible((t, determine_shape_for_classes([t])))] 

758 if not visible_types: 

759 return None 

760 

761 sort_values = {} 

762 for prop in sortable_properties: 

763 prop_uri = prop["property"] 

764 values = [ 

765 str(o) 

766 for s, p, o in last_valid_state.triples( 

767 (URIRef(entity_uri), URIRef(prop_uri), None) 

768 ) 

769 ] 

770 sort_values[prop_uri] = values[0] if values else "" 

771 

772 return { 

773 "uri": entity_uri, 

774 "deletionTime": result["deletionTime"]["value"], 

775 "deletedBy": custom_filter.format_agent_reference( 

776 result.get("agent", {}).get("value", "") 

777 ), 

778 "lastValidSnapshotTime": last_valid_snapshot_time, 

779 "type": custom_filter.human_readable_predicate( 

780 highest_priority_type, (highest_priority_type, shape) 

781 ), 

782 "label": custom_filter.human_readable_entity( 

783 entity_uri, (highest_priority_type, shape), last_valid_state 

784 ), 

785 "entity_types": visible_types, 

786 "sort_values": sort_values, 

787 } 

788 

789 

790def find_orphaned_entities(subject, entity_type, predicate=None, object_value=None): 

791 """ 

792 Find entities that would become orphaned after deleting a triple or an entire entity, 

793 including intermediate relation entities. 

794 

795 An entity is considered orphaned if: 

796 1. It has no incoming references from other entities (except from the entity being deleted) 

797 2. It does not reference any entities that are subjects of other triples 

798 

799 For intermediate relations, an entity is also considered orphaned if: 

800 1. It connects to the entity being deleted 

801 2. It has no other valid connections after the deletion 

802 3. It is directly involved in the deletion operation (if predicate and object_value are specified) 

803 

804 Args: 

805 subject (str): The URI of the subject being deleted 

806 entity_type (str): The type of the entity being deleted 

807 predicate (str, optional): The predicate being deleted 

808 object_value (str, optional): The object value being deleted 

809 

810 Returns: 

811 tuple: Lists of (orphaned_entities, intermediate_orphans) 

812 """ 

813 sparql = get_sparql() 

814 display_rules = get_display_rules() 

815 

816 intermediate_classes = set() 

817 

818 for rule in display_rules: 

819 if "target" in rule and "class" in rule["target"] and rule["target"]["class"] == entity_type: 

820 for prop in rule.get("displayProperties", []): 

821 if "intermediateRelation" in prop: 

822 intermediate_classes.add(prop["intermediateRelation"]["class"]) 

823 

824 orphan_query = f""" 

825 SELECT DISTINCT ?entity ?type 

826 WHERE {{ 

827 {f"<{subject}> <{predicate}> ?entity ." if predicate and object_value else ""} 

828 {f"FILTER(?entity = <{object_value}>)" if predicate and object_value else ""} 

829  

830 # If no specific predicate, get all connected entities 

831 {f"<{subject}> ?p ?entity ." if not predicate else ""} 

832  

833 FILTER(isIRI(?entity)) 

834 ?entity a ?type . 

835  

836 # No incoming references from other entities 

837 FILTER NOT EXISTS {{ 

838 ?other ?anyPredicate ?entity . 

839 FILTER(?other != <{subject}>) 

840 }} 

841  

842 # No outgoing references to active entities 

843 FILTER NOT EXISTS {{ 

844 ?entity ?outgoingPredicate ?connectedEntity . 

845 ?connectedEntity ?furtherPredicate ?furtherObject . 

846 {f"FILTER(?connectedEntity != <{subject}>)" if not predicate else ""} 

847 }} 

848  

849 # Exclude intermediate relation entities 

850 FILTER(?type NOT IN (<{f">, <".join(intermediate_classes)}>)) 

851 }} 

852 """ 

853 

854 # Query to find orphaned intermediate relations 

855 if predicate and object_value: 

856 intermediate_query = f""" 

857 SELECT DISTINCT ?entity ?type 

858 WHERE {{ 

859 <{object_value}> a ?type . 

860 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>))  

861 BIND(<{object_value}> AS ?entity) 

862 }} 

863 """ 

864 else: 

865 # Se stiamo cancellando l'intera entità, trova tutte le entità intermedie collegate 

866 intermediate_query = f""" 

867 SELECT DISTINCT ?entity ?type 

868 WHERE {{ 

869 # Find intermediate relations connected to the entity being deleted 

870 {{ 

871 <{subject}> ?p ?entity . 

872 ?entity a ?type . 

873 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>)) 

874 }} UNION {{ 

875 ?entity ?p <{subject}> . 

876 ?entity a ?type . 

877 FILTER(?type IN (<{f">, <".join(intermediate_classes)}>)) 

878 }}  

879 }} 

880 """ 

881 

882 orphaned = [] 

883 intermediate_orphans = [] 

884 

885 # Execute queries and process results 

886 for query, result_list in [ 

887 (orphan_query, orphaned), 

888 (intermediate_query, intermediate_orphans), 

889 ]: 

890 sparql.setQuery(query) 

891 sparql.setReturnFormat(JSON) 

892 results = sparql.query().convert() 

893 

894 for result in results["results"]["bindings"]: 

895 result_list.append( 

896 {"uri": result["entity"]["value"], "type": result["type"]["value"]} 

897 ) 

898 

899 return orphaned, intermediate_orphans 

900 

901 

902def import_entity_graph(editor: Editor, subject: str, max_depth: int = 5, include_referencing_entities: bool = False): 

903 """ 

904 Recursively import the main subject and its connected entity graph up to a specified depth. 

905 

906 This function imports the specified subject and all entities connected to it, 

907 directly or indirectly, up to the maximum depth specified. It traverses the 

908 graph of connected entities, importing each one into the editor. 

909 

910 Args: 

911 editor (Editor): The Editor instance to use for importing. 

912 subject (str): The URI of the subject to start the import from. 

913 max_depth (int): The maximum depth of recursion (default is 5). 

914 include_referencing_entities (bool): Whether to include entities that have the subject as their object (default False). 

915 Useful when deleting an entity to ensure all references are properly removed. 

916 

917 Returns: 

918 Editor: The updated Editor instance with all imported entities. 

919 """ 

920 imported_subjects = set() 

921 

922 # First import referencing entities if needed 

923 if include_referencing_entities: 

924 sparql = get_sparql() 

925 

926 # Build query based on database type 

927 if editor.dataset_is_quadstore: 

928 query = f""" 

929 SELECT DISTINCT ?s 

930 WHERE {{ 

931 GRAPH ?g {{ 

932 ?s ?p <{subject}> . 

933 }} 

934 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>) 

935 }} 

936 """ 

937 else: 

938 query = f""" 

939 SELECT DISTINCT ?s 

940 WHERE {{ 

941 ?s ?p <{subject}> . 

942 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>) 

943 }} 

944 """ 

945 

946 sparql.setQuery(query) 

947 sparql.setReturnFormat(JSON) 

948 results = sparql.query().convert() 

949 

950 # Import each referencing entity 

951 for result in results["results"]["bindings"]: 

952 referencing_subject = result["s"]["value"] 

953 if referencing_subject != subject and referencing_subject not in imported_subjects: 

954 imported_subjects.add(referencing_subject) 

955 editor.import_entity(URIRef(referencing_subject)) 

956 

957 def recursive_import(current_subject: str, current_depth: int): 

958 if current_depth > max_depth or current_subject in imported_subjects: 

959 return 

960 

961 imported_subjects.add(current_subject) 

962 editor.import_entity(URIRef(current_subject)) 

963 

964 query = f""" 

965 SELECT ?p ?o 

966 WHERE {{ 

967 <{current_subject}> ?p ?o . 

968 FILTER(isIRI(?o)) 

969 FILTER(?p != <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>) 

970 }} 

971 """ 

972 

973 sparql = get_sparql() 

974 sparql.setQuery(query) 

975 sparql.setReturnFormat(JSON) 

976 results = sparql.query().convert() 

977 

978 for result in results["results"]["bindings"]: 

979 object_entity = result["o"]["value"] 

980 recursive_import(object_entity, current_depth + 1) 

981 

982 recursive_import(subject, 1) 

983 return editor 

984 

985 

986def get_entity_types(subject_uri: str) -> List[str]: 

987 """ 

988 Get all RDF types for an entity. 

989 

990 Args: 

991 subject_uri: URI of the entity 

992 

993 Returns: 

994 List of type URIs 

995 """ 

996 sparql = get_sparql() 

997 

998 query = f""" 

999 SELECT ?type WHERE {{ 

1000 <{subject_uri}> a ?type . 

1001 }} 

1002 """ 

1003 

1004 sparql.setQuery(query) 

1005 sparql.setReturnFormat(JSON) 

1006 results = sparql.query().convert() 

1007 

1008 return [result["type"]["value"] for result in results["results"]["bindings"]] 

1009 

1010 

1011def collect_referenced_entities(data, existing_entities=None): 

1012 """ 

1013 Recursively collect all URIs of existing entities referenced in the structured data. 

1014  

1015 This function traverses the structured data to find explicit references to existing entities 

1016 that need to be imported into the editor before calling preexisting_finished(). 

1017  

1018 Args: 

1019 data: The structured data (can be dict, list, or string) 

1020 existing_entities: Set to collect URIs (created if None) 

1021  

1022 Returns: 

1023 Set of URIs (strings) of existing entities that should be imported 

1024 """ 

1025 

1026 if existing_entities is None: 

1027 existing_entities = set() 

1028 

1029 if isinstance(data, dict): 

1030 if data.get("is_existing_entity") is True and "entity_uri" in data: 

1031 existing_entities.add(data["entity_uri"]) 

1032 

1033 # If it's an entity with entity_type, it's a new entity being created 

1034 elif "entity_type" in data: 

1035 properties = data.get("properties", {}) 

1036 for prop_values in properties.values(): 

1037 collect_referenced_entities(prop_values, existing_entities) 

1038 else: 

1039 for value in data.values(): 

1040 collect_referenced_entities(value, existing_entities) 

1041 

1042 elif isinstance(data, list): 

1043 for item in data: 

1044 collect_referenced_entities(item, existing_entities) 

1045 

1046 return existing_entities 

1047 

1048 

1049def import_referenced_entities(editor, structured_data): 

1050 """ 

1051 Import all existing entities referenced in structured data into the editor. 

1052  

1053 This function should be called before editor.preexisting_finished() to ensure 

1054 that all existing entities that will be linked have their snapshots created. 

1055  

1056 Args: 

1057 editor: The Editor instance 

1058 structured_data: The structured data containing entity references 

1059 """ 

1060 referenced_entities = collect_referenced_entities(structured_data) 

1061 for entity_uri in referenced_entities: 

1062 try: 

1063 editor.import_entity(entity_uri) 

1064 except Exception as e: 

1065 print(f"Warning: Could not import entity {entity_uri}: {e}") 

1066 continue