Coverage for heritrace / routes / entity / _history.py: 88%

199 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-07-02 10:16 +0000

1# SPDX-FileCopyrightText: 2024-2026 Arcangelo Massari <arcangelo.massari@unibo.it> 

2# 

3# SPDX-License-Identifier: ISC 

4 

5import re 

6from datetime import datetime 

7 

8from flask import abort, render_template 

9from flask_babel import gettext 

10from flask_login import login_required 

11from rdflib import RDF, Graph, Literal, URIRef 

12from SPARQLWrapper import JSON 

13from time_agnostic_library.agnostic_entity import AgnosticEntity 

14 

15from heritrace.extensions import ( 

16 get_change_tracking_config, 

17 get_custom_filter, 

18 get_dataset_is_quadstore, 

19 get_provenance_sparql, 

20) 

21from heritrace.routes.entity._blueprint import entity_bp 

22from heritrace.routes.entity._rendering import generate_modification_text 

23from heritrace.routes.entity._types import _DATETIME_MIN_UTC, HistoryContext 

24from heritrace.sparql import get_sparql_bindings 

25from heritrace.utils.converters import convert_to_datetime 

26from heritrace.utils.display_rules_utils import ( 

27 get_grouped_triples, 

28 get_highest_priority_class, 

29) 

30from heritrace.utils.shacl_utils import determine_shape_for_entity_triples 

31from heritrace.utils.shacl_validation import get_valid_predicates 

32from heritrace.utils.sparql_utils import ( 

33 convert_to_rdflib_graphs, 

34 determine_shape_for_classes, 

35 get_triples_from_graph, 

36 parse_sparql_update, 

37) 

38from heritrace.utils.uri_utils import is_valid_url 

39 

40 

41@entity_bp.route("/entity-history/<path:entity_uri>") 

42@login_required 

43def entity_history(entity_uri: str) -> str: 

44 entity_uri_ref = URIRef(entity_uri) 

45 custom_filter = get_custom_filter() 

46 change_tracking_config = get_change_tracking_config() 

47 

48 agnostic_entity = AgnosticEntity( 

49 res=entity_uri, 

50 config=change_tracking_config, 

51 include_related_objects=True, 

52 include_merged_entities=True, 

53 include_reverse_relations=True, 

54 ) 

55 history, provenance = agnostic_entity.get_history(include_prov_metadata=True) 

56 history = convert_to_rdflib_graphs(history, is_quadstore=get_dataset_is_quadstore()) 

57 

58 sorted_metadata = sorted( 

59 provenance[entity_uri].items(), 

60 key=lambda x: convert_to_datetime(x[1]["generatedAtTime"]) or _DATETIME_MIN_UTC, 

61 ) 

62 sorted_timestamps: list[str] = [ 

63 dt.isoformat() 

64 for _, meta in sorted_metadata 

65 if (dt := convert_to_datetime(meta["generatedAtTime"])) is not None 

66 ] 

67 

68 latest_metadata = sorted_metadata[-1][1] if sorted_metadata else None 

69 is_latest_deletion = ( 

70 latest_metadata 

71 and "invalidatedAtTime" in latest_metadata 

72 and latest_metadata["invalidatedAtTime"] 

73 ) 

74 if is_latest_deletion and len(sorted_timestamps) > 1: 

75 context_snapshot = history[entity_uri][sorted_timestamps[-2]] 

76 else: 

77 context_snapshot = history[entity_uri][sorted_timestamps[-1]] 

78 

79 entity_classes = [ 

80 str(triple[2]) 

81 for triple in get_triples_from_graph( 

82 context_snapshot, (entity_uri_ref, RDF.type, None) 

83 ) 

84 ] 

85 highest_priority_class = get_highest_priority_class(entity_classes) 

86 

87 snapshot_entity_shape = determine_shape_for_entity_triples( 

88 list(get_triples_from_graph(context_snapshot, (entity_uri_ref, None, None))) 

89 ) 

90 

91 events = [] 

92 for i, (_snapshot_uri, metadata) in enumerate(sorted_metadata): 

93 date = convert_to_datetime(metadata["generatedAtTime"]) 

94 if date is None: 

95 msg = "date must not be None" 

96 raise AssertionError(msg) 

97 snapshot_graph = history[entity_uri][date.isoformat()] 

98 

99 responsible_agent = custom_filter.format_agent_reference( 

100 metadata["wasAttributedTo"] 

101 ) 

102 primary_source = custom_filter.format_source_reference( 

103 metadata["hadPrimarySource"] 

104 ) 

105 

106 history_ctx = HistoryContext( 

107 entity_uri=entity_uri, 

108 highest_priority_class=highest_priority_class, 

109 entity_shape=snapshot_entity_shape, 

110 history=history, 

111 sorted_timestamps=sorted_timestamps, 

112 custom_filter=custom_filter, 

113 ) 

114 

115 description = _format_snapshot_description( 

116 metadata, 

117 history_ctx, 

118 context_snapshot, 

119 i, 

120 ) 

121 modifications = metadata.get("hasUpdateQuery", "") 

122 modification_text = "" 

123 if modifications: 

124 parsed_modifications = parse_sparql_update(modifications) 

125 modification_text = generate_modification_text( 

126 parsed_modifications, 

127 history_ctx, 

128 snapshot_graph, 

129 date.isoformat(), 

130 ) 

131 

132 can_restore = len(sorted_metadata) > 1 and i + 1 < len(sorted_metadata) 

133 restore_button = "" 

134 if can_restore: 

135 restore_label = gettext("Restore") 

136 restore_action = ( 

137 f"/restore-version/{entity_uri}/{metadata['generatedAtTime']}" 

138 ) 

139 restore_button = f""" 

140 <form action='{restore_action}' 

141 method='post' 

142 class='d-inline restore-form'> 

143 <button type='submit' 

144 class='btn btn-success restore-btn'> 

145 <i class='bi 

146 bi-arrow-counterclockwise 

147 me-1'></i>{restore_label} 

148 </button> 

149 </form> 

150 """ 

151 

152 event = { 

153 "start_date": { 

154 "year": date.year, 

155 "month": date.month, 

156 "day": date.day, 

157 "hour": date.hour, 

158 "minute": date.minute, 

159 "second": date.second, 

160 }, 

161 "text": { 

162 "headline": gettext("Snapshot") + " " + str(i + 1), 

163 "text": ( 

164 f"<p><strong>" 

165 f"{gettext('Responsible agent')}" 

166 f":</strong>" 

167 f" {responsible_agent}</p>" 

168 f"<p><strong>" 

169 f"{gettext('Primary source')}" 

170 f":</strong>" 

171 f" {primary_source}</p>" 

172 f"<p><strong>" 

173 f"{gettext('Description')}" 

174 f":</strong>" 

175 f" {description}</p>" 

176 f'<div class="modifications mb-3">' 

177 f"{modification_text}" 

178 f"</div>" 

179 f'<div class="d-flex gap-2 mt-2">' 

180 f"<a href='/entity-version/" 

181 f"{entity_uri}/" 

182 f"{metadata['generatedAtTime']}'" 

183 f" class='btn btn-outline-primary" 

184 f" view-version'" 

185 f" target='_self'>" 

186 f"{gettext('View version')}</a>" 

187 f"{restore_button}" 

188 f"</div>" 

189 ), 

190 }, 

191 "autolink": False, 

192 } 

193 

194 if i + 1 < len(sorted_metadata): 

195 next_date = convert_to_datetime( 

196 sorted_metadata[i + 1][1]["generatedAtTime"] 

197 ) 

198 if next_date is None: 

199 msg = "next_date must not be None" 

200 raise AssertionError(msg) 

201 event["end_date"] = { 

202 "year": next_date.year, 

203 "month": next_date.month, 

204 "day": next_date.day, 

205 "hour": next_date.hour, 

206 "minute": next_date.minute, 

207 "second": next_date.second, 

208 } 

209 

210 events.append(event) 

211 

212 entity_label = custom_filter.human_readable_entity( 

213 entity_uri, (highest_priority_class, snapshot_entity_shape), context_snapshot 

214 ) 

215 

216 timeline_data = { 

217 "entityUri": entity_uri, 

218 "entityLabel": entity_label, 

219 "entityClasses": list(entity_classes), 

220 "entityShape": snapshot_entity_shape, 

221 "events": events, 

222 } 

223 

224 return render_template("entity/history.jinja", timeline_data=timeline_data) 

225 

226 

227def _format_snapshot_description( 

228 metadata: dict, 

229 ctx: HistoryContext, 

230 context_snapshot: Graph, 

231 current_index: int, 

232) -> str: 

233 description = metadata.get("description", "") 

234 is_merge_snapshot = False 

235 was_derived_from = metadata.get("wasDerivedFrom") 

236 if isinstance(was_derived_from, list) and len(was_derived_from) > 1: 

237 is_merge_snapshot = True 

238 

239 if is_merge_snapshot: 

240 match = re.search(r"merged with [‘’]?([^’’<>\s]+)[‘’]?", description) # noqa: RUF001 

241 if match: 

242 potential_merged_uri = match.group(1) 

243 if is_valid_url(potential_merged_uri): 

244 merged_entity_uri_from_desc = potential_merged_uri 

245 merged_entity_label = None 

246 if current_index > 0: 

247 previous_snapshot_timestamp = ctx.sorted_timestamps[ 

248 current_index - 1 

249 ] 

250 previous_snapshot_graph = ctx.history.get(ctx.entity_uri, {}).get( 

251 previous_snapshot_timestamp 

252 ) 

253 if previous_snapshot_graph: 

254 raw_merged_entity_classes = [ 

255 str(o) 

256 for s, p, o in get_triples_from_graph( 

257 previous_snapshot_graph, 

258 (URIRef(merged_entity_uri_from_desc), RDF.type, None), 

259 ) 

260 ] 

261 highest_priority_merged_class = ( 

262 get_highest_priority_class(raw_merged_entity_classes) 

263 if raw_merged_entity_classes 

264 else None 

265 ) 

266 

267 shape = determine_shape_for_classes(raw_merged_entity_classes) 

268 merged_entity_label = ctx.custom_filter.human_readable_entity( 

269 merged_entity_uri_from_desc, 

270 (highest_priority_merged_class, shape), 

271 previous_snapshot_graph, 

272 ) 

273 if ( 

274 merged_entity_label 

275 and merged_entity_label != merged_entity_uri_from_desc 

276 ): 

277 description = description.replace( 

278 match.group(0), f"merged with '{merged_entity_label}'" 

279 ) 

280 

281 shape = ( 

282 determine_shape_for_classes([ctx.highest_priority_class]) 

283 if ctx.highest_priority_class 

284 else None 

285 ) 

286 entity_label_for_desc = ctx.custom_filter.human_readable_entity( 

287 ctx.entity_uri, (ctx.highest_priority_class, shape), context_snapshot 

288 ) 

289 if entity_label_for_desc and entity_label_for_desc != ctx.entity_uri: 

290 description = description.replace( 

291 f"'{ctx.entity_uri}'", f"'{entity_label_for_desc}'" 

292 ) 

293 

294 return description 

295 

296 

297def _resolve_timestamp(entity_uri: str, timestamp: str) -> tuple[str, datetime]: 

298 try: 

299 return timestamp, datetime.fromisoformat(timestamp) 

300 except ValueError: 

301 pass 

302 

303 provenance_sparql = get_provenance_sparql() 

304 query_timestamp = f""" 

305 SELECT ?generation_time 

306 WHERE {{ 

307 <{entity_uri}/prov/se/{timestamp}> 

308 <http://www.w3.org/ns/prov#generatedAtTime> 

309 ?generation_time. 

310 }} 

311 """ 

312 provenance_sparql.setQuery(query_timestamp) 

313 provenance_sparql.setReturnFormat(JSON) 

314 try: 

315 bindings = get_sparql_bindings(provenance_sparql.queryAndConvert()) 

316 generation_time = bindings[0]["generation_time"]["value"] 

317 except IndexError: 

318 abort(404) 

319 return generation_time, datetime.fromisoformat(generation_time) 

320 

321 

322def _find_closest_metadata( 

323 entity_metadata: dict, 

324 timestamp_dt: datetime, 

325 latest_timestamp: str, 

326) -> tuple[dict | None, dict | None]: 

327 closest_metadata = None 

328 min_time_diff = None 

329 latest_metadata = None 

330 

331 for meta in entity_metadata.values(): 

332 meta_time = convert_to_datetime(meta["generatedAtTime"]) 

333 if meta_time is None: 

334 msg = "meta_time must not be None" 

335 raise AssertionError(msg) 

336 time_diff = abs((meta_time - timestamp_dt).total_seconds()) 

337 

338 if ( 

339 closest_metadata is None 

340 or min_time_diff is None 

341 or time_diff < min_time_diff 

342 ): 

343 closest_metadata = meta 

344 min_time_diff = time_diff 

345 

346 if meta["generatedAtTime"] == latest_timestamp: 

347 latest_metadata = meta 

348 

349 return closest_metadata, latest_metadata 

350 

351 

352def _compute_version_navigation( 

353 snapshot_times: list[datetime], 

354 timestamp_dt: datetime, 

355) -> tuple[str | None, str | None]: 

356 next_snapshot_timestamp = None 

357 prev_snapshot_timestamp = None 

358 

359 for snap_time in snapshot_times: 

360 if snap_time > timestamp_dt: 

361 next_snapshot_timestamp = snap_time.isoformat() 

362 break 

363 

364 for snap_time in reversed(snapshot_times): 

365 if snap_time < timestamp_dt: 

366 prev_snapshot_timestamp = snap_time.isoformat() 

367 break 

368 

369 return prev_snapshot_timestamp, next_snapshot_timestamp 

370 

371 

372def _prepare_modifications( 

373 closest_metadata: dict, 

374 ctx: HistoryContext, 

375 context_version: Graph, 

376 closest_timestamp: str, 

377 sorted_timestamps: list[str], 

378) -> tuple[str, dict]: 

379 modifications = "" 

380 if closest_metadata.get("hasUpdateQuery"): 

381 sparql_query = closest_metadata["hasUpdateQuery"] 

382 parsed_modifications = parse_sparql_update(sparql_query) 

383 modifications = generate_modification_text( 

384 parsed_modifications, 

385 ctx, 

386 context_version, 

387 closest_timestamp, 

388 ) 

389 

390 try: 

391 current_index = sorted_timestamps.index(closest_timestamp) 

392 except ValueError: 

393 current_index = -1 

394 

395 if closest_metadata.get("description"): 

396 formatted_description = _format_snapshot_description( 

397 closest_metadata, 

398 ctx, 

399 context_version, 

400 current_index, 

401 ) 

402 closest_metadata["description"] = formatted_description 

403 

404 return modifications, closest_metadata 

405 

406 

407@entity_bp.route("/entity-version/<path:entity_uri>/<timestamp>") 

408@login_required 

409def entity_version(entity_uri: str, timestamp: str) -> str: 

410 entity_uri_ref = URIRef(entity_uri) 

411 custom_filter = get_custom_filter() 

412 change_tracking_config = get_change_tracking_config() 

413 

414 timestamp, timestamp_dt = _resolve_timestamp(entity_uri, timestamp) 

415 

416 agnostic_entity = AgnosticEntity( 

417 res=entity_uri, 

418 config=change_tracking_config, 

419 include_related_objects=True, 

420 include_merged_entities=True, 

421 include_reverse_relations=True, 

422 ) 

423 history, provenance = agnostic_entity.get_history(include_prov_metadata=True) 

424 history = convert_to_rdflib_graphs(history, is_quadstore=get_dataset_is_quadstore()) 

425 main_entity_history = history.get(entity_uri, {}) 

426 sorted_timestamps = sorted( 

427 main_entity_history.keys(), 

428 key=lambda t: convert_to_datetime(t) or _DATETIME_MIN_UTC, 

429 ) 

430 

431 if not sorted_timestamps: 

432 abort(404) 

433 

434 closest_timestamp = min( 

435 sorted_timestamps, 

436 key=lambda t: abs( 

437 (convert_to_datetime(t) or _DATETIME_MIN_UTC).astimezone() 

438 - timestamp_dt.astimezone() 

439 ), 

440 ) 

441 

442 version = main_entity_history[closest_timestamp] 

443 triples: list[tuple[URIRef, URIRef, URIRef | Literal]] = [ 

444 (URIRef(str(s)), URIRef(str(p)), URIRef(str(o)) if isinstance(o, URIRef) else o) # type: ignore[misc] 

445 for s, p, o in get_triples_from_graph(version, (entity_uri_ref, None, None)) 

446 ] 

447 

448 entity_metadata = provenance.get(entity_uri, {}) 

449 latest_timestamp = max(sorted_timestamps) 

450 closest_metadata, latest_metadata = _find_closest_metadata( 

451 entity_metadata, timestamp_dt, latest_timestamp 

452 ) 

453 

454 if closest_metadata is None or latest_metadata is None: 

455 abort(404) 

456 

457 is_deletion_snapshot = ( 

458 closest_timestamp == latest_timestamp 

459 and "invalidatedAtTime" in latest_metadata 

460 and latest_metadata["invalidatedAtTime"] 

461 ) or len(triples) == 0 

462 

463 context_version = version 

464 if is_deletion_snapshot and len(sorted_timestamps) > 1: 

465 current_index = sorted_timestamps.index(closest_timestamp) 

466 if current_index > 0: 

467 context_version = main_entity_history[sorted_timestamps[current_index - 1]] 

468 

469 if is_deletion_snapshot and len(sorted_timestamps) > 1: 

470 subject_classes = [ 

471 str(o) 

472 for _, _, o in get_triples_from_graph( 

473 context_version, (entity_uri_ref, RDF.type, None) 

474 ) 

475 ] 

476 else: 

477 subject_classes = [ 

478 str(o) 

479 for _, _, o in get_triples_from_graph( 

480 version, (entity_uri_ref, RDF.type, None) 

481 ) 

482 ] 

483 

484 highest_priority_class = get_highest_priority_class(subject_classes) 

485 

486 entity_shape = determine_shape_for_entity_triples( 

487 list(get_triples_from_graph(context_version, (entity_uri_ref, None, None))) 

488 ) 

489 

490 _, _, _, _, _, valid_predicates_set = get_valid_predicates( 

491 triples, highest_priority_class=URIRef(highest_priority_class or "") 

492 ) 

493 

494 grouped_triples, _relevant_properties = get_grouped_triples( 

495 entity_uri_ref, 

496 triples, 

497 list(valid_predicates_set), 

498 historical_snapshot=context_version, 

499 entity_key=(highest_priority_class, entity_shape), 

500 ) 

501 

502 snapshot_times: list[datetime] = [ 

503 dt 

504 for meta in entity_metadata.values() 

505 if (dt := convert_to_datetime(meta["generatedAtTime"])) is not None 

506 ] 

507 snapshot_times = sorted(set(snapshot_times)) 

508 version_number = snapshot_times.index(timestamp_dt) + 1 

509 

510 prev_snapshot_timestamp, next_snapshot_timestamp = _compute_version_navigation( 

511 snapshot_times, timestamp_dt 

512 ) 

513 

514 version_history_ctx = HistoryContext( 

515 entity_uri=entity_uri, 

516 highest_priority_class=highest_priority_class, 

517 entity_shape=entity_shape, 

518 history=history, 

519 sorted_timestamps=sorted_timestamps, 

520 custom_filter=custom_filter, 

521 ) 

522 

523 modifications, closest_metadata = _prepare_modifications( 

524 closest_metadata, 

525 version_history_ctx, 

526 context_version, 

527 closest_timestamp, 

528 sorted_timestamps, 

529 ) 

530 

531 closest_timestamp = closest_metadata["generatedAtTime"] 

532 

533 return render_template( 

534 "entity/version.jinja", 

535 subject=entity_uri, 

536 entity_type=highest_priority_class, 

537 entity_shape=entity_shape, 

538 metadata={closest_timestamp: closest_metadata}, 

539 timestamp=closest_timestamp, 

540 next_snapshot_timestamp=next_snapshot_timestamp, 

541 prev_snapshot_timestamp=prev_snapshot_timestamp, 

542 modifications=modifications, 

543 grouped_triples=grouped_triples, 

544 version_number=version_number, 

545 version=context_version, 

546 )