Coverage for heritrace / routes / entity / _history.py: 88%
199 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-07-02 10:16 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-07-02 10:16 +0000
1# SPDX-FileCopyrightText: 2024-2026 Arcangelo Massari <arcangelo.massari@unibo.it>
2#
3# SPDX-License-Identifier: ISC
5import re
6from datetime import datetime
8from flask import abort, render_template
9from flask_babel import gettext
10from flask_login import login_required
11from rdflib import RDF, Graph, Literal, URIRef
12from SPARQLWrapper import JSON
13from time_agnostic_library.agnostic_entity import AgnosticEntity
15from heritrace.extensions import (
16 get_change_tracking_config,
17 get_custom_filter,
18 get_dataset_is_quadstore,
19 get_provenance_sparql,
20)
21from heritrace.routes.entity._blueprint import entity_bp
22from heritrace.routes.entity._rendering import generate_modification_text
23from heritrace.routes.entity._types import _DATETIME_MIN_UTC, HistoryContext
24from heritrace.sparql import get_sparql_bindings
25from heritrace.utils.converters import convert_to_datetime
26from heritrace.utils.display_rules_utils import (
27 get_grouped_triples,
28 get_highest_priority_class,
29)
30from heritrace.utils.shacl_utils import determine_shape_for_entity_triples
31from heritrace.utils.shacl_validation import get_valid_predicates
32from heritrace.utils.sparql_utils import (
33 convert_to_rdflib_graphs,
34 determine_shape_for_classes,
35 get_triples_from_graph,
36 parse_sparql_update,
37)
38from heritrace.utils.uri_utils import is_valid_url
41@entity_bp.route("/entity-history/<path:entity_uri>")
42@login_required
43def entity_history(entity_uri: str) -> str:
44 entity_uri_ref = URIRef(entity_uri)
45 custom_filter = get_custom_filter()
46 change_tracking_config = get_change_tracking_config()
48 agnostic_entity = AgnosticEntity(
49 res=entity_uri,
50 config=change_tracking_config,
51 include_related_objects=True,
52 include_merged_entities=True,
53 include_reverse_relations=True,
54 )
55 history, provenance = agnostic_entity.get_history(include_prov_metadata=True)
56 history = convert_to_rdflib_graphs(history, is_quadstore=get_dataset_is_quadstore())
58 sorted_metadata = sorted(
59 provenance[entity_uri].items(),
60 key=lambda x: convert_to_datetime(x[1]["generatedAtTime"]) or _DATETIME_MIN_UTC,
61 )
62 sorted_timestamps: list[str] = [
63 dt.isoformat()
64 for _, meta in sorted_metadata
65 if (dt := convert_to_datetime(meta["generatedAtTime"])) is not None
66 ]
68 latest_metadata = sorted_metadata[-1][1] if sorted_metadata else None
69 is_latest_deletion = (
70 latest_metadata
71 and "invalidatedAtTime" in latest_metadata
72 and latest_metadata["invalidatedAtTime"]
73 )
74 if is_latest_deletion and len(sorted_timestamps) > 1:
75 context_snapshot = history[entity_uri][sorted_timestamps[-2]]
76 else:
77 context_snapshot = history[entity_uri][sorted_timestamps[-1]]
79 entity_classes = [
80 str(triple[2])
81 for triple in get_triples_from_graph(
82 context_snapshot, (entity_uri_ref, RDF.type, None)
83 )
84 ]
85 highest_priority_class = get_highest_priority_class(entity_classes)
87 snapshot_entity_shape = determine_shape_for_entity_triples(
88 list(get_triples_from_graph(context_snapshot, (entity_uri_ref, None, None)))
89 )
91 events = []
92 for i, (_snapshot_uri, metadata) in enumerate(sorted_metadata):
93 date = convert_to_datetime(metadata["generatedAtTime"])
94 if date is None:
95 msg = "date must not be None"
96 raise AssertionError(msg)
97 snapshot_graph = history[entity_uri][date.isoformat()]
99 responsible_agent = custom_filter.format_agent_reference(
100 metadata["wasAttributedTo"]
101 )
102 primary_source = custom_filter.format_source_reference(
103 metadata["hadPrimarySource"]
104 )
106 history_ctx = HistoryContext(
107 entity_uri=entity_uri,
108 highest_priority_class=highest_priority_class,
109 entity_shape=snapshot_entity_shape,
110 history=history,
111 sorted_timestamps=sorted_timestamps,
112 custom_filter=custom_filter,
113 )
115 description = _format_snapshot_description(
116 metadata,
117 history_ctx,
118 context_snapshot,
119 i,
120 )
121 modifications = metadata.get("hasUpdateQuery", "")
122 modification_text = ""
123 if modifications:
124 parsed_modifications = parse_sparql_update(modifications)
125 modification_text = generate_modification_text(
126 parsed_modifications,
127 history_ctx,
128 snapshot_graph,
129 date.isoformat(),
130 )
132 can_restore = len(sorted_metadata) > 1 and i + 1 < len(sorted_metadata)
133 restore_button = ""
134 if can_restore:
135 restore_label = gettext("Restore")
136 restore_action = (
137 f"/restore-version/{entity_uri}/{metadata['generatedAtTime']}"
138 )
139 restore_button = f"""
140 <form action='{restore_action}'
141 method='post'
142 class='d-inline restore-form'>
143 <button type='submit'
144 class='btn btn-success restore-btn'>
145 <i class='bi
146 bi-arrow-counterclockwise
147 me-1'></i>{restore_label}
148 </button>
149 </form>
150 """
152 event = {
153 "start_date": {
154 "year": date.year,
155 "month": date.month,
156 "day": date.day,
157 "hour": date.hour,
158 "minute": date.minute,
159 "second": date.second,
160 },
161 "text": {
162 "headline": gettext("Snapshot") + " " + str(i + 1),
163 "text": (
164 f"<p><strong>"
165 f"{gettext('Responsible agent')}"
166 f":</strong>"
167 f" {responsible_agent}</p>"
168 f"<p><strong>"
169 f"{gettext('Primary source')}"
170 f":</strong>"
171 f" {primary_source}</p>"
172 f"<p><strong>"
173 f"{gettext('Description')}"
174 f":</strong>"
175 f" {description}</p>"
176 f'<div class="modifications mb-3">'
177 f"{modification_text}"
178 f"</div>"
179 f'<div class="d-flex gap-2 mt-2">'
180 f"<a href='/entity-version/"
181 f"{entity_uri}/"
182 f"{metadata['generatedAtTime']}'"
183 f" class='btn btn-outline-primary"
184 f" view-version'"
185 f" target='_self'>"
186 f"{gettext('View version')}</a>"
187 f"{restore_button}"
188 f"</div>"
189 ),
190 },
191 "autolink": False,
192 }
194 if i + 1 < len(sorted_metadata):
195 next_date = convert_to_datetime(
196 sorted_metadata[i + 1][1]["generatedAtTime"]
197 )
198 if next_date is None:
199 msg = "next_date must not be None"
200 raise AssertionError(msg)
201 event["end_date"] = {
202 "year": next_date.year,
203 "month": next_date.month,
204 "day": next_date.day,
205 "hour": next_date.hour,
206 "minute": next_date.minute,
207 "second": next_date.second,
208 }
210 events.append(event)
212 entity_label = custom_filter.human_readable_entity(
213 entity_uri, (highest_priority_class, snapshot_entity_shape), context_snapshot
214 )
216 timeline_data = {
217 "entityUri": entity_uri,
218 "entityLabel": entity_label,
219 "entityClasses": list(entity_classes),
220 "entityShape": snapshot_entity_shape,
221 "events": events,
222 }
224 return render_template("entity/history.jinja", timeline_data=timeline_data)
227def _format_snapshot_description(
228 metadata: dict,
229 ctx: HistoryContext,
230 context_snapshot: Graph,
231 current_index: int,
232) -> str:
233 description = metadata.get("description", "")
234 is_merge_snapshot = False
235 was_derived_from = metadata.get("wasDerivedFrom")
236 if isinstance(was_derived_from, list) and len(was_derived_from) > 1:
237 is_merge_snapshot = True
239 if is_merge_snapshot:
240 match = re.search(r"merged with [‘’]?([^’’<>\s]+)[‘’]?", description) # noqa: RUF001
241 if match:
242 potential_merged_uri = match.group(1)
243 if is_valid_url(potential_merged_uri):
244 merged_entity_uri_from_desc = potential_merged_uri
245 merged_entity_label = None
246 if current_index > 0:
247 previous_snapshot_timestamp = ctx.sorted_timestamps[
248 current_index - 1
249 ]
250 previous_snapshot_graph = ctx.history.get(ctx.entity_uri, {}).get(
251 previous_snapshot_timestamp
252 )
253 if previous_snapshot_graph:
254 raw_merged_entity_classes = [
255 str(o)
256 for s, p, o in get_triples_from_graph(
257 previous_snapshot_graph,
258 (URIRef(merged_entity_uri_from_desc), RDF.type, None),
259 )
260 ]
261 highest_priority_merged_class = (
262 get_highest_priority_class(raw_merged_entity_classes)
263 if raw_merged_entity_classes
264 else None
265 )
267 shape = determine_shape_for_classes(raw_merged_entity_classes)
268 merged_entity_label = ctx.custom_filter.human_readable_entity(
269 merged_entity_uri_from_desc,
270 (highest_priority_merged_class, shape),
271 previous_snapshot_graph,
272 )
273 if (
274 merged_entity_label
275 and merged_entity_label != merged_entity_uri_from_desc
276 ):
277 description = description.replace(
278 match.group(0), f"merged with '{merged_entity_label}'"
279 )
281 shape = (
282 determine_shape_for_classes([ctx.highest_priority_class])
283 if ctx.highest_priority_class
284 else None
285 )
286 entity_label_for_desc = ctx.custom_filter.human_readable_entity(
287 ctx.entity_uri, (ctx.highest_priority_class, shape), context_snapshot
288 )
289 if entity_label_for_desc and entity_label_for_desc != ctx.entity_uri:
290 description = description.replace(
291 f"'{ctx.entity_uri}'", f"'{entity_label_for_desc}'"
292 )
294 return description
297def _resolve_timestamp(entity_uri: str, timestamp: str) -> tuple[str, datetime]:
298 try:
299 return timestamp, datetime.fromisoformat(timestamp)
300 except ValueError:
301 pass
303 provenance_sparql = get_provenance_sparql()
304 query_timestamp = f"""
305 SELECT ?generation_time
306 WHERE {{
307 <{entity_uri}/prov/se/{timestamp}>
308 <http://www.w3.org/ns/prov#generatedAtTime>
309 ?generation_time.
310 }}
311 """
312 provenance_sparql.setQuery(query_timestamp)
313 provenance_sparql.setReturnFormat(JSON)
314 try:
315 bindings = get_sparql_bindings(provenance_sparql.queryAndConvert())
316 generation_time = bindings[0]["generation_time"]["value"]
317 except IndexError:
318 abort(404)
319 return generation_time, datetime.fromisoformat(generation_time)
322def _find_closest_metadata(
323 entity_metadata: dict,
324 timestamp_dt: datetime,
325 latest_timestamp: str,
326) -> tuple[dict | None, dict | None]:
327 closest_metadata = None
328 min_time_diff = None
329 latest_metadata = None
331 for meta in entity_metadata.values():
332 meta_time = convert_to_datetime(meta["generatedAtTime"])
333 if meta_time is None:
334 msg = "meta_time must not be None"
335 raise AssertionError(msg)
336 time_diff = abs((meta_time - timestamp_dt).total_seconds())
338 if (
339 closest_metadata is None
340 or min_time_diff is None
341 or time_diff < min_time_diff
342 ):
343 closest_metadata = meta
344 min_time_diff = time_diff
346 if meta["generatedAtTime"] == latest_timestamp:
347 latest_metadata = meta
349 return closest_metadata, latest_metadata
352def _compute_version_navigation(
353 snapshot_times: list[datetime],
354 timestamp_dt: datetime,
355) -> tuple[str | None, str | None]:
356 next_snapshot_timestamp = None
357 prev_snapshot_timestamp = None
359 for snap_time in snapshot_times:
360 if snap_time > timestamp_dt:
361 next_snapshot_timestamp = snap_time.isoformat()
362 break
364 for snap_time in reversed(snapshot_times):
365 if snap_time < timestamp_dt:
366 prev_snapshot_timestamp = snap_time.isoformat()
367 break
369 return prev_snapshot_timestamp, next_snapshot_timestamp
372def _prepare_modifications(
373 closest_metadata: dict,
374 ctx: HistoryContext,
375 context_version: Graph,
376 closest_timestamp: str,
377 sorted_timestamps: list[str],
378) -> tuple[str, dict]:
379 modifications = ""
380 if closest_metadata.get("hasUpdateQuery"):
381 sparql_query = closest_metadata["hasUpdateQuery"]
382 parsed_modifications = parse_sparql_update(sparql_query)
383 modifications = generate_modification_text(
384 parsed_modifications,
385 ctx,
386 context_version,
387 closest_timestamp,
388 )
390 try:
391 current_index = sorted_timestamps.index(closest_timestamp)
392 except ValueError:
393 current_index = -1
395 if closest_metadata.get("description"):
396 formatted_description = _format_snapshot_description(
397 closest_metadata,
398 ctx,
399 context_version,
400 current_index,
401 )
402 closest_metadata["description"] = formatted_description
404 return modifications, closest_metadata
407@entity_bp.route("/entity-version/<path:entity_uri>/<timestamp>")
408@login_required
409def entity_version(entity_uri: str, timestamp: str) -> str:
410 entity_uri_ref = URIRef(entity_uri)
411 custom_filter = get_custom_filter()
412 change_tracking_config = get_change_tracking_config()
414 timestamp, timestamp_dt = _resolve_timestamp(entity_uri, timestamp)
416 agnostic_entity = AgnosticEntity(
417 res=entity_uri,
418 config=change_tracking_config,
419 include_related_objects=True,
420 include_merged_entities=True,
421 include_reverse_relations=True,
422 )
423 history, provenance = agnostic_entity.get_history(include_prov_metadata=True)
424 history = convert_to_rdflib_graphs(history, is_quadstore=get_dataset_is_quadstore())
425 main_entity_history = history.get(entity_uri, {})
426 sorted_timestamps = sorted(
427 main_entity_history.keys(),
428 key=lambda t: convert_to_datetime(t) or _DATETIME_MIN_UTC,
429 )
431 if not sorted_timestamps:
432 abort(404)
434 closest_timestamp = min(
435 sorted_timestamps,
436 key=lambda t: abs(
437 (convert_to_datetime(t) or _DATETIME_MIN_UTC).astimezone()
438 - timestamp_dt.astimezone()
439 ),
440 )
442 version = main_entity_history[closest_timestamp]
443 triples: list[tuple[URIRef, URIRef, URIRef | Literal]] = [
444 (URIRef(str(s)), URIRef(str(p)), URIRef(str(o)) if isinstance(o, URIRef) else o) # type: ignore[misc]
445 for s, p, o in get_triples_from_graph(version, (entity_uri_ref, None, None))
446 ]
448 entity_metadata = provenance.get(entity_uri, {})
449 latest_timestamp = max(sorted_timestamps)
450 closest_metadata, latest_metadata = _find_closest_metadata(
451 entity_metadata, timestamp_dt, latest_timestamp
452 )
454 if closest_metadata is None or latest_metadata is None:
455 abort(404)
457 is_deletion_snapshot = (
458 closest_timestamp == latest_timestamp
459 and "invalidatedAtTime" in latest_metadata
460 and latest_metadata["invalidatedAtTime"]
461 ) or len(triples) == 0
463 context_version = version
464 if is_deletion_snapshot and len(sorted_timestamps) > 1:
465 current_index = sorted_timestamps.index(closest_timestamp)
466 if current_index > 0:
467 context_version = main_entity_history[sorted_timestamps[current_index - 1]]
469 if is_deletion_snapshot and len(sorted_timestamps) > 1:
470 subject_classes = [
471 str(o)
472 for _, _, o in get_triples_from_graph(
473 context_version, (entity_uri_ref, RDF.type, None)
474 )
475 ]
476 else:
477 subject_classes = [
478 str(o)
479 for _, _, o in get_triples_from_graph(
480 version, (entity_uri_ref, RDF.type, None)
481 )
482 ]
484 highest_priority_class = get_highest_priority_class(subject_classes)
486 entity_shape = determine_shape_for_entity_triples(
487 list(get_triples_from_graph(context_version, (entity_uri_ref, None, None)))
488 )
490 _, _, _, _, _, valid_predicates_set = get_valid_predicates(
491 triples, highest_priority_class=URIRef(highest_priority_class or "")
492 )
494 grouped_triples, _relevant_properties = get_grouped_triples(
495 entity_uri_ref,
496 triples,
497 list(valid_predicates_set),
498 historical_snapshot=context_version,
499 entity_key=(highest_priority_class, entity_shape),
500 )
502 snapshot_times: list[datetime] = [
503 dt
504 for meta in entity_metadata.values()
505 if (dt := convert_to_datetime(meta["generatedAtTime"])) is not None
506 ]
507 snapshot_times = sorted(set(snapshot_times))
508 version_number = snapshot_times.index(timestamp_dt) + 1
510 prev_snapshot_timestamp, next_snapshot_timestamp = _compute_version_navigation(
511 snapshot_times, timestamp_dt
512 )
514 version_history_ctx = HistoryContext(
515 entity_uri=entity_uri,
516 highest_priority_class=highest_priority_class,
517 entity_shape=entity_shape,
518 history=history,
519 sorted_timestamps=sorted_timestamps,
520 custom_filter=custom_filter,
521 )
523 modifications, closest_metadata = _prepare_modifications(
524 closest_metadata,
525 version_history_ctx,
526 context_version,
527 closest_timestamp,
528 sorted_timestamps,
529 )
531 closest_timestamp = closest_metadata["generatedAtTime"]
533 return render_template(
534 "entity/version.jinja",
535 subject=entity_uri,
536 entity_type=highest_priority_class,
537 entity_shape=entity_shape,
538 metadata={closest_timestamp: closest_metadata},
539 timestamp=closest_timestamp,
540 next_snapshot_timestamp=next_snapshot_timestamp,
541 prev_snapshot_timestamp=prev_snapshot_timestamp,
542 modifications=modifications,
543 grouped_triples=grouped_triples,
544 version_number=version_number,
545 version=context_version,
546 )