Coverage for test/fix_provenance_test.py: 99%
357 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-07-14 14:06 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-07-14 14:06 +0000
1import json
2import os
3import shutil
4import unittest
5import zipfile
7from oc_meta.run.fixer.prov.fix import ProvenanceProcessor
8from rdflib import ConjunctiveGraph, Literal, Namespace, URIRef
9from rdflib.namespace import XSD
12class TestProvenanceFixing(unittest.TestCase):
13 def setUp(self):
14 self.processor = ProvenanceProcessor(log_dir='test/fix_provenance_logs')
16 self.temp_dir = "test_temp_dir"
17 if not os.path.exists(self.temp_dir):
18 os.makedirs(self.temp_dir)
20 # Sample JSON-LD data
21 self.test_data = {
22 "@graph": [
23 {
24 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/2",
25 "@type": ["http://www.w3.org/ns/prov#Entity"],
26 "http://www.w3.org/ns/prov#invalidatedAtTime": [{
27 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
28 "@value": "2023-12-31T22:08:21+00:00"
29 }]
30 },
31 {
32 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/3",
33 "@type": ["http://www.w3.org/ns/prov#Entity"],
34 "http://www.w3.org/ns/prov#generatedAtTime": [{
35 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
36 "@value": "2023-12-31T22:08:21+00:00"
37 }],
38 "http://www.w3.org/ns/prov#specializationOf": [{
39 "@id": "https://w3id.org/oc/meta/br/06504122264"
40 }],
41 "http://www.w3.org/ns/prov#wasDerivedFrom": [{
42 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/2"
43 }]
44 },
45 {
46 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/1",
47 "@type": ["http://www.w3.org/ns/prov#Entity"],
48 "http://www.w3.org/ns/prov#generatedAtTime": [{
49 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
50 "@value": "2023-12-13T15:05:18.218917"
51 }],
52 "http://www.w3.org/ns/prov#specializationOf": [{
53 "@id": "https://w3id.org/oc/meta/br/06504122264"
54 }]
55 }
56 ],
57 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/"
58 }
60 # Create test zip file
61 self.test_zip_path = os.path.join(self.temp_dir, "test_se.zip")
62 with zipfile.ZipFile(self.test_zip_path, 'w') as zf:
63 zf.writestr('se.json', json.dumps(self.test_data))
65 def tearDown(self):
66 if os.path.exists(self.temp_dir):
67 shutil.rmtree(self.temp_dir)
69 def test_extract_snapshot_number(self):
70 """Test extracting snapshot numbers from URIs."""
71 test_cases = [
72 ("https://w3id.org/oc/meta/br/06504122264/prov/se/1", 1),
73 ("https://w3id.org/oc/meta/br/06504122264/prov/se/42", 42),
74 ("invalid_uri", 0)
75 ]
77 for uri, expected in test_cases:
78 with self.subTest(uri=uri):
79 self.assertEqual(self.processor._extract_snapshot_number(uri), expected)
81 def test_get_entity_from_prov_graph(self):
82 """Test extracting entity URI from provenance graph URI."""
83 test_cases = [
84 ("https://w3id.org/oc/meta/br/06504122264/prov/",
85 "https://w3id.org/oc/meta/br/06504122264"),
86 ("https://example.org/resource/prov/",
87 "https://example.org/resource")
88 ]
90 for graph_uri, expected in test_cases:
91 with self.subTest(graph_uri=graph_uri):
92 self.assertEqual(self.processor._get_entity_from_prov_graph(graph_uri), expected)
94 def test_collect_snapshot_info(self):
95 """Test collecting snapshot information from the graph."""
96 g = ConjunctiveGraph()
97 PROV = Namespace("http://www.w3.org/ns/prov#")
99 # Add test data
100 snapshot1 = URIRef("https://w3id.org/oc/meta/br/06504122264/prov/se/1")
101 snapshot2 = URIRef("https://w3id.org/oc/meta/br/06504122264/prov/se/2")
102 gen_time = Literal("2023-12-13T15:05:18.218917", datatype=XSD.dateTime)
104 g.add((snapshot1, PROV.generatedAtTime, gen_time))
105 g.add((snapshot2, PROV.generatedAtTime, gen_time))
107 snapshots = self.processor._collect_snapshot_info(g)
109 self.assertEqual(len(snapshots), 2)
110 self.assertEqual(snapshots[0]['number'], 1)
111 self.assertEqual(snapshots[1]['number'], 2)
112 self.assertEqual(len(snapshots[0]['generation_times']), 1)
113 self.assertEqual(str(snapshots[0]['generation_times'][0]), str(gen_time))
115 def test_multiple_timestamps(self):
116 """Test handling of multiple timestamps for a snapshot."""
117 g = ConjunctiveGraph()
118 PROV = Namespace("http://www.w3.org/ns/prov#")
120 snapshot = URIRef("https://w3id.org/oc/meta/br/06504122264/prov/se/1")
121 time1 = Literal("2023-12-13T15:05:18+00:00", datatype=XSD.dateTime)
122 time2 = Literal("2023-12-13T16:05:18+00:00", datatype=XSD.dateTime)
124 g.add((snapshot, PROV.generatedAtTime, time1))
125 g.add((snapshot, PROV.generatedAtTime, time2))
127 # Test la rimozione dei timestamp multipli
128 self.processor._remove_multiple_timestamps(
129 g, snapshot, PROV.generatedAtTime, [time1, time2])
131 # Verifica che non ci siano più timestamp
132 remaining_times = list(g.objects(snapshot, PROV.generatedAtTime))
133 self.assertEqual(len(remaining_times), 0)
135 def test_process_file_with_multiple_timestamps(self):
136 """Test processing a file that contains snapshots with multiple timestamps."""
137 # Crea dati di test con timestamp multipli e la catena completa di snapshot
138 test_data = {
139 "@graph": [
140 {
141 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/1",
142 "@type": ["http://www.w3.org/ns/prov#Entity"],
143 "http://www.w3.org/ns/prov#generatedAtTime": [
144 {
145 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
146 "@value": "2023-12-13T15:05:18+00:00"
147 },
148 {
149 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
150 "@value": "2023-12-13T16:05:18+00:00"
151 }
152 ]
153 },
154 {
155 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/2",
156 "@type": ["http://www.w3.org/ns/prov#Entity"],
157 "http://www.w3.org/ns/prov#generatedAtTime": [
158 {
159 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
160 "@value": "2023-12-22T18:06:49+00:00"
161 }
162 ]
163 },
164 {
165 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/3",
166 "@type": ["http://www.w3.org/ns/prov#Entity"],
167 "http://www.w3.org/ns/prov#generatedAtTime": [
168 {
169 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
170 "@value": "2023-12-31T22:08:21+00:00"
171 }
172 ]
173 }
174 ],
175 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/"
176 }
178 test_file = os.path.join(self.temp_dir, "multiple_timestamps.zip")
179 with zipfile.ZipFile(test_file, 'w') as zf:
180 zf.writestr('se.json', json.dumps(test_data))
182 # Processa il file
183 result = self.processor.process_file(test_file, 'test/fix_provenance_logs')
184 self.assertIsNotNone(result)
186 # Verifica il contenuto del file risultante
187 with zipfile.ZipFile(test_file, 'r') as zf:
188 with zf.open('se.json') as f:
189 fixed_data = json.loads(f.read())
191 # Verifica la struttura di base
192 graph_data = fixed_data[0]['@graph']
194 # Verifica gli ID degli snapshot
195 snapshot_ids = {item['@id'] for item in graph_data}
196 expected_ids = {
197 "https://w3id.org/oc/meta/br/06504122264/prov/se/1",
198 "https://w3id.org/oc/meta/br/06504122264/prov/se/2",
199 "https://w3id.org/oc/meta/br/06504122264/prov/se/3"
200 }
201 self.assertEqual(snapshot_ids, expected_ids)
203 # Verifica le relazioni wasDerivedFrom
204 derived_from = {
205 item['@id']: item.get('http://www.w3.org/ns/prov#wasDerivedFrom', [{}])[0].get('@id')
206 for item in graph_data
207 if 'http://www.w3.org/ns/prov#wasDerivedFrom' in item
208 }
209 expected_derived = {
210 "https://w3id.org/oc/meta/br/06504122264/prov/se/2":
211 "https://w3id.org/oc/meta/br/06504122264/prov/se/1",
212 "https://w3id.org/oc/meta/br/06504122264/prov/se/3":
213 "https://w3id.org/oc/meta/br/06504122264/prov/se/2"
214 }
215 self.assertEqual(derived_from, expected_derived)
217 # Verifica le relazioni specializationOf
218 specialization_of = {
219 item['@id']: item.get('http://www.w3.org/ns/prov#specializationOf', [{}])[0].get('@id')
220 for item in graph_data
221 if 'http://www.w3.org/ns/prov#specializationOf' in item
222 }
223 expected_specialization = {
224 "https://w3id.org/oc/meta/br/06504122264/prov/se/1":
225 "https://w3id.org/oc/meta/br/06504122264",
226 "https://w3id.org/oc/meta/br/06504122264/prov/se/2":
227 "https://w3id.org/oc/meta/br/06504122264",
228 "https://w3id.org/oc/meta/br/06504122264/prov/se/3":
229 "https://w3id.org/oc/meta/br/06504122264"
230 }
231 self.assertEqual(specialization_of, expected_specialization)
233 # Verifica i timestamp
234 generated_times = {
235 item['@id']: item.get('http://www.w3.org/ns/prov#generatedAtTime', [{}])[0].get('@value')
236 for item in graph_data
237 if 'http://www.w3.org/ns/prov#generatedAtTime' in item
238 }
240 # Verifica che il primo snapshot abbia un solo timestamp
241 first_snapshot = next(item for item in graph_data
242 if item['@id'].endswith('/prov/se/1'))
243 self.assertEqual(
244 len(first_snapshot.get('http://www.w3.org/ns/prov#generatedAtTime', [])),
245 1,
246 "First snapshot should have exactly one generatedAtTime"
247 )
249 # Verifica i timestamp di invalidazione
250 invalidated_times = {
251 item['@id']: item.get('http://www.w3.org/ns/prov#invalidatedAtTime', [{}])[0].get('@value')
252 for item in graph_data
253 if 'http://www.w3.org/ns/prov#invalidatedAtTime' in item
254 }
255 expected_invalidated = {
256 'https://w3id.org/oc/meta/br/06504122264/prov/se/1':
257 '2023-12-22T18:06:49+00:00',
258 "https://w3id.org/oc/meta/br/06504122264/prov/se/2":
259 "2023-12-31T22:08:21+00:00"
260 }
261 self.assertEqual(invalidated_times, expected_invalidated)
263 # Verifica che tutti i timestamp siano in UTC
264 for timestamp in generated_times.values():
265 self.assertTrue(
266 '+00:00' in timestamp or 'Z' in timestamp,
267 f"Generated timestamp {timestamp} should be in UTC"
268 )
270 for timestamp in invalidated_times.values():
271 self.assertTrue(
272 '+00:00' in timestamp or 'Z' in timestamp,
273 f"Invalidated timestamp {timestamp} should be in UTC"
274 )
276 def test_normalize_timestamps(self):
277 """Test normalizing timestamps to UTC."""
278 test_cases = [
279 ("2023-12-13T15:05:18.218917", True), # No timezone - should be converted
280 ("2023-12-13T15:05:18+00:00", False), # Already UTC - no change needed
281 ("2023-12-13T15:05:18Z", False), # Already UTC - no change needed
282 ("2023-12-13T15:05:18+01:00", True) # Different timezone - should be converted
283 ]
285 for timestamp_str, should_change in test_cases:
286 with self.subTest(timestamp=timestamp_str):
287 literal = Literal(timestamp_str, datatype=XSD.dateTime)
288 new_literal, was_changed = self.processor._normalize_timestamp(literal)
289 self.assertEqual(was_changed, should_change)
290 if was_changed:
291 self.assertTrue('+00:00' in str(new_literal) or 'Z' in str(new_literal))
293 def test_missing_snapshots(self):
294 """Test handling of missing snapshots in the sequence."""
295 # Test data with snapshot 2 missing from sequence 1,3,4
296 test_data = {
297 "@graph": [
298 {
299 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/1",
300 "@type": ["http://www.w3.org/ns/prov#Entity"],
301 "http://www.w3.org/ns/prov#generatedAtTime": [{
302 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
303 "@value": "2023-12-13T15:05:18+00:00"
304 }],
305 "http://www.w3.org/ns/prov#invalidatedAtTime": [{
306 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
307 "@value": "2023-12-22T18:06:49+00:00"
308 }],
309 "http://www.w3.org/ns/prov#specializationOf": [{
310 "@id": "https://w3id.org/oc/meta/br/06504122264"
311 }]
312 },
313 {
314 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/3",
315 "@type": ["http://www.w3.org/ns/prov#Entity"],
316 "http://www.w3.org/ns/prov#generatedAtTime": [{
317 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
318 "@value": "2023-12-31T22:08:21+00:00"
319 }],
320 "http://www.w3.org/ns/prov#specializationOf": [{
321 "@id": "https://w3id.org/oc/meta/br/06504122264"
322 }]
323 },
324 {
325 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/4",
326 "@type": ["http://www.w3.org/ns/prov#Entity"],
327 "http://www.w3.org/ns/prov#generatedAtTime": [{
328 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
329 "@value": "2024-01-15T10:30:00+00:00"
330 }],
331 "http://www.w3.org/ns/prov#specializationOf": [{
332 "@id": "https://w3id.org/oc/meta/br/06504122264"
333 }]
334 }
335 ],
336 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/"
337 }
339 test_file = os.path.join(self.temp_dir, "missing_snapshot.zip")
340 with zipfile.ZipFile(test_file, 'w') as zf:
341 zf.writestr('se.json', json.dumps(test_data))
343 # Process the file
344 result = self.processor.process_file(test_file, 'test/fix_provenance_logs')
345 self.assertTrue(result)
347 # Verify the resulting file
348 with zipfile.ZipFile(test_file, 'r') as zf:
349 with zf.open('se.json') as f:
350 fixed_data = json.loads(f.read())
351 graph_data = fixed_data[0]['@graph']
353 # Check if the missing snapshot 2 was created
354 snapshot_ids = {item['@id'] for item in graph_data}
355 self.assertIn(
356 "https://w3id.org/oc/meta/br/06504122264/prov/se/2",
357 snapshot_ids,
358 "Missing snapshot 2 should have been created"
359 )
361 # Find the created snapshot
362 snapshot_2 = next(item for item in graph_data
363 if item['@id'].endswith('/prov/se/2'))
365 # Verify basic properties of the created snapshot
366 self.assertIn('@type', snapshot_2)
367 self.assertIn('http://www.w3.org/ns/prov#Entity', snapshot_2['@type'])
369 # Verify specializationOf relationship
370 self.assertIn('http://www.w3.org/ns/prov#specializationOf', snapshot_2)
371 self.assertEqual(
372 snapshot_2['http://www.w3.org/ns/prov#specializationOf'][0]['@id'],
373 "https://w3id.org/oc/meta/br/06504122264"
374 )
376 # Verify wasDerivedFrom relationship
377 self.assertIn('http://www.w3.org/ns/prov#wasDerivedFrom', snapshot_2)
378 self.assertEqual(
379 snapshot_2['http://www.w3.org/ns/prov#wasDerivedFrom'][0]['@id'],
380 "https://w3id.org/oc/meta/br/06504122264/prov/se/1"
381 )
383 # Verify timestamps
384 self.assertIn('http://www.w3.org/ns/prov#generatedAtTime', snapshot_2)
385 self.assertIn('http://www.w3.org/ns/prov#invalidatedAtTime', snapshot_2)
387 def test_multiple_missing_snapshots(self):
388 """Test handling of multiple consecutive missing snapshots."""
389 # Test data with snapshots 2 and 3 missing from sequence 1,4,5
390 test_data = {
391 "@graph": [
392 {
393 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/1",
394 "@type": ["http://www.w3.org/ns/prov#Entity"],
395 "http://www.w3.org/ns/prov#generatedAtTime": [{
396 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
397 "@value": "2023-12-13T15:05:18+00:00"
398 }],
399 "http://www.w3.org/ns/prov#invalidatedAtTime": [{
400 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
401 "@value": "2023-12-22T18:06:49+00:00"
402 }],
403 "http://www.w3.org/ns/prov#specializationOf": [{
404 "@id": "https://w3id.org/oc/meta/br/06504122264"
405 }]
406 },
407 {
408 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/4",
409 "@type": ["http://www.w3.org/ns/prov#Entity"],
410 "http://www.w3.org/ns/prov#generatedAtTime": [{
411 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
412 "@value": "2024-01-15T10:30:00+00:00"
413 }],
414 "http://www.w3.org/ns/prov#specializationOf": [{
415 "@id": "https://w3id.org/oc/meta/br/06504122264"
416 }]
417 },
418 {
419 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/5",
420 "@type": ["http://www.w3.org/ns/prov#Entity"],
421 "http://www.w3.org/ns/prov#generatedAtTime": [{
422 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
423 "@value": "2024-01-20T14:45:00+00:00"
424 }],
425 "http://www.w3.org/ns/prov#specializationOf": [{
426 "@id": "https://w3id.org/oc/meta/br/06504122264"
427 }]
428 }
429 ],
430 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/"
431 }
433 test_file = os.path.join(self.temp_dir, "multiple_missing_snapshots.zip")
434 with zipfile.ZipFile(test_file, 'w') as zf:
435 zf.writestr('se.json', json.dumps(test_data))
437 result = self.processor.process_file(test_file, 'test/fix_provenance_logs')
438 self.assertTrue(result)
440 with zipfile.ZipFile(test_file, 'r') as zf:
441 with zf.open('se.json') as f:
442 fixed_data = json.loads(f.read())
444 graph_data = fixed_data[0]['@graph']
445 # Raccoglie gli snapshot e i loro numeri
446 snapshots = {}
447 for item in graph_data:
448 if '/prov/se/' in item['@id']:
449 num = int(item['@id'].split('/se/')[-1])
450 snapshots[num] = item
452 # Verifica che tutti gli snapshot abbiano le proprietà di base
453 for num, snapshot in snapshots.items():
454 # Verifica tipo
455 self.assertIn('@type', snapshot)
456 self.assertIn('http://www.w3.org/ns/prov#Entity', snapshot['@type'])
458 # Verifica specializationOf
459 self.assertIn('http://www.w3.org/ns/prov#specializationOf', snapshot)
460 self.assertEqual(
461 snapshot['http://www.w3.org/ns/prov#specializationOf'][0]['@id'],
462 "https://w3id.org/oc/meta/br/06504122264"
463 )
465 # Verifica timestamp
466 self.assertIn('http://www.w3.org/ns/prov#generatedAtTime', snapshot)
467 gen_time = snapshot['http://www.w3.org/ns/prov#generatedAtTime'][0]['@value']
468 self.assertTrue('+00:00' in gen_time or 'Z' in gen_time)
470 # Verifica wasDerivedFrom per tutti tranne il primo snapshot
471 if num > min(snapshots.keys()):
472 self.assertIn('http://www.w3.org/ns/prov#wasDerivedFrom', snapshot)
474 # Verifica la consistenza temporale
475 ordered_nums = sorted(snapshots.keys())
476 for i in range(len(ordered_nums)-1):
477 curr_num = ordered_nums[i]
478 next_num = ordered_nums[i+1]
480 curr_snapshot = snapshots[curr_num]
481 next_snapshot = snapshots[next_num]
483 # Se lo snapshot corrente ha un tempo di invalidazione
484 if 'http://www.w3.org/ns/prov#invalidatedAtTime' in curr_snapshot:
485 curr_inv_time = self.processor._convert_to_utc(
486 curr_snapshot['http://www.w3.org/ns/prov#invalidatedAtTime'][0]['@value']
487 )
488 next_gen_time = self.processor._convert_to_utc(
489 next_snapshot['http://www.w3.org/ns/prov#generatedAtTime'][0]['@value']
490 )
491 self.assertEqual(
492 curr_inv_time,
493 next_gen_time,
494 f"Invalidation time of snapshot {curr_num} should match generation time of {next_num}"
495 )
497 # Verifica che gli snapshot siano collegati correttamente
498 for num in ordered_nums[1:]: # Skip the first one
499 curr_snapshot = snapshots[num]
500 prev_num = ordered_nums[ordered_nums.index(num) - 1]
502 # Verifica che wasDerivedFrom punti allo snapshot precedente
503 derived_from = curr_snapshot['http://www.w3.org/ns/prov#wasDerivedFrom'][0]['@id']
504 expected_derived = f"https://w3id.org/oc/meta/br/06504122264/prov/se/{prev_num}"
505 self.assertEqual(
506 derived_from,
507 expected_derived,
508 f"Snapshot {num} should be derived from snapshot {prev_num}"
509 )
511 def test_timestamp_inference(self):
512 """Test timestamp inference for missing snapshots."""
513 # Create test data where we can verify timestamp inference logic
514 test_data = {
515 "@graph": [
516 {
517 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/1",
518 "@type": ["http://www.w3.org/ns/prov#Entity"],
519 "http://www.w3.org/ns/prov#generatedAtTime": [{
520 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
521 "@value": "2023-12-13T12:00:00+00:00"
522 }],
523 "http://www.w3.org/ns/prov#invalidatedAtTime": [{
524 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
525 "@value": "2023-12-13T14:00:00+00:00"
526 }],
527 "http://www.w3.org/ns/prov#specializationOf": [{
528 "@id": "https://w3id.org/oc/meta/br/06504122264"
529 }]
530 },
531 {
532 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/3",
533 "@type": ["http://www.w3.org/ns/prov#Entity"],
534 "http://www.w3.org/ns/prov#generatedAtTime": [{
535 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
536 "@value": "2023-12-13T18:00:00+00:00"
537 }],
538 "http://www.w3.org/ns/prov#specializationOf": [{
539 "@id": "https://w3id.org/oc/meta/br/06504122264"
540 }]
541 }
542 ],
543 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/"
544 }
546 test_file = os.path.join(self.temp_dir, "timestamp_inference.zip")
547 with zipfile.ZipFile(test_file, 'w') as zf:
548 zf.writestr('se.json', json.dumps(test_data))
550 result = self.processor.process_file(test_file, 'test/fix_provenance_logs')
551 self.assertTrue(result)
553 with zipfile.ZipFile(test_file, 'r') as zf:
554 with zf.open('se.json') as f:
555 fixed_data = json.loads(f.read())
557 graph_data = fixed_data[0]['@graph']
559 # Find the created snapshot 2
560 snapshot_2 = next(item for item in graph_data
561 if item['@id'].endswith('/prov/se/2'))
563 # Verify timestamps were inferred correctly
564 self.assertIn('http://www.w3.org/ns/prov#generatedAtTime', snapshot_2)
565 gen_time = snapshot_2['http://www.w3.org/ns/prov#generatedAtTime'][0]['@value']
566 self.assertEqual(gen_time, "2023-12-13T14:00:00+00:00")
568 self.assertIn('http://www.w3.org/ns/prov#invalidatedAtTime', snapshot_2)
569 inv_time = snapshot_2['http://www.w3.org/ns/prov#invalidatedAtTime'][0]['@value']
570 self.assertEqual(inv_time, "2023-12-13T18:00:00+00:00")
572 def test_multiple_descriptions_merge(self):
573 """Test handling of multiple descriptions when merge descriptions are present."""
575 test_data = {
576 "@graph": [
577 {
578 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/1",
579 "@type": ["http://www.w3.org/ns/prov#Entity"],
580 "http://purl.org/dc/terms/description": [{
581 "@value": "The entity 'https://w3id.org/oc/meta/br/06504122264' has been created."
582 }],
583 "http://www.w3.org/ns/prov#generatedAtTime": [{
584 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
585 "@value": "2023-12-13T15:05:18+00:00"
586 }]
587 },
588 {
589 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/2",
590 "@type": ["http://www.w3.org/ns/prov#Entity"],
591 "http://purl.org/dc/terms/description": [
592 {
593 "@value": "The entity 'https://w3id.org/oc/meta/br/06504122264' has been merged with 'https://w3id.org/oc/meta/br/06504122265'."
594 },
595 {
596 "@value": "The entity 'https://w3id.org/oc/meta/br/06504122264' has been merged with 'https://w3id.org/oc/meta/br/06504122266'."
597 },
598 {
599 "@value": "The entity 'https://w3id.org/oc/meta/br/06504122264' has been modified."
600 }
601 ],
602 "http://www.w3.org/ns/prov#generatedAtTime": [{
603 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
604 "@value": "2023-12-31T22:08:21+00:00"
605 }]
606 }
607 ],
608 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/"
609 }
611 test_file = os.path.join(self.temp_dir, "multiple_descriptions_merge.zip")
612 with zipfile.ZipFile(test_file, 'w') as zf:
613 zf.writestr('se.json', json.dumps(test_data))
615 result = self.processor.process_file(test_file, 'test/fix_provenance_logs')
616 self.assertTrue(result)
618 with zipfile.ZipFile(test_file, 'r') as zf:
619 with zf.open('se.json') as f:
620 fixed_data = json.loads(f.read())
622 # Find the snapshot in the fixed data
623 snapshot = next(item for item in fixed_data[0]['@graph']
624 if item['@id'].endswith('/prov/se/2'))
626 descriptions = snapshot.get('http://purl.org/dc/terms/description', [])
628 # Verify that both merge descriptions were kept
629 merge_descriptions = [desc for desc in descriptions
630 if "has been merged with" in desc['@value']]
631 self.assertEqual(len(merge_descriptions), 2,
632 "Both merge descriptions should be preserved")
634 # Verify that non-merge description was removed
635 non_merge_descriptions = [desc for desc in descriptions
636 if "has been modified" in desc['@value']]
637 self.assertEqual(len(non_merge_descriptions), 0,
638 "Non-merge description should be removed")
640 def test_multiple_descriptions_first_snapshot(self):
641 """Test handling of multiple descriptions in the first snapshot."""
642 test_data = {
643 "@graph": [
644 {
645 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/1",
646 "@type": ["http://www.w3.org/ns/prov#Entity"],
647 "http://purl.org/dc/terms/description": [
648 {
649 "@value": "The entity 'https://w3id.org/oc/meta/br/06504122264' has been created."
650 },
651 {
652 "@value": "The entity 'https://w3id.org/oc/meta/br/06504122264' has been modified."
653 },
654 {
655 "@value": "The entity 'https://w3id.org/oc/meta/br/06504122264' has been merged with 'https://w3id.org/oc/meta/br/06504122265'."
656 }
657 ],
658 "http://www.w3.org/ns/prov#generatedAtTime": [{
659 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
660 "@value": "2023-12-13T15:05:18+00:00"
661 }]
662 }
663 ],
664 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/"
665 }
667 test_file = os.path.join(self.temp_dir, "first_snapshot_descriptions.zip")
668 with zipfile.ZipFile(test_file, 'w') as zf:
669 zf.writestr('se.json', json.dumps(test_data))
671 result = self.processor.process_file(test_file, 'test/fix_provenance_logs')
672 self.assertTrue(result)
674 with zipfile.ZipFile(test_file, 'r') as zf:
675 with zf.open('se.json') as f:
676 fixed_data = json.loads(f.read())
678 snapshot = next(item for item in fixed_data[0]['@graph']
679 if item['@id'].endswith('/prov/se/1'))
681 descriptions = snapshot.get('http://purl.org/dc/terms/description', [])
683 # Verify that only creation description was kept
684 self.assertEqual(len(descriptions), 1,
685 "First snapshot should have exactly one description")
686 self.assertTrue("has been created" in descriptions[0]['@value'],
687 "First snapshot should keep only creation description")
689 def test_multiple_descriptions_last_snapshot(self):
690 """Test handling of multiple descriptions in the last snapshot."""
691 test_data = {
692 "@graph": [
693 {
694 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/1",
695 "@type": ["http://www.w3.org/ns/prov#Entity"],
696 "http://purl.org/dc/terms/description": [{
697 "@value": "The entity 'https://w3id.org/oc/meta/br/06504122264' has been created."
698 }],
699 "http://www.w3.org/ns/prov#generatedAtTime": [{
700 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
701 "@value": "2023-12-13T15:05:18+00:00"
702 }]
703 },
704 {
705 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/2",
706 "@type": ["http://www.w3.org/ns/prov#Entity"],
707 "http://purl.org/dc/terms/description": [
708 {
709 "@value": "The entity 'https://w3id.org/oc/meta/br/06504122264' has been modified."
710 },
711 {
712 "@value": "The entity 'https://w3id.org/oc/meta/br/06504122264' has been deleted."
713 }
714 ],
715 "http://www.w3.org/ns/prov#generatedAtTime": [{
716 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
717 "@value": "2023-12-31T22:08:21+00:00"
718 }]
719 }
720 ],
721 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/"
722 }
724 test_file = os.path.join(self.temp_dir, "last_snapshot_descriptions.zip")
725 with zipfile.ZipFile(test_file, 'w') as zf:
726 zf.writestr('se.json', json.dumps(test_data))
728 result = self.processor.process_file(test_file, 'test/fix_provenance_logs')
729 self.assertTrue(result)
731 with zipfile.ZipFile(test_file, 'r') as zf:
732 with zf.open('se.json') as f:
733 fixed_data = json.loads(f.read())
735 last_snapshot = next(item for item in fixed_data[0]['@graph']
736 if item['@id'].endswith('/prov/se/2'))
738 descriptions = last_snapshot.get('http://purl.org/dc/terms/description', [])
740 # Verify that only deletion description was kept
741 self.assertEqual(len(descriptions), 1,
742 "Last snapshot should have exactly one description")
743 self.assertTrue("has been deleted" in descriptions[0]['@value'],
744 "Last snapshot should keep deletion description")
746 def test_multiple_descriptions_middle_snapshot(self):
747 """Test handling of multiple descriptions in a middle snapshot."""
748 test_data = {
749 "@graph": [
750 {
751 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/1",
752 "@type": ["http://www.w3.org/ns/prov#Entity"],
753 "http://purl.org/dc/terms/description": [{
754 "@value": "The entity 'https://w3id.org/oc/meta/br/06504122264' has been created."
755 }],
756 "http://www.w3.org/ns/prov#generatedAtTime": [{
757 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
758 "@value": "2023-12-13T15:05:18+00:00"
759 }]
760 },
761 {
762 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/2",
763 "@type": ["http://www.w3.org/ns/prov#Entity"],
764 "http://purl.org/dc/terms/description": [
765 {
766 "@value": "The entity 'https://w3id.org/oc/meta/br/06504122264' has been modified."
767 },
768 {
769 "@value": "The entity 'https://w3id.org/oc/meta/br/06504122264' has been created."
770 }
771 ],
772 "http://www.w3.org/ns/prov#generatedAtTime": [{
773 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
774 "@value": "2023-12-31T22:08:21+00:00"
775 }]
776 },
777 {
778 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/se/3",
779 "@type": ["http://www.w3.org/ns/prov#Entity"],
780 "http://purl.org/dc/terms/description": [{
781 "@value": "The entity 'https://w3id.org/oc/meta/br/06504122264' has been deleted."
782 }],
783 "http://www.w3.org/ns/prov#generatedAtTime": [{
784 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
785 "@value": "2024-01-01T10:00:00+00:00"
786 }]
787 }
788 ],
789 "@id": "https://w3id.org/oc/meta/br/06504122264/prov/"
790 }
792 test_file = os.path.join(self.temp_dir, "middle_snapshot_descriptions.zip")
793 with zipfile.ZipFile(test_file, 'w') as zf:
794 zf.writestr('se.json', json.dumps(test_data))
796 result = self.processor.process_file(test_file, 'test/fix_provenance_logs')
797 self.assertTrue(result)
799 with zipfile.ZipFile(test_file, 'r') as zf:
800 with zf.open('se.json') as f:
801 fixed_data = json.loads(f.read())
803 middle_snapshot = next(item for item in fixed_data[0]['@graph']
804 if item['@id'].endswith('/prov/se/2'))
806 descriptions = middle_snapshot.get('http://purl.org/dc/terms/description', [])
808 # Verify that only modification description was kept
809 self.assertEqual(len(descriptions), 1,
810 "Middle snapshot should have exactly one description")
811 self.assertTrue("has been modified" in descriptions[0]['@value'],
812 "Middle snapshot should keep modification description")
814 def test_real_case_multiple_timestamps_and_incomplete_snapshot(self):
815 """Test the real case scenario of entity 0623074134 with multiple timestamps
816 and an incomplete snapshot."""
817 test_data = {
818 "@graph": [
819 {
820 "@id": "https://w3id.org/oc/meta/id/0623074134/prov/se/4",
821 "@type": ["http://www.w3.org/ns/prov#Entity"],
822 "http://purl.org/dc/terms/description": [{
823 "@value": "The entity 'https://w3id.org/oc/meta/id/0623074134' has been merged with 'https://w3id.org/oc/meta/id/063301371593'."
824 }],
825 "http://www.w3.org/ns/prov#generatedAtTime": [{
826 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
827 "@value": "2024-10-23T20:52:32+00:00"
828 }],
829 "http://www.w3.org/ns/prov#specializationOf": [{
830 "@id": "https://w3id.org/oc/meta/id/0623074134"
831 }],
832 "http://www.w3.org/ns/prov#wasAttributedTo": [{
833 "@id": "http://orcid.org/0000-0002-8420-0696"
834 }],
835 "http://www.w3.org/ns/prov#wasDerivedFrom": [
836 {
837 "@id": "https://w3id.org/oc/meta/id/0623074134/prov/se/3"
838 },
839 {
840 "@id": "https://w3id.org/oc/meta/id/063301371593/prov/se/1"
841 }
842 ]
843 },
844 {
845 "@id": "https://w3id.org/oc/meta/id/0623074134/prov/se/3",
846 "@type": ["http://www.w3.org/ns/prov#Entity"],
847 "http://purl.org/dc/terms/description": [{
848 "@value": "The entity 'https://w3id.org/oc/meta/id/0623074134' has been modified."
849 }],
850 "http://www.w3.org/ns/prov#generatedAtTime": [{
851 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
852 "@value": "2024-06-06T18:55:36+00:00"
853 }],
854 "http://www.w3.org/ns/prov#invalidatedAtTime": [{
855 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
856 "@value": "2024-10-23T20:52:32+00:00"
857 }],
858 "http://www.w3.org/ns/prov#specializationOf": [{
859 "@id": "https://w3id.org/oc/meta/id/0623074134"
860 }],
861 "http://www.w3.org/ns/prov#wasDerivedFrom": [{
862 "@id": "https://w3id.org/oc/meta/id/0623074134/prov/se/2"
863 }]
864 },
865 {
866 "@id": "https://w3id.org/oc/meta/id/0623074134/prov/se/1",
867 "@type": ["http://www.w3.org/ns/prov#Entity"],
868 "http://purl.org/dc/terms/description": [{
869 "@value": "The entity 'https://w3id.org/oc/meta/id/0623074134' has been created."
870 }],
871 "http://www.w3.org/ns/prov#generatedAtTime": [
872 {
873 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
874 "@value": "2024-03-27T18:03:23+00:00"
875 },
876 {
877 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
878 "@value": "2023-12-13T16:14:48.836637"
879 }
880 ],
881 "http://www.w3.org/ns/prov#specializationOf": [{
882 "@id": "https://w3id.org/oc/meta/id/0623074134"
883 }]
884 },
885 {
886 "@id": "https://w3id.org/oc/meta/id/0623074134/prov/se/2",
887 "@type": ["http://www.w3.org/ns/prov#Entity"],
888 "http://www.w3.org/ns/prov#invalidatedAtTime": [{
889 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
890 "@value": "2024-06-06T18:55:36+00:00"
891 }]
892 }
893 ],
894 "@id": "https://w3id.org/oc/meta/id/0623074134/prov/"
895 }
897 test_file = os.path.join(self.temp_dir, "real_case_test.zip")
898 with zipfile.ZipFile(test_file, 'w') as zf:
899 zf.writestr('se.json', json.dumps(test_data))
901 result = self.processor.process_file(test_file, 'test/fix_provenance_logs')
902 self.assertTrue(result)
904 # Verify the fixed data
905 with zipfile.ZipFile(test_file, 'r') as zf:
906 with zf.open('se.json') as f:
907 fixed_data = json.loads(f.read())
909 graph_data = fixed_data[0]['@graph']
911 # Test specific issues from the case study:
913 # 1. Check that snapshot 1 has only one generatedAtTime (should keep the earliest)
914 snapshot_1 = next(item for item in graph_data
915 if item['@id'].endswith('/prov/se/1'))
916 gen_times_1 = snapshot_1.get('http://www.w3.org/ns/prov#generatedAtTime', [])
917 self.assertEqual(len(gen_times_1), 1,
918 "Snapshot 1 should have exactly one generatedAtTime")
919 self.assertEqual(
920 gen_times_1[0]['@value'],
921 "2023-12-13T15:14:48.836637+00:00",
922 "Should keep the earliest timestamp"
923 )
925 # 2. Check that snapshot 2 is complete with all required properties
926 snapshot_2 = next(item for item in graph_data
927 if item['@id'].endswith('/prov/se/2'))
928 self.assertIn('@type', snapshot_2)
929 self.assertIn('http://www.w3.org/ns/prov#specializationOf', snapshot_2)
930 self.assertIn('http://www.w3.org/ns/prov#wasDerivedFrom', snapshot_2)
931 self.assertEqual(
932 snapshot_2['http://www.w3.org/ns/prov#wasDerivedFrom'][0]['@id'],
933 "https://w3id.org/oc/meta/id/0623074134/prov/se/1"
934 )
936 # 3. Check that all snapshots form a proper chain
937 for i in range(2, 5): # Check snapshots 2 through 4
938 current = next(item for item in graph_data
939 if item['@id'].endswith(f'/prov/se/{i}'))
940 self.assertIn('http://www.w3.org/ns/prov#wasDerivedFrom', current)
941 derived_from = current['http://www.w3.org/ns/prov#wasDerivedFrom'][0]['@id']
942 self.assertTrue(
943 derived_from.endswith(f'/prov/se/{i-1}'),
944 f"Snapshot {i} should be derived from snapshot {i-1}"
945 )
947 # 4. Check bi-directional timestamp consistency
948 snapshots = {}
949 for i in range(1, 5): # Get all snapshots
950 snapshots[i] = next(item for item in graph_data
951 if item['@id'].endswith(f'/prov/se/{i}'))
953 # 4.1 Check forward: invalidatedAtTime matches next snapshot's generatedAtTime
954 for i in range(1, 4): # Check snapshots 1 through 3
955 current = snapshots[i]
956 next_snapshot = snapshots[i + 1]
958 self.assertIn('http://www.w3.org/ns/prov#invalidatedAtTime', current,
959 f"Snapshot {i} should have invalidatedAtTime")
960 self.assertIn('http://www.w3.org/ns/prov#generatedAtTime', next_snapshot,
961 f"Snapshot {i+1} should have generatedAtTime")
963 inv_time = current['http://www.w3.org/ns/prov#invalidatedAtTime'][0]['@value']
964 gen_time = next_snapshot['http://www.w3.org/ns/prov#generatedAtTime'][0]['@value']
966 self.assertEqual(
967 inv_time,
968 gen_time,
969 f"Invalidation time of snapshot {i} should match generation time of snapshot {i+1}"
970 )
972 # 4.2 Check backward: generatedAtTime matches previous snapshot's invalidatedAtTime
973 for i in range(2, 5): # Check snapshots 2 through 4
974 current = snapshots[i]
975 prev_snapshot = snapshots[i - 1]
977 self.assertIn('http://www.w3.org/ns/prov#generatedAtTime', current,
978 f"Snapshot {i} should have generatedAtTime")
979 self.assertIn('http://www.w3.org/ns/prov#invalidatedAtTime', prev_snapshot,
980 f"Snapshot {i-1} should have invalidatedAtTime")
982 gen_time = current['http://www.w3.org/ns/prov#generatedAtTime'][0]['@value']
983 inv_time = prev_snapshot['http://www.w3.org/ns/prov#invalidatedAtTime'][0]['@value']
985 self.assertEqual(
986 gen_time,
987 inv_time,
988 f"Generation time of snapshot {i} should match invalidation time of snapshot {i-1}"
989 )
991 # 5. Check that merge-related wasDerivedFrom is preserved in snapshot 4
992 snapshot_4 = next(item for item in graph_data
993 if item['@id'].endswith('/prov/se/4'))
994 derived_from_ids = [ref['@id'] for ref in
995 snapshot_4['http://www.w3.org/ns/prov#wasDerivedFrom']]
996 self.assertIn(
997 "https://w3id.org/oc/meta/id/063301371593/prov/se/1",
998 derived_from_ids,
999 "Merge-related wasDerivedFrom should be preserved"
1000 )
1002 def test_original_unresolved_issues_scenario(self):
1003 # Dati di test presi dal messaggio iniziale nella conversazione
1004 original_data = {
1005 "@graph": [
1006 {
1007 "@id": "https://w3id.org/oc/meta/ra/06440227509/prov/se/5",
1008 "@type": ["http://www.w3.org/ns/prov#Entity"],
1009 "http://purl.org/dc/terms/description": [
1010 {
1011 "@value": "The entity 'https://w3id.org/oc/meta/ra/06440227509' has been deleted."
1012 }
1013 ],
1014 "http://www.w3.org/ns/prov#generatedAtTime": [
1015 {
1016 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1017 "@value": "2024-12-08T01:23:24+00:00"
1018 }
1019 ],
1020 "http://www.w3.org/ns/prov#invalidatedAtTime": [
1021 {
1022 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1023 "@value": "2024-12-08T01:23:24+00:00"
1024 }
1025 ],
1026 "http://www.w3.org/ns/prov#specializationOf": [
1027 {
1028 "@id": "https://w3id.org/oc/meta/ra/06440227509"
1029 }
1030 ],
1031 "http://www.w3.org/ns/prov#wasAttributedTo": [
1032 {
1033 "@id": "https://orcid.org/0000-0002-8420-0696"
1034 }
1035 ],
1036 "http://www.w3.org/ns/prov#wasDerivedFrom": [
1037 {
1038 "@id": "https://w3id.org/oc/meta/ra/06440227509/prov/se/4"
1039 }
1040 ],
1041 "https://w3id.org/oc/ontology/hasUpdateQuery": [
1042 {
1043 "@value": "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06440227509> <http://xmlns.com/foaf/0.1/givenName> \"R.\" .<https://w3id.org/oc/meta/ra/06440227509> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/063501394354> .<https://w3id.org/oc/meta/ra/06440227509> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Agent> .<https://w3id.org/oc/meta/ra/06440227509> <http://xmlns.com/foaf/0.1/familyName> \"Stępniewski\" . } }"
1044 }
1045 ]
1046 },
1047 {
1048 "@id": "https://w3id.org/oc/meta/ra/06440227509/prov/se/1",
1049 "@type": ["http://www.w3.org/ns/prov#Entity"],
1050 "http://purl.org/dc/terms/description": [
1051 {
1052 "@value": "The entity 'https://w3id.org/oc/meta/ra/06440227509' has been created."
1053 }
1054 ],
1055 "http://www.w3.org/ns/prov#generatedAtTime": [
1056 {
1057 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1058 "@value": "2023-12-13T15:53:04.544275"
1059 },
1060 {
1061 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1062 "@value": "2024-03-27T20:20:19+00:00"
1063 }
1064 ],
1065 "http://www.w3.org/ns/prov#hadPrimarySource": [
1066 {
1067 "@id": "https://openalex.s3.amazonaws.com/browse.html"
1068 }
1069 ],
1070 "http://www.w3.org/ns/prov#specializationOf": [
1071 {
1072 "@id": "https://w3id.org/oc/meta/ra/06440227509"
1073 }
1074 ],
1075 "http://www.w3.org/ns/prov#wasAttributedTo": [
1076 {
1077 "@id": "https://w3id.org/oc/meta/prov/pa/1"
1078 }
1079 ]
1080 },
1081 {
1082 "@id": "https://w3id.org/oc/meta/ra/06440227509/prov/se/4",
1083 "@type": ["http://www.w3.org/ns/prov#Entity"],
1084 "http://purl.org/dc/terms/description": [
1085 {
1086 "@value": "The entity 'https://w3id.org/oc/meta/ra/06440227509' has been modified."
1087 }
1088 ],
1089 "http://www.w3.org/ns/prov#generatedAtTime": [
1090 {
1091 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1092 "@value": "2024-12-04T21:15:55+00:00"
1093 }
1094 ],
1095 "http://www.w3.org/ns/prov#invalidatedAtTime": [
1096 {
1097 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1098 "@value": "2024-12-08T01:23:24+00:00"
1099 }
1100 ],
1101 "http://www.w3.org/ns/prov#specializationOf": [
1102 {
1103 "@id": "https://w3id.org/oc/meta/ra/06440227509"
1104 }
1105 ],
1106 "http://www.w3.org/ns/prov#wasAttributedTo": [
1107 {
1108 "@id": "https://orcid.org/0000-0002-8420-0696"
1109 }
1110 ],
1111 "http://www.w3.org/ns/prov#wasDerivedFrom": [
1112 {
1113 "@id": "https://w3id.org/oc/meta/ra/06440227509/prov/se/3"
1114 }
1115 ],
1116 "https://w3id.org/oc/ontology/hasUpdateQuery": [
1117 {
1118 "@value": "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06440227509> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/06904873317> . } }; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06440227509> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/063501394354> . } }"
1119 }
1120 ]
1121 },
1122 {
1123 "@id": "https://w3id.org/oc/meta/ra/06440227509/prov/se/2",
1124 "@type": ["http://www.w3.org/ns/prov#Entity"],
1125 "http://www.w3.org/ns/prov#invalidatedAtTime": [
1126 {
1127 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1128 "@value": "2023-12-24T23:21:33+00:00"
1129 }
1130 ]
1131 },
1132 {
1133 "@id": "https://w3id.org/oc/meta/ra/06440227509/prov/se/3",
1134 "@type": ["http://www.w3.org/ns/prov#Entity"],
1135 "http://purl.org/dc/terms/description": [
1136 {
1137 "@value": "The entity 'https://w3id.org/oc/meta/ra/06440227509' has been modified."
1138 }
1139 ],
1140 "http://www.w3.org/ns/prov#generatedAtTime": [
1141 {
1142 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1143 "@value": "2023-12-24T23:21:33+00:00"
1144 }
1145 ],
1146 "http://www.w3.org/ns/prov#invalidatedAtTime": [
1147 {
1148 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1149 "@value": "2024-12-04T21:15:55+00:00"
1150 }
1151 ],
1152 "http://www.w3.org/ns/prov#specializationOf": [
1153 {
1154 "@id": "https://w3id.org/oc/meta/ra/06440227509"
1155 }
1156 ],
1157 "http://www.w3.org/ns/prov#wasAttributedTo": [
1158 {
1159 "@id": "https://w3id.org/oc/meta/prov/pa/1"
1160 }
1161 ],
1162 "http://www.w3.org/ns/prov#wasDerivedFrom": [
1163 {
1164 "@id": "https://w3id.org/oc/meta/ra/06440227509/prov/se/2"
1165 }
1166 ],
1167 "https://w3id.org/oc/ontology/hasUpdateQuery": [
1168 {
1169 "@value": "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06440227509> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/0644082006> . } }; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06440227509> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/06904873317> . } }"
1170 }
1171 ]
1172 }
1173 ],
1174 "@id": "https://w3id.org/oc/meta/ra/06440227509/prov/"
1175 }
1177 test_file = os.path.join(self.temp_dir, "original_unresolved_issues.zip")
1178 with zipfile.ZipFile(test_file, 'w') as zf:
1179 zf.writestr('se.json', json.dumps(original_data))
1181 # Processa il file con lo script
1182 result = self.processor.process_file(test_file, 'test/fix_provenance_logs')
1183 self.assertIsNotNone(result, "Process should complete without errors")
1185 # Legge i dati modificati
1186 with zipfile.ZipFile(test_file, 'r') as zf:
1187 with zf.open('se.json') as f:
1188 fixed_data = json.loads(f.read())
1190 graph_data = fixed_data[0]['@graph']
1192 # Verifica se alcuni problemi noti sono stati risolti:
1193 # 1. Snapshot se/1 ha multipli 'generatedAtTime', dovrebbe averne solo uno
1194 snapshot_1 = next((x for x in graph_data if x['@id'].endswith('/prov/se/1')), None)
1195 self.assertIsNotNone(snapshot_1, "Snapshot se/1 should exist")
1196 gen_times_1 = snapshot_1.get('http://www.w3.org/ns/prov#generatedAtTime', [])
1197 # Qui ci aspettiamo che lo script abbia risolto il problema tenendo il timestamp più vecchio.
1198 # Se notiamo che non è successo, il test fallirà, evidenziando che il problema non è stato risolto.
1199 self.assertEqual(
1200 len(gen_times_1), 1,
1201 "Snapshot se/1 should have only one generatedAtTime after processing"
1202 )
1204 # 2. Verifica coerenza descrizioni su se/3 e se/4: dovrebbero mantenere un'unica descrizione coerente
1205 snapshot_3 = next((x for x in graph_data if x['@id'].endswith('/prov/se/3')), None)
1206 self.assertIsNotNone(snapshot_3, "Snapshot se/3 should exist")
1207 desc_3 = snapshot_3.get('http://purl.org/dc/terms/description', [])
1208 self.assertEqual(len(desc_3), 1, "Snapshot se/3 should have exactly one description")
1210 snapshot_4 = next((x for x in graph_data if x['@id'].endswith('/prov/se/4')), None)
1211 self.assertIsNotNone(snapshot_4, "Snapshot se/4 should exist")
1212 desc_4 = snapshot_4.get('http://purl.org/dc/terms/description', [])
1213 self.assertEqual(len(desc_4), 1, "Snapshot se/4 should have exactly one description")
1215 # 3. Verifica la catena wasDerivedFrom: ogni snapshot (tranne il primo) dovrebbe avere un wasDerivedFrom che punta allo snapshot precedente
1216 # La sequenza dovrebbe essere: se/1 (creato), se/2, se/3, se/4, se/5 (cancellato)
1217 # Ci aspettiamo:
1218 # se/2 -> se/1
1219 # se/3 -> se/2
1220 # se/4 -> se/3
1221 # se/5 -> se/4
1222 # Se il problema non è stato risolto, tali collegamenti potrebbero non essere corretti.
1223 def get_derived_from(snap_id):
1224 snap = next((x for x in graph_data if x['@id'].endswith(snap_id)), None)
1225 if snap and 'http://www.w3.org/ns/prov#wasDerivedFrom' in snap:
1226 return snap['http://www.w3.org/ns/prov#wasDerivedFrom'][0]['@id'].split('/se/')[-1]
1227 return None
1229 self.assertEqual(get_derived_from('/prov/se/2'), '1', "se/2 should derive from se/1")
1230 self.assertEqual(get_derived_from('/prov/se/3'), '2', "se/3 should derive from se/2")
1231 self.assertEqual(get_derived_from('/prov/se/4'), '3', "se/4 should derive from se/3")
1232 self.assertEqual(get_derived_from('/prov/se/5'), '4', "se/5 should derive from se/4")
1234 # Infine, se alcuni di questi test falliscono, significa che lo script non ha risolto i problemi come previsto,
1235 # mostrando quindi il comportamento effettivo sullo scenario fornito.
1237 def test_complex_merge_chain_scenario(self):
1238 """Test handling of a complex chain of merges with oscillating property values."""
1239 test_data = {
1240 "@graph": [
1241 {
1242 "@id": "https://w3id.org/oc/meta/ra/06490509042/prov/se/9",
1243 "@type": ["http://www.w3.org/ns/prov#Entity"],
1244 "http://purl.org/dc/terms/description": [
1245 {
1246 "@value": "The entity 'https://w3id.org/oc/meta/ra/06490509042' has been modified."
1247 }
1248 ],
1249 "http://www.w3.org/ns/prov#generatedAtTime": [
1250 {
1251 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1252 "@value": "2024-12-04T18:44:08+00:00"
1253 }
1254 ],
1255 "http://www.w3.org/ns/prov#invalidatedAtTime": [
1256 {
1257 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1258 "@value": "2024-12-16T03:14:25+00:00"
1259 }
1260 ],
1261 "http://www.w3.org/ns/prov#specializationOf": [
1262 {"@id": "https://w3id.org/oc/meta/ra/06490509042"}
1263 ],
1264 "http://www.w3.org/ns/prov#wasAttributedTo": [
1265 {"@id": "https://orcid.org/0000-0002-8420-0696"}
1266 ],
1267 "http://www.w3.org/ns/prov#wasDerivedFrom": [
1268 {"@id": "https://w3id.org/oc/meta/ra/06490509042/prov/se/8"}
1269 ],
1270 "https://w3id.org/oc/ontology/hasUpdateQuery": [
1271 {
1272 "@value": "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06490509042> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/06320156505> . } }; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06490509042> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/063201438132> . } }"
1273 }
1274 ]
1275 },
1276 {
1277 "@id": "https://w3id.org/oc/meta/ra/06490509042/prov/se/5",
1278 "@type": ["http://www.w3.org/ns/prov#Entity"],
1279 "http://purl.org/dc/terms/description": [
1280 {
1281 "@value": "The entity 'https://w3id.org/oc/meta/ra/06490509042' has been merged with 'https://w3id.org/oc/meta/ra/06530192638'."
1282 }
1283 ],
1284 "http://www.w3.org/ns/prov#generatedAtTime": [
1285 {
1286 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1287 "@value": "2024-02-21T06:29:52+00:00"
1288 }
1289 ],
1290 "http://www.w3.org/ns/prov#invalidatedAtTime": [
1291 {
1292 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1293 "@value": "2024-02-21T06:30:27+00:00"
1294 }
1295 ],
1296 "http://www.w3.org/ns/prov#specializationOf": [
1297 {"@id": "https://w3id.org/oc/meta/ra/06490509042"}
1298 ],
1299 "http://www.w3.org/ns/prov#wasAttributedTo": [
1300 {"@id": "https://w3id.org/oc/meta/prov/pa/1"}
1301 ],
1302 "http://www.w3.org/ns/prov#wasDerivedFrom": [
1303 {"@id": "https://w3id.org/oc/meta/ra/06490509042/prov/se/4"},
1304 {"@id": "https://w3id.org/oc/meta/ra/06530192638/prov/se/2"}
1305 ],
1306 "https://w3id.org/oc/ontology/hasUpdateQuery": [
1307 {
1308 "@value": "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06490509042> <http://xmlns.com/foaf/0.1/givenName> \"Aurora E\" . } }; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06490509042> <http://xmlns.com/foaf/0.1/givenName> \"Aurora E.\" . } }"
1309 }
1310 ]
1311 },
1312 {
1313 "@id": "https://w3id.org/oc/meta/ra/06490509042/prov/se/4",
1314 "@type": ["http://www.w3.org/ns/prov#Entity"],
1315 "http://purl.org/dc/terms/description": [
1316 {
1317 "@value": "The entity 'https://w3id.org/oc/meta/ra/06490509042' has been merged with 'https://w3id.org/oc/meta/ra/065047414'."
1318 }
1319 ],
1320 "http://www.w3.org/ns/prov#generatedAtTime": [
1321 {
1322 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1323 "@value": "2024-02-21T06:24:47+00:00"
1324 }
1325 ],
1326 "http://www.w3.org/ns/prov#invalidatedAtTime": [
1327 {
1328 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1329 "@value": "2024-02-21T06:29:52+00:00"
1330 }
1331 ],
1332 "http://www.w3.org/ns/prov#specializationOf": [
1333 {"@id": "https://w3id.org/oc/meta/ra/06490509042"}
1334 ],
1335 "http://www.w3.org/ns/prov#wasAttributedTo": [
1336 {"@id": "https://w3id.org/oc/meta/prov/pa/1"}
1337 ],
1338 "http://www.w3.org/ns/prov#wasDerivedFrom": [
1339 {"@id": "https://w3id.org/oc/meta/ra/06490509042/prov/se/3"},
1340 {"@id": "https://w3id.org/oc/meta/ra/065047414/prov/se/2"}
1341 ],
1342 "https://w3id.org/oc/ontology/hasUpdateQuery": [
1343 {
1344 "@value": "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06490509042> <http://xmlns.com/foaf/0.1/givenName> \"Aurora Elizabeth\" . } }; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06490509042> <http://xmlns.com/foaf/0.1/givenName> \"Aurora E\" . } }"
1345 }
1346 ]
1347 },
1348 {
1349 "@id": "https://w3id.org/oc/meta/ra/06490509042/prov/se/7",
1350 "@type": ["http://www.w3.org/ns/prov#Entity"],
1351 "http://purl.org/dc/terms/description": [
1352 {
1353 "@value": "The entity 'https://w3id.org/oc/meta/ra/06490509042' has been merged with 'https://w3id.org/oc/meta/ra/0612010691345'."
1354 }
1355 ],
1356 "http://www.w3.org/ns/prov#generatedAtTime": [
1357 {
1358 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1359 "@value": "2024-02-21T06:31:00+00:00"
1360 }
1361 ],
1362 "http://www.w3.org/ns/prov#invalidatedAtTime": [
1363 {
1364 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1365 "@value": "2024-02-21T06:31:43+00:00"
1366 }
1367 ],
1368 "http://www.w3.org/ns/prov#specializationOf": [
1369 {"@id": "https://w3id.org/oc/meta/ra/06490509042"}
1370 ],
1371 "http://www.w3.org/ns/prov#wasAttributedTo": [
1372 {"@id": "https://w3id.org/oc/meta/prov/pa/1"}
1373 ],
1374 "http://www.w3.org/ns/prov#wasDerivedFrom": [
1375 {"@id": "https://w3id.org/oc/meta/ra/0612010691345/prov/se/1"},
1376 {"@id": "https://w3id.org/oc/meta/ra/06490509042/prov/se/6"}
1377 ],
1378 "https://w3id.org/oc/ontology/hasUpdateQuery": [
1379 {
1380 "@value": "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06490509042> <http://xmlns.com/foaf/0.1/givenName> \"Aurora\" . } }; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06490509042> <http://xmlns.com/foaf/0.1/givenName> \"Aurora E.\" . } }"
1381 }
1382 ]
1383 },
1384 {
1385 "@id": "https://w3id.org/oc/meta/ra/06490509042/prov/se/10",
1386 "@type": ["http://www.w3.org/ns/prov#Entity"],
1387 "http://purl.org/dc/terms/description": [
1388 {
1389 "@value": "The entity 'https://w3id.org/oc/meta/ra/06490509042' has been deleted."
1390 }
1391 ],
1392 "http://www.w3.org/ns/prov#generatedAtTime": [
1393 {
1394 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1395 "@value": "2024-12-16T03:14:25+00:00"
1396 }
1397 ],
1398 "http://www.w3.org/ns/prov#invalidatedAtTime": [
1399 {
1400 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1401 "@value": "2024-12-16T03:14:25+00:00"
1402 }
1403 ],
1404 "http://www.w3.org/ns/prov#specializationOf": [
1405 {"@id": "https://w3id.org/oc/meta/ra/06490509042"}
1406 ],
1407 "http://www.w3.org/ns/prov#wasAttributedTo": [
1408 {"@id": "https://orcid.org/0000-0002-8420-0696"}
1409 ],
1410 "http://www.w3.org/ns/prov#wasDerivedFrom": [
1411 {"@id": "https://w3id.org/oc/meta/ra/06490509042/prov/se/9"}
1412 ],
1413 "https://w3id.org/oc/ontology/hasUpdateQuery": [
1414 {
1415 "@value": "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06490509042> <http://xmlns.com/foaf/0.1/givenName> \"Aurora E.\" .<https://w3id.org/oc/meta/ra/06490509042> <http://xmlns.com/foaf/0.1/familyName> \"Serralde-Zúñiga\" .<https://w3id.org/oc/meta/ra/06490509042> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Agent> .<https://w3id.org/oc/meta/ra/06490509042> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/063201438132> . } }"
1416 }
1417 ]
1418 },
1419 {
1420 "@id": "https://w3id.org/oc/meta/ra/06490509042/prov/se/3",
1421 "@type": ["http://www.w3.org/ns/prov#Entity"],
1422 "http://purl.org/dc/terms/description": [
1423 {
1424 "@value": "The entity 'https://w3id.org/oc/meta/ra/06490509042' has been modified."
1425 }
1426 ],
1427 "http://www.w3.org/ns/prov#generatedAtTime": [
1428 {
1429 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1430 "@value": "2024-02-21T06:24:19+00:00"
1431 }
1432 ],
1433 "http://www.w3.org/ns/prov#invalidatedAtTime": [
1434 {
1435 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1436 "@value": "2024-02-21T06:24:47+00:00"
1437 }
1438 ],
1439 "http://www.w3.org/ns/prov#specializationOf": [
1440 {"@id": "https://w3id.org/oc/meta/ra/06490509042"}
1441 ],
1442 "http://www.w3.org/ns/prov#wasAttributedTo": [
1443 {"@id": "https://w3id.org/oc/meta/prov/pa/1"}
1444 ],
1445 "http://www.w3.org/ns/prov#wasDerivedFrom": [
1446 {"@id": "https://w3id.org/oc/meta/ra/06490509042/prov/se/2"}
1447 ],
1448 "https://w3id.org/oc/ontology/hasUpdateQuery": [
1449 {
1450 "@value": "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06490509042> <http://xmlns.com/foaf/0.1/givenName> \"Aurora E.\" . } }; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06490509042> <http://xmlns.com/foaf/0.1/givenName> \"Aurora Elizabeth\" . } }"
1451 }
1452 ]
1453 },
1454 {
1455 "@id": "https://w3id.org/oc/meta/ra/06490509042/prov/se/8",
1456 "@type": ["http://www.w3.org/ns/prov#Entity"],
1457 "http://www.w3.org/ns/prov#generatedAtTime": [
1458 {
1459 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1460 "@value": "2024-02-21T06:31:43+00:00"
1461 }
1462 ],
1463 "http://www.w3.org/ns/prov#invalidatedAtTime": [
1464 {
1465 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1466 "@value": "2024-12-04T18:44:08+00:00"
1467 }
1468 ],
1469 "http://www.w3.org/ns/prov#specializationOf": [
1470 {"@id": "https://w3id.org/oc/meta/ra/06490509042"}
1471 ],
1472 "http://www.w3.org/ns/prov#wasAttributedTo": [
1473 {"@id": "https://w3id.org/oc/meta/prov/pa/1"}
1474 ],
1475 "http://www.w3.org/ns/prov#wasDerivedFrom": [
1476 {"@id": "https://w3id.org/oc/meta/ra/06320390920/prov/se/1"},
1477 {"@id": "https://w3id.org/oc/meta/ra/06490509042/prov/se/7"}
1478 ]
1479 },
1480 {
1481 "@id": "https://w3id.org/oc/meta/ra/06490509042/prov/se/6",
1482 "@type": ["http://www.w3.org/ns/prov#Entity"],
1483 "http://purl.org/dc/terms/description": [
1484 {
1485 "@value": "The entity 'https://w3id.org/oc/meta/ra/06490509042' has been merged with 'https://w3id.org/oc/meta/ra/06520239458'."
1486 }
1487 ],
1488 "http://www.w3.org/ns/prov#generatedAtTime": [
1489 {
1490 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1491 "@value": "2024-02-21T06:30:27+00:00"
1492 }
1493 ],
1494 "http://www.w3.org/ns/prov#invalidatedAtTime": [
1495 {
1496 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1497 "@value": "2024-02-21T06:31:00+00:00"
1498 }
1499 ],
1500 "http://www.w3.org/ns/prov#specializationOf": [
1501 {"@id": "https://w3id.org/oc/meta/ra/06490509042"}
1502 ],
1503 "http://www.w3.org/ns/prov#wasAttributedTo": [
1504 {"@id": "https://w3id.org/oc/meta/prov/pa/1"}
1505 ],
1506 "http://www.w3.org/ns/prov#wasDerivedFrom": [
1507 {"@id": "https://w3id.org/oc/meta/ra/06520239458/prov/se/1"},
1508 {"@id": "https://w3id.org/oc/meta/ra/06490509042/prov/se/5"}
1509 ],
1510 "https://w3id.org/oc/ontology/hasUpdateQuery": [
1511 {
1512 "@value": "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06490509042> <http://xmlns.com/foaf/0.1/givenName> \"Aurora E.\" . } }; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06490509042> <http://xmlns.com/foaf/0.1/givenName> \"Aurora\" . } }"
1513 }
1514 ]
1515 },
1516 {
1517 "@id": "https://w3id.org/oc/meta/ra/06490509042/prov/se/2",
1518 "@type": ["http://www.w3.org/ns/prov#Entity"],
1519 "http://www.w3.org/ns/prov#generatedAtTime": [
1520 {
1521 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1522 "@value": "2022-12-20T00:00:00+00:00"
1523 }
1524 ],
1525 "http://www.w3.org/ns/prov#invalidatedAtTime": [
1526 {
1527 "@type": "http://www.w3.org/2001/XMLSchema#dateTime",
1528 "@value": "2024-02-21T06:24:19+00:00"
1529 }
1530 ],
1531 "http://www.w3.org/ns/prov#specializationOf": [
1532 {"@id": "https://w3id.org/oc/meta/ra/06490509042"}
1533 ]
1534 }
1535 ],
1536 "@id": "https://w3id.org/oc/meta/ra/06490509042/prov/"
1537 }
1539 test_file = os.path.join(self.temp_dir, "complex_merge_chain.zip")
1540 with zipfile.ZipFile(test_file, 'w') as zf:
1541 zf.writestr('se.json', json.dumps(test_data))
1543 # Process the file
1544 result = self.processor.process_file(test_file, 'test/fix_provenance_logs')
1545 self.assertTrue(result)
1546 # Verify the processed data
1547 with zipfile.ZipFile(test_file, 'r') as zf:
1548 with zf.open('se.json') as f:
1549 fixed_data = json.loads(f.read())
1551 graph_data = fixed_data[0]['@graph']
1553 # Ordina gli snapshot per numero di snapshot
1554 def get_snapshot_number(snap):
1555 if '/prov/se/' in snap['@id']:
1556 return int(snap['@id'].split('/se/')[-1])
1557 return 0
1559 sorted_snapshots = sorted(graph_data, key=get_snapshot_number)
1561 # 1. Verifica la catena temporale
1562 def get_timestamp(snapshot, pred):
1563 return next((
1564 item['@value']
1565 for item in snapshot.get(pred, [])
1566 if '@value' in item
1567 ), None)
1569 for i in range(len(sorted_snapshots)-1):
1570 curr = sorted_snapshots[i]
1571 next_snap = sorted_snapshots[i+1]
1573 curr_inv = get_timestamp(curr, "http://www.w3.org/ns/prov#invalidatedAtTime")
1574 next_gen = get_timestamp(next_snap, "http://www.w3.org/ns/prov#generatedAtTime")
1576 if curr_inv and next_gen:
1577 self.assertEqual(curr_inv, next_gen,
1578 f"Timestamp mismatch between snapshots {curr['@id']} and {next_snap['@id']}")
1580 # 2. Verifica la coerenza dei merge
1581 merge_snapshots = [
1582 s for s in graph_data
1583 if any("has been merged with" in str(d.get('@value', ''))
1584 for d in s.get('http://purl.org/dc/terms/description', []))
1585 ]
1587 for merge in merge_snapshots:
1588 derived_from = [
1589 d['@id'] for d in merge.get('http://www.w3.org/ns/prov#wasDerivedFrom', [])
1590 ]
1591 self.assertGreaterEqual(len(derived_from), 2,
1592 f"Merge snapshot {merge['@id']} should have at least 2 wasDerivedFrom relations")
1594 # Verify snapshot sequence completeness
1595 snapshot_numbers = set()
1596 for item in graph_data:
1597 if '/prov/se/' in item['@id']:
1598 num = int(item['@id'].split('/se/')[-1])
1599 snapshot_numbers.add(num)
1601 # Check that sequence starts at 1
1602 self.assertIn(1, snapshot_numbers,
1603 "Snapshot sequence should start with number 1")
1605 # Check sequence continuity
1606 expected_numbers = set(range(1, max(snapshot_numbers) + 1))
1607 self.assertEqual(snapshot_numbers, expected_numbers,
1608 f"Snapshot sequence should be continuous from 1 to {max(snapshot_numbers)}")
1610 # Verify that snapshot 1 has creation description
1611 snapshot_1 = next((s for s in graph_data if s['@id'].endswith('/prov/se/1')), None)
1612 self.assertIsNotNone(snapshot_1, "Snapshot 1 should exist")
1613 descriptions = snapshot_1.get('http://purl.org/dc/terms/description', [])
1614 self.assertTrue(any(
1615 "has been created" in d.get('@value', '')
1616 for d in descriptions
1617 ), "First snapshot should have creation description")
1620if __name__ == '__main__':
1621 unittest.main()