Coverage for test/editor_test.py: 97%
358 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-12-20 08:55 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-12-20 08:55 +0000
1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3# Copyright (c) 2022 Arcangelo Massari <arcangelo.massari@unibo.it>
4#
5# Permission to use, copy, modify, and/or distribute this software for any purpose
6# with or without fee is hereby granted, provided that the above copyright notice
7# and this permission notice appear in all copies.
8#
9# THE SOFTWARE IS PROVIDED 'AS IS' AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
15# SOFTWARE.
17import json
18import os
19import unittest
20from shutil import rmtree
21from test.test_utils import (PROV_SERVER, REDIS_CACHE_DB, REDIS_DB, REDIS_HOST,
22 REDIS_PORT, SERVER, reset_redis_counters,
23 reset_server)
25import yaml
26from oc_meta.plugins.editor import EntityCache, MetaEditor
27from oc_meta.run.meta_process import run_meta_process
28from oc_ocdm import Storer
29from oc_ocdm.counter_handler.redis_counter_handler import RedisCounterHandler
30from oc_ocdm.graph import GraphSet
31from oc_ocdm.prov import ProvSet
32from oc_ocdm.reader import Reader
33from rdflib import URIRef
34from sparqlite import SPARQLClient
36BASE = os.path.join("test", "editor")
37OUTPUT = os.path.join(BASE, "output")
38META_CONFIG = os.path.join(BASE, "meta_config.yaml")
41def get_counter_handler():
42 return RedisCounterHandler(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB)
45class TestEditor(unittest.TestCase):
46 @classmethod
47 def setUpClass(cls):
48 cls.counter_handler = get_counter_handler()
49 cls.original_generate_rdf_files = None
51 def setUp(self):
52 reset_server()
53 reset_redis_counters()
54 if os.path.exists(OUTPUT):
55 rmtree(OUTPUT)
57 # Create temporary directory for cache files
58 self.temp_dir = os.path.join("test", "temp_editor_test")
59 if os.path.exists(self.temp_dir):
60 rmtree(self.temp_dir)
61 os.makedirs(self.temp_dir)
63 # Setup cache files
64 self.cache_file = os.path.join(self.temp_dir, "ts_upload_cache.json")
65 self.failed_file = os.path.join(self.temp_dir, "failed_queries.txt")
66 self.stop_file = os.path.join(self.temp_dir, ".stop_upload")
68 # Create separate directories for data and provenance update queries
69 self.data_update_dir = os.path.join(self.temp_dir, "to_be_uploaded_data")
70 self.prov_update_dir = os.path.join(self.temp_dir, "to_be_uploaded_prov")
71 os.makedirs(self.data_update_dir, exist_ok=True)
72 os.makedirs(self.prov_update_dir, exist_ok=True)
74 with open(META_CONFIG, encoding="utf-8") as file:
75 settings = yaml.full_load(file)
76 # Update settings to use Redis and cache files
77 settings.update(
78 {
79 "redis_host": REDIS_HOST,
80 "redis_port": REDIS_PORT,
81 "redis_db": REDIS_DB,
82 "redis_cache_db": REDIS_CACHE_DB,
83 "ts_upload_cache": self.cache_file,
84 "ts_failed_queries": self.failed_file,
85 "ts_stop_file": self.stop_file,
86 "triplestore_url": SERVER,
87 "provenance_triplestore_url": PROV_SERVER,
88 "data_update_dir": self.data_update_dir,
89 "prov_update_dir": self.prov_update_dir
90 }
91 )
92 run_meta_process(settings=settings, meta_config_path=META_CONFIG)
94 def tearDown(self):
95 if os.path.exists(OUTPUT):
96 rmtree(OUTPUT)
97 if os.path.exists(self.temp_dir):
98 rmtree(self.temp_dir)
99 reset_redis_counters()
101 if self.original_generate_rdf_files is not None:
102 with open(META_CONFIG, encoding="utf-8") as file:
103 settings = yaml.full_load(file)
104 settings["generate_rdf_files"] = self.original_generate_rdf_files
105 with open(META_CONFIG, "w", encoding="utf-8") as file:
106 yaml.dump(settings, file)
107 self.original_generate_rdf_files = None
109 def test_update_property(self):
110 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696")
111 editor.update_property(
112 URIRef("https://w3id.org/oc/meta/ar/0601"),
113 "has_next",
114 URIRef("https://w3id.org/oc/meta/ar/0604"),
115 )
116 editor.update_property(
117 URIRef("https://w3id.org/oc/meta/ar/0604"),
118 "has_next",
119 URIRef("https://w3id.org/oc/meta/ar/0603"),
120 )
121 editor.update_property(
122 URIRef("https://w3id.org/oc/meta/ar/0603"),
123 "has_next",
124 URIRef("https://w3id.org/oc/meta/ar/0602"),
125 )
126 editor.update_property(
127 URIRef("https://w3id.org/oc/meta/ar/0602"),
128 "has_next",
129 URIRef("https://w3id.org/oc/meta/ar/0605"),
130 )
132 with SPARQLClient(SERVER, timeout=60) as client:
133 result = client.query("""
134 ASK {
135 GRAPH <https://w3id.org/oc/meta/ar/> {
136 <https://w3id.org/oc/meta/ar/0601> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0604> .
137 }
138 }
139 """)
140 self.assertTrue(result["boolean"], "AR/0601 → AR/0604 relationship not found in triplestore")
142 result = client.query("""
143 ASK {
144 GRAPH <https://w3id.org/oc/meta/ar/> {
145 <https://w3id.org/oc/meta/ar/0604> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0603> .
146 }
147 }
148 """)
149 self.assertTrue(result["boolean"], "AR/0604 → AR/0603 relationship not found in triplestore")
151 result = client.query("""
152 ASK {
153 GRAPH <https://w3id.org/oc/meta/ar/> {
154 <https://w3id.org/oc/meta/ar/0603> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0602> .
155 }
156 }
157 """)
158 self.assertTrue(result["boolean"], "AR/0603 → AR/0602 relationship not found in triplestore")
160 result = client.query("""
161 ASK {
162 GRAPH <https://w3id.org/oc/meta/ar/> {
163 <https://w3id.org/oc/meta/ar/0602> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0605> .
164 }
165 }
166 """)
167 self.assertTrue(result["boolean"], "AR/0602 → AR/0605 relationship not found in triplestore")
169 with SPARQLClient(PROV_SERVER, timeout=60) as client:
170 prov_result = client.query("""
171 ASK {
172 ?s <http://www.w3.org/ns/prov#specializationOf> <https://w3id.org/oc/meta/ar/0601> ;
173 <http://www.w3.org/ns/prov#generatedAtTime> ?time .
174 }
175 """)
176 self.assertTrue(prov_result["boolean"], "Provenance for AR/0601 not found in triplestore")
178 with open(
179 os.path.join(OUTPUT, "rdf", "ar", "060", "10000", "1000.json"),
180 "r",
181 encoding="utf-8",
182 ) as file:
183 ar_data = json.load(file)
184 for graph in ar_data:
185 graph_data = graph["@graph"]
186 for ar in graph_data:
187 if ar["@id"] == "https://w3id.org/oc/meta/ar/0601":
188 self.assertEqual(
189 ar["https://w3id.org/oc/ontology/hasNext"][0]["@id"],
190 "https://w3id.org/oc/meta/ar/0604",
191 )
192 elif ar["@id"] == "https://w3id.org/oc/meta/ar/0603":
193 self.assertEqual(
194 ar["https://w3id.org/oc/ontology/hasNext"][0]["@id"],
195 "https://w3id.org/oc/meta/ar/0602",
196 )
197 elif ar["@id"] == "https://w3id.org/oc/meta/ar/0604":
198 self.assertEqual(
199 ar["https://w3id.org/oc/ontology/hasNext"][0]["@id"],
200 "https://w3id.org/oc/meta/ar/0603",
201 )
202 elif ar["@id"] == "https://w3id.org/oc/meta/ar/0602":
203 self.assertEqual(
204 ar["https://w3id.org/oc/ontology/hasNext"][0]["@id"],
205 "https://w3id.org/oc/meta/ar/0605",
206 )
207 with open(
208 os.path.join(
209 OUTPUT, "rdf", "ar", "060", "10000", "1000", "prov", "se.json"
210 ),
211 "r",
212 encoding="utf8",
213 ) as f:
214 ar_prov = json.load(f)
215 for graph in ar_prov:
216 graph_prov = graph["@graph"]
217 for ar in graph_prov:
218 if ar["@id"] == "https://w3id.org/oc/meta/ar/0601/prov/se/2":
219 self.assertEqual(
220 ar["https://w3id.org/oc/ontology/hasUpdateQuery"][0][
221 "@value"
222 ],
223 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/0601> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0602> . } } ; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/0601> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0604> . } }",
224 )
225 if ar["@id"] == "https://w3id.org/oc/meta/ar/0603/prov/se/2":
226 self.assertEqual(
227 ar["https://w3id.org/oc/ontology/hasUpdateQuery"][0][
228 "@value"
229 ],
230 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/0603> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0604> . } } ; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/0603> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0602> . } }",
231 )
232 if ar["@id"] == "https://w3id.org/oc/meta/ar/0604/prov/se/2":
233 self.assertEqual(
234 ar["https://w3id.org/oc/ontology/hasUpdateQuery"][0][
235 "@value"
236 ],
237 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/0604> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0605> . } } ; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/0604> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0603> . } }",
238 )
239 if ar["@id"] == "https://w3id.org/oc/meta/ar/0602/prov/se/2":
240 self.assertEqual(
241 ar["https://w3id.org/oc/ontology/hasUpdateQuery"][0][
242 "@value"
243 ],
244 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/0602> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0603> . } } ; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/0602> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0605> . } }",
245 )
247 def test_delete_property(self):
248 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696")
249 editor.delete(URIRef("https://w3id.org/oc/meta/br/0601"), "has_title")
250 with open(
251 os.path.join(OUTPUT, "rdf", "br", "060", "10000", "1000.json"),
252 "r",
253 encoding="utf8",
254 ) as f:
255 br_data = json.load(f)
256 for graph in br_data:
257 graph_data = graph["@graph"]
258 for br in graph_data:
259 if br["@id"] == "https://w3id.org/oc/meta/br/0601":
260 self.assertFalse("http://purl.org/dc/terms/title" in br)
261 with open(
262 os.path.join(
263 OUTPUT, "rdf", "br", "060", "10000", "1000", "prov", "se.json"
264 ),
265 "r",
266 encoding="utf8",
267 ) as f:
268 br_prov = json.load(f)
269 for graph in br_prov:
270 graph_prov = graph["@graph"]
271 for br in graph_prov:
272 if br["@id"] == "https://w3id.org/oc/meta/br/0601/prov/se/2":
273 self.assertEqual(
274 br["https://w3id.org/oc/ontology/hasUpdateQuery"][0][
275 "@value"
276 ],
277 'DELETE DATA { GRAPH <https://w3id.org/oc/meta/br/> { <https://w3id.org/oc/meta/br/0601> <http://purl.org/dc/terms/title> "A Review Of Hemolytic Uremic Syndrome In Patients Treated With Gemcitabine Therapy"^^<http://www.w3.org/2001/XMLSchema#string> . } }',
278 )
280 def test_delete_entity(self):
281 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696")
282 editor.delete(URIRef("https://w3id.org/oc/meta/id/0601"))
283 with open(
284 os.path.join(OUTPUT, "rdf", "id", "060", "10000", "1000.json"),
285 "r",
286 encoding="utf8",
287 ) as f:
288 id_data = json.load(f)
289 for graph in id_data:
290 graph_data = graph["@graph"]
291 for identifier in graph_data:
292 if identifier["@id"] == "https://w3id.org/oc/meta/id/0601":
293 self.fail()
294 with open(
295 os.path.join(
296 OUTPUT, "rdf", "id", "060", "10000", "1000", "prov", "se.json"
297 ),
298 "r",
299 encoding="utf8",
300 ) as f:
301 id_prov = json.load(f)
302 for graph in id_prov:
303 graph_prov = graph["@graph"]
304 for identifier in graph_prov:
305 if (
306 identifier["@id"]
307 == "https://w3id.org/oc/meta/id/0601/prov/se/2"
308 ):
309 update_query = (
310 identifier["https://w3id.org/oc/ontology/hasUpdateQuery"][
311 0
312 ]["@value"]
313 .replace(
314 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/id/> { ",
315 "",
316 )
317 .replace(" . } }", "")
318 .replace("\n", "")
319 .split(" .")
320 )
321 self.assertEqual(
322 set(update_query),
323 {
324 "<https://w3id.org/oc/meta/id/0601> <http://purl.org/spar/datacite/usesIdentifierScheme> <http://purl.org/spar/datacite/doi>",
325 "<https://w3id.org/oc/meta/id/0601> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/spar/datacite/Identifier>",
326 '<https://w3id.org/oc/meta/id/0601> <http://www.essepuntato.it/2010/06/literalreification/hasLiteralValue> "10.1002/(sici)1097-0142(19990501)85:9<2023::aid-cncr21>3.0.co;2-2"^^<http://www.w3.org/2001/XMLSchema#string>',
327 },
328 )
329 with open(
330 os.path.join(
331 OUTPUT, "rdf", "br", "060", "10000", "1000", "prov", "se.json"
332 ),
333 "r",
334 encoding="utf8",
335 ) as f:
336 ra_prov = json.load(f)
337 for graph in ra_prov:
338 graph_prov = graph["@graph"]
339 for ra in graph_prov:
340 if ra["@id"] == "https://w3id.org/oc/meta/br/0601/prov/se/2":
341 self.assertEqual(
342 ra["https://w3id.org/oc/ontology/hasUpdateQuery"][0][
343 "@value"
344 ],
345 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/br/> { <https://w3id.org/oc/meta/br/0601> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/0601> . } }",
346 )
348 def test_merge(self):
349 base_iri = "https://w3id.org/oc/meta/"
350 resp_agent = "https://orcid.org/0000-0002-8420-0696"
351 g_set = GraphSet(
352 base_iri,
353 supplier_prefix="060",
354 wanted_label=False,
355 custom_counter_handler=self.counter_handler,
356 )
357 endpoint = "http://127.0.0.1:8805/sparql"
359 # Create entities testing
360 ra = g_set.add_ra(
361 resp_agent=resp_agent, res=URIRef("https://w3id.org/oc/meta/ra/06010")
362 )
363 ra.has_name("Wiley")
365 reader = Reader()
366 id_0605 = reader.import_entity_from_triplestore(
367 g_set,
368 endpoint,
369 URIRef("https://w3id.org/oc/meta/id/0605"),
370 resp_agent,
371 enable_validation=False,
372 )
373 id_0609 = g_set.add_id(resp_agent=resp_agent)
374 id_0609.create_crossref("313")
376 ra.has_identifier(id_0605)
377 ra.has_identifier(id_0609)
379 # Generate provenance
380 provset = ProvSet(
381 g_set,
382 base_iri,
383 wanted_label=False,
384 supplier_prefix="060",
385 custom_counter_handler=self.counter_handler,
386 )
387 provset.generate_provenance()
389 # Store and upload data
390 rdf_dir = os.path.join(OUTPUT, "rdf") + os.sep
391 graph_storer = Storer(
392 g_set, dir_split=10000, n_file_item=1000, zip_output=False
393 )
394 prov_storer = Storer(
395 provset, dir_split=10000, n_file_item=1000, zip_output=False
396 )
398 graph_storer.store_all(rdf_dir, base_iri)
399 prov_storer.store_all(rdf_dir, base_iri)
400 graph_storer.upload_all(endpoint)
401 g_set.commit_changes()
403 # Perform merge
404 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696")
405 editor.merge(
406 g_set,
407 URIRef("https://w3id.org/oc/meta/ra/0607"),
408 URIRef("https://w3id.org/oc/meta/ra/06010"),
409 )
410 editor.save(g_set)
412 # Check Redis counters
413 self.assertEqual(
414 self.counter_handler.read_counter(
415 "ra", prov_short_name="se", identifier=1, supplier_prefix="060"
416 ),
417 1,
418 )
419 self.assertEqual(
420 self.counter_handler.read_counter(
421 "ra", prov_short_name="se", identifier=2, supplier_prefix="060"
422 ),
423 1,
424 )
425 self.assertEqual(
426 self.counter_handler.read_counter(
427 "ra", prov_short_name="se", identifier=3, supplier_prefix="060"
428 ),
429 1,
430 )
431 self.assertEqual(
432 self.counter_handler.read_counter(
433 "ra", prov_short_name="se", identifier=4, supplier_prefix="060"
434 ),
435 1,
436 )
437 self.assertEqual(
438 self.counter_handler.read_counter(
439 "ra", prov_short_name="se", identifier=5, supplier_prefix="060"
440 ),
441 1,
442 )
443 self.assertEqual(
444 self.counter_handler.read_counter(
445 "ra", prov_short_name="se", identifier=6, supplier_prefix="060"
446 ),
447 1,
448 )
449 self.assertEqual(
450 self.counter_handler.read_counter(
451 "ra", prov_short_name="se", identifier=7, supplier_prefix="060"
452 ),
453 2,
454 )
456 # Verify merged data
457 for filepath in [
458 os.path.join(OUTPUT, "rdf", "ra", "060", "10000", "1000.json"),
459 os.path.join(
460 OUTPUT, "rdf", "ra", "060", "10000", "1000", "prov", "se.json"
461 ),
462 ]:
463 with open(filepath, "r", encoding="utf8") as f:
464 data = json.load(f)
465 for graph in data:
466 graph_data = graph["@graph"]
467 for entity in graph_data:
468 if entity["@id"] == "https://w3id.org/oc/meta/ra/0607":
469 identifiers = {
470 identifier["@id"]
471 for identifier in entity[
472 "http://purl.org/spar/datacite/hasIdentifier"
473 ]
474 }
475 self.assertEqual(
476 identifiers,
477 {
478 str(id_0605.res),
479 str(id_0609.res),
480 },
481 )
482 elif entity["@id"] == "https://w3id.org/oc/meta/ra/06010":
483 self.fail()
484 # elif entity['@id'] == 'https://w3id.org/oc/meta/ar/06010':
485 # self.assertEqual(entity['http://purl.org/spar/pro/isHeldBy'][0]['@id'], 'https://w3id.org/oc/meta/ra/0607')
486 elif entity["@id"] in {
487 "https://w3id.org/oc/meta/ra/0607/prov/se/1",
488 "https://w3id.org/oc/meta/ra/06010/prov/se/1",
489 }:
490 self.assertTrue(
491 "http://www.w3.org/ns/prov#invalidatedAtTime" in entity
492 )
493 elif (
494 entity["@id"]
495 == "https://w3id.org/oc/meta/ra/0607/prov/se/3"
496 ):
497 self.assertEqual(
498 entity["http://purl.org/dc/terms/description"][0][
499 "@value"
500 ],
501 "The entity 'https://w3id.org/oc/meta/ra/0607' has been merged with 'https://w3id.org/oc/meta/ra/06010'.",
502 )
503 self.assertEqual(
504 entity["https://w3id.org/oc/ontology/hasUpdateQuery"][
505 0
506 ]["@value"],
507 "INSERT DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/0607> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/06011> . } }",
508 )
509 elif (
510 entity["@id"]
511 == "https://w3id.org/oc/meta/ra/06010/prov/se/2"
512 ):
513 update_query = (
514 entity["https://w3id.org/oc/ontology/hasUpdateQuery"][
515 0
516 ]["@value"]
517 .replace(
518 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ra/> { ",
519 "",
520 )
521 .replace(" . } }", "")
522 .replace("\n", "")
523 .split(" .")
524 )
525 self.assertEqual(
526 set(update_query),
527 {
528 '<https://w3id.org/oc/meta/ra/06010> <http://xmlns.com/foaf/0.1/name> "Wiley"^^<http://www.w3.org/2001/XMLSchema#string>',
529 f"<https://w3id.org/oc/meta/ra/06010> <http://purl.org/spar/datacite/hasIdentifier> <{id_0609.res}>",
530 f"<https://w3id.org/oc/meta/ra/06010> <http://purl.org/spar/datacite/hasIdentifier> <{id_0605.res}>",
531 "<https://w3id.org/oc/meta/ra/06010> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Agent>",
532 },
533 )
535 def test_delete_entity_with_inferred_type(self):
536 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696")
538 with SPARQLClient(SERVER, timeout=60) as client:
539 # Remove the type from the entity
540 delete_type_query = """
541 DELETE {
542 GRAPH <https://w3id.org/oc/meta/br/> {
543 <https://w3id.org/oc/meta/br/0605> a <http://purl.org/spar/fabio/Expression> .
544 }
545 }
546 WHERE {
547 GRAPH <https://w3id.org/oc/meta/br/> {
548 <https://w3id.org/oc/meta/br/0605> a <http://purl.org/spar/fabio/Expression> .
549 }
550 }
551 """
552 client.update(delete_type_query)
554 # Ensure the entity exists before deletion
555 select_query = """
556 SELECT ?s WHERE {
557 GRAPH <https://w3id.org/oc/meta/br/> {
558 ?s <http://prismstandard.org/namespaces/basic/2.0/publicationDate> "2024-04-14"^^<http://www.w3.org/2001/XMLSchema#date> .
559 }
560 }
561 """
562 result = client.query(select_query)
563 self.assertEqual(len(result["results"]["bindings"]), 1)
565 # Perform deletion
566 editor.delete(URIRef("https://w3id.org/oc/meta/br/0605"))
568 # Ensure the entity is deleted
569 with SPARQLClient(SERVER, timeout=60) as client:
570 result = client.query(select_query)
571 self.assertEqual(len(result["results"]["bindings"]), 0)
573 # Verify provenance information
574 prov_path = os.path.join(
575 OUTPUT, "rdf", "br", "060", "10000", "1000", "prov", "se.json"
576 )
577 with open(prov_path, "r", encoding="utf8") as f:
578 prov_data = json.load(f)
579 br_0605_prov_se_2 = None
580 br_0605_prov_se_1 = None
581 for graph in prov_data:
582 for entity in graph["@graph"]:
583 if entity["@id"] == "https://w3id.org/oc/meta/br/0605/prov/se/2":
584 br_0605_prov_se_2 = entity
585 if entity["@id"] == "https://w3id.org/oc/meta/br/0605/prov/se/1":
586 br_0605_prov_se_1 = entity
588 self.assertIsNotNone(br_0605_prov_se_2)
589 self.assertEqual(
590 br_0605_prov_se_2["http://purl.org/dc/terms/description"][0]["@value"],
591 "The entity 'https://w3id.org/oc/meta/br/0605' has been deleted.",
592 )
593 self.assertEqual(
594 br_0605_prov_se_2["@type"][0], "http://www.w3.org/ns/prov#Entity"
595 )
596 self.assertEqual(
597 br_0605_prov_se_2["http://www.w3.org/ns/prov#specializationOf"][0][
598 "@id"
599 ],
600 "https://w3id.org/oc/meta/br/0605",
601 )
602 self.assertEqual(
603 br_0605_prov_se_2["http://www.w3.org/ns/prov#wasAttributedTo"][0][
604 "@id"
605 ],
606 "https://orcid.org/0000-0002-8420-0696",
607 )
608 self.assertIn(
609 "http://www.w3.org/ns/prov#invalidatedAtTime", br_0605_prov_se_2
610 )
611 self.assertIn(
612 "http://www.w3.org/ns/prov#generatedAtTime", br_0605_prov_se_2
613 )
614 self.assertEqual(
615 len(br_0605_prov_se_1["http://www.w3.org/ns/prov#generatedAtTime"]), 1
616 )
617 self.assertIn(
618 "https://w3id.org/oc/ontology/hasUpdateQuery", br_0605_prov_se_2
619 )
620 update_query_value = br_0605_prov_se_2[
621 "https://w3id.org/oc/ontology/hasUpdateQuery"
622 ][0]["@value"]
623 update_query_triples = (
624 update_query_value.replace(
625 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/br/> { ", ""
626 )
627 .replace(" } }", "")
628 .strip()
629 )
630 actual_triples = set(
631 triple.strip()
632 for triple in update_query_triples.split(" .")
633 if triple.strip()
634 )
635 expected_triples = {
636 "<https://w3id.org/oc/meta/br/0605> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/0606>",
637 '<https://w3id.org/oc/meta/br/0605> <http://prismstandard.org/namespaces/basic/2.0/publicationDate> "2024-04-14"^^<http://www.w3.org/2001/XMLSchema#date>',
638 }
639 self.assertEqual(actual_triples, expected_triples)
641 self.assertIsNotNone(br_0605_prov_se_1)
642 self.assertEqual(
643 br_0605_prov_se_1["http://purl.org/dc/terms/description"][0]["@value"],
644 "The entity 'https://w3id.org/oc/meta/br/0605' has been created.",
645 )
646 self.assertEqual(
647 br_0605_prov_se_1["@type"][0], "http://www.w3.org/ns/prov#Entity"
648 )
649 self.assertEqual(
650 br_0605_prov_se_1["http://www.w3.org/ns/prov#specializationOf"][0][
651 "@id"
652 ],
653 "https://w3id.org/oc/meta/br/0605",
654 )
655 self.assertEqual(
656 br_0605_prov_se_1["http://www.w3.org/ns/prov#wasAttributedTo"][0][
657 "@id"
658 ],
659 "https://w3id.org/oc/meta/prov/pa/1",
660 )
661 self.assertIn(
662 "http://www.w3.org/ns/prov#generatedAtTime", br_0605_prov_se_1
663 )
664 self.assertEqual(
665 len(br_0605_prov_se_1["http://www.w3.org/ns/prov#generatedAtTime"]), 1
666 )
667 self.assertEqual(
668 len(br_0605_prov_se_2["http://www.w3.org/ns/prov#invalidatedAtTime"]),
669 1,
670 )
671 self.assertIn(
672 "http://www.w3.org/ns/prov#hadPrimarySource", br_0605_prov_se_1
673 )
675 # Reinsert the publication date
676 sparql_update_query = """
677 INSERT DATA {
678 GRAPH <https://w3id.org/oc/meta/br/> {
679 <https://w3id.org/oc/meta/br/0605> <http://prismstandard.org/namespaces/basic/2.0/publicationDate> "2024-04-14"^^<http://www.w3.org/2001/XMLSchema#date> .
680 }
681 }
682 """
683 with SPARQLClient(SERVER, timeout=60) as client:
684 client.update(sparql_update_query)
686 # Perform deletion again
687 editor.delete(URIRef("https://w3id.org/oc/meta/br/0605"))
689 # Verify and print the provenance graph for the entity
690 prov_path = os.path.join(
691 OUTPUT, "rdf", "br", "060", "10000", "1000", "prov", "se.json"
692 )
693 with open(prov_path, "r", encoding="utf8") as f:
694 prov_data = json.load(f)
695 for graph in prov_data:
696 for entity in graph["@graph"]:
697 if "https://w3id.org/oc/meta/br/0605" in entity["@id"]:
698 if (
699 entity["@id"]
700 == "https://w3id.org/oc/meta/br/0605/prov/se/1"
701 ):
702 self.assertEqual(
703 len(
704 entity["http://www.w3.org/ns/prov#generatedAtTime"]
705 ),
706 1,
707 )
708 self.assertEqual(
709 len(
710 entity[
711 "http://www.w3.org/ns/prov#invalidatedAtTime"
712 ]
713 ),
714 1,
715 )
716 elif (
717 entity["@id"]
718 == "https://w3id.org/oc/meta/br/0605/prov/se/2"
719 ):
720 self.assertEqual(
721 len(
722 entity["http://www.w3.org/ns/prov#generatedAtTime"]
723 ),
724 1,
725 )
726 # self.assertEqual(len(entity['http://www.w3.org/ns/prov#invalidatedAtTime']), 2)
727 elif (
728 entity["@id"]
729 == "https://w3id.org/oc/meta/br/0605/prov/se/3"
730 ):
731 self.assertEqual(
732 entity["http://purl.org/dc/terms/description"][0][
733 "@value"
734 ],
735 "The entity 'https://w3id.org/oc/meta/br/0605' has been deleted.",
736 )
737 self.assertIn(
738 "https://w3id.org/oc/ontology/hasUpdateQuery", entity
739 )
740 update_query_value = entity[
741 "https://w3id.org/oc/ontology/hasUpdateQuery"
742 ][0]["@value"]
743 update_query_triples = (
744 update_query_value.replace(
745 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/br/> { ",
746 "",
747 )
748 .replace(" } }", "")
749 .strip()
750 )
751 actual_triples = set(
752 triple.strip()
753 for triple in update_query_triples.split(" .")
754 if triple.strip()
755 )
756 expected_triples = {
757 '<https://w3id.org/oc/meta/br/0605> <http://prismstandard.org/namespaces/basic/2.0/publicationDate> "2024-04-14"^^<http://www.w3.org/2001/XMLSchema#date>'
758 }
759 self.assertEqual(actual_triples, expected_triples)
760 self.assertEqual(
761 entity["@type"][0], "http://www.w3.org/ns/prov#Entity"
762 )
763 self.assertEqual(
764 entity["http://www.w3.org/ns/prov#specializationOf"][0][
765 "@id"
766 ],
767 "https://w3id.org/oc/meta/br/0605",
768 )
769 self.assertEqual(
770 entity["http://www.w3.org/ns/prov#wasAttributedTo"][0][
771 "@id"
772 ],
773 "https://orcid.org/0000-0002-8420-0696",
774 )
775 self.assertIn(
776 "http://www.w3.org/ns/prov#invalidatedAtTime", entity
777 )
778 self.assertIn(
779 "http://www.w3.org/ns/prov#generatedAtTime", entity
780 )
781 self.assertEqual(
782 len(
783 entity["http://www.w3.org/ns/prov#generatedAtTime"]
784 ),
785 1,
786 )
787 self.assertEqual(
788 len(
789 entity[
790 "http://www.w3.org/ns/prov#invalidatedAtTime"
791 ]
792 ),
793 1,
794 )
795 self.assertEqual(
796 entity["http://www.w3.org/ns/prov#wasDerivedFrom"][0][
797 "@id"
798 ],
799 "https://w3id.org/oc/meta/br/0605/prov/se/2",
800 )
802 def test_no_rdf_files_generation(self):
803 """Test that when generate_rdf_files is False, data is still updated in triplestore but not in files"""
804 with open(META_CONFIG, encoding="utf-8") as file:
805 settings = yaml.full_load(file)
806 self.original_generate_rdf_files = settings.get("generate_rdf_files", True)
808 settings["generate_rdf_files"] = False
809 with open(META_CONFIG, "w", encoding="utf-8") as file:
810 yaml.dump(settings, file)
812 os.makedirs(os.path.join(OUTPUT, "rdf", "br", "060", "10000"), exist_ok=True)
814 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696")
816 self.assertFalse(editor.generate_rdf_files, "generate_rdf_files should be False")
818 g_set = GraphSet(base_iri="https://w3id.org/oc/meta/")
819 br = g_set.add_br(res=URIRef("https://w3id.org/oc/meta/br/0603"), resp_agent="https://orcid.org/0000-0002-8420-0696")
820 br.has_title("Original Title")
821 editor.save(g_set)
823 editor.update_property(
824 URIRef("https://w3id.org/oc/meta/br/0603"),
825 "has_title",
826 "New Test Title",
827 )
829 with SPARQLClient(SERVER, timeout=60) as client:
830 debug_result = client.query("""
831 SELECT ?p ?o
832 WHERE {
833 GRAPH ?g {
834 <https://w3id.org/oc/meta/br/0603> ?p ?o .
835 }
836 }
837 """)
839 title_found = False
840 if debug_result["results"]["bindings"]:
841 for binding in debug_result["results"]["bindings"]:
842 predicate = binding.get('p', {}).get('value')
843 obj = binding.get('o', {}).get('value')
845 # Check if this is our title property with the expected value
846 if predicate == "http://purl.org/dc/terms/title" and obj == "New Test Title":
847 title_found = True
848 else:
849 print("No properties found for BR/0603")
851 self.assertTrue(title_found, "Title update not found in triplestore")
853 with SPARQLClient(PROV_SERVER, timeout=60) as client:
854 prov_result = client.query("""
855 ASK {
856 ?s <http://www.w3.org/ns/prov#specializationOf> <https://w3id.org/oc/meta/br/0603> .
857 }
858 """)
859 self.assertTrue(prov_result["boolean"], "Provenance for BR/0603 not found in triplestore")
861 target_file = os.path.join(OUTPUT, "rdf", "br", "060", "10000", "1000.json")
862 if os.path.exists(target_file):
863 with open(target_file, "r", encoding="utf-8") as file:
864 try:
865 data = json.load(file)
866 contains_update = False
867 for graph in data:
868 for entity in graph.get("@graph", []):
869 if entity.get("@id") == "https://w3id.org/oc/meta/br/0603":
870 titles = entity.get("http://purl.org/dc/terms/title", [])
871 for title in titles:
872 if title.get("@value") == "New Test Title":
873 contains_update = True
874 break
875 self.assertFalse(contains_update, "RDF file should not contain the update")
876 except json.JSONDecodeError:
877 pass
879 def test_merge_caches_entities(self):
880 """Verifica che le entità vengano correttamente cachate durante merge successivi"""
881 base_iri = "https://w3id.org/oc/meta/"
882 resp_agent = "https://orcid.org/0000-0002-8420-0696"
883 g_set = GraphSet(
884 base_iri,
885 supplier_prefix="060",
886 wanted_label=False,
887 custom_counter_handler=self.counter_handler,
888 )
889 endpoint = "http://127.0.0.1:8805/sparql"
891 # Prepara le entità di test
892 ra = g_set.add_ra(
893 resp_agent=resp_agent, res=URIRef("https://w3id.org/oc/meta/ra/06010")
894 )
895 ra.has_name("Wiley")
897 reader = Reader()
898 id_0605 = reader.import_entity_from_triplestore(
899 g_set,
900 endpoint,
901 URIRef("https://w3id.org/oc/meta/id/0605"),
902 resp_agent,
903 enable_validation=False,
904 )
905 id_0609 = g_set.add_id(resp_agent=resp_agent)
906 id_0609.create_crossref("313")
908 ra.has_identifier(id_0605)
909 ra.has_identifier(id_0609)
911 # Genera provenance
912 provset = ProvSet(
913 g_set,
914 base_iri,
915 wanted_label=False,
916 supplier_prefix="060",
917 custom_counter_handler=self.counter_handler,
918 )
919 provset.generate_provenance()
921 # Salva e carica i dati
922 rdf_dir = os.path.join(OUTPUT, "rdf") + os.sep
923 graph_storer = Storer(
924 g_set, dir_split=10000, n_file_item=1000, zip_output=False
925 )
926 prov_storer = Storer(
927 provset, dir_split=10000, n_file_item=1000, zip_output=False
928 )
930 graph_storer.store_all(rdf_dir, base_iri)
931 prov_storer.store_all(rdf_dir, base_iri)
932 graph_storer.upload_all(endpoint)
933 g_set.commit_changes()
935 # Esegui il test della cache
936 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696")
938 # Prima fusione
939 editor.merge(
940 g_set,
941 URIRef("https://w3id.org/oc/meta/ra/0607"),
942 URIRef("https://w3id.org/oc/meta/ra/06010"),
943 )
945 # Verifica che le entità principali siano in cache
946 self.assertTrue(
947 editor.entity_cache.is_cached(URIRef("https://w3id.org/oc/meta/ra/0607"))
948 )
949 self.assertTrue(
950 editor.entity_cache.is_cached(URIRef("https://w3id.org/oc/meta/ra/06010"))
951 )
953 # Verifica che le entità correlate siano in cache
954 self.assertTrue(
955 editor.entity_cache.is_cached(id_0609.res)
956 )
957 self.assertTrue(
958 editor.entity_cache.is_cached(id_0605.res)
959 )
962class TestEntityCache(unittest.TestCase):
963 def setUp(self):
964 self.cache = EntityCache()
965 self.entity = URIRef("https://w3id.org/oc/meta/ra/0607")
967 def test_add_and_is_cached(self):
968 self.assertFalse(self.cache.is_cached(self.entity))
969 self.cache.add(self.entity)
970 self.assertTrue(self.cache.is_cached(self.entity))
972 def test_clear(self):
973 self.cache.add(self.entity)
974 self.cache.clear()
975 self.assertFalse(self.cache.is_cached(self.entity))
978if __name__ == "__main__": # pragma: no cover
979 unittest.main()