Coverage for test/editor_test.py: 97%
379 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-07-14 14:06 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-07-14 14:06 +0000
1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3# Copyright (c) 2022 Arcangelo Massari <arcangelo.massari@unibo.it>
4#
5# Permission to use, copy, modify, and/or distribute this software for any purpose
6# with or without fee is hereby granted, provided that the above copyright notice
7# and this permission notice appear in all copies.
8#
9# THE SOFTWARE IS PROVIDED 'AS IS' AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
15# SOFTWARE.
17import json
18import os
19import unittest
20from shutil import rmtree
21from test.test_utils import (PROV_SERVER, REDIS_CACHE_DB, REDIS_DB, REDIS_HOST,
22 REDIS_PORT, SERVER, reset_redis_counters,
23 reset_server)
25import yaml
26from oc_meta.plugins.editor import EntityCache, MetaEditor
27from oc_meta.run.meta_process import run_meta_process
28from oc_ocdm import Storer
29from oc_ocdm.counter_handler.redis_counter_handler import RedisCounterHandler
30from oc_ocdm.graph import GraphSet
31from oc_ocdm.prov import ProvSet
32from oc_ocdm.reader import Reader
33from rdflib import URIRef
34from SPARQLWrapper import JSON, POST, SPARQLWrapper
36BASE = os.path.join("test", "editor")
37OUTPUT = os.path.join(BASE, "output")
38META_CONFIG = os.path.join(BASE, "meta_config.yaml")
41def get_counter_handler():
42 return RedisCounterHandler(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB)
45class TestEditor(unittest.TestCase):
46 @classmethod
47 def setUpClass(cls):
48 cls.counter_handler = get_counter_handler()
49 cls.original_generate_rdf_files = None
51 def setUp(self):
52 reset_server()
53 reset_redis_counters()
54 if os.path.exists(OUTPUT):
55 rmtree(OUTPUT)
57 # Create temporary directory for cache files
58 self.temp_dir = os.path.join("test", "temp_editor_test")
59 if os.path.exists(self.temp_dir):
60 rmtree(self.temp_dir)
61 os.makedirs(self.temp_dir)
63 # Setup cache files
64 self.cache_file = os.path.join(self.temp_dir, "ts_upload_cache.json")
65 self.failed_file = os.path.join(self.temp_dir, "failed_queries.txt")
66 self.stop_file = os.path.join(self.temp_dir, ".stop_upload")
68 # Create separate directories for data and provenance update queries
69 self.data_update_dir = os.path.join(self.temp_dir, "to_be_uploaded_data")
70 self.prov_update_dir = os.path.join(self.temp_dir, "to_be_uploaded_prov")
71 os.makedirs(self.data_update_dir, exist_ok=True)
72 os.makedirs(self.prov_update_dir, exist_ok=True)
74 with open(META_CONFIG, encoding="utf-8") as file:
75 settings = yaml.full_load(file)
76 # Update settings to use Redis and cache files
77 settings.update(
78 {
79 "redis_host": REDIS_HOST,
80 "redis_port": REDIS_PORT,
81 "redis_db": REDIS_DB,
82 "redis_cache_db": REDIS_CACHE_DB,
83 "ts_upload_cache": self.cache_file,
84 "ts_failed_queries": self.failed_file,
85 "ts_stop_file": self.stop_file,
86 "triplestore_url": SERVER,
87 "provenance_triplestore_url": PROV_SERVER,
88 "data_update_dir": self.data_update_dir,
89 "prov_update_dir": self.prov_update_dir
90 }
91 )
92 run_meta_process(settings=settings, meta_config_path=META_CONFIG)
94 def tearDown(self):
95 if os.path.exists(OUTPUT):
96 rmtree(OUTPUT)
97 if os.path.exists(self.temp_dir):
98 rmtree(self.temp_dir)
99 reset_redis_counters()
101 if self.original_generate_rdf_files is not None:
102 with open(META_CONFIG, encoding="utf-8") as file:
103 settings = yaml.full_load(file)
104 settings["generate_rdf_files"] = self.original_generate_rdf_files
105 with open(META_CONFIG, "w", encoding="utf-8") as file:
106 yaml.dump(settings, file)
107 self.original_generate_rdf_files = None
109 def test_update_property(self):
110 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696")
111 editor.update_property(
112 URIRef("https://w3id.org/oc/meta/ar/06101"),
113 "has_next",
114 URIRef("https://w3id.org/oc/meta/ar/06104"),
115 )
116 editor.update_property(
117 URIRef("https://w3id.org/oc/meta/ar/06104"),
118 "has_next",
119 URIRef("https://w3id.org/oc/meta/ar/06103"),
120 )
121 editor.update_property(
122 URIRef("https://w3id.org/oc/meta/ar/06103"),
123 "has_next",
124 URIRef("https://w3id.org/oc/meta/ar/06102"),
125 )
126 editor.update_property(
127 URIRef("https://w3id.org/oc/meta/ar/06102"),
128 "has_next",
129 URIRef("https://w3id.org/oc/meta/ar/06105"),
130 )
132 sparql = SPARQLWrapper(SERVER)
134 sparql.setQuery("""
135 ASK {
136 GRAPH <https://w3id.org/oc/meta/ar/> {
137 <https://w3id.org/oc/meta/ar/06101> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06104> .
138 }
139 }
140 """)
141 sparql.setReturnFormat(JSON)
142 result = sparql.queryAndConvert()
143 self.assertTrue(result["boolean"], "AR/06101 → AR/06104 relationship not found in triplestore")
145 sparql.setQuery("""
146 ASK {
147 GRAPH <https://w3id.org/oc/meta/ar/> {
148 <https://w3id.org/oc/meta/ar/06104> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06103> .
149 }
150 }
151 """)
152 result = sparql.queryAndConvert()
153 self.assertTrue(result["boolean"], "AR/06104 → AR/06103 relationship not found in triplestore")
155 sparql.setQuery("""
156 ASK {
157 GRAPH <https://w3id.org/oc/meta/ar/> {
158 <https://w3id.org/oc/meta/ar/06103> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06102> .
159 }
160 }
161 """)
162 result = sparql.queryAndConvert()
163 self.assertTrue(result["boolean"], "AR/06103 → AR/06102 relationship not found in triplestore")
165 sparql.setQuery("""
166 ASK {
167 GRAPH <https://w3id.org/oc/meta/ar/> {
168 <https://w3id.org/oc/meta/ar/06102> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06105> .
169 }
170 }
171 """)
172 result = sparql.queryAndConvert()
173 self.assertTrue(result["boolean"], "AR/06102 → AR/06105 relationship not found in triplestore")
175 prov_sparql = SPARQLWrapper(PROV_SERVER)
176 prov_sparql.setQuery("""
177 ASK {
178 ?s <http://www.w3.org/ns/prov#specializationOf> <https://w3id.org/oc/meta/ar/06101> ;
179 <http://www.w3.org/ns/prov#generatedAtTime> ?time .
180 }
181 """)
182 prov_sparql.setReturnFormat(JSON)
183 prov_result = prov_sparql.queryAndConvert()
184 self.assertTrue(prov_result["boolean"], "Provenance for AR/06101 not found in triplestore")
186 with open(
187 os.path.join(OUTPUT, "rdf", "ar", "0610", "10000", "1000.json"),
188 "r",
189 encoding="utf-8",
190 ) as file:
191 ar_data = json.load(file)
192 for graph in ar_data:
193 graph_data = graph["@graph"]
194 for ar in graph_data:
195 if ar["@id"] == "https://w3id.org/oc/meta/ar/06101":
196 self.assertEqual(
197 ar["https://w3id.org/oc/ontology/hasNext"][0]["@id"],
198 "https://w3id.org/oc/meta/ar/06104",
199 )
200 elif ar["@id"] == "https://w3id.org/oc/meta/ar/06103":
201 self.assertEqual(
202 ar["https://w3id.org/oc/ontology/hasNext"][0]["@id"],
203 "https://w3id.org/oc/meta/ar/06102",
204 )
205 elif ar["@id"] == "https://w3id.org/oc/meta/ar/06104":
206 self.assertEqual(
207 ar["https://w3id.org/oc/ontology/hasNext"][0]["@id"],
208 "https://w3id.org/oc/meta/ar/06103",
209 )
210 elif ar["@id"] == "https://w3id.org/oc/meta/ar/06102":
211 self.assertEqual(
212 ar["https://w3id.org/oc/ontology/hasNext"][0]["@id"],
213 "https://w3id.org/oc/meta/ar/06105",
214 )
215 with open(
216 os.path.join(
217 OUTPUT, "rdf", "ar", "0610", "10000", "1000", "prov", "se.json"
218 ),
219 "r",
220 encoding="utf8",
221 ) as f:
222 ar_prov = json.load(f)
223 for graph in ar_prov:
224 graph_prov = graph["@graph"]
225 for ar in graph_prov:
226 if ar["@id"] == "https://w3id.org/oc/meta/ar/06101/prov/se/2":
227 self.assertEqual(
228 ar["https://w3id.org/oc/ontology/hasUpdateQuery"][0][
229 "@value"
230 ],
231 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/06101> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06102> . } }; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/06101> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06104> . } }",
232 )
233 if ar["@id"] == "https://w3id.org/oc/meta/ar/06103/prov/se/2":
234 self.assertEqual(
235 ar["https://w3id.org/oc/ontology/hasUpdateQuery"][0][
236 "@value"
237 ],
238 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/06103> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06104> . } }; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/06103> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06102> . } }",
239 )
240 if ar["@id"] == "https://w3id.org/oc/meta/ar/06104/prov/se/2":
241 self.assertEqual(
242 ar["https://w3id.org/oc/ontology/hasUpdateQuery"][0][
243 "@value"
244 ],
245 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/06104> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06105> . } }; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/06104> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06103> . } }",
246 )
247 if ar["@id"] == "https://w3id.org/oc/meta/ar/06102/prov/se/2":
248 self.assertEqual(
249 ar["https://w3id.org/oc/ontology/hasUpdateQuery"][0][
250 "@value"
251 ],
252 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/06102> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06103> . } }; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/06102> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06105> . } }",
253 )
255 def test_delete_property(self):
256 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696")
257 editor.delete(URIRef("https://w3id.org/oc/meta/br/06101"), "has_title")
258 with open(
259 os.path.join(OUTPUT, "rdf", "br", "0610", "10000", "1000.json"),
260 "r",
261 encoding="utf8",
262 ) as f:
263 br_data = json.load(f)
264 for graph in br_data:
265 graph_data = graph["@graph"]
266 for br in graph_data:
267 if br["@id"] == "https://w3id.org/oc/meta/br/06101":
268 self.assertFalse("http://purl.org/dc/terms/title" in br)
269 with open(
270 os.path.join(
271 OUTPUT, "rdf", "br", "0610", "10000", "1000", "prov", "se.json"
272 ),
273 "r",
274 encoding="utf8",
275 ) as f:
276 br_prov = json.load(f)
277 for graph in br_prov:
278 graph_prov = graph["@graph"]
279 for br in graph_prov:
280 if br["@id"] == "https://w3id.org/oc/meta/br/06101/prov/se/2":
281 self.assertEqual(
282 br["https://w3id.org/oc/ontology/hasUpdateQuery"][0][
283 "@value"
284 ],
285 'DELETE DATA { GRAPH <https://w3id.org/oc/meta/br/> { <https://w3id.org/oc/meta/br/06101> <http://purl.org/dc/terms/title> "A Review Of Hemolytic Uremic Syndrome In Patients Treated With Gemcitabine Therapy"^^<http://www.w3.org/2001/XMLSchema#string> . } }',
286 )
288 def test_delete_entity(self):
289 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696")
290 editor.delete(URIRef("https://w3id.org/oc/meta/id/06101"))
291 with open(
292 os.path.join(OUTPUT, "rdf", "id", "0610", "10000", "1000.json"),
293 "r",
294 encoding="utf8",
295 ) as f:
296 id_data = json.load(f)
297 for graph in id_data:
298 graph_data = graph["@graph"]
299 for identifier in graph_data:
300 if identifier["@id"] == "https://w3id.org/oc/meta/id/06101":
301 self.fail()
302 with open(
303 os.path.join(
304 OUTPUT, "rdf", "id", "0610", "10000", "1000", "prov", "se.json"
305 ),
306 "r",
307 encoding="utf8",
308 ) as f:
309 id_prov = json.load(f)
310 for graph in id_prov:
311 graph_prov = graph["@graph"]
312 for identifier in graph_prov:
313 if (
314 identifier["@id"]
315 == "https://w3id.org/oc/meta/id/06101/prov/se/2"
316 ):
317 update_query = (
318 identifier["https://w3id.org/oc/ontology/hasUpdateQuery"][
319 0
320 ]["@value"]
321 .replace(
322 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/id/> { ",
323 "",
324 )
325 .replace(" . } }", "")
326 .replace("\n", "")
327 .split(" .")
328 )
329 self.assertEqual(
330 set(update_query),
331 {
332 "<https://w3id.org/oc/meta/id/06101> <http://purl.org/spar/datacite/usesIdentifierScheme> <http://purl.org/spar/datacite/doi>",
333 "<https://w3id.org/oc/meta/id/06101> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/spar/datacite/Identifier>",
334 '<https://w3id.org/oc/meta/id/06101> <http://www.essepuntato.it/2010/06/literalreification/hasLiteralValue> "10.1002/(sici)1097-0142(19990501)85:9<2023::aid-cncr21>3.0.co;2-2"^^<http://www.w3.org/2001/XMLSchema#string>',
335 },
336 )
337 with open(
338 os.path.join(
339 OUTPUT, "rdf", "br", "0610", "10000", "1000", "prov", "se.json"
340 ),
341 "r",
342 encoding="utf8",
343 ) as f:
344 ra_prov = json.load(f)
345 for graph in ra_prov:
346 graph_prov = graph["@graph"]
347 for ra in graph_prov:
348 if ra["@id"] == "https://w3id.org/oc/meta/br/06101/prov/se/2":
349 self.assertEqual(
350 ra["https://w3id.org/oc/ontology/hasUpdateQuery"][0][
351 "@value"
352 ],
353 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/br/> { <https://w3id.org/oc/meta/br/06101> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/06101> . } }",
354 )
356 def test_merge(self):
357 base_iri = "https://w3id.org/oc/meta/"
358 resp_agent = "https://orcid.org/0000-0002-8420-0696"
359 g_set = GraphSet(
360 base_iri,
361 supplier_prefix="0620",
362 wanted_label=False,
363 custom_counter_handler=self.counter_handler,
364 )
365 endpoint = "http://127.0.0.1:8805/sparql"
367 # Create entities testing
368 ra = g_set.add_ra(
369 resp_agent=resp_agent, res=URIRef("https://w3id.org/oc/meta/ra/06205")
370 )
371 ra.has_name("Wiley")
373 reader = Reader()
374 id_06105 = reader.import_entity_from_triplestore(
375 g_set,
376 endpoint,
377 URIRef("https://w3id.org/oc/meta/id/06105"),
378 resp_agent,
379 enable_validation=False,
380 )
381 id_06203 = g_set.add_id(resp_agent=resp_agent)
382 id_06203.create_crossref("313")
384 ra.has_identifier(id_06105)
385 ra.has_identifier(id_06203)
387 # Generate provenance
388 provset = ProvSet(
389 g_set,
390 base_iri,
391 wanted_label=False,
392 supplier_prefix="0620",
393 custom_counter_handler=self.counter_handler,
394 )
395 provset.generate_provenance()
397 # Store and upload data
398 rdf_dir = os.path.join(OUTPUT, "rdf") + os.sep
399 graph_storer = Storer(
400 g_set, dir_split=10000, n_file_item=1000, zip_output=False
401 )
402 prov_storer = Storer(
403 provset, dir_split=10000, n_file_item=1000, zip_output=False
404 )
406 graph_storer.store_all(rdf_dir, base_iri)
407 prov_storer.store_all(rdf_dir, base_iri)
408 graph_storer.upload_all(endpoint)
409 g_set.commit_changes()
411 # Perform merge
412 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696")
413 editor.merge(
414 g_set,
415 URIRef("https://w3id.org/oc/meta/ra/06107"),
416 URIRef("https://w3id.org/oc/meta/ra/06205"),
417 )
418 editor.save(g_set)
420 # Check Redis counters
421 self.assertEqual(
422 self.counter_handler.read_counter(
423 "ra", prov_short_name="se", identifier=1, supplier_prefix="0610"
424 ),
425 1,
426 )
427 self.assertEqual(
428 self.counter_handler.read_counter(
429 "ra", prov_short_name="se", identifier=2, supplier_prefix="0610"
430 ),
431 1,
432 )
433 self.assertEqual(
434 self.counter_handler.read_counter(
435 "ra", prov_short_name="se", identifier=3, supplier_prefix="0610"
436 ),
437 1,
438 )
439 self.assertEqual(
440 self.counter_handler.read_counter(
441 "ra", prov_short_name="se", identifier=4, supplier_prefix="0610"
442 ),
443 1,
444 )
445 self.assertEqual(
446 self.counter_handler.read_counter(
447 "ra", prov_short_name="se", identifier=5, supplier_prefix="0610"
448 ),
449 1,
450 )
451 self.assertEqual(
452 self.counter_handler.read_counter(
453 "ra", prov_short_name="se", identifier=6, supplier_prefix="0610"
454 ),
455 1,
456 )
457 self.assertEqual(
458 self.counter_handler.read_counter(
459 "ra", prov_short_name="se", identifier=7, supplier_prefix="0610"
460 ),
461 2,
462 )
464 self.assertEqual(
465 self.counter_handler.read_counter(
466 "ra", prov_short_name="se", identifier=1, supplier_prefix="0620"
467 ),
468 0,
469 )
470 self.assertEqual(
471 self.counter_handler.read_counter(
472 "ra", prov_short_name="se", identifier=2, supplier_prefix="0620"
473 ),
474 0,
475 )
476 self.assertEqual(
477 self.counter_handler.read_counter(
478 "ra", prov_short_name="se", identifier=3, supplier_prefix="0620"
479 ),
480 0,
481 )
482 self.assertEqual(
483 self.counter_handler.read_counter(
484 "ra", prov_short_name="se", identifier=4, supplier_prefix="0620"
485 ),
486 0,
487 )
488 self.assertEqual(
489 self.counter_handler.read_counter(
490 "ra", prov_short_name="se", identifier=5, supplier_prefix="0620"
491 ),
492 2,
493 )
495 # Verify merged data
496 for filepath in [
497 os.path.join(OUTPUT, "rdf", "ra", "0610", "10000", "1000.json"),
498 # os.path.join(OUTPUT, 'rdf', 'ar', '0620', '10000', '1000.json'),
499 os.path.join(
500 OUTPUT, "rdf", "ra", "0620", "10000", "1000", "prov", "se.json"
501 ),
502 os.path.join(
503 OUTPUT, "rdf", "ra", "0610", "10000", "1000", "prov", "se.json"
504 ),
505 ]:
506 with open(filepath, "r", encoding="utf8") as f:
507 data = json.load(f)
508 for graph in data:
509 graph_data = graph["@graph"]
510 for entity in graph_data:
511 if entity["@id"] == "https://w3id.org/oc/meta/ra/06107":
512 identifiers = {
513 identifier["@id"]
514 for identifier in entity[
515 "http://purl.org/spar/datacite/hasIdentifier"
516 ]
517 }
518 self.assertEqual(
519 identifiers,
520 {
521 "https://w3id.org/oc/meta/id/06105",
522 "https://w3id.org/oc/meta/id/06201",
523 },
524 )
525 elif entity["@id"] == "https://w3id.org/oc/meta/ra/06205":
526 self.fail()
527 # elif entity['@id'] == 'https://w3id.org/oc/meta/ar/06205':
528 # self.assertEqual(entity['http://purl.org/spar/pro/isHeldBy'][0]['@id'], 'https://w3id.org/oc/meta/ra/06107')
529 elif entity["@id"] in {
530 "https://w3id.org/oc/meta/ra/06107/prov/se/1",
531 "https://w3id.org/oc/meta/ra/06205/prov/se/1",
532 }:
533 self.assertTrue(
534 "http://www.w3.org/ns/prov#invalidatedAtTime" in entity
535 )
536 elif (
537 entity["@id"]
538 == "https://w3id.org/oc/meta/ra/06107/prov/se/3"
539 ):
540 self.assertEqual(
541 entity["http://purl.org/dc/terms/description"][0][
542 "@value"
543 ],
544 "The entity 'https://w3id.org/oc/meta/ra/06107' has been merged with 'https://w3id.org/oc/meta/ra/06205'.",
545 )
546 self.assertEqual(
547 entity["https://w3id.org/oc/ontology/hasUpdateQuery"][
548 0
549 ]["@value"],
550 "INSERT DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06107> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/06206> . } }",
551 )
552 elif (
553 entity["@id"]
554 == "https://w3id.org/oc/meta/ra/06205/prov/se/2"
555 ):
556 update_query = (
557 entity["https://w3id.org/oc/ontology/hasUpdateQuery"][
558 0
559 ]["@value"]
560 .replace(
561 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ra/> { ",
562 "",
563 )
564 .replace(" . } }", "")
565 .replace("\n", "")
566 .split(" .")
567 )
568 self.assertEqual(
569 set(update_query),
570 {
571 '<https://w3id.org/oc/meta/ra/06205> <http://xmlns.com/foaf/0.1/name> "Wiley"^^<http://www.w3.org/2001/XMLSchema#string>',
572 "<https://w3id.org/oc/meta/ra/06205> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/06201>",
573 "<https://w3id.org/oc/meta/ra/06205> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/06105>",
574 "<https://w3id.org/oc/meta/ra/06205> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Agent>",
575 },
576 )
578 def test_delete_entity_with_inferred_type(self):
579 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696")
580 endpoint = SPARQLWrapper(SERVER)
582 # Remove the type from the entity
583 delete_type_query = """
584 DELETE {
585 GRAPH <https://w3id.org/oc/meta/br/> {
586 <https://w3id.org/oc/meta/br/06105> a <http://purl.org/spar/fabio/Expression> .
587 }
588 }
589 WHERE {
590 GRAPH <https://w3id.org/oc/meta/br/> {
591 <https://w3id.org/oc/meta/br/06105> a <http://purl.org/spar/fabio/Expression> .
592 }
593 }
594 """
595 endpoint.setQuery(delete_type_query)
596 endpoint.setMethod(POST)
597 endpoint.query()
599 # Ensure the entity exists before deletion
600 select_query = """
601 SELECT ?s WHERE {
602 GRAPH <https://w3id.org/oc/meta/br/> {
603 ?s <http://prismstandard.org/namespaces/basic/2.0/publicationDate> "2024-04-14"^^<http://www.w3.org/2001/XMLSchema#date> .
604 }
605 }
606 """
607 endpoint.setQuery(select_query)
608 endpoint.setReturnFormat("json")
609 result = endpoint.query().convert()
610 self.assertEqual(len(result["results"]["bindings"]), 1)
612 # Perform deletion
613 editor.delete(URIRef("https://w3id.org/oc/meta/br/06105"))
615 # Ensure the entity is deleted
616 result = endpoint.query().convert()
617 self.assertEqual(len(result["results"]["bindings"]), 0)
619 # Verify provenance information
620 prov_path = os.path.join(
621 OUTPUT, "rdf", "br", "0610", "10000", "1000", "prov", "se.json"
622 )
623 with open(prov_path, "r", encoding="utf8") as f:
624 prov_data = json.load(f)
625 br_06105_prov_se_2 = None
626 br_06105_prov_se_1 = None
627 for graph in prov_data:
628 for entity in graph["@graph"]:
629 if entity["@id"] == "https://w3id.org/oc/meta/br/06105/prov/se/2":
630 br_06105_prov_se_2 = entity
631 if entity["@id"] == "https://w3id.org/oc/meta/br/06105/prov/se/1":
632 br_06105_prov_se_1 = entity
634 self.assertIsNotNone(br_06105_prov_se_2)
635 self.assertEqual(
636 br_06105_prov_se_2["http://purl.org/dc/terms/description"][0]["@value"],
637 "The entity 'https://w3id.org/oc/meta/br/06105' has been deleted.",
638 )
639 self.assertEqual(
640 br_06105_prov_se_2["@type"][0], "http://www.w3.org/ns/prov#Entity"
641 )
642 self.assertEqual(
643 br_06105_prov_se_2["http://www.w3.org/ns/prov#specializationOf"][0][
644 "@id"
645 ],
646 "https://w3id.org/oc/meta/br/06105",
647 )
648 self.assertEqual(
649 br_06105_prov_se_2["http://www.w3.org/ns/prov#wasAttributedTo"][0][
650 "@id"
651 ],
652 "https://orcid.org/0000-0002-8420-0696",
653 )
654 self.assertIn(
655 "http://www.w3.org/ns/prov#invalidatedAtTime", br_06105_prov_se_2
656 )
657 self.assertIn(
658 "http://www.w3.org/ns/prov#generatedAtTime", br_06105_prov_se_2
659 )
660 self.assertEqual(
661 len(br_06105_prov_se_1["http://www.w3.org/ns/prov#generatedAtTime"]), 1
662 )
663 self.assertIn(
664 "https://w3id.org/oc/ontology/hasUpdateQuery", br_06105_prov_se_2
665 )
666 update_query_value = br_06105_prov_se_2[
667 "https://w3id.org/oc/ontology/hasUpdateQuery"
668 ][0]["@value"]
669 update_query_triples = (
670 update_query_value.replace(
671 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/br/> { ", ""
672 )
673 .replace(" } }", "")
674 .strip()
675 )
676 actual_triples = set(
677 triple.strip()
678 for triple in update_query_triples.split(" .")
679 if triple.strip()
680 )
681 expected_triples = {
682 "<https://w3id.org/oc/meta/br/06105> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/06106>",
683 '<https://w3id.org/oc/meta/br/06105> <http://prismstandard.org/namespaces/basic/2.0/publicationDate> "2024-04-14"^^<http://www.w3.org/2001/XMLSchema#date>',
684 }
685 self.assertEqual(actual_triples, expected_triples)
687 self.assertIsNotNone(br_06105_prov_se_1)
688 self.assertEqual(
689 br_06105_prov_se_1["http://purl.org/dc/terms/description"][0]["@value"],
690 "The entity 'https://w3id.org/oc/meta/br/06105' has been created.",
691 )
692 self.assertEqual(
693 br_06105_prov_se_1["@type"][0], "http://www.w3.org/ns/prov#Entity"
694 )
695 self.assertEqual(
696 br_06105_prov_se_1["http://www.w3.org/ns/prov#specializationOf"][0][
697 "@id"
698 ],
699 "https://w3id.org/oc/meta/br/06105",
700 )
701 self.assertEqual(
702 br_06105_prov_se_1["http://www.w3.org/ns/prov#wasAttributedTo"][0][
703 "@id"
704 ],
705 "https://w3id.org/oc/meta/prov/pa/1",
706 )
707 self.assertIn(
708 "http://www.w3.org/ns/prov#generatedAtTime", br_06105_prov_se_1
709 )
710 self.assertEqual(
711 len(br_06105_prov_se_1["http://www.w3.org/ns/prov#generatedAtTime"]), 1
712 )
713 self.assertEqual(
714 len(br_06105_prov_se_2["http://www.w3.org/ns/prov#invalidatedAtTime"]),
715 1,
716 )
717 self.assertIn(
718 "http://www.w3.org/ns/prov#hadPrimarySource", br_06105_prov_se_1
719 )
721 # Reinsert the publication date
722 sparql_update_query = f"""
723 INSERT DATA {{
724 GRAPH <https://w3id.org/oc/meta/br/> {{
725 <https://w3id.org/oc/meta/br/06105> <http://prismstandard.org/namespaces/basic/2.0/publicationDate> "2024-04-14"^^<http://www.w3.org/2001/XMLSchema#date> .
726 }}
727 }}
728 """
729 endpoint = SPARQLWrapper(SERVER)
730 endpoint.setQuery(sparql_update_query)
731 endpoint.setMethod(POST)
732 endpoint.query()
734 # Perform deletion again
735 editor.delete(URIRef("https://w3id.org/oc/meta/br/06105"))
737 # Verify and print the provenance graph for the entity
738 prov_path = os.path.join(
739 OUTPUT, "rdf", "br", "0610", "10000", "1000", "prov", "se.json"
740 )
741 with open(prov_path, "r", encoding="utf8") as f:
742 prov_data = json.load(f)
743 for graph in prov_data:
744 for entity in graph["@graph"]:
745 if "https://w3id.org/oc/meta/br/06105" in entity["@id"]:
746 if (
747 entity["@id"]
748 == "https://w3id.org/oc/meta/br/06105/prov/se/1"
749 ):
750 self.assertEqual(
751 len(
752 entity["http://www.w3.org/ns/prov#generatedAtTime"]
753 ),
754 1,
755 )
756 self.assertEqual(
757 len(
758 entity[
759 "http://www.w3.org/ns/prov#invalidatedAtTime"
760 ]
761 ),
762 1,
763 )
764 elif (
765 entity["@id"]
766 == "https://w3id.org/oc/meta/br/06105/prov/se/2"
767 ):
768 self.assertEqual(
769 len(
770 entity["http://www.w3.org/ns/prov#generatedAtTime"]
771 ),
772 1,
773 )
774 # self.assertEqual(len(entity['http://www.w3.org/ns/prov#invalidatedAtTime']), 2)
775 elif (
776 entity["@id"]
777 == "https://w3id.org/oc/meta/br/06105/prov/se/3"
778 ):
779 self.assertEqual(
780 entity["http://purl.org/dc/terms/description"][0][
781 "@value"
782 ],
783 "The entity 'https://w3id.org/oc/meta/br/06105' has been deleted.",
784 )
785 self.assertIn(
786 "https://w3id.org/oc/ontology/hasUpdateQuery", entity
787 )
788 update_query_value = entity[
789 "https://w3id.org/oc/ontology/hasUpdateQuery"
790 ][0]["@value"]
791 update_query_triples = (
792 update_query_value.replace(
793 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/br/> { ",
794 "",
795 )
796 .replace(" } }", "")
797 .strip()
798 )
799 actual_triples = set(
800 triple.strip()
801 for triple in update_query_triples.split(" .")
802 if triple.strip()
803 )
804 expected_triples = {
805 '<https://w3id.org/oc/meta/br/06105> <http://prismstandard.org/namespaces/basic/2.0/publicationDate> "2024-04-14"^^<http://www.w3.org/2001/XMLSchema#date>'
806 }
807 self.assertEqual(actual_triples, expected_triples)
808 self.assertEqual(
809 entity["@type"][0], "http://www.w3.org/ns/prov#Entity"
810 )
811 self.assertEqual(
812 entity["http://www.w3.org/ns/prov#specializationOf"][0][
813 "@id"
814 ],
815 "https://w3id.org/oc/meta/br/06105",
816 )
817 self.assertEqual(
818 entity["http://www.w3.org/ns/prov#wasAttributedTo"][0][
819 "@id"
820 ],
821 "https://orcid.org/0000-0002-8420-0696",
822 )
823 self.assertIn(
824 "http://www.w3.org/ns/prov#invalidatedAtTime", entity
825 )
826 self.assertIn(
827 "http://www.w3.org/ns/prov#generatedAtTime", entity
828 )
829 self.assertEqual(
830 len(
831 entity["http://www.w3.org/ns/prov#generatedAtTime"]
832 ),
833 1,
834 )
835 self.assertEqual(
836 len(
837 entity[
838 "http://www.w3.org/ns/prov#invalidatedAtTime"
839 ]
840 ),
841 1,
842 )
843 self.assertEqual(
844 entity["http://www.w3.org/ns/prov#wasDerivedFrom"][0][
845 "@id"
846 ],
847 "https://w3id.org/oc/meta/br/06105/prov/se/2",
848 )
850 def test_no_rdf_files_generation(self):
851 """Test that when generate_rdf_files is False, data is still updated in triplestore but not in files"""
852 with open(META_CONFIG, encoding="utf-8") as file:
853 settings = yaml.full_load(file)
854 self.original_generate_rdf_files = settings.get("generate_rdf_files", True)
856 settings["generate_rdf_files"] = False
857 with open(META_CONFIG, "w", encoding="utf-8") as file:
858 yaml.dump(settings, file)
860 os.makedirs(os.path.join(OUTPUT, "rdf", "br", "0610", "10000"), exist_ok=True)
862 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696")
864 self.assertFalse(editor.generate_rdf_files, "generate_rdf_files should be False")
866 g_set = GraphSet(base_iri="https://w3id.org/oc/meta/")
867 br = g_set.add_br(res=URIRef("https://w3id.org/oc/meta/br/06103"), resp_agent="https://orcid.org/0000-0002-8420-0696")
868 br.has_title("Original Title")
869 editor.save(g_set)
871 editor.update_property(
872 URIRef("https://w3id.org/oc/meta/br/06103"),
873 "has_title",
874 "New Test Title",
875 )
877 sparql = SPARQLWrapper(SERVER)
878 sparql.setQuery("""
879 SELECT ?p ?o
880 WHERE {
881 GRAPH ?g {
882 <https://w3id.org/oc/meta/br/06103> ?p ?o .
883 }
884 }
885 """)
886 sparql.setReturnFormat(JSON)
887 debug_result = sparql.queryAndConvert()
889 title_found = False
890 if debug_result["results"]["bindings"]:
891 for binding in debug_result["results"]["bindings"]:
892 predicate = binding.get('p', {}).get('value')
893 obj = binding.get('o', {}).get('value')
895 # Check if this is our title property with the expected value
896 if predicate == "http://purl.org/dc/terms/title" and obj == "New Test Title":
897 title_found = True
898 else:
899 print("No properties found for BR/06103")
901 self.assertTrue(title_found, "Title update not found in triplestore")
903 prov_sparql = SPARQLWrapper(PROV_SERVER)
904 prov_sparql.setQuery("""
905 ASK {
906 ?s <http://www.w3.org/ns/prov#specializationOf> <https://w3id.org/oc/meta/br/06103> .
907 }
908 """)
909 prov_sparql.setReturnFormat(JSON)
910 prov_result = prov_sparql.queryAndConvert()
911 self.assertTrue(prov_result["boolean"], "Provenance for BR/06103 not found in triplestore")
913 target_file = os.path.join(OUTPUT, "rdf", "br", "0610", "10000", "1000.json")
914 if os.path.exists(target_file):
915 with open(target_file, "r", encoding="utf-8") as file:
916 try:
917 data = json.load(file)
918 contains_update = False
919 for graph in data:
920 for entity in graph.get("@graph", []):
921 if entity.get("@id") == "https://w3id.org/oc/meta/br/06103":
922 titles = entity.get("http://purl.org/dc/terms/title", [])
923 for title in titles:
924 if title.get("@value") == "New Test Title":
925 contains_update = True
926 break
927 self.assertFalse(contains_update, "RDF file should not contain the update")
928 except json.JSONDecodeError:
929 pass
931 def test_merge_caches_entities(self):
932 """Verifica che le entità vengano correttamente cachate durante merge successivi"""
933 base_iri = "https://w3id.org/oc/meta/"
934 resp_agent = "https://orcid.org/0000-0002-8420-0696"
935 g_set = GraphSet(
936 base_iri,
937 supplier_prefix="0620",
938 wanted_label=False,
939 custom_counter_handler=self.counter_handler,
940 )
941 endpoint = "http://127.0.0.1:8805/sparql"
943 # Prepara le entità di test
944 ra = g_set.add_ra(
945 resp_agent=resp_agent, res=URIRef("https://w3id.org/oc/meta/ra/06205")
946 )
947 ra.has_name("Wiley")
949 reader = Reader()
950 id_06105 = reader.import_entity_from_triplestore(
951 g_set,
952 endpoint,
953 URIRef("https://w3id.org/oc/meta/id/06105"),
954 resp_agent,
955 enable_validation=False,
956 )
957 id_06203 = g_set.add_id(resp_agent=resp_agent)
958 id_06203.create_crossref("313")
960 ra.has_identifier(id_06105)
961 ra.has_identifier(id_06203)
963 # Genera provenance
964 provset = ProvSet(
965 g_set,
966 base_iri,
967 wanted_label=False,
968 supplier_prefix="0620",
969 custom_counter_handler=self.counter_handler,
970 )
971 provset.generate_provenance()
973 # Salva e carica i dati
974 rdf_dir = os.path.join(OUTPUT, "rdf") + os.sep
975 graph_storer = Storer(
976 g_set, dir_split=10000, n_file_item=1000, zip_output=False
977 )
978 prov_storer = Storer(
979 provset, dir_split=10000, n_file_item=1000, zip_output=False
980 )
982 graph_storer.store_all(rdf_dir, base_iri)
983 prov_storer.store_all(rdf_dir, base_iri)
984 graph_storer.upload_all(endpoint)
985 g_set.commit_changes()
987 # Esegui il test della cache
988 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696")
990 # Prima fusione
991 editor.merge(
992 g_set,
993 URIRef("https://w3id.org/oc/meta/ra/06107"),
994 URIRef("https://w3id.org/oc/meta/ra/06205"),
995 )
997 # Verifica che le entità principali siano in cache
998 self.assertTrue(
999 editor.entity_cache.is_cached(URIRef("https://w3id.org/oc/meta/ra/06107"))
1000 )
1001 self.assertTrue(
1002 editor.entity_cache.is_cached(URIRef("https://w3id.org/oc/meta/ra/06205"))
1003 )
1005 # Verifica che le entità correlate siano in cache
1006 self.assertTrue(
1007 editor.entity_cache.is_cached(URIRef("https://w3id.org/oc/meta/id/06201"))
1008 )
1009 self.assertTrue(
1010 editor.entity_cache.is_cached(URIRef("https://w3id.org/oc/meta/id/06105"))
1011 )
1014class TestEntityCache(unittest.TestCase):
1015 def setUp(self):
1016 self.cache = EntityCache()
1017 self.entity = URIRef("https://w3id.org/oc/meta/ra/06107")
1019 def test_add_and_is_cached(self):
1020 self.assertFalse(self.cache.is_cached(self.entity))
1021 self.cache.add(self.entity)
1022 self.assertTrue(self.cache.is_cached(self.entity))
1024 def test_clear(self):
1025 self.cache.add(self.entity)
1026 self.cache.clear()
1027 self.assertFalse(self.cache.is_cached(self.entity))
1030if __name__ == "__main__": # pragma: no cover
1031 unittest.main()