Coverage for test/editor_test.py: 97%

358 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-12-20 08:55 +0000

1#!/usr/bin/python 

2# -*- coding: utf-8 -*- 

3# Copyright (c) 2022 Arcangelo Massari <arcangelo.massari@unibo.it> 

4# 

5# Permission to use, copy, modify, and/or distribute this software for any purpose 

6# with or without fee is hereby granted, provided that the above copyright notice 

7# and this permission notice appear in all copies. 

8# 

9# THE SOFTWARE IS PROVIDED 'AS IS' AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 

10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 

11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, 

12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 

13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 

15# SOFTWARE. 

16 

17import json 

18import os 

19import unittest 

20from shutil import rmtree 

21from test.test_utils import (PROV_SERVER, REDIS_CACHE_DB, REDIS_DB, REDIS_HOST, 

22 REDIS_PORT, SERVER, reset_redis_counters, 

23 reset_server) 

24 

25import yaml 

26from oc_meta.plugins.editor import EntityCache, MetaEditor 

27from oc_meta.run.meta_process import run_meta_process 

28from oc_ocdm import Storer 

29from oc_ocdm.counter_handler.redis_counter_handler import RedisCounterHandler 

30from oc_ocdm.graph import GraphSet 

31from oc_ocdm.prov import ProvSet 

32from oc_ocdm.reader import Reader 

33from rdflib import URIRef 

34from sparqlite import SPARQLClient 

35 

36BASE = os.path.join("test", "editor") 

37OUTPUT = os.path.join(BASE, "output") 

38META_CONFIG = os.path.join(BASE, "meta_config.yaml") 

39 

40 

41def get_counter_handler(): 

42 return RedisCounterHandler(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB) 

43 

44 

45class TestEditor(unittest.TestCase): 

46 @classmethod 

47 def setUpClass(cls): 

48 cls.counter_handler = get_counter_handler() 

49 cls.original_generate_rdf_files = None 

50 

51 def setUp(self): 

52 reset_server() 

53 reset_redis_counters() 

54 if os.path.exists(OUTPUT): 

55 rmtree(OUTPUT) 

56 

57 # Create temporary directory for cache files 

58 self.temp_dir = os.path.join("test", "temp_editor_test") 

59 if os.path.exists(self.temp_dir): 

60 rmtree(self.temp_dir) 

61 os.makedirs(self.temp_dir) 

62 

63 # Setup cache files 

64 self.cache_file = os.path.join(self.temp_dir, "ts_upload_cache.json") 

65 self.failed_file = os.path.join(self.temp_dir, "failed_queries.txt") 

66 self.stop_file = os.path.join(self.temp_dir, ".stop_upload") 

67 

68 # Create separate directories for data and provenance update queries 

69 self.data_update_dir = os.path.join(self.temp_dir, "to_be_uploaded_data") 

70 self.prov_update_dir = os.path.join(self.temp_dir, "to_be_uploaded_prov") 

71 os.makedirs(self.data_update_dir, exist_ok=True) 

72 os.makedirs(self.prov_update_dir, exist_ok=True) 

73 

74 with open(META_CONFIG, encoding="utf-8") as file: 

75 settings = yaml.full_load(file) 

76 # Update settings to use Redis and cache files 

77 settings.update( 

78 { 

79 "redis_host": REDIS_HOST, 

80 "redis_port": REDIS_PORT, 

81 "redis_db": REDIS_DB, 

82 "redis_cache_db": REDIS_CACHE_DB, 

83 "ts_upload_cache": self.cache_file, 

84 "ts_failed_queries": self.failed_file, 

85 "ts_stop_file": self.stop_file, 

86 "triplestore_url": SERVER, 

87 "provenance_triplestore_url": PROV_SERVER, 

88 "data_update_dir": self.data_update_dir, 

89 "prov_update_dir": self.prov_update_dir 

90 } 

91 ) 

92 run_meta_process(settings=settings, meta_config_path=META_CONFIG) 

93 

94 def tearDown(self): 

95 if os.path.exists(OUTPUT): 

96 rmtree(OUTPUT) 

97 if os.path.exists(self.temp_dir): 

98 rmtree(self.temp_dir) 

99 reset_redis_counters() 

100 

101 if self.original_generate_rdf_files is not None: 

102 with open(META_CONFIG, encoding="utf-8") as file: 

103 settings = yaml.full_load(file) 

104 settings["generate_rdf_files"] = self.original_generate_rdf_files 

105 with open(META_CONFIG, "w", encoding="utf-8") as file: 

106 yaml.dump(settings, file) 

107 self.original_generate_rdf_files = None 

108 

109 def test_update_property(self): 

110 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696") 

111 editor.update_property( 

112 URIRef("https://w3id.org/oc/meta/ar/0601"), 

113 "has_next", 

114 URIRef("https://w3id.org/oc/meta/ar/0604"), 

115 ) 

116 editor.update_property( 

117 URIRef("https://w3id.org/oc/meta/ar/0604"), 

118 "has_next", 

119 URIRef("https://w3id.org/oc/meta/ar/0603"), 

120 ) 

121 editor.update_property( 

122 URIRef("https://w3id.org/oc/meta/ar/0603"), 

123 "has_next", 

124 URIRef("https://w3id.org/oc/meta/ar/0602"), 

125 ) 

126 editor.update_property( 

127 URIRef("https://w3id.org/oc/meta/ar/0602"), 

128 "has_next", 

129 URIRef("https://w3id.org/oc/meta/ar/0605"), 

130 ) 

131 

132 with SPARQLClient(SERVER, timeout=60) as client: 

133 result = client.query(""" 

134 ASK { 

135 GRAPH <https://w3id.org/oc/meta/ar/> { 

136 <https://w3id.org/oc/meta/ar/0601> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0604> . 

137 } 

138 } 

139 """) 

140 self.assertTrue(result["boolean"], "AR/0601 → AR/0604 relationship not found in triplestore") 

141 

142 result = client.query(""" 

143 ASK { 

144 GRAPH <https://w3id.org/oc/meta/ar/> { 

145 <https://w3id.org/oc/meta/ar/0604> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0603> . 

146 } 

147 } 

148 """) 

149 self.assertTrue(result["boolean"], "AR/0604 → AR/0603 relationship not found in triplestore") 

150 

151 result = client.query(""" 

152 ASK { 

153 GRAPH <https://w3id.org/oc/meta/ar/> { 

154 <https://w3id.org/oc/meta/ar/0603> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0602> . 

155 } 

156 } 

157 """) 

158 self.assertTrue(result["boolean"], "AR/0603 → AR/0602 relationship not found in triplestore") 

159 

160 result = client.query(""" 

161 ASK { 

162 GRAPH <https://w3id.org/oc/meta/ar/> { 

163 <https://w3id.org/oc/meta/ar/0602> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0605> . 

164 } 

165 } 

166 """) 

167 self.assertTrue(result["boolean"], "AR/0602 → AR/0605 relationship not found in triplestore") 

168 

169 with SPARQLClient(PROV_SERVER, timeout=60) as client: 

170 prov_result = client.query(""" 

171 ASK { 

172 ?s <http://www.w3.org/ns/prov#specializationOf> <https://w3id.org/oc/meta/ar/0601> ; 

173 <http://www.w3.org/ns/prov#generatedAtTime> ?time . 

174 } 

175 """) 

176 self.assertTrue(prov_result["boolean"], "Provenance for AR/0601 not found in triplestore") 

177 

178 with open( 

179 os.path.join(OUTPUT, "rdf", "ar", "060", "10000", "1000.json"), 

180 "r", 

181 encoding="utf-8", 

182 ) as file: 

183 ar_data = json.load(file) 

184 for graph in ar_data: 

185 graph_data = graph["@graph"] 

186 for ar in graph_data: 

187 if ar["@id"] == "https://w3id.org/oc/meta/ar/0601": 

188 self.assertEqual( 

189 ar["https://w3id.org/oc/ontology/hasNext"][0]["@id"], 

190 "https://w3id.org/oc/meta/ar/0604", 

191 ) 

192 elif ar["@id"] == "https://w3id.org/oc/meta/ar/0603": 

193 self.assertEqual( 

194 ar["https://w3id.org/oc/ontology/hasNext"][0]["@id"], 

195 "https://w3id.org/oc/meta/ar/0602", 

196 ) 

197 elif ar["@id"] == "https://w3id.org/oc/meta/ar/0604": 

198 self.assertEqual( 

199 ar["https://w3id.org/oc/ontology/hasNext"][0]["@id"], 

200 "https://w3id.org/oc/meta/ar/0603", 

201 ) 

202 elif ar["@id"] == "https://w3id.org/oc/meta/ar/0602": 

203 self.assertEqual( 

204 ar["https://w3id.org/oc/ontology/hasNext"][0]["@id"], 

205 "https://w3id.org/oc/meta/ar/0605", 

206 ) 

207 with open( 

208 os.path.join( 

209 OUTPUT, "rdf", "ar", "060", "10000", "1000", "prov", "se.json" 

210 ), 

211 "r", 

212 encoding="utf8", 

213 ) as f: 

214 ar_prov = json.load(f) 

215 for graph in ar_prov: 

216 graph_prov = graph["@graph"] 

217 for ar in graph_prov: 

218 if ar["@id"] == "https://w3id.org/oc/meta/ar/0601/prov/se/2": 

219 self.assertEqual( 

220 ar["https://w3id.org/oc/ontology/hasUpdateQuery"][0][ 

221 "@value" 

222 ], 

223 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/0601> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0602> . } } ; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/0601> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0604> . } }", 

224 ) 

225 if ar["@id"] == "https://w3id.org/oc/meta/ar/0603/prov/se/2": 

226 self.assertEqual( 

227 ar["https://w3id.org/oc/ontology/hasUpdateQuery"][0][ 

228 "@value" 

229 ], 

230 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/0603> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0604> . } } ; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/0603> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0602> . } }", 

231 ) 

232 if ar["@id"] == "https://w3id.org/oc/meta/ar/0604/prov/se/2": 

233 self.assertEqual( 

234 ar["https://w3id.org/oc/ontology/hasUpdateQuery"][0][ 

235 "@value" 

236 ], 

237 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/0604> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0605> . } } ; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/0604> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0603> . } }", 

238 ) 

239 if ar["@id"] == "https://w3id.org/oc/meta/ar/0602/prov/se/2": 

240 self.assertEqual( 

241 ar["https://w3id.org/oc/ontology/hasUpdateQuery"][0][ 

242 "@value" 

243 ], 

244 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/0602> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0603> . } } ; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/0602> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/0605> . } }", 

245 ) 

246 

247 def test_delete_property(self): 

248 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696") 

249 editor.delete(URIRef("https://w3id.org/oc/meta/br/0601"), "has_title") 

250 with open( 

251 os.path.join(OUTPUT, "rdf", "br", "060", "10000", "1000.json"), 

252 "r", 

253 encoding="utf8", 

254 ) as f: 

255 br_data = json.load(f) 

256 for graph in br_data: 

257 graph_data = graph["@graph"] 

258 for br in graph_data: 

259 if br["@id"] == "https://w3id.org/oc/meta/br/0601": 

260 self.assertFalse("http://purl.org/dc/terms/title" in br) 

261 with open( 

262 os.path.join( 

263 OUTPUT, "rdf", "br", "060", "10000", "1000", "prov", "se.json" 

264 ), 

265 "r", 

266 encoding="utf8", 

267 ) as f: 

268 br_prov = json.load(f) 

269 for graph in br_prov: 

270 graph_prov = graph["@graph"] 

271 for br in graph_prov: 

272 if br["@id"] == "https://w3id.org/oc/meta/br/0601/prov/se/2": 

273 self.assertEqual( 

274 br["https://w3id.org/oc/ontology/hasUpdateQuery"][0][ 

275 "@value" 

276 ], 

277 'DELETE DATA { GRAPH <https://w3id.org/oc/meta/br/> { <https://w3id.org/oc/meta/br/0601> <http://purl.org/dc/terms/title> "A Review Of Hemolytic Uremic Syndrome In Patients Treated With Gemcitabine Therapy"^^<http://www.w3.org/2001/XMLSchema#string> . } }', 

278 ) 

279 

280 def test_delete_entity(self): 

281 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696") 

282 editor.delete(URIRef("https://w3id.org/oc/meta/id/0601")) 

283 with open( 

284 os.path.join(OUTPUT, "rdf", "id", "060", "10000", "1000.json"), 

285 "r", 

286 encoding="utf8", 

287 ) as f: 

288 id_data = json.load(f) 

289 for graph in id_data: 

290 graph_data = graph["@graph"] 

291 for identifier in graph_data: 

292 if identifier["@id"] == "https://w3id.org/oc/meta/id/0601": 

293 self.fail() 

294 with open( 

295 os.path.join( 

296 OUTPUT, "rdf", "id", "060", "10000", "1000", "prov", "se.json" 

297 ), 

298 "r", 

299 encoding="utf8", 

300 ) as f: 

301 id_prov = json.load(f) 

302 for graph in id_prov: 

303 graph_prov = graph["@graph"] 

304 for identifier in graph_prov: 

305 if ( 

306 identifier["@id"] 

307 == "https://w3id.org/oc/meta/id/0601/prov/se/2" 

308 ): 

309 update_query = ( 

310 identifier["https://w3id.org/oc/ontology/hasUpdateQuery"][ 

311 0 

312 ]["@value"] 

313 .replace( 

314 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/id/> { ", 

315 "", 

316 ) 

317 .replace(" . } }", "") 

318 .replace("\n", "") 

319 .split(" .") 

320 ) 

321 self.assertEqual( 

322 set(update_query), 

323 { 

324 "<https://w3id.org/oc/meta/id/0601> <http://purl.org/spar/datacite/usesIdentifierScheme> <http://purl.org/spar/datacite/doi>", 

325 "<https://w3id.org/oc/meta/id/0601> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/spar/datacite/Identifier>", 

326 '<https://w3id.org/oc/meta/id/0601> <http://www.essepuntato.it/2010/06/literalreification/hasLiteralValue> "10.1002/(sici)1097-0142(19990501)85:9<2023::aid-cncr21>3.0.co;2-2"^^<http://www.w3.org/2001/XMLSchema#string>', 

327 }, 

328 ) 

329 with open( 

330 os.path.join( 

331 OUTPUT, "rdf", "br", "060", "10000", "1000", "prov", "se.json" 

332 ), 

333 "r", 

334 encoding="utf8", 

335 ) as f: 

336 ra_prov = json.load(f) 

337 for graph in ra_prov: 

338 graph_prov = graph["@graph"] 

339 for ra in graph_prov: 

340 if ra["@id"] == "https://w3id.org/oc/meta/br/0601/prov/se/2": 

341 self.assertEqual( 

342 ra["https://w3id.org/oc/ontology/hasUpdateQuery"][0][ 

343 "@value" 

344 ], 

345 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/br/> { <https://w3id.org/oc/meta/br/0601> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/0601> . } }", 

346 ) 

347 

348 def test_merge(self): 

349 base_iri = "https://w3id.org/oc/meta/" 

350 resp_agent = "https://orcid.org/0000-0002-8420-0696" 

351 g_set = GraphSet( 

352 base_iri, 

353 supplier_prefix="060", 

354 wanted_label=False, 

355 custom_counter_handler=self.counter_handler, 

356 ) 

357 endpoint = "http://127.0.0.1:8805/sparql" 

358 

359 # Create entities testing 

360 ra = g_set.add_ra( 

361 resp_agent=resp_agent, res=URIRef("https://w3id.org/oc/meta/ra/06010") 

362 ) 

363 ra.has_name("Wiley") 

364 

365 reader = Reader() 

366 id_0605 = reader.import_entity_from_triplestore( 

367 g_set, 

368 endpoint, 

369 URIRef("https://w3id.org/oc/meta/id/0605"), 

370 resp_agent, 

371 enable_validation=False, 

372 ) 

373 id_0609 = g_set.add_id(resp_agent=resp_agent) 

374 id_0609.create_crossref("313") 

375 

376 ra.has_identifier(id_0605) 

377 ra.has_identifier(id_0609) 

378 

379 # Generate provenance 

380 provset = ProvSet( 

381 g_set, 

382 base_iri, 

383 wanted_label=False, 

384 supplier_prefix="060", 

385 custom_counter_handler=self.counter_handler, 

386 ) 

387 provset.generate_provenance() 

388 

389 # Store and upload data 

390 rdf_dir = os.path.join(OUTPUT, "rdf") + os.sep 

391 graph_storer = Storer( 

392 g_set, dir_split=10000, n_file_item=1000, zip_output=False 

393 ) 

394 prov_storer = Storer( 

395 provset, dir_split=10000, n_file_item=1000, zip_output=False 

396 ) 

397 

398 graph_storer.store_all(rdf_dir, base_iri) 

399 prov_storer.store_all(rdf_dir, base_iri) 

400 graph_storer.upload_all(endpoint) 

401 g_set.commit_changes() 

402 

403 # Perform merge 

404 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696") 

405 editor.merge( 

406 g_set, 

407 URIRef("https://w3id.org/oc/meta/ra/0607"), 

408 URIRef("https://w3id.org/oc/meta/ra/06010"), 

409 ) 

410 editor.save(g_set) 

411 

412 # Check Redis counters 

413 self.assertEqual( 

414 self.counter_handler.read_counter( 

415 "ra", prov_short_name="se", identifier=1, supplier_prefix="060" 

416 ), 

417 1, 

418 ) 

419 self.assertEqual( 

420 self.counter_handler.read_counter( 

421 "ra", prov_short_name="se", identifier=2, supplier_prefix="060" 

422 ), 

423 1, 

424 ) 

425 self.assertEqual( 

426 self.counter_handler.read_counter( 

427 "ra", prov_short_name="se", identifier=3, supplier_prefix="060" 

428 ), 

429 1, 

430 ) 

431 self.assertEqual( 

432 self.counter_handler.read_counter( 

433 "ra", prov_short_name="se", identifier=4, supplier_prefix="060" 

434 ), 

435 1, 

436 ) 

437 self.assertEqual( 

438 self.counter_handler.read_counter( 

439 "ra", prov_short_name="se", identifier=5, supplier_prefix="060" 

440 ), 

441 1, 

442 ) 

443 self.assertEqual( 

444 self.counter_handler.read_counter( 

445 "ra", prov_short_name="se", identifier=6, supplier_prefix="060" 

446 ), 

447 1, 

448 ) 

449 self.assertEqual( 

450 self.counter_handler.read_counter( 

451 "ra", prov_short_name="se", identifier=7, supplier_prefix="060" 

452 ), 

453 2, 

454 ) 

455 

456 # Verify merged data 

457 for filepath in [ 

458 os.path.join(OUTPUT, "rdf", "ra", "060", "10000", "1000.json"), 

459 os.path.join( 

460 OUTPUT, "rdf", "ra", "060", "10000", "1000", "prov", "se.json" 

461 ), 

462 ]: 

463 with open(filepath, "r", encoding="utf8") as f: 

464 data = json.load(f) 

465 for graph in data: 

466 graph_data = graph["@graph"] 

467 for entity in graph_data: 

468 if entity["@id"] == "https://w3id.org/oc/meta/ra/0607": 

469 identifiers = { 

470 identifier["@id"] 

471 for identifier in entity[ 

472 "http://purl.org/spar/datacite/hasIdentifier" 

473 ] 

474 } 

475 self.assertEqual( 

476 identifiers, 

477 { 

478 str(id_0605.res), 

479 str(id_0609.res), 

480 }, 

481 ) 

482 elif entity["@id"] == "https://w3id.org/oc/meta/ra/06010": 

483 self.fail() 

484 # elif entity['@id'] == 'https://w3id.org/oc/meta/ar/06010': 

485 # self.assertEqual(entity['http://purl.org/spar/pro/isHeldBy'][0]['@id'], 'https://w3id.org/oc/meta/ra/0607') 

486 elif entity["@id"] in { 

487 "https://w3id.org/oc/meta/ra/0607/prov/se/1", 

488 "https://w3id.org/oc/meta/ra/06010/prov/se/1", 

489 }: 

490 self.assertTrue( 

491 "http://www.w3.org/ns/prov#invalidatedAtTime" in entity 

492 ) 

493 elif ( 

494 entity["@id"] 

495 == "https://w3id.org/oc/meta/ra/0607/prov/se/3" 

496 ): 

497 self.assertEqual( 

498 entity["http://purl.org/dc/terms/description"][0][ 

499 "@value" 

500 ], 

501 "The entity 'https://w3id.org/oc/meta/ra/0607' has been merged with 'https://w3id.org/oc/meta/ra/06010'.", 

502 ) 

503 self.assertEqual( 

504 entity["https://w3id.org/oc/ontology/hasUpdateQuery"][ 

505 0 

506 ]["@value"], 

507 "INSERT DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/0607> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/06011> . } }", 

508 ) 

509 elif ( 

510 entity["@id"] 

511 == "https://w3id.org/oc/meta/ra/06010/prov/se/2" 

512 ): 

513 update_query = ( 

514 entity["https://w3id.org/oc/ontology/hasUpdateQuery"][ 

515 0 

516 ]["@value"] 

517 .replace( 

518 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ra/> { ", 

519 "", 

520 ) 

521 .replace(" . } }", "") 

522 .replace("\n", "") 

523 .split(" .") 

524 ) 

525 self.assertEqual( 

526 set(update_query), 

527 { 

528 '<https://w3id.org/oc/meta/ra/06010> <http://xmlns.com/foaf/0.1/name> "Wiley"^^<http://www.w3.org/2001/XMLSchema#string>', 

529 f"<https://w3id.org/oc/meta/ra/06010> <http://purl.org/spar/datacite/hasIdentifier> <{id_0609.res}>", 

530 f"<https://w3id.org/oc/meta/ra/06010> <http://purl.org/spar/datacite/hasIdentifier> <{id_0605.res}>", 

531 "<https://w3id.org/oc/meta/ra/06010> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Agent>", 

532 }, 

533 ) 

534 

535 def test_delete_entity_with_inferred_type(self): 

536 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696") 

537 

538 with SPARQLClient(SERVER, timeout=60) as client: 

539 # Remove the type from the entity 

540 delete_type_query = """ 

541 DELETE { 

542 GRAPH <https://w3id.org/oc/meta/br/> { 

543 <https://w3id.org/oc/meta/br/0605> a <http://purl.org/spar/fabio/Expression> . 

544 } 

545 } 

546 WHERE { 

547 GRAPH <https://w3id.org/oc/meta/br/> { 

548 <https://w3id.org/oc/meta/br/0605> a <http://purl.org/spar/fabio/Expression> . 

549 } 

550 } 

551 """ 

552 client.update(delete_type_query) 

553 

554 # Ensure the entity exists before deletion 

555 select_query = """ 

556 SELECT ?s WHERE { 

557 GRAPH <https://w3id.org/oc/meta/br/> { 

558 ?s <http://prismstandard.org/namespaces/basic/2.0/publicationDate> "2024-04-14"^^<http://www.w3.org/2001/XMLSchema#date> . 

559 } 

560 } 

561 """ 

562 result = client.query(select_query) 

563 self.assertEqual(len(result["results"]["bindings"]), 1) 

564 

565 # Perform deletion 

566 editor.delete(URIRef("https://w3id.org/oc/meta/br/0605")) 

567 

568 # Ensure the entity is deleted 

569 with SPARQLClient(SERVER, timeout=60) as client: 

570 result = client.query(select_query) 

571 self.assertEqual(len(result["results"]["bindings"]), 0) 

572 

573 # Verify provenance information 

574 prov_path = os.path.join( 

575 OUTPUT, "rdf", "br", "060", "10000", "1000", "prov", "se.json" 

576 ) 

577 with open(prov_path, "r", encoding="utf8") as f: 

578 prov_data = json.load(f) 

579 br_0605_prov_se_2 = None 

580 br_0605_prov_se_1 = None 

581 for graph in prov_data: 

582 for entity in graph["@graph"]: 

583 if entity["@id"] == "https://w3id.org/oc/meta/br/0605/prov/se/2": 

584 br_0605_prov_se_2 = entity 

585 if entity["@id"] == "https://w3id.org/oc/meta/br/0605/prov/se/1": 

586 br_0605_prov_se_1 = entity 

587 

588 self.assertIsNotNone(br_0605_prov_se_2) 

589 self.assertEqual( 

590 br_0605_prov_se_2["http://purl.org/dc/terms/description"][0]["@value"], 

591 "The entity 'https://w3id.org/oc/meta/br/0605' has been deleted.", 

592 ) 

593 self.assertEqual( 

594 br_0605_prov_se_2["@type"][0], "http://www.w3.org/ns/prov#Entity" 

595 ) 

596 self.assertEqual( 

597 br_0605_prov_se_2["http://www.w3.org/ns/prov#specializationOf"][0][ 

598 "@id" 

599 ], 

600 "https://w3id.org/oc/meta/br/0605", 

601 ) 

602 self.assertEqual( 

603 br_0605_prov_se_2["http://www.w3.org/ns/prov#wasAttributedTo"][0][ 

604 "@id" 

605 ], 

606 "https://orcid.org/0000-0002-8420-0696", 

607 ) 

608 self.assertIn( 

609 "http://www.w3.org/ns/prov#invalidatedAtTime", br_0605_prov_se_2 

610 ) 

611 self.assertIn( 

612 "http://www.w3.org/ns/prov#generatedAtTime", br_0605_prov_se_2 

613 ) 

614 self.assertEqual( 

615 len(br_0605_prov_se_1["http://www.w3.org/ns/prov#generatedAtTime"]), 1 

616 ) 

617 self.assertIn( 

618 "https://w3id.org/oc/ontology/hasUpdateQuery", br_0605_prov_se_2 

619 ) 

620 update_query_value = br_0605_prov_se_2[ 

621 "https://w3id.org/oc/ontology/hasUpdateQuery" 

622 ][0]["@value"] 

623 update_query_triples = ( 

624 update_query_value.replace( 

625 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/br/> { ", "" 

626 ) 

627 .replace(" } }", "") 

628 .strip() 

629 ) 

630 actual_triples = set( 

631 triple.strip() 

632 for triple in update_query_triples.split(" .") 

633 if triple.strip() 

634 ) 

635 expected_triples = { 

636 "<https://w3id.org/oc/meta/br/0605> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/0606>", 

637 '<https://w3id.org/oc/meta/br/0605> <http://prismstandard.org/namespaces/basic/2.0/publicationDate> "2024-04-14"^^<http://www.w3.org/2001/XMLSchema#date>', 

638 } 

639 self.assertEqual(actual_triples, expected_triples) 

640 

641 self.assertIsNotNone(br_0605_prov_se_1) 

642 self.assertEqual( 

643 br_0605_prov_se_1["http://purl.org/dc/terms/description"][0]["@value"], 

644 "The entity 'https://w3id.org/oc/meta/br/0605' has been created.", 

645 ) 

646 self.assertEqual( 

647 br_0605_prov_se_1["@type"][0], "http://www.w3.org/ns/prov#Entity" 

648 ) 

649 self.assertEqual( 

650 br_0605_prov_se_1["http://www.w3.org/ns/prov#specializationOf"][0][ 

651 "@id" 

652 ], 

653 "https://w3id.org/oc/meta/br/0605", 

654 ) 

655 self.assertEqual( 

656 br_0605_prov_se_1["http://www.w3.org/ns/prov#wasAttributedTo"][0][ 

657 "@id" 

658 ], 

659 "https://w3id.org/oc/meta/prov/pa/1", 

660 ) 

661 self.assertIn( 

662 "http://www.w3.org/ns/prov#generatedAtTime", br_0605_prov_se_1 

663 ) 

664 self.assertEqual( 

665 len(br_0605_prov_se_1["http://www.w3.org/ns/prov#generatedAtTime"]), 1 

666 ) 

667 self.assertEqual( 

668 len(br_0605_prov_se_2["http://www.w3.org/ns/prov#invalidatedAtTime"]), 

669 1, 

670 ) 

671 self.assertIn( 

672 "http://www.w3.org/ns/prov#hadPrimarySource", br_0605_prov_se_1 

673 ) 

674 

675 # Reinsert the publication date 

676 sparql_update_query = """ 

677 INSERT DATA { 

678 GRAPH <https://w3id.org/oc/meta/br/> { 

679 <https://w3id.org/oc/meta/br/0605> <http://prismstandard.org/namespaces/basic/2.0/publicationDate> "2024-04-14"^^<http://www.w3.org/2001/XMLSchema#date> . 

680 } 

681 } 

682 """ 

683 with SPARQLClient(SERVER, timeout=60) as client: 

684 client.update(sparql_update_query) 

685 

686 # Perform deletion again 

687 editor.delete(URIRef("https://w3id.org/oc/meta/br/0605")) 

688 

689 # Verify and print the provenance graph for the entity 

690 prov_path = os.path.join( 

691 OUTPUT, "rdf", "br", "060", "10000", "1000", "prov", "se.json" 

692 ) 

693 with open(prov_path, "r", encoding="utf8") as f: 

694 prov_data = json.load(f) 

695 for graph in prov_data: 

696 for entity in graph["@graph"]: 

697 if "https://w3id.org/oc/meta/br/0605" in entity["@id"]: 

698 if ( 

699 entity["@id"] 

700 == "https://w3id.org/oc/meta/br/0605/prov/se/1" 

701 ): 

702 self.assertEqual( 

703 len( 

704 entity["http://www.w3.org/ns/prov#generatedAtTime"] 

705 ), 

706 1, 

707 ) 

708 self.assertEqual( 

709 len( 

710 entity[ 

711 "http://www.w3.org/ns/prov#invalidatedAtTime" 

712 ] 

713 ), 

714 1, 

715 ) 

716 elif ( 

717 entity["@id"] 

718 == "https://w3id.org/oc/meta/br/0605/prov/se/2" 

719 ): 

720 self.assertEqual( 

721 len( 

722 entity["http://www.w3.org/ns/prov#generatedAtTime"] 

723 ), 

724 1, 

725 ) 

726 # self.assertEqual(len(entity['http://www.w3.org/ns/prov#invalidatedAtTime']), 2) 

727 elif ( 

728 entity["@id"] 

729 == "https://w3id.org/oc/meta/br/0605/prov/se/3" 

730 ): 

731 self.assertEqual( 

732 entity["http://purl.org/dc/terms/description"][0][ 

733 "@value" 

734 ], 

735 "The entity 'https://w3id.org/oc/meta/br/0605' has been deleted.", 

736 ) 

737 self.assertIn( 

738 "https://w3id.org/oc/ontology/hasUpdateQuery", entity 

739 ) 

740 update_query_value = entity[ 

741 "https://w3id.org/oc/ontology/hasUpdateQuery" 

742 ][0]["@value"] 

743 update_query_triples = ( 

744 update_query_value.replace( 

745 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/br/> { ", 

746 "", 

747 ) 

748 .replace(" } }", "") 

749 .strip() 

750 ) 

751 actual_triples = set( 

752 triple.strip() 

753 for triple in update_query_triples.split(" .") 

754 if triple.strip() 

755 ) 

756 expected_triples = { 

757 '<https://w3id.org/oc/meta/br/0605> <http://prismstandard.org/namespaces/basic/2.0/publicationDate> "2024-04-14"^^<http://www.w3.org/2001/XMLSchema#date>' 

758 } 

759 self.assertEqual(actual_triples, expected_triples) 

760 self.assertEqual( 

761 entity["@type"][0], "http://www.w3.org/ns/prov#Entity" 

762 ) 

763 self.assertEqual( 

764 entity["http://www.w3.org/ns/prov#specializationOf"][0][ 

765 "@id" 

766 ], 

767 "https://w3id.org/oc/meta/br/0605", 

768 ) 

769 self.assertEqual( 

770 entity["http://www.w3.org/ns/prov#wasAttributedTo"][0][ 

771 "@id" 

772 ], 

773 "https://orcid.org/0000-0002-8420-0696", 

774 ) 

775 self.assertIn( 

776 "http://www.w3.org/ns/prov#invalidatedAtTime", entity 

777 ) 

778 self.assertIn( 

779 "http://www.w3.org/ns/prov#generatedAtTime", entity 

780 ) 

781 self.assertEqual( 

782 len( 

783 entity["http://www.w3.org/ns/prov#generatedAtTime"] 

784 ), 

785 1, 

786 ) 

787 self.assertEqual( 

788 len( 

789 entity[ 

790 "http://www.w3.org/ns/prov#invalidatedAtTime" 

791 ] 

792 ), 

793 1, 

794 ) 

795 self.assertEqual( 

796 entity["http://www.w3.org/ns/prov#wasDerivedFrom"][0][ 

797 "@id" 

798 ], 

799 "https://w3id.org/oc/meta/br/0605/prov/se/2", 

800 ) 

801 

802 def test_no_rdf_files_generation(self): 

803 """Test that when generate_rdf_files is False, data is still updated in triplestore but not in files""" 

804 with open(META_CONFIG, encoding="utf-8") as file: 

805 settings = yaml.full_load(file) 

806 self.original_generate_rdf_files = settings.get("generate_rdf_files", True) 

807 

808 settings["generate_rdf_files"] = False 

809 with open(META_CONFIG, "w", encoding="utf-8") as file: 

810 yaml.dump(settings, file) 

811 

812 os.makedirs(os.path.join(OUTPUT, "rdf", "br", "060", "10000"), exist_ok=True) 

813 

814 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696") 

815 

816 self.assertFalse(editor.generate_rdf_files, "generate_rdf_files should be False") 

817 

818 g_set = GraphSet(base_iri="https://w3id.org/oc/meta/") 

819 br = g_set.add_br(res=URIRef("https://w3id.org/oc/meta/br/0603"), resp_agent="https://orcid.org/0000-0002-8420-0696") 

820 br.has_title("Original Title") 

821 editor.save(g_set) 

822 

823 editor.update_property( 

824 URIRef("https://w3id.org/oc/meta/br/0603"), 

825 "has_title", 

826 "New Test Title", 

827 ) 

828 

829 with SPARQLClient(SERVER, timeout=60) as client: 

830 debug_result = client.query(""" 

831 SELECT ?p ?o 

832 WHERE { 

833 GRAPH ?g { 

834 <https://w3id.org/oc/meta/br/0603> ?p ?o . 

835 } 

836 } 

837 """) 

838 

839 title_found = False 

840 if debug_result["results"]["bindings"]: 

841 for binding in debug_result["results"]["bindings"]: 

842 predicate = binding.get('p', {}).get('value') 

843 obj = binding.get('o', {}).get('value') 

844 

845 # Check if this is our title property with the expected value 

846 if predicate == "http://purl.org/dc/terms/title" and obj == "New Test Title": 

847 title_found = True 

848 else: 

849 print("No properties found for BR/0603") 

850 

851 self.assertTrue(title_found, "Title update not found in triplestore") 

852 

853 with SPARQLClient(PROV_SERVER, timeout=60) as client: 

854 prov_result = client.query(""" 

855 ASK { 

856 ?s <http://www.w3.org/ns/prov#specializationOf> <https://w3id.org/oc/meta/br/0603> . 

857 } 

858 """) 

859 self.assertTrue(prov_result["boolean"], "Provenance for BR/0603 not found in triplestore") 

860 

861 target_file = os.path.join(OUTPUT, "rdf", "br", "060", "10000", "1000.json") 

862 if os.path.exists(target_file): 

863 with open(target_file, "r", encoding="utf-8") as file: 

864 try: 

865 data = json.load(file) 

866 contains_update = False 

867 for graph in data: 

868 for entity in graph.get("@graph", []): 

869 if entity.get("@id") == "https://w3id.org/oc/meta/br/0603": 

870 titles = entity.get("http://purl.org/dc/terms/title", []) 

871 for title in titles: 

872 if title.get("@value") == "New Test Title": 

873 contains_update = True 

874 break 

875 self.assertFalse(contains_update, "RDF file should not contain the update") 

876 except json.JSONDecodeError: 

877 pass 

878 

879 def test_merge_caches_entities(self): 

880 """Verifica che le entità vengano correttamente cachate durante merge successivi""" 

881 base_iri = "https://w3id.org/oc/meta/" 

882 resp_agent = "https://orcid.org/0000-0002-8420-0696" 

883 g_set = GraphSet( 

884 base_iri, 

885 supplier_prefix="060", 

886 wanted_label=False, 

887 custom_counter_handler=self.counter_handler, 

888 ) 

889 endpoint = "http://127.0.0.1:8805/sparql" 

890 

891 # Prepara le entità di test 

892 ra = g_set.add_ra( 

893 resp_agent=resp_agent, res=URIRef("https://w3id.org/oc/meta/ra/06010") 

894 ) 

895 ra.has_name("Wiley") 

896 

897 reader = Reader() 

898 id_0605 = reader.import_entity_from_triplestore( 

899 g_set, 

900 endpoint, 

901 URIRef("https://w3id.org/oc/meta/id/0605"), 

902 resp_agent, 

903 enable_validation=False, 

904 ) 

905 id_0609 = g_set.add_id(resp_agent=resp_agent) 

906 id_0609.create_crossref("313") 

907 

908 ra.has_identifier(id_0605) 

909 ra.has_identifier(id_0609) 

910 

911 # Genera provenance 

912 provset = ProvSet( 

913 g_set, 

914 base_iri, 

915 wanted_label=False, 

916 supplier_prefix="060", 

917 custom_counter_handler=self.counter_handler, 

918 ) 

919 provset.generate_provenance() 

920 

921 # Salva e carica i dati 

922 rdf_dir = os.path.join(OUTPUT, "rdf") + os.sep 

923 graph_storer = Storer( 

924 g_set, dir_split=10000, n_file_item=1000, zip_output=False 

925 ) 

926 prov_storer = Storer( 

927 provset, dir_split=10000, n_file_item=1000, zip_output=False 

928 ) 

929 

930 graph_storer.store_all(rdf_dir, base_iri) 

931 prov_storer.store_all(rdf_dir, base_iri) 

932 graph_storer.upload_all(endpoint) 

933 g_set.commit_changes() 

934 

935 # Esegui il test della cache 

936 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696") 

937 

938 # Prima fusione 

939 editor.merge( 

940 g_set, 

941 URIRef("https://w3id.org/oc/meta/ra/0607"), 

942 URIRef("https://w3id.org/oc/meta/ra/06010"), 

943 ) 

944 

945 # Verifica che le entità principali siano in cache 

946 self.assertTrue( 

947 editor.entity_cache.is_cached(URIRef("https://w3id.org/oc/meta/ra/0607")) 

948 ) 

949 self.assertTrue( 

950 editor.entity_cache.is_cached(URIRef("https://w3id.org/oc/meta/ra/06010")) 

951 ) 

952 

953 # Verifica che le entità correlate siano in cache 

954 self.assertTrue( 

955 editor.entity_cache.is_cached(id_0609.res) 

956 ) 

957 self.assertTrue( 

958 editor.entity_cache.is_cached(id_0605.res) 

959 ) 

960 

961 

962class TestEntityCache(unittest.TestCase): 

963 def setUp(self): 

964 self.cache = EntityCache() 

965 self.entity = URIRef("https://w3id.org/oc/meta/ra/0607") 

966 

967 def test_add_and_is_cached(self): 

968 self.assertFalse(self.cache.is_cached(self.entity)) 

969 self.cache.add(self.entity) 

970 self.assertTrue(self.cache.is_cached(self.entity)) 

971 

972 def test_clear(self): 

973 self.cache.add(self.entity) 

974 self.cache.clear() 

975 self.assertFalse(self.cache.is_cached(self.entity)) 

976 

977 

978if __name__ == "__main__": # pragma: no cover 

979 unittest.main()