Coverage for test/editor_test.py: 97%

379 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-07-14 14:06 +0000

1#!/usr/bin/python 

2# -*- coding: utf-8 -*- 

3# Copyright (c) 2022 Arcangelo Massari <arcangelo.massari@unibo.it> 

4# 

5# Permission to use, copy, modify, and/or distribute this software for any purpose 

6# with or without fee is hereby granted, provided that the above copyright notice 

7# and this permission notice appear in all copies. 

8# 

9# THE SOFTWARE IS PROVIDED 'AS IS' AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 

10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 

11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, 

12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 

13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 

15# SOFTWARE. 

16 

17import json 

18import os 

19import unittest 

20from shutil import rmtree 

21from test.test_utils import (PROV_SERVER, REDIS_CACHE_DB, REDIS_DB, REDIS_HOST, 

22 REDIS_PORT, SERVER, reset_redis_counters, 

23 reset_server) 

24 

25import yaml 

26from oc_meta.plugins.editor import EntityCache, MetaEditor 

27from oc_meta.run.meta_process import run_meta_process 

28from oc_ocdm import Storer 

29from oc_ocdm.counter_handler.redis_counter_handler import RedisCounterHandler 

30from oc_ocdm.graph import GraphSet 

31from oc_ocdm.prov import ProvSet 

32from oc_ocdm.reader import Reader 

33from rdflib import URIRef 

34from SPARQLWrapper import JSON, POST, SPARQLWrapper 

35 

36BASE = os.path.join("test", "editor") 

37OUTPUT = os.path.join(BASE, "output") 

38META_CONFIG = os.path.join(BASE, "meta_config.yaml") 

39 

40 

41def get_counter_handler(): 

42 return RedisCounterHandler(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB) 

43 

44 

45class TestEditor(unittest.TestCase): 

46 @classmethod 

47 def setUpClass(cls): 

48 cls.counter_handler = get_counter_handler() 

49 cls.original_generate_rdf_files = None 

50 

51 def setUp(self): 

52 reset_server() 

53 reset_redis_counters() 

54 if os.path.exists(OUTPUT): 

55 rmtree(OUTPUT) 

56 

57 # Create temporary directory for cache files 

58 self.temp_dir = os.path.join("test", "temp_editor_test") 

59 if os.path.exists(self.temp_dir): 

60 rmtree(self.temp_dir) 

61 os.makedirs(self.temp_dir) 

62 

63 # Setup cache files 

64 self.cache_file = os.path.join(self.temp_dir, "ts_upload_cache.json") 

65 self.failed_file = os.path.join(self.temp_dir, "failed_queries.txt") 

66 self.stop_file = os.path.join(self.temp_dir, ".stop_upload") 

67 

68 # Create separate directories for data and provenance update queries 

69 self.data_update_dir = os.path.join(self.temp_dir, "to_be_uploaded_data") 

70 self.prov_update_dir = os.path.join(self.temp_dir, "to_be_uploaded_prov") 

71 os.makedirs(self.data_update_dir, exist_ok=True) 

72 os.makedirs(self.prov_update_dir, exist_ok=True) 

73 

74 with open(META_CONFIG, encoding="utf-8") as file: 

75 settings = yaml.full_load(file) 

76 # Update settings to use Redis and cache files 

77 settings.update( 

78 { 

79 "redis_host": REDIS_HOST, 

80 "redis_port": REDIS_PORT, 

81 "redis_db": REDIS_DB, 

82 "redis_cache_db": REDIS_CACHE_DB, 

83 "ts_upload_cache": self.cache_file, 

84 "ts_failed_queries": self.failed_file, 

85 "ts_stop_file": self.stop_file, 

86 "triplestore_url": SERVER, 

87 "provenance_triplestore_url": PROV_SERVER, 

88 "data_update_dir": self.data_update_dir, 

89 "prov_update_dir": self.prov_update_dir 

90 } 

91 ) 

92 run_meta_process(settings=settings, meta_config_path=META_CONFIG) 

93 

94 def tearDown(self): 

95 if os.path.exists(OUTPUT): 

96 rmtree(OUTPUT) 

97 if os.path.exists(self.temp_dir): 

98 rmtree(self.temp_dir) 

99 reset_redis_counters() 

100 

101 if self.original_generate_rdf_files is not None: 

102 with open(META_CONFIG, encoding="utf-8") as file: 

103 settings = yaml.full_load(file) 

104 settings["generate_rdf_files"] = self.original_generate_rdf_files 

105 with open(META_CONFIG, "w", encoding="utf-8") as file: 

106 yaml.dump(settings, file) 

107 self.original_generate_rdf_files = None 

108 

109 def test_update_property(self): 

110 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696") 

111 editor.update_property( 

112 URIRef("https://w3id.org/oc/meta/ar/06101"), 

113 "has_next", 

114 URIRef("https://w3id.org/oc/meta/ar/06104"), 

115 ) 

116 editor.update_property( 

117 URIRef("https://w3id.org/oc/meta/ar/06104"), 

118 "has_next", 

119 URIRef("https://w3id.org/oc/meta/ar/06103"), 

120 ) 

121 editor.update_property( 

122 URIRef("https://w3id.org/oc/meta/ar/06103"), 

123 "has_next", 

124 URIRef("https://w3id.org/oc/meta/ar/06102"), 

125 ) 

126 editor.update_property( 

127 URIRef("https://w3id.org/oc/meta/ar/06102"), 

128 "has_next", 

129 URIRef("https://w3id.org/oc/meta/ar/06105"), 

130 ) 

131 

132 sparql = SPARQLWrapper(SERVER) 

133 

134 sparql.setQuery(""" 

135 ASK {  

136 GRAPH <https://w3id.org/oc/meta/ar/> {  

137 <https://w3id.org/oc/meta/ar/06101> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06104> .  

138 }  

139 } 

140 """) 

141 sparql.setReturnFormat(JSON) 

142 result = sparql.queryAndConvert() 

143 self.assertTrue(result["boolean"], "AR/06101 → AR/06104 relationship not found in triplestore") 

144 

145 sparql.setQuery(""" 

146 ASK {  

147 GRAPH <https://w3id.org/oc/meta/ar/> {  

148 <https://w3id.org/oc/meta/ar/06104> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06103> .  

149 }  

150 } 

151 """) 

152 result = sparql.queryAndConvert() 

153 self.assertTrue(result["boolean"], "AR/06104 → AR/06103 relationship not found in triplestore") 

154 

155 sparql.setQuery(""" 

156 ASK {  

157 GRAPH <https://w3id.org/oc/meta/ar/> {  

158 <https://w3id.org/oc/meta/ar/06103> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06102> .  

159 }  

160 } 

161 """) 

162 result = sparql.queryAndConvert() 

163 self.assertTrue(result["boolean"], "AR/06103 → AR/06102 relationship not found in triplestore") 

164 

165 sparql.setQuery(""" 

166 ASK {  

167 GRAPH <https://w3id.org/oc/meta/ar/> {  

168 <https://w3id.org/oc/meta/ar/06102> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06105> .  

169 }  

170 } 

171 """) 

172 result = sparql.queryAndConvert() 

173 self.assertTrue(result["boolean"], "AR/06102 → AR/06105 relationship not found in triplestore") 

174 

175 prov_sparql = SPARQLWrapper(PROV_SERVER) 

176 prov_sparql.setQuery(""" 

177 ASK {  

178 ?s <http://www.w3.org/ns/prov#specializationOf> <https://w3id.org/oc/meta/ar/06101> ; 

179 <http://www.w3.org/ns/prov#generatedAtTime> ?time . 

180 } 

181 """) 

182 prov_sparql.setReturnFormat(JSON) 

183 prov_result = prov_sparql.queryAndConvert() 

184 self.assertTrue(prov_result["boolean"], "Provenance for AR/06101 not found in triplestore") 

185 

186 with open( 

187 os.path.join(OUTPUT, "rdf", "ar", "0610", "10000", "1000.json"), 

188 "r", 

189 encoding="utf-8", 

190 ) as file: 

191 ar_data = json.load(file) 

192 for graph in ar_data: 

193 graph_data = graph["@graph"] 

194 for ar in graph_data: 

195 if ar["@id"] == "https://w3id.org/oc/meta/ar/06101": 

196 self.assertEqual( 

197 ar["https://w3id.org/oc/ontology/hasNext"][0]["@id"], 

198 "https://w3id.org/oc/meta/ar/06104", 

199 ) 

200 elif ar["@id"] == "https://w3id.org/oc/meta/ar/06103": 

201 self.assertEqual( 

202 ar["https://w3id.org/oc/ontology/hasNext"][0]["@id"], 

203 "https://w3id.org/oc/meta/ar/06102", 

204 ) 

205 elif ar["@id"] == "https://w3id.org/oc/meta/ar/06104": 

206 self.assertEqual( 

207 ar["https://w3id.org/oc/ontology/hasNext"][0]["@id"], 

208 "https://w3id.org/oc/meta/ar/06103", 

209 ) 

210 elif ar["@id"] == "https://w3id.org/oc/meta/ar/06102": 

211 self.assertEqual( 

212 ar["https://w3id.org/oc/ontology/hasNext"][0]["@id"], 

213 "https://w3id.org/oc/meta/ar/06105", 

214 ) 

215 with open( 

216 os.path.join( 

217 OUTPUT, "rdf", "ar", "0610", "10000", "1000", "prov", "se.json" 

218 ), 

219 "r", 

220 encoding="utf8", 

221 ) as f: 

222 ar_prov = json.load(f) 

223 for graph in ar_prov: 

224 graph_prov = graph["@graph"] 

225 for ar in graph_prov: 

226 if ar["@id"] == "https://w3id.org/oc/meta/ar/06101/prov/se/2": 

227 self.assertEqual( 

228 ar["https://w3id.org/oc/ontology/hasUpdateQuery"][0][ 

229 "@value" 

230 ], 

231 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/06101> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06102> . } }; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/06101> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06104> . } }", 

232 ) 

233 if ar["@id"] == "https://w3id.org/oc/meta/ar/06103/prov/se/2": 

234 self.assertEqual( 

235 ar["https://w3id.org/oc/ontology/hasUpdateQuery"][0][ 

236 "@value" 

237 ], 

238 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/06103> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06104> . } }; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/06103> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06102> . } }", 

239 ) 

240 if ar["@id"] == "https://w3id.org/oc/meta/ar/06104/prov/se/2": 

241 self.assertEqual( 

242 ar["https://w3id.org/oc/ontology/hasUpdateQuery"][0][ 

243 "@value" 

244 ], 

245 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/06104> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06105> . } }; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/06104> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06103> . } }", 

246 ) 

247 if ar["@id"] == "https://w3id.org/oc/meta/ar/06102/prov/se/2": 

248 self.assertEqual( 

249 ar["https://w3id.org/oc/ontology/hasUpdateQuery"][0][ 

250 "@value" 

251 ], 

252 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/06102> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06103> . } }; INSERT DATA { GRAPH <https://w3id.org/oc/meta/ar/> { <https://w3id.org/oc/meta/ar/06102> <https://w3id.org/oc/ontology/hasNext> <https://w3id.org/oc/meta/ar/06105> . } }", 

253 ) 

254 

255 def test_delete_property(self): 

256 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696") 

257 editor.delete(URIRef("https://w3id.org/oc/meta/br/06101"), "has_title") 

258 with open( 

259 os.path.join(OUTPUT, "rdf", "br", "0610", "10000", "1000.json"), 

260 "r", 

261 encoding="utf8", 

262 ) as f: 

263 br_data = json.load(f) 

264 for graph in br_data: 

265 graph_data = graph["@graph"] 

266 for br in graph_data: 

267 if br["@id"] == "https://w3id.org/oc/meta/br/06101": 

268 self.assertFalse("http://purl.org/dc/terms/title" in br) 

269 with open( 

270 os.path.join( 

271 OUTPUT, "rdf", "br", "0610", "10000", "1000", "prov", "se.json" 

272 ), 

273 "r", 

274 encoding="utf8", 

275 ) as f: 

276 br_prov = json.load(f) 

277 for graph in br_prov: 

278 graph_prov = graph["@graph"] 

279 for br in graph_prov: 

280 if br["@id"] == "https://w3id.org/oc/meta/br/06101/prov/se/2": 

281 self.assertEqual( 

282 br["https://w3id.org/oc/ontology/hasUpdateQuery"][0][ 

283 "@value" 

284 ], 

285 'DELETE DATA { GRAPH <https://w3id.org/oc/meta/br/> { <https://w3id.org/oc/meta/br/06101> <http://purl.org/dc/terms/title> "A Review Of Hemolytic Uremic Syndrome In Patients Treated With Gemcitabine Therapy"^^<http://www.w3.org/2001/XMLSchema#string> . } }', 

286 ) 

287 

288 def test_delete_entity(self): 

289 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696") 

290 editor.delete(URIRef("https://w3id.org/oc/meta/id/06101")) 

291 with open( 

292 os.path.join(OUTPUT, "rdf", "id", "0610", "10000", "1000.json"), 

293 "r", 

294 encoding="utf8", 

295 ) as f: 

296 id_data = json.load(f) 

297 for graph in id_data: 

298 graph_data = graph["@graph"] 

299 for identifier in graph_data: 

300 if identifier["@id"] == "https://w3id.org/oc/meta/id/06101": 

301 self.fail() 

302 with open( 

303 os.path.join( 

304 OUTPUT, "rdf", "id", "0610", "10000", "1000", "prov", "se.json" 

305 ), 

306 "r", 

307 encoding="utf8", 

308 ) as f: 

309 id_prov = json.load(f) 

310 for graph in id_prov: 

311 graph_prov = graph["@graph"] 

312 for identifier in graph_prov: 

313 if ( 

314 identifier["@id"] 

315 == "https://w3id.org/oc/meta/id/06101/prov/se/2" 

316 ): 

317 update_query = ( 

318 identifier["https://w3id.org/oc/ontology/hasUpdateQuery"][ 

319 0 

320 ]["@value"] 

321 .replace( 

322 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/id/> { ", 

323 "", 

324 ) 

325 .replace(" . } }", "") 

326 .replace("\n", "") 

327 .split(" .") 

328 ) 

329 self.assertEqual( 

330 set(update_query), 

331 { 

332 "<https://w3id.org/oc/meta/id/06101> <http://purl.org/spar/datacite/usesIdentifierScheme> <http://purl.org/spar/datacite/doi>", 

333 "<https://w3id.org/oc/meta/id/06101> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/spar/datacite/Identifier>", 

334 '<https://w3id.org/oc/meta/id/06101> <http://www.essepuntato.it/2010/06/literalreification/hasLiteralValue> "10.1002/(sici)1097-0142(19990501)85:9<2023::aid-cncr21>3.0.co;2-2"^^<http://www.w3.org/2001/XMLSchema#string>', 

335 }, 

336 ) 

337 with open( 

338 os.path.join( 

339 OUTPUT, "rdf", "br", "0610", "10000", "1000", "prov", "se.json" 

340 ), 

341 "r", 

342 encoding="utf8", 

343 ) as f: 

344 ra_prov = json.load(f) 

345 for graph in ra_prov: 

346 graph_prov = graph["@graph"] 

347 for ra in graph_prov: 

348 if ra["@id"] == "https://w3id.org/oc/meta/br/06101/prov/se/2": 

349 self.assertEqual( 

350 ra["https://w3id.org/oc/ontology/hasUpdateQuery"][0][ 

351 "@value" 

352 ], 

353 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/br/> { <https://w3id.org/oc/meta/br/06101> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/06101> . } }", 

354 ) 

355 

356 def test_merge(self): 

357 base_iri = "https://w3id.org/oc/meta/" 

358 resp_agent = "https://orcid.org/0000-0002-8420-0696" 

359 g_set = GraphSet( 

360 base_iri, 

361 supplier_prefix="0620", 

362 wanted_label=False, 

363 custom_counter_handler=self.counter_handler, 

364 ) 

365 endpoint = "http://127.0.0.1:8805/sparql" 

366 

367 # Create entities testing 

368 ra = g_set.add_ra( 

369 resp_agent=resp_agent, res=URIRef("https://w3id.org/oc/meta/ra/06205") 

370 ) 

371 ra.has_name("Wiley") 

372 

373 reader = Reader() 

374 id_06105 = reader.import_entity_from_triplestore( 

375 g_set, 

376 endpoint, 

377 URIRef("https://w3id.org/oc/meta/id/06105"), 

378 resp_agent, 

379 enable_validation=False, 

380 ) 

381 id_06203 = g_set.add_id(resp_agent=resp_agent) 

382 id_06203.create_crossref("313") 

383 

384 ra.has_identifier(id_06105) 

385 ra.has_identifier(id_06203) 

386 

387 # Generate provenance 

388 provset = ProvSet( 

389 g_set, 

390 base_iri, 

391 wanted_label=False, 

392 supplier_prefix="0620", 

393 custom_counter_handler=self.counter_handler, 

394 ) 

395 provset.generate_provenance() 

396 

397 # Store and upload data 

398 rdf_dir = os.path.join(OUTPUT, "rdf") + os.sep 

399 graph_storer = Storer( 

400 g_set, dir_split=10000, n_file_item=1000, zip_output=False 

401 ) 

402 prov_storer = Storer( 

403 provset, dir_split=10000, n_file_item=1000, zip_output=False 

404 ) 

405 

406 graph_storer.store_all(rdf_dir, base_iri) 

407 prov_storer.store_all(rdf_dir, base_iri) 

408 graph_storer.upload_all(endpoint) 

409 g_set.commit_changes() 

410 

411 # Perform merge 

412 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696") 

413 editor.merge( 

414 g_set, 

415 URIRef("https://w3id.org/oc/meta/ra/06107"), 

416 URIRef("https://w3id.org/oc/meta/ra/06205"), 

417 ) 

418 editor.save(g_set) 

419 

420 # Check Redis counters 

421 self.assertEqual( 

422 self.counter_handler.read_counter( 

423 "ra", prov_short_name="se", identifier=1, supplier_prefix="0610" 

424 ), 

425 1, 

426 ) 

427 self.assertEqual( 

428 self.counter_handler.read_counter( 

429 "ra", prov_short_name="se", identifier=2, supplier_prefix="0610" 

430 ), 

431 1, 

432 ) 

433 self.assertEqual( 

434 self.counter_handler.read_counter( 

435 "ra", prov_short_name="se", identifier=3, supplier_prefix="0610" 

436 ), 

437 1, 

438 ) 

439 self.assertEqual( 

440 self.counter_handler.read_counter( 

441 "ra", prov_short_name="se", identifier=4, supplier_prefix="0610" 

442 ), 

443 1, 

444 ) 

445 self.assertEqual( 

446 self.counter_handler.read_counter( 

447 "ra", prov_short_name="se", identifier=5, supplier_prefix="0610" 

448 ), 

449 1, 

450 ) 

451 self.assertEqual( 

452 self.counter_handler.read_counter( 

453 "ra", prov_short_name="se", identifier=6, supplier_prefix="0610" 

454 ), 

455 1, 

456 ) 

457 self.assertEqual( 

458 self.counter_handler.read_counter( 

459 "ra", prov_short_name="se", identifier=7, supplier_prefix="0610" 

460 ), 

461 2, 

462 ) 

463 

464 self.assertEqual( 

465 self.counter_handler.read_counter( 

466 "ra", prov_short_name="se", identifier=1, supplier_prefix="0620" 

467 ), 

468 0, 

469 ) 

470 self.assertEqual( 

471 self.counter_handler.read_counter( 

472 "ra", prov_short_name="se", identifier=2, supplier_prefix="0620" 

473 ), 

474 0, 

475 ) 

476 self.assertEqual( 

477 self.counter_handler.read_counter( 

478 "ra", prov_short_name="se", identifier=3, supplier_prefix="0620" 

479 ), 

480 0, 

481 ) 

482 self.assertEqual( 

483 self.counter_handler.read_counter( 

484 "ra", prov_short_name="se", identifier=4, supplier_prefix="0620" 

485 ), 

486 0, 

487 ) 

488 self.assertEqual( 

489 self.counter_handler.read_counter( 

490 "ra", prov_short_name="se", identifier=5, supplier_prefix="0620" 

491 ), 

492 2, 

493 ) 

494 

495 # Verify merged data 

496 for filepath in [ 

497 os.path.join(OUTPUT, "rdf", "ra", "0610", "10000", "1000.json"), 

498 # os.path.join(OUTPUT, 'rdf', 'ar', '0620', '10000', '1000.json'), 

499 os.path.join( 

500 OUTPUT, "rdf", "ra", "0620", "10000", "1000", "prov", "se.json" 

501 ), 

502 os.path.join( 

503 OUTPUT, "rdf", "ra", "0610", "10000", "1000", "prov", "se.json" 

504 ), 

505 ]: 

506 with open(filepath, "r", encoding="utf8") as f: 

507 data = json.load(f) 

508 for graph in data: 

509 graph_data = graph["@graph"] 

510 for entity in graph_data: 

511 if entity["@id"] == "https://w3id.org/oc/meta/ra/06107": 

512 identifiers = { 

513 identifier["@id"] 

514 for identifier in entity[ 

515 "http://purl.org/spar/datacite/hasIdentifier" 

516 ] 

517 } 

518 self.assertEqual( 

519 identifiers, 

520 { 

521 "https://w3id.org/oc/meta/id/06105", 

522 "https://w3id.org/oc/meta/id/06201", 

523 }, 

524 ) 

525 elif entity["@id"] == "https://w3id.org/oc/meta/ra/06205": 

526 self.fail() 

527 # elif entity['@id'] == 'https://w3id.org/oc/meta/ar/06205': 

528 # self.assertEqual(entity['http://purl.org/spar/pro/isHeldBy'][0]['@id'], 'https://w3id.org/oc/meta/ra/06107') 

529 elif entity["@id"] in { 

530 "https://w3id.org/oc/meta/ra/06107/prov/se/1", 

531 "https://w3id.org/oc/meta/ra/06205/prov/se/1", 

532 }: 

533 self.assertTrue( 

534 "http://www.w3.org/ns/prov#invalidatedAtTime" in entity 

535 ) 

536 elif ( 

537 entity["@id"] 

538 == "https://w3id.org/oc/meta/ra/06107/prov/se/3" 

539 ): 

540 self.assertEqual( 

541 entity["http://purl.org/dc/terms/description"][0][ 

542 "@value" 

543 ], 

544 "The entity 'https://w3id.org/oc/meta/ra/06107' has been merged with 'https://w3id.org/oc/meta/ra/06205'.", 

545 ) 

546 self.assertEqual( 

547 entity["https://w3id.org/oc/ontology/hasUpdateQuery"][ 

548 0 

549 ]["@value"], 

550 "INSERT DATA { GRAPH <https://w3id.org/oc/meta/ra/> { <https://w3id.org/oc/meta/ra/06107> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/06206> . } }", 

551 ) 

552 elif ( 

553 entity["@id"] 

554 == "https://w3id.org/oc/meta/ra/06205/prov/se/2" 

555 ): 

556 update_query = ( 

557 entity["https://w3id.org/oc/ontology/hasUpdateQuery"][ 

558 0 

559 ]["@value"] 

560 .replace( 

561 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/ra/> { ", 

562 "", 

563 ) 

564 .replace(" . } }", "") 

565 .replace("\n", "") 

566 .split(" .") 

567 ) 

568 self.assertEqual( 

569 set(update_query), 

570 { 

571 '<https://w3id.org/oc/meta/ra/06205> <http://xmlns.com/foaf/0.1/name> "Wiley"^^<http://www.w3.org/2001/XMLSchema#string>', 

572 "<https://w3id.org/oc/meta/ra/06205> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/06201>", 

573 "<https://w3id.org/oc/meta/ra/06205> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/06105>", 

574 "<https://w3id.org/oc/meta/ra/06205> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Agent>", 

575 }, 

576 ) 

577 

578 def test_delete_entity_with_inferred_type(self): 

579 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696") 

580 endpoint = SPARQLWrapper(SERVER) 

581 

582 # Remove the type from the entity 

583 delete_type_query = """ 

584 DELETE { 

585 GRAPH <https://w3id.org/oc/meta/br/> { 

586 <https://w3id.org/oc/meta/br/06105> a <http://purl.org/spar/fabio/Expression> . 

587 } 

588 } 

589 WHERE { 

590 GRAPH <https://w3id.org/oc/meta/br/> { 

591 <https://w3id.org/oc/meta/br/06105> a <http://purl.org/spar/fabio/Expression> . 

592 } 

593 } 

594 """ 

595 endpoint.setQuery(delete_type_query) 

596 endpoint.setMethod(POST) 

597 endpoint.query() 

598 

599 # Ensure the entity exists before deletion 

600 select_query = """ 

601 SELECT ?s WHERE { 

602 GRAPH <https://w3id.org/oc/meta/br/> { 

603 ?s <http://prismstandard.org/namespaces/basic/2.0/publicationDate> "2024-04-14"^^<http://www.w3.org/2001/XMLSchema#date> . 

604 } 

605 } 

606 """ 

607 endpoint.setQuery(select_query) 

608 endpoint.setReturnFormat("json") 

609 result = endpoint.query().convert() 

610 self.assertEqual(len(result["results"]["bindings"]), 1) 

611 

612 # Perform deletion 

613 editor.delete(URIRef("https://w3id.org/oc/meta/br/06105")) 

614 

615 # Ensure the entity is deleted 

616 result = endpoint.query().convert() 

617 self.assertEqual(len(result["results"]["bindings"]), 0) 

618 

619 # Verify provenance information 

620 prov_path = os.path.join( 

621 OUTPUT, "rdf", "br", "0610", "10000", "1000", "prov", "se.json" 

622 ) 

623 with open(prov_path, "r", encoding="utf8") as f: 

624 prov_data = json.load(f) 

625 br_06105_prov_se_2 = None 

626 br_06105_prov_se_1 = None 

627 for graph in prov_data: 

628 for entity in graph["@graph"]: 

629 if entity["@id"] == "https://w3id.org/oc/meta/br/06105/prov/se/2": 

630 br_06105_prov_se_2 = entity 

631 if entity["@id"] == "https://w3id.org/oc/meta/br/06105/prov/se/1": 

632 br_06105_prov_se_1 = entity 

633 

634 self.assertIsNotNone(br_06105_prov_se_2) 

635 self.assertEqual( 

636 br_06105_prov_se_2["http://purl.org/dc/terms/description"][0]["@value"], 

637 "The entity 'https://w3id.org/oc/meta/br/06105' has been deleted.", 

638 ) 

639 self.assertEqual( 

640 br_06105_prov_se_2["@type"][0], "http://www.w3.org/ns/prov#Entity" 

641 ) 

642 self.assertEqual( 

643 br_06105_prov_se_2["http://www.w3.org/ns/prov#specializationOf"][0][ 

644 "@id" 

645 ], 

646 "https://w3id.org/oc/meta/br/06105", 

647 ) 

648 self.assertEqual( 

649 br_06105_prov_se_2["http://www.w3.org/ns/prov#wasAttributedTo"][0][ 

650 "@id" 

651 ], 

652 "https://orcid.org/0000-0002-8420-0696", 

653 ) 

654 self.assertIn( 

655 "http://www.w3.org/ns/prov#invalidatedAtTime", br_06105_prov_se_2 

656 ) 

657 self.assertIn( 

658 "http://www.w3.org/ns/prov#generatedAtTime", br_06105_prov_se_2 

659 ) 

660 self.assertEqual( 

661 len(br_06105_prov_se_1["http://www.w3.org/ns/prov#generatedAtTime"]), 1 

662 ) 

663 self.assertIn( 

664 "https://w3id.org/oc/ontology/hasUpdateQuery", br_06105_prov_se_2 

665 ) 

666 update_query_value = br_06105_prov_se_2[ 

667 "https://w3id.org/oc/ontology/hasUpdateQuery" 

668 ][0]["@value"] 

669 update_query_triples = ( 

670 update_query_value.replace( 

671 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/br/> { ", "" 

672 ) 

673 .replace(" } }", "") 

674 .strip() 

675 ) 

676 actual_triples = set( 

677 triple.strip() 

678 for triple in update_query_triples.split(" .") 

679 if triple.strip() 

680 ) 

681 expected_triples = { 

682 "<https://w3id.org/oc/meta/br/06105> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/06106>", 

683 '<https://w3id.org/oc/meta/br/06105> <http://prismstandard.org/namespaces/basic/2.0/publicationDate> "2024-04-14"^^<http://www.w3.org/2001/XMLSchema#date>', 

684 } 

685 self.assertEqual(actual_triples, expected_triples) 

686 

687 self.assertIsNotNone(br_06105_prov_se_1) 

688 self.assertEqual( 

689 br_06105_prov_se_1["http://purl.org/dc/terms/description"][0]["@value"], 

690 "The entity 'https://w3id.org/oc/meta/br/06105' has been created.", 

691 ) 

692 self.assertEqual( 

693 br_06105_prov_se_1["@type"][0], "http://www.w3.org/ns/prov#Entity" 

694 ) 

695 self.assertEqual( 

696 br_06105_prov_se_1["http://www.w3.org/ns/prov#specializationOf"][0][ 

697 "@id" 

698 ], 

699 "https://w3id.org/oc/meta/br/06105", 

700 ) 

701 self.assertEqual( 

702 br_06105_prov_se_1["http://www.w3.org/ns/prov#wasAttributedTo"][0][ 

703 "@id" 

704 ], 

705 "https://w3id.org/oc/meta/prov/pa/1", 

706 ) 

707 self.assertIn( 

708 "http://www.w3.org/ns/prov#generatedAtTime", br_06105_prov_se_1 

709 ) 

710 self.assertEqual( 

711 len(br_06105_prov_se_1["http://www.w3.org/ns/prov#generatedAtTime"]), 1 

712 ) 

713 self.assertEqual( 

714 len(br_06105_prov_se_2["http://www.w3.org/ns/prov#invalidatedAtTime"]), 

715 1, 

716 ) 

717 self.assertIn( 

718 "http://www.w3.org/ns/prov#hadPrimarySource", br_06105_prov_se_1 

719 ) 

720 

721 # Reinsert the publication date 

722 sparql_update_query = f""" 

723 INSERT DATA {{ 

724 GRAPH <https://w3id.org/oc/meta/br/> {{ 

725 <https://w3id.org/oc/meta/br/06105> <http://prismstandard.org/namespaces/basic/2.0/publicationDate> "2024-04-14"^^<http://www.w3.org/2001/XMLSchema#date> . 

726 }} 

727 }} 

728 """ 

729 endpoint = SPARQLWrapper(SERVER) 

730 endpoint.setQuery(sparql_update_query) 

731 endpoint.setMethod(POST) 

732 endpoint.query() 

733 

734 # Perform deletion again 

735 editor.delete(URIRef("https://w3id.org/oc/meta/br/06105")) 

736 

737 # Verify and print the provenance graph for the entity 

738 prov_path = os.path.join( 

739 OUTPUT, "rdf", "br", "0610", "10000", "1000", "prov", "se.json" 

740 ) 

741 with open(prov_path, "r", encoding="utf8") as f: 

742 prov_data = json.load(f) 

743 for graph in prov_data: 

744 for entity in graph["@graph"]: 

745 if "https://w3id.org/oc/meta/br/06105" in entity["@id"]: 

746 if ( 

747 entity["@id"] 

748 == "https://w3id.org/oc/meta/br/06105/prov/se/1" 

749 ): 

750 self.assertEqual( 

751 len( 

752 entity["http://www.w3.org/ns/prov#generatedAtTime"] 

753 ), 

754 1, 

755 ) 

756 self.assertEqual( 

757 len( 

758 entity[ 

759 "http://www.w3.org/ns/prov#invalidatedAtTime" 

760 ] 

761 ), 

762 1, 

763 ) 

764 elif ( 

765 entity["@id"] 

766 == "https://w3id.org/oc/meta/br/06105/prov/se/2" 

767 ): 

768 self.assertEqual( 

769 len( 

770 entity["http://www.w3.org/ns/prov#generatedAtTime"] 

771 ), 

772 1, 

773 ) 

774 # self.assertEqual(len(entity['http://www.w3.org/ns/prov#invalidatedAtTime']), 2) 

775 elif ( 

776 entity["@id"] 

777 == "https://w3id.org/oc/meta/br/06105/prov/se/3" 

778 ): 

779 self.assertEqual( 

780 entity["http://purl.org/dc/terms/description"][0][ 

781 "@value" 

782 ], 

783 "The entity 'https://w3id.org/oc/meta/br/06105' has been deleted.", 

784 ) 

785 self.assertIn( 

786 "https://w3id.org/oc/ontology/hasUpdateQuery", entity 

787 ) 

788 update_query_value = entity[ 

789 "https://w3id.org/oc/ontology/hasUpdateQuery" 

790 ][0]["@value"] 

791 update_query_triples = ( 

792 update_query_value.replace( 

793 "DELETE DATA { GRAPH <https://w3id.org/oc/meta/br/> { ", 

794 "", 

795 ) 

796 .replace(" } }", "") 

797 .strip() 

798 ) 

799 actual_triples = set( 

800 triple.strip() 

801 for triple in update_query_triples.split(" .") 

802 if triple.strip() 

803 ) 

804 expected_triples = { 

805 '<https://w3id.org/oc/meta/br/06105> <http://prismstandard.org/namespaces/basic/2.0/publicationDate> "2024-04-14"^^<http://www.w3.org/2001/XMLSchema#date>' 

806 } 

807 self.assertEqual(actual_triples, expected_triples) 

808 self.assertEqual( 

809 entity["@type"][0], "http://www.w3.org/ns/prov#Entity" 

810 ) 

811 self.assertEqual( 

812 entity["http://www.w3.org/ns/prov#specializationOf"][0][ 

813 "@id" 

814 ], 

815 "https://w3id.org/oc/meta/br/06105", 

816 ) 

817 self.assertEqual( 

818 entity["http://www.w3.org/ns/prov#wasAttributedTo"][0][ 

819 "@id" 

820 ], 

821 "https://orcid.org/0000-0002-8420-0696", 

822 ) 

823 self.assertIn( 

824 "http://www.w3.org/ns/prov#invalidatedAtTime", entity 

825 ) 

826 self.assertIn( 

827 "http://www.w3.org/ns/prov#generatedAtTime", entity 

828 ) 

829 self.assertEqual( 

830 len( 

831 entity["http://www.w3.org/ns/prov#generatedAtTime"] 

832 ), 

833 1, 

834 ) 

835 self.assertEqual( 

836 len( 

837 entity[ 

838 "http://www.w3.org/ns/prov#invalidatedAtTime" 

839 ] 

840 ), 

841 1, 

842 ) 

843 self.assertEqual( 

844 entity["http://www.w3.org/ns/prov#wasDerivedFrom"][0][ 

845 "@id" 

846 ], 

847 "https://w3id.org/oc/meta/br/06105/prov/se/2", 

848 ) 

849 

850 def test_no_rdf_files_generation(self): 

851 """Test that when generate_rdf_files is False, data is still updated in triplestore but not in files""" 

852 with open(META_CONFIG, encoding="utf-8") as file: 

853 settings = yaml.full_load(file) 

854 self.original_generate_rdf_files = settings.get("generate_rdf_files", True) 

855 

856 settings["generate_rdf_files"] = False 

857 with open(META_CONFIG, "w", encoding="utf-8") as file: 

858 yaml.dump(settings, file) 

859 

860 os.makedirs(os.path.join(OUTPUT, "rdf", "br", "0610", "10000"), exist_ok=True) 

861 

862 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696") 

863 

864 self.assertFalse(editor.generate_rdf_files, "generate_rdf_files should be False") 

865 

866 g_set = GraphSet(base_iri="https://w3id.org/oc/meta/") 

867 br = g_set.add_br(res=URIRef("https://w3id.org/oc/meta/br/06103"), resp_agent="https://orcid.org/0000-0002-8420-0696") 

868 br.has_title("Original Title") 

869 editor.save(g_set) 

870 

871 editor.update_property( 

872 URIRef("https://w3id.org/oc/meta/br/06103"), 

873 "has_title", 

874 "New Test Title", 

875 ) 

876 

877 sparql = SPARQLWrapper(SERVER) 

878 sparql.setQuery(""" 

879 SELECT ?p ?o 

880 WHERE {  

881 GRAPH ?g {  

882 <https://w3id.org/oc/meta/br/06103> ?p ?o .  

883 }  

884 } 

885 """) 

886 sparql.setReturnFormat(JSON) 

887 debug_result = sparql.queryAndConvert() 

888 

889 title_found = False 

890 if debug_result["results"]["bindings"]: 

891 for binding in debug_result["results"]["bindings"]: 

892 predicate = binding.get('p', {}).get('value') 

893 obj = binding.get('o', {}).get('value') 

894 

895 # Check if this is our title property with the expected value 

896 if predicate == "http://purl.org/dc/terms/title" and obj == "New Test Title": 

897 title_found = True 

898 else: 

899 print("No properties found for BR/06103") 

900 

901 self.assertTrue(title_found, "Title update not found in triplestore") 

902 

903 prov_sparql = SPARQLWrapper(PROV_SERVER) 

904 prov_sparql.setQuery(""" 

905 ASK {  

906 ?s <http://www.w3.org/ns/prov#specializationOf> <https://w3id.org/oc/meta/br/06103> . 

907 } 

908 """) 

909 prov_sparql.setReturnFormat(JSON) 

910 prov_result = prov_sparql.queryAndConvert() 

911 self.assertTrue(prov_result["boolean"], "Provenance for BR/06103 not found in triplestore") 

912 

913 target_file = os.path.join(OUTPUT, "rdf", "br", "0610", "10000", "1000.json") 

914 if os.path.exists(target_file): 

915 with open(target_file, "r", encoding="utf-8") as file: 

916 try: 

917 data = json.load(file) 

918 contains_update = False 

919 for graph in data: 

920 for entity in graph.get("@graph", []): 

921 if entity.get("@id") == "https://w3id.org/oc/meta/br/06103": 

922 titles = entity.get("http://purl.org/dc/terms/title", []) 

923 for title in titles: 

924 if title.get("@value") == "New Test Title": 

925 contains_update = True 

926 break 

927 self.assertFalse(contains_update, "RDF file should not contain the update") 

928 except json.JSONDecodeError: 

929 pass 

930 

931 def test_merge_caches_entities(self): 

932 """Verifica che le entità vengano correttamente cachate durante merge successivi""" 

933 base_iri = "https://w3id.org/oc/meta/" 

934 resp_agent = "https://orcid.org/0000-0002-8420-0696" 

935 g_set = GraphSet( 

936 base_iri, 

937 supplier_prefix="0620", 

938 wanted_label=False, 

939 custom_counter_handler=self.counter_handler, 

940 ) 

941 endpoint = "http://127.0.0.1:8805/sparql" 

942 

943 # Prepara le entità di test 

944 ra = g_set.add_ra( 

945 resp_agent=resp_agent, res=URIRef("https://w3id.org/oc/meta/ra/06205") 

946 ) 

947 ra.has_name("Wiley") 

948 

949 reader = Reader() 

950 id_06105 = reader.import_entity_from_triplestore( 

951 g_set, 

952 endpoint, 

953 URIRef("https://w3id.org/oc/meta/id/06105"), 

954 resp_agent, 

955 enable_validation=False, 

956 ) 

957 id_06203 = g_set.add_id(resp_agent=resp_agent) 

958 id_06203.create_crossref("313") 

959 

960 ra.has_identifier(id_06105) 

961 ra.has_identifier(id_06203) 

962 

963 # Genera provenance 

964 provset = ProvSet( 

965 g_set, 

966 base_iri, 

967 wanted_label=False, 

968 supplier_prefix="0620", 

969 custom_counter_handler=self.counter_handler, 

970 ) 

971 provset.generate_provenance() 

972 

973 # Salva e carica i dati 

974 rdf_dir = os.path.join(OUTPUT, "rdf") + os.sep 

975 graph_storer = Storer( 

976 g_set, dir_split=10000, n_file_item=1000, zip_output=False 

977 ) 

978 prov_storer = Storer( 

979 provset, dir_split=10000, n_file_item=1000, zip_output=False 

980 ) 

981 

982 graph_storer.store_all(rdf_dir, base_iri) 

983 prov_storer.store_all(rdf_dir, base_iri) 

984 graph_storer.upload_all(endpoint) 

985 g_set.commit_changes() 

986 

987 # Esegui il test della cache 

988 editor = MetaEditor(META_CONFIG, "https://orcid.org/0000-0002-8420-0696") 

989 

990 # Prima fusione 

991 editor.merge( 

992 g_set, 

993 URIRef("https://w3id.org/oc/meta/ra/06107"), 

994 URIRef("https://w3id.org/oc/meta/ra/06205"), 

995 ) 

996 

997 # Verifica che le entità principali siano in cache 

998 self.assertTrue( 

999 editor.entity_cache.is_cached(URIRef("https://w3id.org/oc/meta/ra/06107")) 

1000 ) 

1001 self.assertTrue( 

1002 editor.entity_cache.is_cached(URIRef("https://w3id.org/oc/meta/ra/06205")) 

1003 ) 

1004 

1005 # Verifica che le entità correlate siano in cache 

1006 self.assertTrue( 

1007 editor.entity_cache.is_cached(URIRef("https://w3id.org/oc/meta/id/06201")) 

1008 ) 

1009 self.assertTrue( 

1010 editor.entity_cache.is_cached(URIRef("https://w3id.org/oc/meta/id/06105")) 

1011 ) 

1012 

1013 

1014class TestEntityCache(unittest.TestCase): 

1015 def setUp(self): 

1016 self.cache = EntityCache() 

1017 self.entity = URIRef("https://w3id.org/oc/meta/ra/06107") 

1018 

1019 def test_add_and_is_cached(self): 

1020 self.assertFalse(self.cache.is_cached(self.entity)) 

1021 self.cache.add(self.entity) 

1022 self.assertTrue(self.cache.is_cached(self.entity)) 

1023 

1024 def test_clear(self): 

1025 self.cache.add(self.entity) 

1026 self.cache.clear() 

1027 self.assertFalse(self.cache.is_cached(self.entity)) 

1028 

1029 

1030if __name__ == "__main__": # pragma: no cover 

1031 unittest.main()