Coverage for test/ResourceFinder_test.py: 96%

371 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-12-20 08:55 +0000

1import os 

2import unittest 

3 

4from oc_meta.lib.finder import ResourceFinder 

5from oc_ocdm.graph import GraphEntity 

6from rdflib import Dataset, Graph, Literal, URIRef 

7from sparqlite import SPARQLClient 

8 

9 

10def get_path(path:str) -> str: 

11 # absolute_path:str = os.path.abspath(path) 

12 universal_path = path.replace('\\', '/') 

13 return universal_path 

14 

15def add_data_ts(server, data_path, batch_size:int=100, default_graph_uri=URIRef("http://default.graph/")): 

16 f_path = get_path(data_path) 

17 

18 file_extension = os.path.splitext(f_path)[1].lower() 

19 if file_extension == '.nt': 

20 g = Graph() 

21 g.parse(location=f_path, format='nt') 

22 elif file_extension == '.nq': 

23 g = Dataset() 

24 g.parse(location=f_path, format='nquads') 

25 elif file_extension == '.ttl': 

26 g = Graph() 

27 g.parse(location=f_path, format='turtle') 

28 else: 

29 raise ValueError(f"Unsupported file extension: {file_extension}") 

30 

31 triples_list = [] 

32 if file_extension == '.nt': 

33 for subj, pred, obj in g: 

34 triples_list.append((subj, pred, obj, default_graph_uri)) 

35 elif file_extension == '.nq': 

36 for subj, pred, obj, ctx in g.quads((None, None, None, None)): 

37 triples_list.append((subj, pred, obj, ctx)) 

38 

39 with SPARQLClient(server, timeout=60) as client: 

40 for i in range(0, len(triples_list), batch_size): 

41 batch_triples = triples_list[i:i + batch_size] 

42 

43 triples_str = "" 

44 for subj, pred, obj, ctx in batch_triples: 

45 if ctx: 

46 triples_str += f"GRAPH {ctx.n3().replace('[', '').replace(']', '')} {{ {subj.n3()} {pred.n3()} {obj.n3()} }} " 

47 else: 

48 triples_str += f"{subj.n3()} {pred.n3()} {obj.n3()} . " 

49 

50 query = f"INSERT DATA {{ {triples_str} }}" 

51 client.update(query) 

52 

53def reset_server(server) -> None: 

54 with SPARQLClient(server, timeout=60) as client: 

55 for graph in {'https://w3id.org/oc/meta/br/', 'https://w3id.org/oc/meta/ra/', 'https://w3id.org/oc/meta/re/', 'https://w3id.org/oc/meta/id/', 'https://w3id.org/oc/meta/ar/'}: 

56 client.update(f'CLEAR GRAPH <{graph}>') 

57 

58class TestResourceFinder(unittest.TestCase): 

59 @classmethod 

60 def setUpClass(cls): 

61 ENDPOINT = 'http://127.0.0.1:8805/sparql' 

62 BASE_IRI = 'https://w3id.org/oc/meta/' 

63 REAL_DATA_FILE = os.path.join('test', 'testcases', 'ts', 'real_data.nt') 

64 local_g = Graph() 

65 cls.finder = ResourceFinder(ENDPOINT, BASE_IRI, local_g) 

66 # Clear ts 

67 reset_server(server=ENDPOINT) 

68 # Upload data 

69 add_data_ts(server=ENDPOINT, data_path=REAL_DATA_FILE) 

70 cls.finder.get_everything_about_res(metavals={'omid:br/2373', 'omid:br/2380', 'omid:br/2730', 'omid:br/2374', 'omid:br/4435', 'omid:br/4436', 'omid:br/4437', 'omid:br/4438', 'omid:br/0604750', 'omid:br/0605379', 'omid:br/0606696'}, identifiers={'doi:10.1001/.391', 'orcid:0000-0001-6994-8412'}, vvis={}) 

71 

72 def test_retrieve_br_from_id(self): 

73 value = '10.1001/.391' 

74 schema = 'doi' 

75 output = self.finder.retrieve_br_from_id(schema, value) 

76 expected_output = [( 

77 '2373', 

78 'Treatment Of Excessive Anticoagulation With Phytonadione (Vitamin K): A Meta-analysis', 

79 [('2239', 'doi:10.1001/.391')] 

80 )] 

81 self.assertEqual(output, expected_output) 

82 

83 def test_retrieve_br_from_id_multiple_ids(self): 

84 value = '10.1001/.405' 

85 schema = 'doi' 

86 output = self.finder.retrieve_br_from_id(schema, value) 

87 expected_output = [( 

88 '2374', 

89 "Neutropenia In Human Immunodeficiency Virus Infection: Data From The Women's Interagency HIV Study", 

90 [('2240', 'doi:10.1001/.405'), ('5000', 'doi:10.1001/.406')] 

91 )] 

92 self.assertEqual(output, expected_output) 

93 

94 def test_retrieve_br_from_meta(self): 

95 metaid = '2373' 

96 output = self.finder.retrieve_br_from_meta(metaid) 

97 expected_output = ('Treatment Of Excessive Anticoagulation With Phytonadione (Vitamin K): A Meta-analysis', [('2239', 'doi:10.1001/.391')], True) 

98 self.assertEqual(output, expected_output) 

99 

100 def test_retrieve_br_from_meta_multiple_ids(self): 

101 metaid = '2374' 

102 output = self.finder.retrieve_br_from_meta(metaid) 

103 output = (output[0], set(output[1])) 

104 expected_output = ("Neutropenia In Human Immunodeficiency Virus Infection: Data From The Women's Interagency HIV Study", {('2240', 'doi:10.1001/.405'), ('5000', 'doi:10.1001/.406')}) 

105 self.assertEqual(output, expected_output) 

106 

107 def test_retrieve_metaid_from_id(self): 

108 schema = 'doi' 

109 value = '10.1001/.391' 

110 output = self.finder.retrieve_metaid_from_id(schema, value) 

111 expected_output = '2239' 

112 self.assertEqual(output, expected_output) 

113 

114 def test_retrieve_ra_from_meta(self): 

115 metaid = '3308' 

116 output = self.finder.retrieve_ra_from_meta(metaid) 

117 expected_output = ('Dezee, K. J.', [], True) 

118 self.assertEqual(output, expected_output) 

119 

120 def test_retrieve_ra_from_meta_with_orcid(self): 

121 metaid = '4940' 

122 output = self.finder.retrieve_ra_from_meta(metaid) 

123 expected_output = ('Alarcon, Louis H.', [('4475', 'orcid:0000-0001-6994-8412')], True) 

124 self.assertEqual(output, expected_output) 

125 

126 def test_retrieve_ra_from_meta_if_publisher(self): 

127 metaid = '3309' 

128 output = self.finder.retrieve_ra_from_meta(metaid) 

129 expected_output = ('American Medical Association (ama)', [('4274', 'crossref:10')], True) 

130 self.assertEqual(output, expected_output) 

131 

132 def test_retrieve_ra_from_id(self): 

133 schema = 'orcid' 

134 value = '0000-0001-6994-8412' 

135 output = self.finder.retrieve_ra_from_id(schema, value, publisher=False) 

136 expected_output = [ 

137 ('1000000', 'Alarcon, Louis H.', [('4475', 'orcid:0000-0001-6994-8412')]), 

138 ('4940', 'Alarcon, Louis H.', [('4475', 'orcid:0000-0001-6994-8412')]) 

139 ] 

140 self.assertEqual(sorted(output), expected_output) 

141 

142 def test_retrieve_ra_from_id_if_publisher(self): 

143 schema = 'crossref' 

144 value = '10' 

145 output = self.finder.retrieve_ra_from_id(schema, value, publisher=True) 

146 expected_output = [('3309', 'American Medical Association (ama)', [('4274', 'crossref:10')])] 

147 self.assertEqual(output, expected_output) 

148 

149 def test_retrieve_ra_sequence_from_br_meta(self): 

150 metaid = '2380' 

151 output = self.finder.retrieve_ra_sequence_from_br_meta(metaid, 'author') 

152 expected_output = [ 

153 {'5343': ('Hodge, James G.', [], '3316')}, 

154 {'5344': ('Anderson, Evan D.', [], '3317')}, 

155 {'5345': ('Kirsch, Thomas D.', [], '3318')}, 

156 {'5346': ('Kelen, Gabor D.', [('4278', 'orcid:0000-0002-3236-8286')], '3319')} 

157 ] 

158 self.assertEqual(output, expected_output) 

159 

160 def test_retrieve_re_from_br_meta(self): 

161 metaid = '2373' 

162 output = self.finder.retrieve_re_from_br_meta(metaid) 

163 expected_output = ('2011', '391-397') 

164 self.assertEqual(output, expected_output) 

165 

166 def test_retrieve_br_info_from_meta(self): 

167 metaid = '2373' 

168 output = self.finder.retrieve_br_info_from_meta(metaid) 

169 expected_output = { 

170 'pub_date': '2006-02-27', 

171 'type': 'journal article', 

172 'page': ('2011', '391-397'), 

173 'issue': '4', 

174 'volume': '166', 

175 'venue': 'Archives Of Internal Medicine [omid:br/4387 issn:0003-9926]' 

176 } 

177 self.assertEqual(output, expected_output) 

178 

179 def test_retrieve_ra_sequence_with_loop(self): 

180 """Test that retrieve_ra_sequence_from_br_meta handles circular references without infinite loops""" 

181 base_iri = 'https://w3id.org/oc/meta' 

182 br_uri = URIRef(f'{base_iri}/br/9999') 

183 ar1_uri = URIRef(f'{base_iri}/ar/9991') 

184 ar2_uri = URIRef(f'{base_iri}/ar/9992') 

185 ra1_uri = URIRef(f'{base_iri}/ra/9981') 

186 ra2_uri = URIRef(f'{base_iri}/ra/9982') 

187 

188 # Create a circular AR chain: AR1 -> AR2 -> AR1 (loop) 

189 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri)) 

190 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_author)) 

191 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri)) 

192 self.finder.local_g.add((ar1_uri, GraphEntity.iri_has_next, ar2_uri)) 

193 self.finder.local_g.add((ra1_uri, GraphEntity.iri_given_name, Literal('John'))) 

194 self.finder.local_g.add((ra1_uri, GraphEntity.iri_family_name, Literal('Doe'))) 

195 

196 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar2_uri)) 

197 self.finder.local_g.add((ar2_uri, GraphEntity.iri_with_role, GraphEntity.iri_author)) 

198 self.finder.local_g.add((ar2_uri, GraphEntity.iri_is_held_by, ra2_uri)) 

199 self.finder.local_g.add((ar2_uri, GraphEntity.iri_has_next, ar1_uri)) 

200 self.finder.local_g.add((ra2_uri, GraphEntity.iri_given_name, Literal('Jane'))) 

201 self.finder.local_g.add((ra2_uri, GraphEntity.iri_family_name, Literal('Smith'))) 

202 

203 # This should return only 2 ARs (breaking the loop) without hanging 

204 result = self.finder.retrieve_ra_sequence_from_br_meta('9999', 'author') 

205 

206 # Should return exactly 2 ARs (not infinite loop) 

207 self.assertEqual(len(result), 2) 

208 # Should contain both ARs 

209 ar_ids = [list(item.keys())[0] for item in result] 

210 self.assertIn('9991', ar_ids) 

211 self.assertIn('9992', ar_ids) 

212 

213 def test_retrieve_ra_sequence_with_self_reference(self): 

214 """Test that retrieve_ra_sequence_from_br_meta handles self-referencing AR""" 

215 base_iri = 'https://w3id.org/oc/meta' 

216 br_uri = URIRef(f'{base_iri}/br/9998') 

217 ar1_uri = URIRef(f'{base_iri}/ar/9981') 

218 ra1_uri = URIRef(f'{base_iri}/ra/9971') 

219 

220 # Create AR that points to itself 

221 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri)) 

222 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_author)) 

223 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri)) 

224 self.finder.local_g.add((ar1_uri, GraphEntity.iri_has_next, ar1_uri)) 

225 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Test Publisher'))) 

226 

227 # This should return only 1 AR (ignoring self-reference) 

228 result = self.finder.retrieve_ra_sequence_from_br_meta('9998', 'author') 

229 

230 # Should return exactly 1 AR 

231 self.assertEqual(len(result), 1) 

232 self.assertEqual(list(result[0].keys())[0], '9981') 

233 

234 def test_retrieve_ra_sequence_with_invalid_next(self): 

235 """Test that retrieve_ra_sequence_from_br_meta handles invalid 'next' references""" 

236 base_iri = 'https://w3id.org/oc/meta' 

237 br_uri = URIRef(f'{base_iri}/br/9997') 

238 ar1_uri = URIRef(f'{base_iri}/ar/9971') 

239 ar2_uri = URIRef(f'{base_iri}/ar/9972') 

240 ar_invalid_uri = URIRef(f'{base_iri}/ar/9999') 

241 ra1_uri = URIRef(f'{base_iri}/ra/9961') 

242 ra2_uri = URIRef(f'{base_iri}/ra/9962') 

243 

244 # Create AR chain where AR1 -> AR_INVALID (doesn't exist) and AR2 is orphaned 

245 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri)) 

246 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_author)) 

247 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri)) 

248 self.finder.local_g.add((ar1_uri, GraphEntity.iri_has_next, ar_invalid_uri)) 

249 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Author One'))) 

250 

251 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar2_uri)) 

252 self.finder.local_g.add((ar2_uri, GraphEntity.iri_with_role, GraphEntity.iri_author)) 

253 self.finder.local_g.add((ar2_uri, GraphEntity.iri_is_held_by, ra2_uri)) 

254 self.finder.local_g.add((ra2_uri, GraphEntity.iri_name, Literal('Author Two'))) 

255 

256 # Should return chain stopping at invalid reference 

257 result = self.finder.retrieve_ra_sequence_from_br_meta('9997', 'author') 

258 

259 # Should return at least AR1 (stops at invalid next) 

260 # The method will find 2 start candidates and pick the longest chain 

261 self.assertGreaterEqual(len(result), 1) 

262 ar_ids = [list(item.keys())[0] for item in result] 

263 self.assertIn('9971', ar_ids) 

264 

265 def test_retrieve_ra_sequence_with_missing_is_held_by(self): 

266 """Test that retrieve_ra_sequence_from_br_meta handles AR without is_held_by gracefully""" 

267 base_iri = 'https://w3id.org/oc/meta' 

268 br_uri = URIRef(f'{base_iri}/br/9996') 

269 ar1_uri = URIRef(f'{base_iri}/ar/9961') 

270 

271 # Create AR without is_held_by relationship (malformed data) 

272 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri)) 

273 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_author)) 

274 # Missing: ar1_uri iri_is_held_by ra_uri 

275 

276 # Should handle gracefully without crash 

277 try: 

278 result = self.finder.retrieve_ra_sequence_from_br_meta('9996', 'author') 

279 # If it doesn't crash, check result is reasonable (either empty or handles error) 

280 self.assertIsInstance(result, list) 

281 except (KeyError, UnboundLocalError) as e: 

282 self.fail(f"Method crashed with missing is_held_by: {e}") 

283 

284 def test_retrieve_ra_sequence_with_multiple_next_values(self): 

285 """Test that retrieve_ra_sequence_from_br_meta handles AR with multiple 'next' relationships""" 

286 base_iri = 'https://w3id.org/oc/meta' 

287 br_uri = URIRef(f'{base_iri}/br/9995') 

288 ar1_uri = URIRef(f'{base_iri}/ar/9951') 

289 ar2_uri = URIRef(f'{base_iri}/ar/9952') 

290 ar3_uri = URIRef(f'{base_iri}/ar/9953') 

291 ra1_uri = URIRef(f'{base_iri}/ra/9941') 

292 ra2_uri = URIRef(f'{base_iri}/ra/9942') 

293 ra3_uri = URIRef(f'{base_iri}/ra/9943') 

294 

295 # Create AR1 with multiple 'next' relationships (data error) 

296 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri)) 

297 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_author)) 

298 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri)) 

299 self.finder.local_g.add((ar1_uri, GraphEntity.iri_has_next, ar2_uri)) 

300 self.finder.local_g.add((ar1_uri, GraphEntity.iri_has_next, ar3_uri)) 

301 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Author One'))) 

302 

303 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar2_uri)) 

304 self.finder.local_g.add((ar2_uri, GraphEntity.iri_with_role, GraphEntity.iri_author)) 

305 self.finder.local_g.add((ar2_uri, GraphEntity.iri_is_held_by, ra2_uri)) 

306 self.finder.local_g.add((ra2_uri, GraphEntity.iri_name, Literal('Author Two'))) 

307 

308 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar3_uri)) 

309 self.finder.local_g.add((ar3_uri, GraphEntity.iri_with_role, GraphEntity.iri_author)) 

310 self.finder.local_g.add((ar3_uri, GraphEntity.iri_is_held_by, ra3_uri)) 

311 self.finder.local_g.add((ra3_uri, GraphEntity.iri_name, Literal('Author Three'))) 

312 

313 # Should handle multiple next values consistently (last one wins in current implementation) 

314 result = self.finder.retrieve_ra_sequence_from_br_meta('9995', 'author') 

315 

316 # Should return a valid result without crashing 

317 self.assertIsInstance(result, list) 

318 self.assertGreater(len(result), 0) 

319 

320 def test_retrieve_ra_sequence_no_ars_for_role(self): 

321 """Test that retrieve_ra_sequence_from_br_meta returns empty list when no ARs exist for specified role""" 

322 base_iri = 'https://w3id.org/oc/meta' 

323 br_uri = URIRef(f'{base_iri}/br/9994') 

324 ar1_uri = URIRef(f'{base_iri}/ar/9941') 

325 ra1_uri = URIRef(f'{base_iri}/ra/9931') 

326 

327 # Create BR with editor, but request author 

328 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri)) 

329 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_editor)) 

330 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri)) 

331 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Editor Name'))) 

332 

333 # Request author (should be empty) 

334 result = self.finder.retrieve_ra_sequence_from_br_meta('9994', 'author') 

335 

336 self.assertEqual(result, []) 

337 

338 def test_retrieve_ra_sequence_single_ar_no_chain(self): 

339 """Test that retrieve_ra_sequence_from_br_meta handles single AR without 'next'""" 

340 base_iri = 'https://w3id.org/oc/meta' 

341 br_uri = URIRef(f'{base_iri}/br/9993') 

342 ar1_uri = URIRef(f'{base_iri}/ar/9931') 

343 ra1_uri = URIRef(f'{base_iri}/ra/9921') 

344 

345 # Create single AR without next 

346 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri)) 

347 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_author)) 

348 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri)) 

349 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Single Author'))) 

350 

351 result = self.finder.retrieve_ra_sequence_from_br_meta('9993', 'author') 

352 

353 self.assertEqual(len(result), 1) 

354 self.assertEqual(list(result[0].keys())[0], '9931') 

355 

356 def test_retrieve_ra_sequence_two_independent_chains(self): 

357 """Test that retrieve_ra_sequence_from_br_meta picks longest chain when multiple disconnected chains exist""" 

358 base_iri = 'https://w3id.org/oc/meta' 

359 br_uri = URIRef(f'{base_iri}/br/9992') 

360 

361 # Chain 1: AR1 -> AR2 (length 2) 

362 ar1_uri = URIRef(f'{base_iri}/ar/9921') 

363 ar2_uri = URIRef(f'{base_iri}/ar/9922') 

364 ra1_uri = URIRef(f'{base_iri}/ra/9911') 

365 ra2_uri = URIRef(f'{base_iri}/ra/9912') 

366 

367 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri)) 

368 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_author)) 

369 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri)) 

370 self.finder.local_g.add((ar1_uri, GraphEntity.iri_has_next, ar2_uri)) 

371 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Author One'))) 

372 

373 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar2_uri)) 

374 self.finder.local_g.add((ar2_uri, GraphEntity.iri_with_role, GraphEntity.iri_author)) 

375 self.finder.local_g.add((ar2_uri, GraphEntity.iri_is_held_by, ra2_uri)) 

376 self.finder.local_g.add((ra2_uri, GraphEntity.iri_name, Literal('Author Two'))) 

377 

378 # Chain 2: AR3 (length 1, disconnected) 

379 ar3_uri = URIRef(f'{base_iri}/ar/9923') 

380 ra3_uri = URIRef(f'{base_iri}/ra/9913') 

381 

382 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar3_uri)) 

383 self.finder.local_g.add((ar3_uri, GraphEntity.iri_with_role, GraphEntity.iri_author)) 

384 self.finder.local_g.add((ar3_uri, GraphEntity.iri_is_held_by, ra3_uri)) 

385 self.finder.local_g.add((ra3_uri, GraphEntity.iri_name, Literal('Author Three'))) 

386 

387 result = self.finder.retrieve_ra_sequence_from_br_meta('9992', 'author') 

388 

389 # Should return the longer chain (chain 1 with 2 elements) 

390 self.assertEqual(len(result), 2) 

391 ar_ids = [list(item.keys())[0] for item in result] 

392 self.assertIn('9921', ar_ids) 

393 self.assertIn('9922', ar_ids) 

394 

395 def test_retrieve_ra_sequence_editor_role(self): 

396 """Test that retrieve_ra_sequence_from_br_meta works with editor role""" 

397 base_iri = 'https://w3id.org/oc/meta' 

398 br_uri = URIRef(f'{base_iri}/br/9991') 

399 ar1_uri = URIRef(f'{base_iri}/ar/9911') 

400 ra1_uri = URIRef(f'{base_iri}/ra/9901') 

401 

402 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri)) 

403 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_editor)) 

404 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri)) 

405 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Editor Name'))) 

406 

407 result = self.finder.retrieve_ra_sequence_from_br_meta('9991', 'editor') 

408 

409 self.assertEqual(len(result), 1) 

410 self.assertEqual(list(result[0].keys())[0], '9911') 

411 

412 def test_retrieve_ra_sequence_publisher_role(self): 

413 """Test that retrieve_ra_sequence_from_br_meta works with publisher role""" 

414 base_iri = 'https://w3id.org/oc/meta' 

415 br_uri = URIRef(f'{base_iri}/br/9990') 

416 ar1_uri = URIRef(f'{base_iri}/ar/9901') 

417 ra1_uri = URIRef(f'{base_iri}/ra/9891') 

418 

419 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri)) 

420 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_publisher)) 

421 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri)) 

422 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Publisher Name'))) 

423 

424 result = self.finder.retrieve_ra_sequence_from_br_meta('9990', 'publisher') 

425 

426 self.assertEqual(len(result), 1) 

427 self.assertEqual(list(result[0].keys())[0], '9901') 

428 

429 def test_retrieve_ra_sequence_three_node_loop(self): 

430 """Test that retrieve_ra_sequence_from_br_meta handles three-node circular loop""" 

431 base_iri = 'https://w3id.org/oc/meta' 

432 br_uri = URIRef(f'{base_iri}/br/9989') 

433 ar1_uri = URIRef(f'{base_iri}/ar/9891') 

434 ar2_uri = URIRef(f'{base_iri}/ar/9892') 

435 ar3_uri = URIRef(f'{base_iri}/ar/9893') 

436 ra1_uri = URIRef(f'{base_iri}/ra/9881') 

437 ra2_uri = URIRef(f'{base_iri}/ra/9882') 

438 ra3_uri = URIRef(f'{base_iri}/ra/9883') 

439 

440 # Create circular loop: AR1 -> AR2 -> AR3 -> AR1 

441 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri)) 

442 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_author)) 

443 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri)) 

444 self.finder.local_g.add((ar1_uri, GraphEntity.iri_has_next, ar2_uri)) 

445 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Author One'))) 

446 

447 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar2_uri)) 

448 self.finder.local_g.add((ar2_uri, GraphEntity.iri_with_role, GraphEntity.iri_author)) 

449 self.finder.local_g.add((ar2_uri, GraphEntity.iri_is_held_by, ra2_uri)) 

450 self.finder.local_g.add((ar2_uri, GraphEntity.iri_has_next, ar3_uri)) 

451 self.finder.local_g.add((ra2_uri, GraphEntity.iri_name, Literal('Author Two'))) 

452 

453 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar3_uri)) 

454 self.finder.local_g.add((ar3_uri, GraphEntity.iri_with_role, GraphEntity.iri_author)) 

455 self.finder.local_g.add((ar3_uri, GraphEntity.iri_is_held_by, ra3_uri)) 

456 self.finder.local_g.add((ar3_uri, GraphEntity.iri_has_next, ar1_uri)) 

457 self.finder.local_g.add((ra3_uri, GraphEntity.iri_name, Literal('Author Three'))) 

458 

459 result = self.finder.retrieve_ra_sequence_from_br_meta('9989', 'author') 

460 

461 # Should return exactly 3 ARs (breaking loop) 

462 self.assertEqual(len(result), 3) 

463 ar_ids = [list(item.keys())[0] for item in result] 

464 self.assertIn('9891', ar_ids) 

465 self.assertIn('9892', ar_ids) 

466 self.assertIn('9893', ar_ids) 

467 

468 def test_retrieve_ra_sequence_duplicate_ra(self): 

469 """Test that retrieve_ra_sequence_from_br_meta returns both ARs when they point to same RA""" 

470 base_iri = 'https://w3id.org/oc/meta' 

471 br_uri = URIRef(f'{base_iri}/br/9988') 

472 ar1_uri = URIRef(f'{base_iri}/ar/9881') 

473 ar2_uri = URIRef(f'{base_iri}/ar/9882') 

474 ra1_uri = URIRef(f'{base_iri}/ra/9871') 

475 

476 # Two ARs pointing to same RA (duplicate author) 

477 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri)) 

478 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_author)) 

479 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri)) 

480 self.finder.local_g.add((ar1_uri, GraphEntity.iri_has_next, ar2_uri)) 

481 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Same Author'))) 

482 

483 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar2_uri)) 

484 self.finder.local_g.add((ar2_uri, GraphEntity.iri_with_role, GraphEntity.iri_author)) 

485 self.finder.local_g.add((ar2_uri, GraphEntity.iri_is_held_by, ra1_uri)) 

486 

487 result = self.finder.retrieve_ra_sequence_from_br_meta('9988', 'author') 

488 

489 # Should return both ARs even though they reference same RA 

490 self.assertEqual(len(result), 2) 

491 # Both should reference RA 9871 

492 self.assertEqual(result[0][list(result[0].keys())[0]][2], '9871') 

493 self.assertEqual(result[1][list(result[1].keys())[0]][2], '9871') 

494 

495 

496class TestVVIQueryIsolation(unittest.TestCase): 

497 """Test that VVI queries only search under the correct venues.""" 

498 

499 @classmethod 

500 def setUpClass(cls): 

501 ENDPOINT = 'http://127.0.0.1:8805/sparql' 

502 BASE_IRI = 'https://w3id.org/oc/meta/' 

503 reset_server(server=ENDPOINT) 

504 

505 # Upload test data: two venues with different ISSNs, each with their own volume 

506 test_triples = [ 

507 # Venue A (br/9001) with ISSN 1111-1111 

508 '<https://w3id.org/oc/meta/br/9001> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/spar/fabio/Journal> .', 

509 '<https://w3id.org/oc/meta/br/9001> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/9001> .', 

510 '<https://w3id.org/oc/meta/id/9001> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/spar/datacite/Identifier> .', 

511 '<https://w3id.org/oc/meta/id/9001> <http://purl.org/spar/datacite/usesIdentifierScheme> <http://purl.org/spar/datacite/issn> .', 

512 '<https://w3id.org/oc/meta/id/9001> <http://www.essepuntato.it/2010/06/literalreification/hasLiteralValue> "1111-1111"^^<http://www.w3.org/2001/XMLSchema#string> .', 

513 # Volume 10 of Venue A 

514 '<https://w3id.org/oc/meta/br/9002> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/spar/fabio/JournalVolume> .', 

515 '<https://w3id.org/oc/meta/br/9002> <http://purl.org/vocab/frbr/core#partOf> <https://w3id.org/oc/meta/br/9001> .', 

516 '<https://w3id.org/oc/meta/br/9002> <http://purl.org/spar/fabio/hasSequenceIdentifier> "10"^^<http://www.w3.org/2001/XMLSchema#string> .', 

517 # Venue B (br/9003) with ISSN 2222-2222 

518 '<https://w3id.org/oc/meta/br/9003> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/spar/fabio/Journal> .', 

519 '<https://w3id.org/oc/meta/br/9003> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/9002> .', 

520 '<https://w3id.org/oc/meta/id/9002> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/spar/datacite/Identifier> .', 

521 '<https://w3id.org/oc/meta/id/9002> <http://purl.org/spar/datacite/usesIdentifierScheme> <http://purl.org/spar/datacite/issn> .', 

522 '<https://w3id.org/oc/meta/id/9002> <http://www.essepuntato.it/2010/06/literalreification/hasLiteralValue> "2222-2222"^^<http://www.w3.org/2001/XMLSchema#string> .', 

523 # Volume 20 of Venue B 

524 '<https://w3id.org/oc/meta/br/9004> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/spar/fabio/JournalVolume> .', 

525 '<https://w3id.org/oc/meta/br/9004> <http://purl.org/vocab/frbr/core#partOf> <https://w3id.org/oc/meta/br/9003> .', 

526 '<https://w3id.org/oc/meta/br/9004> <http://purl.org/spar/fabio/hasSequenceIdentifier> "20"^^<http://www.w3.org/2001/XMLSchema#string> .', 

527 ] 

528 

529 with SPARQLClient(ENDPOINT, timeout=60) as client: 

530 for triple in test_triples: 

531 query = f"INSERT DATA {{ GRAPH <https://w3id.org/oc/meta/br/> {{ {triple} }} }}" 

532 client.update(query) 

533 

534 def test_vvi_queries_only_search_correct_venues(self): 

535 """Test that VVI queries only search under venues matching each tuple's identifiers. 

536 

537 This test verifies the fix for the bug where VVI queries were incorrectly 

538 searching under ALL venues instead of just the venues matching each VVI tuple. 

539 With the bug, searching for volume "10" under venue with ISSN 2222-2222 would 

540 also incorrectly search under venue with ISSN 1111-1111. 

541 """ 

542 ENDPOINT = 'http://127.0.0.1:8805/sparql' 

543 BASE_IRI = 'https://w3id.org/oc/meta/' 

544 local_g = Graph() 

545 settings = {'virtuoso_full_text_search': True} 

546 finder = ResourceFinder(ENDPOINT, BASE_IRI, local_g, settings=settings) 

547 

548 # VVI tuples: each should only search under its corresponding venue 

549 vvis = { 

550 ("10", "", None, ("issn:1111-1111",)), # Volume 10 of Venue A 

551 ("20", "", None, ("issn:2222-2222",)), # Volume 20 of Venue B 

552 } 

553 

554 finder.get_everything_about_res(metavals=set(), identifiers=set(), vvis=vvis) 

555 

556 # Verify both volumes were found 

557 volume_10_uri = URIRef('https://w3id.org/oc/meta/br/9002') 

558 volume_20_uri = URIRef('https://w3id.org/oc/meta/br/9004') 

559 venue_a_uri = URIRef('https://w3id.org/oc/meta/br/9001') 

560 venue_b_uri = URIRef('https://w3id.org/oc/meta/br/9003') 

561 

562 # Check that volume 10 is in local graph and is part of venue A (not venue B) 

563 self.assertIn(volume_10_uri, finder.prebuilt_subgraphs) 

564 volume_10_graph = finder.prebuilt_subgraphs[volume_10_uri] 

565 self.assertTrue( 

566 (volume_10_uri, GraphEntity.iri_part_of, venue_a_uri) in volume_10_graph, 

567 "Volume 10 should be part of Venue A" 

568 ) 

569 

570 # Check that volume 20 is in local graph and is part of venue B (not venue A) 

571 self.assertIn(volume_20_uri, finder.prebuilt_subgraphs) 

572 volume_20_graph = finder.prebuilt_subgraphs[volume_20_uri] 

573 self.assertTrue( 

574 (volume_20_uri, GraphEntity.iri_part_of, venue_b_uri) in volume_20_graph, 

575 "Volume 20 should be part of Venue B" 

576 ) 

577 

578 

579if __name__ == '__main__': # pragma: no cover 

580 unittest.main()