Coverage for test/ResourceFinder_test.py: 96%
371 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-12-20 08:55 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-12-20 08:55 +0000
1import os
2import unittest
4from oc_meta.lib.finder import ResourceFinder
5from oc_ocdm.graph import GraphEntity
6from rdflib import Dataset, Graph, Literal, URIRef
7from sparqlite import SPARQLClient
10def get_path(path:str) -> str:
11 # absolute_path:str = os.path.abspath(path)
12 universal_path = path.replace('\\', '/')
13 return universal_path
15def add_data_ts(server, data_path, batch_size:int=100, default_graph_uri=URIRef("http://default.graph/")):
16 f_path = get_path(data_path)
18 file_extension = os.path.splitext(f_path)[1].lower()
19 if file_extension == '.nt':
20 g = Graph()
21 g.parse(location=f_path, format='nt')
22 elif file_extension == '.nq':
23 g = Dataset()
24 g.parse(location=f_path, format='nquads')
25 elif file_extension == '.ttl':
26 g = Graph()
27 g.parse(location=f_path, format='turtle')
28 else:
29 raise ValueError(f"Unsupported file extension: {file_extension}")
31 triples_list = []
32 if file_extension == '.nt':
33 for subj, pred, obj in g:
34 triples_list.append((subj, pred, obj, default_graph_uri))
35 elif file_extension == '.nq':
36 for subj, pred, obj, ctx in g.quads((None, None, None, None)):
37 triples_list.append((subj, pred, obj, ctx))
39 with SPARQLClient(server, timeout=60) as client:
40 for i in range(0, len(triples_list), batch_size):
41 batch_triples = triples_list[i:i + batch_size]
43 triples_str = ""
44 for subj, pred, obj, ctx in batch_triples:
45 if ctx:
46 triples_str += f"GRAPH {ctx.n3().replace('[', '').replace(']', '')} {{ {subj.n3()} {pred.n3()} {obj.n3()} }} "
47 else:
48 triples_str += f"{subj.n3()} {pred.n3()} {obj.n3()} . "
50 query = f"INSERT DATA {{ {triples_str} }}"
51 client.update(query)
53def reset_server(server) -> None:
54 with SPARQLClient(server, timeout=60) as client:
55 for graph in {'https://w3id.org/oc/meta/br/', 'https://w3id.org/oc/meta/ra/', 'https://w3id.org/oc/meta/re/', 'https://w3id.org/oc/meta/id/', 'https://w3id.org/oc/meta/ar/'}:
56 client.update(f'CLEAR GRAPH <{graph}>')
58class TestResourceFinder(unittest.TestCase):
59 @classmethod
60 def setUpClass(cls):
61 ENDPOINT = 'http://127.0.0.1:8805/sparql'
62 BASE_IRI = 'https://w3id.org/oc/meta/'
63 REAL_DATA_FILE = os.path.join('test', 'testcases', 'ts', 'real_data.nt')
64 local_g = Graph()
65 cls.finder = ResourceFinder(ENDPOINT, BASE_IRI, local_g)
66 # Clear ts
67 reset_server(server=ENDPOINT)
68 # Upload data
69 add_data_ts(server=ENDPOINT, data_path=REAL_DATA_FILE)
70 cls.finder.get_everything_about_res(metavals={'omid:br/2373', 'omid:br/2380', 'omid:br/2730', 'omid:br/2374', 'omid:br/4435', 'omid:br/4436', 'omid:br/4437', 'omid:br/4438', 'omid:br/0604750', 'omid:br/0605379', 'omid:br/0606696'}, identifiers={'doi:10.1001/.391', 'orcid:0000-0001-6994-8412'}, vvis={})
72 def test_retrieve_br_from_id(self):
73 value = '10.1001/.391'
74 schema = 'doi'
75 output = self.finder.retrieve_br_from_id(schema, value)
76 expected_output = [(
77 '2373',
78 'Treatment Of Excessive Anticoagulation With Phytonadione (Vitamin K): A Meta-analysis',
79 [('2239', 'doi:10.1001/.391')]
80 )]
81 self.assertEqual(output, expected_output)
83 def test_retrieve_br_from_id_multiple_ids(self):
84 value = '10.1001/.405'
85 schema = 'doi'
86 output = self.finder.retrieve_br_from_id(schema, value)
87 expected_output = [(
88 '2374',
89 "Neutropenia In Human Immunodeficiency Virus Infection: Data From The Women's Interagency HIV Study",
90 [('2240', 'doi:10.1001/.405'), ('5000', 'doi:10.1001/.406')]
91 )]
92 self.assertEqual(output, expected_output)
94 def test_retrieve_br_from_meta(self):
95 metaid = '2373'
96 output = self.finder.retrieve_br_from_meta(metaid)
97 expected_output = ('Treatment Of Excessive Anticoagulation With Phytonadione (Vitamin K): A Meta-analysis', [('2239', 'doi:10.1001/.391')], True)
98 self.assertEqual(output, expected_output)
100 def test_retrieve_br_from_meta_multiple_ids(self):
101 metaid = '2374'
102 output = self.finder.retrieve_br_from_meta(metaid)
103 output = (output[0], set(output[1]))
104 expected_output = ("Neutropenia In Human Immunodeficiency Virus Infection: Data From The Women's Interagency HIV Study", {('2240', 'doi:10.1001/.405'), ('5000', 'doi:10.1001/.406')})
105 self.assertEqual(output, expected_output)
107 def test_retrieve_metaid_from_id(self):
108 schema = 'doi'
109 value = '10.1001/.391'
110 output = self.finder.retrieve_metaid_from_id(schema, value)
111 expected_output = '2239'
112 self.assertEqual(output, expected_output)
114 def test_retrieve_ra_from_meta(self):
115 metaid = '3308'
116 output = self.finder.retrieve_ra_from_meta(metaid)
117 expected_output = ('Dezee, K. J.', [], True)
118 self.assertEqual(output, expected_output)
120 def test_retrieve_ra_from_meta_with_orcid(self):
121 metaid = '4940'
122 output = self.finder.retrieve_ra_from_meta(metaid)
123 expected_output = ('Alarcon, Louis H.', [('4475', 'orcid:0000-0001-6994-8412')], True)
124 self.assertEqual(output, expected_output)
126 def test_retrieve_ra_from_meta_if_publisher(self):
127 metaid = '3309'
128 output = self.finder.retrieve_ra_from_meta(metaid)
129 expected_output = ('American Medical Association (ama)', [('4274', 'crossref:10')], True)
130 self.assertEqual(output, expected_output)
132 def test_retrieve_ra_from_id(self):
133 schema = 'orcid'
134 value = '0000-0001-6994-8412'
135 output = self.finder.retrieve_ra_from_id(schema, value, publisher=False)
136 expected_output = [
137 ('1000000', 'Alarcon, Louis H.', [('4475', 'orcid:0000-0001-6994-8412')]),
138 ('4940', 'Alarcon, Louis H.', [('4475', 'orcid:0000-0001-6994-8412')])
139 ]
140 self.assertEqual(sorted(output), expected_output)
142 def test_retrieve_ra_from_id_if_publisher(self):
143 schema = 'crossref'
144 value = '10'
145 output = self.finder.retrieve_ra_from_id(schema, value, publisher=True)
146 expected_output = [('3309', 'American Medical Association (ama)', [('4274', 'crossref:10')])]
147 self.assertEqual(output, expected_output)
149 def test_retrieve_ra_sequence_from_br_meta(self):
150 metaid = '2380'
151 output = self.finder.retrieve_ra_sequence_from_br_meta(metaid, 'author')
152 expected_output = [
153 {'5343': ('Hodge, James G.', [], '3316')},
154 {'5344': ('Anderson, Evan D.', [], '3317')},
155 {'5345': ('Kirsch, Thomas D.', [], '3318')},
156 {'5346': ('Kelen, Gabor D.', [('4278', 'orcid:0000-0002-3236-8286')], '3319')}
157 ]
158 self.assertEqual(output, expected_output)
160 def test_retrieve_re_from_br_meta(self):
161 metaid = '2373'
162 output = self.finder.retrieve_re_from_br_meta(metaid)
163 expected_output = ('2011', '391-397')
164 self.assertEqual(output, expected_output)
166 def test_retrieve_br_info_from_meta(self):
167 metaid = '2373'
168 output = self.finder.retrieve_br_info_from_meta(metaid)
169 expected_output = {
170 'pub_date': '2006-02-27',
171 'type': 'journal article',
172 'page': ('2011', '391-397'),
173 'issue': '4',
174 'volume': '166',
175 'venue': 'Archives Of Internal Medicine [omid:br/4387 issn:0003-9926]'
176 }
177 self.assertEqual(output, expected_output)
179 def test_retrieve_ra_sequence_with_loop(self):
180 """Test that retrieve_ra_sequence_from_br_meta handles circular references without infinite loops"""
181 base_iri = 'https://w3id.org/oc/meta'
182 br_uri = URIRef(f'{base_iri}/br/9999')
183 ar1_uri = URIRef(f'{base_iri}/ar/9991')
184 ar2_uri = URIRef(f'{base_iri}/ar/9992')
185 ra1_uri = URIRef(f'{base_iri}/ra/9981')
186 ra2_uri = URIRef(f'{base_iri}/ra/9982')
188 # Create a circular AR chain: AR1 -> AR2 -> AR1 (loop)
189 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri))
190 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_author))
191 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri))
192 self.finder.local_g.add((ar1_uri, GraphEntity.iri_has_next, ar2_uri))
193 self.finder.local_g.add((ra1_uri, GraphEntity.iri_given_name, Literal('John')))
194 self.finder.local_g.add((ra1_uri, GraphEntity.iri_family_name, Literal('Doe')))
196 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar2_uri))
197 self.finder.local_g.add((ar2_uri, GraphEntity.iri_with_role, GraphEntity.iri_author))
198 self.finder.local_g.add((ar2_uri, GraphEntity.iri_is_held_by, ra2_uri))
199 self.finder.local_g.add((ar2_uri, GraphEntity.iri_has_next, ar1_uri))
200 self.finder.local_g.add((ra2_uri, GraphEntity.iri_given_name, Literal('Jane')))
201 self.finder.local_g.add((ra2_uri, GraphEntity.iri_family_name, Literal('Smith')))
203 # This should return only 2 ARs (breaking the loop) without hanging
204 result = self.finder.retrieve_ra_sequence_from_br_meta('9999', 'author')
206 # Should return exactly 2 ARs (not infinite loop)
207 self.assertEqual(len(result), 2)
208 # Should contain both ARs
209 ar_ids = [list(item.keys())[0] for item in result]
210 self.assertIn('9991', ar_ids)
211 self.assertIn('9992', ar_ids)
213 def test_retrieve_ra_sequence_with_self_reference(self):
214 """Test that retrieve_ra_sequence_from_br_meta handles self-referencing AR"""
215 base_iri = 'https://w3id.org/oc/meta'
216 br_uri = URIRef(f'{base_iri}/br/9998')
217 ar1_uri = URIRef(f'{base_iri}/ar/9981')
218 ra1_uri = URIRef(f'{base_iri}/ra/9971')
220 # Create AR that points to itself
221 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri))
222 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_author))
223 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri))
224 self.finder.local_g.add((ar1_uri, GraphEntity.iri_has_next, ar1_uri))
225 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Test Publisher')))
227 # This should return only 1 AR (ignoring self-reference)
228 result = self.finder.retrieve_ra_sequence_from_br_meta('9998', 'author')
230 # Should return exactly 1 AR
231 self.assertEqual(len(result), 1)
232 self.assertEqual(list(result[0].keys())[0], '9981')
234 def test_retrieve_ra_sequence_with_invalid_next(self):
235 """Test that retrieve_ra_sequence_from_br_meta handles invalid 'next' references"""
236 base_iri = 'https://w3id.org/oc/meta'
237 br_uri = URIRef(f'{base_iri}/br/9997')
238 ar1_uri = URIRef(f'{base_iri}/ar/9971')
239 ar2_uri = URIRef(f'{base_iri}/ar/9972')
240 ar_invalid_uri = URIRef(f'{base_iri}/ar/9999')
241 ra1_uri = URIRef(f'{base_iri}/ra/9961')
242 ra2_uri = URIRef(f'{base_iri}/ra/9962')
244 # Create AR chain where AR1 -> AR_INVALID (doesn't exist) and AR2 is orphaned
245 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri))
246 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_author))
247 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri))
248 self.finder.local_g.add((ar1_uri, GraphEntity.iri_has_next, ar_invalid_uri))
249 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Author One')))
251 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar2_uri))
252 self.finder.local_g.add((ar2_uri, GraphEntity.iri_with_role, GraphEntity.iri_author))
253 self.finder.local_g.add((ar2_uri, GraphEntity.iri_is_held_by, ra2_uri))
254 self.finder.local_g.add((ra2_uri, GraphEntity.iri_name, Literal('Author Two')))
256 # Should return chain stopping at invalid reference
257 result = self.finder.retrieve_ra_sequence_from_br_meta('9997', 'author')
259 # Should return at least AR1 (stops at invalid next)
260 # The method will find 2 start candidates and pick the longest chain
261 self.assertGreaterEqual(len(result), 1)
262 ar_ids = [list(item.keys())[0] for item in result]
263 self.assertIn('9971', ar_ids)
265 def test_retrieve_ra_sequence_with_missing_is_held_by(self):
266 """Test that retrieve_ra_sequence_from_br_meta handles AR without is_held_by gracefully"""
267 base_iri = 'https://w3id.org/oc/meta'
268 br_uri = URIRef(f'{base_iri}/br/9996')
269 ar1_uri = URIRef(f'{base_iri}/ar/9961')
271 # Create AR without is_held_by relationship (malformed data)
272 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri))
273 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_author))
274 # Missing: ar1_uri iri_is_held_by ra_uri
276 # Should handle gracefully without crash
277 try:
278 result = self.finder.retrieve_ra_sequence_from_br_meta('9996', 'author')
279 # If it doesn't crash, check result is reasonable (either empty or handles error)
280 self.assertIsInstance(result, list)
281 except (KeyError, UnboundLocalError) as e:
282 self.fail(f"Method crashed with missing is_held_by: {e}")
284 def test_retrieve_ra_sequence_with_multiple_next_values(self):
285 """Test that retrieve_ra_sequence_from_br_meta handles AR with multiple 'next' relationships"""
286 base_iri = 'https://w3id.org/oc/meta'
287 br_uri = URIRef(f'{base_iri}/br/9995')
288 ar1_uri = URIRef(f'{base_iri}/ar/9951')
289 ar2_uri = URIRef(f'{base_iri}/ar/9952')
290 ar3_uri = URIRef(f'{base_iri}/ar/9953')
291 ra1_uri = URIRef(f'{base_iri}/ra/9941')
292 ra2_uri = URIRef(f'{base_iri}/ra/9942')
293 ra3_uri = URIRef(f'{base_iri}/ra/9943')
295 # Create AR1 with multiple 'next' relationships (data error)
296 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri))
297 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_author))
298 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri))
299 self.finder.local_g.add((ar1_uri, GraphEntity.iri_has_next, ar2_uri))
300 self.finder.local_g.add((ar1_uri, GraphEntity.iri_has_next, ar3_uri))
301 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Author One')))
303 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar2_uri))
304 self.finder.local_g.add((ar2_uri, GraphEntity.iri_with_role, GraphEntity.iri_author))
305 self.finder.local_g.add((ar2_uri, GraphEntity.iri_is_held_by, ra2_uri))
306 self.finder.local_g.add((ra2_uri, GraphEntity.iri_name, Literal('Author Two')))
308 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar3_uri))
309 self.finder.local_g.add((ar3_uri, GraphEntity.iri_with_role, GraphEntity.iri_author))
310 self.finder.local_g.add((ar3_uri, GraphEntity.iri_is_held_by, ra3_uri))
311 self.finder.local_g.add((ra3_uri, GraphEntity.iri_name, Literal('Author Three')))
313 # Should handle multiple next values consistently (last one wins in current implementation)
314 result = self.finder.retrieve_ra_sequence_from_br_meta('9995', 'author')
316 # Should return a valid result without crashing
317 self.assertIsInstance(result, list)
318 self.assertGreater(len(result), 0)
320 def test_retrieve_ra_sequence_no_ars_for_role(self):
321 """Test that retrieve_ra_sequence_from_br_meta returns empty list when no ARs exist for specified role"""
322 base_iri = 'https://w3id.org/oc/meta'
323 br_uri = URIRef(f'{base_iri}/br/9994')
324 ar1_uri = URIRef(f'{base_iri}/ar/9941')
325 ra1_uri = URIRef(f'{base_iri}/ra/9931')
327 # Create BR with editor, but request author
328 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri))
329 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_editor))
330 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri))
331 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Editor Name')))
333 # Request author (should be empty)
334 result = self.finder.retrieve_ra_sequence_from_br_meta('9994', 'author')
336 self.assertEqual(result, [])
338 def test_retrieve_ra_sequence_single_ar_no_chain(self):
339 """Test that retrieve_ra_sequence_from_br_meta handles single AR without 'next'"""
340 base_iri = 'https://w3id.org/oc/meta'
341 br_uri = URIRef(f'{base_iri}/br/9993')
342 ar1_uri = URIRef(f'{base_iri}/ar/9931')
343 ra1_uri = URIRef(f'{base_iri}/ra/9921')
345 # Create single AR without next
346 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri))
347 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_author))
348 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri))
349 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Single Author')))
351 result = self.finder.retrieve_ra_sequence_from_br_meta('9993', 'author')
353 self.assertEqual(len(result), 1)
354 self.assertEqual(list(result[0].keys())[0], '9931')
356 def test_retrieve_ra_sequence_two_independent_chains(self):
357 """Test that retrieve_ra_sequence_from_br_meta picks longest chain when multiple disconnected chains exist"""
358 base_iri = 'https://w3id.org/oc/meta'
359 br_uri = URIRef(f'{base_iri}/br/9992')
361 # Chain 1: AR1 -> AR2 (length 2)
362 ar1_uri = URIRef(f'{base_iri}/ar/9921')
363 ar2_uri = URIRef(f'{base_iri}/ar/9922')
364 ra1_uri = URIRef(f'{base_iri}/ra/9911')
365 ra2_uri = URIRef(f'{base_iri}/ra/9912')
367 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri))
368 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_author))
369 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri))
370 self.finder.local_g.add((ar1_uri, GraphEntity.iri_has_next, ar2_uri))
371 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Author One')))
373 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar2_uri))
374 self.finder.local_g.add((ar2_uri, GraphEntity.iri_with_role, GraphEntity.iri_author))
375 self.finder.local_g.add((ar2_uri, GraphEntity.iri_is_held_by, ra2_uri))
376 self.finder.local_g.add((ra2_uri, GraphEntity.iri_name, Literal('Author Two')))
378 # Chain 2: AR3 (length 1, disconnected)
379 ar3_uri = URIRef(f'{base_iri}/ar/9923')
380 ra3_uri = URIRef(f'{base_iri}/ra/9913')
382 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar3_uri))
383 self.finder.local_g.add((ar3_uri, GraphEntity.iri_with_role, GraphEntity.iri_author))
384 self.finder.local_g.add((ar3_uri, GraphEntity.iri_is_held_by, ra3_uri))
385 self.finder.local_g.add((ra3_uri, GraphEntity.iri_name, Literal('Author Three')))
387 result = self.finder.retrieve_ra_sequence_from_br_meta('9992', 'author')
389 # Should return the longer chain (chain 1 with 2 elements)
390 self.assertEqual(len(result), 2)
391 ar_ids = [list(item.keys())[0] for item in result]
392 self.assertIn('9921', ar_ids)
393 self.assertIn('9922', ar_ids)
395 def test_retrieve_ra_sequence_editor_role(self):
396 """Test that retrieve_ra_sequence_from_br_meta works with editor role"""
397 base_iri = 'https://w3id.org/oc/meta'
398 br_uri = URIRef(f'{base_iri}/br/9991')
399 ar1_uri = URIRef(f'{base_iri}/ar/9911')
400 ra1_uri = URIRef(f'{base_iri}/ra/9901')
402 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri))
403 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_editor))
404 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri))
405 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Editor Name')))
407 result = self.finder.retrieve_ra_sequence_from_br_meta('9991', 'editor')
409 self.assertEqual(len(result), 1)
410 self.assertEqual(list(result[0].keys())[0], '9911')
412 def test_retrieve_ra_sequence_publisher_role(self):
413 """Test that retrieve_ra_sequence_from_br_meta works with publisher role"""
414 base_iri = 'https://w3id.org/oc/meta'
415 br_uri = URIRef(f'{base_iri}/br/9990')
416 ar1_uri = URIRef(f'{base_iri}/ar/9901')
417 ra1_uri = URIRef(f'{base_iri}/ra/9891')
419 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri))
420 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_publisher))
421 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri))
422 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Publisher Name')))
424 result = self.finder.retrieve_ra_sequence_from_br_meta('9990', 'publisher')
426 self.assertEqual(len(result), 1)
427 self.assertEqual(list(result[0].keys())[0], '9901')
429 def test_retrieve_ra_sequence_three_node_loop(self):
430 """Test that retrieve_ra_sequence_from_br_meta handles three-node circular loop"""
431 base_iri = 'https://w3id.org/oc/meta'
432 br_uri = URIRef(f'{base_iri}/br/9989')
433 ar1_uri = URIRef(f'{base_iri}/ar/9891')
434 ar2_uri = URIRef(f'{base_iri}/ar/9892')
435 ar3_uri = URIRef(f'{base_iri}/ar/9893')
436 ra1_uri = URIRef(f'{base_iri}/ra/9881')
437 ra2_uri = URIRef(f'{base_iri}/ra/9882')
438 ra3_uri = URIRef(f'{base_iri}/ra/9883')
440 # Create circular loop: AR1 -> AR2 -> AR3 -> AR1
441 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri))
442 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_author))
443 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri))
444 self.finder.local_g.add((ar1_uri, GraphEntity.iri_has_next, ar2_uri))
445 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Author One')))
447 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar2_uri))
448 self.finder.local_g.add((ar2_uri, GraphEntity.iri_with_role, GraphEntity.iri_author))
449 self.finder.local_g.add((ar2_uri, GraphEntity.iri_is_held_by, ra2_uri))
450 self.finder.local_g.add((ar2_uri, GraphEntity.iri_has_next, ar3_uri))
451 self.finder.local_g.add((ra2_uri, GraphEntity.iri_name, Literal('Author Two')))
453 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar3_uri))
454 self.finder.local_g.add((ar3_uri, GraphEntity.iri_with_role, GraphEntity.iri_author))
455 self.finder.local_g.add((ar3_uri, GraphEntity.iri_is_held_by, ra3_uri))
456 self.finder.local_g.add((ar3_uri, GraphEntity.iri_has_next, ar1_uri))
457 self.finder.local_g.add((ra3_uri, GraphEntity.iri_name, Literal('Author Three')))
459 result = self.finder.retrieve_ra_sequence_from_br_meta('9989', 'author')
461 # Should return exactly 3 ARs (breaking loop)
462 self.assertEqual(len(result), 3)
463 ar_ids = [list(item.keys())[0] for item in result]
464 self.assertIn('9891', ar_ids)
465 self.assertIn('9892', ar_ids)
466 self.assertIn('9893', ar_ids)
468 def test_retrieve_ra_sequence_duplicate_ra(self):
469 """Test that retrieve_ra_sequence_from_br_meta returns both ARs when they point to same RA"""
470 base_iri = 'https://w3id.org/oc/meta'
471 br_uri = URIRef(f'{base_iri}/br/9988')
472 ar1_uri = URIRef(f'{base_iri}/ar/9881')
473 ar2_uri = URIRef(f'{base_iri}/ar/9882')
474 ra1_uri = URIRef(f'{base_iri}/ra/9871')
476 # Two ARs pointing to same RA (duplicate author)
477 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar1_uri))
478 self.finder.local_g.add((ar1_uri, GraphEntity.iri_with_role, GraphEntity.iri_author))
479 self.finder.local_g.add((ar1_uri, GraphEntity.iri_is_held_by, ra1_uri))
480 self.finder.local_g.add((ar1_uri, GraphEntity.iri_has_next, ar2_uri))
481 self.finder.local_g.add((ra1_uri, GraphEntity.iri_name, Literal('Same Author')))
483 self.finder.local_g.add((br_uri, GraphEntity.iri_is_document_context_for, ar2_uri))
484 self.finder.local_g.add((ar2_uri, GraphEntity.iri_with_role, GraphEntity.iri_author))
485 self.finder.local_g.add((ar2_uri, GraphEntity.iri_is_held_by, ra1_uri))
487 result = self.finder.retrieve_ra_sequence_from_br_meta('9988', 'author')
489 # Should return both ARs even though they reference same RA
490 self.assertEqual(len(result), 2)
491 # Both should reference RA 9871
492 self.assertEqual(result[0][list(result[0].keys())[0]][2], '9871')
493 self.assertEqual(result[1][list(result[1].keys())[0]][2], '9871')
496class TestVVIQueryIsolation(unittest.TestCase):
497 """Test that VVI queries only search under the correct venues."""
499 @classmethod
500 def setUpClass(cls):
501 ENDPOINT = 'http://127.0.0.1:8805/sparql'
502 BASE_IRI = 'https://w3id.org/oc/meta/'
503 reset_server(server=ENDPOINT)
505 # Upload test data: two venues with different ISSNs, each with their own volume
506 test_triples = [
507 # Venue A (br/9001) with ISSN 1111-1111
508 '<https://w3id.org/oc/meta/br/9001> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/spar/fabio/Journal> .',
509 '<https://w3id.org/oc/meta/br/9001> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/9001> .',
510 '<https://w3id.org/oc/meta/id/9001> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/spar/datacite/Identifier> .',
511 '<https://w3id.org/oc/meta/id/9001> <http://purl.org/spar/datacite/usesIdentifierScheme> <http://purl.org/spar/datacite/issn> .',
512 '<https://w3id.org/oc/meta/id/9001> <http://www.essepuntato.it/2010/06/literalreification/hasLiteralValue> "1111-1111"^^<http://www.w3.org/2001/XMLSchema#string> .',
513 # Volume 10 of Venue A
514 '<https://w3id.org/oc/meta/br/9002> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/spar/fabio/JournalVolume> .',
515 '<https://w3id.org/oc/meta/br/9002> <http://purl.org/vocab/frbr/core#partOf> <https://w3id.org/oc/meta/br/9001> .',
516 '<https://w3id.org/oc/meta/br/9002> <http://purl.org/spar/fabio/hasSequenceIdentifier> "10"^^<http://www.w3.org/2001/XMLSchema#string> .',
517 # Venue B (br/9003) with ISSN 2222-2222
518 '<https://w3id.org/oc/meta/br/9003> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/spar/fabio/Journal> .',
519 '<https://w3id.org/oc/meta/br/9003> <http://purl.org/spar/datacite/hasIdentifier> <https://w3id.org/oc/meta/id/9002> .',
520 '<https://w3id.org/oc/meta/id/9002> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/spar/datacite/Identifier> .',
521 '<https://w3id.org/oc/meta/id/9002> <http://purl.org/spar/datacite/usesIdentifierScheme> <http://purl.org/spar/datacite/issn> .',
522 '<https://w3id.org/oc/meta/id/9002> <http://www.essepuntato.it/2010/06/literalreification/hasLiteralValue> "2222-2222"^^<http://www.w3.org/2001/XMLSchema#string> .',
523 # Volume 20 of Venue B
524 '<https://w3id.org/oc/meta/br/9004> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/spar/fabio/JournalVolume> .',
525 '<https://w3id.org/oc/meta/br/9004> <http://purl.org/vocab/frbr/core#partOf> <https://w3id.org/oc/meta/br/9003> .',
526 '<https://w3id.org/oc/meta/br/9004> <http://purl.org/spar/fabio/hasSequenceIdentifier> "20"^^<http://www.w3.org/2001/XMLSchema#string> .',
527 ]
529 with SPARQLClient(ENDPOINT, timeout=60) as client:
530 for triple in test_triples:
531 query = f"INSERT DATA {{ GRAPH <https://w3id.org/oc/meta/br/> {{ {triple} }} }}"
532 client.update(query)
534 def test_vvi_queries_only_search_correct_venues(self):
535 """Test that VVI queries only search under venues matching each tuple's identifiers.
537 This test verifies the fix for the bug where VVI queries were incorrectly
538 searching under ALL venues instead of just the venues matching each VVI tuple.
539 With the bug, searching for volume "10" under venue with ISSN 2222-2222 would
540 also incorrectly search under venue with ISSN 1111-1111.
541 """
542 ENDPOINT = 'http://127.0.0.1:8805/sparql'
543 BASE_IRI = 'https://w3id.org/oc/meta/'
544 local_g = Graph()
545 settings = {'virtuoso_full_text_search': True}
546 finder = ResourceFinder(ENDPOINT, BASE_IRI, local_g, settings=settings)
548 # VVI tuples: each should only search under its corresponding venue
549 vvis = {
550 ("10", "", None, ("issn:1111-1111",)), # Volume 10 of Venue A
551 ("20", "", None, ("issn:2222-2222",)), # Volume 20 of Venue B
552 }
554 finder.get_everything_about_res(metavals=set(), identifiers=set(), vvis=vvis)
556 # Verify both volumes were found
557 volume_10_uri = URIRef('https://w3id.org/oc/meta/br/9002')
558 volume_20_uri = URIRef('https://w3id.org/oc/meta/br/9004')
559 venue_a_uri = URIRef('https://w3id.org/oc/meta/br/9001')
560 venue_b_uri = URIRef('https://w3id.org/oc/meta/br/9003')
562 # Check that volume 10 is in local graph and is part of venue A (not venue B)
563 self.assertIn(volume_10_uri, finder.prebuilt_subgraphs)
564 volume_10_graph = finder.prebuilt_subgraphs[volume_10_uri]
565 self.assertTrue(
566 (volume_10_uri, GraphEntity.iri_part_of, venue_a_uri) in volume_10_graph,
567 "Volume 10 should be part of Venue A"
568 )
570 # Check that volume 20 is in local graph and is part of venue B (not venue A)
571 self.assertIn(volume_20_uri, finder.prebuilt_subgraphs)
572 volume_20_graph = finder.prebuilt_subgraphs[volume_20_uri]
573 self.assertTrue(
574 (volume_20_uri, GraphEntity.iri_part_of, venue_b_uri) in volume_20_graph,
575 "Volume 20 should be part of Venue B"
576 )
579if __name__ == '__main__': # pragma: no cover
580 unittest.main()