Coverage for test/check_results_test.py: 99%
173 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-12-20 08:55 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-12-20 08:55 +0000
1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3# Copyright 2025, Arcangelo Massari <arcangelo.massari@unibo.it>
4#
5# Permission to use, copy, modify, and/or distribute this software for any purpose
6# with or without fee is hereby granted, provided that the above copyright notice
7# and this permission notice appear in all copies.
8#
9# THE SOFTWARE IS PROVIDED 'AS IS' AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
15# SOFTWARE.
17import unittest
18from unittest.mock import MagicMock, patch
20from oc_meta.run.meta.check_results import (
21 check_omids_existence,
22 check_provenance_existence,
23 find_file,
24 find_prov_file,
25 parse_identifiers,
26)
29class TestParseIdentifiers(unittest.TestCase):
30 """Test cases for parse_identifiers function."""
32 def test_parse_single_identifier(self):
33 """Test parsing a single identifier."""
34 result = parse_identifiers("doi:10.1234/test")
35 expected = [{'schema': 'doi', 'value': '10.1234/test'}]
36 self.assertEqual(result, expected)
38 def test_parse_multiple_identifiers(self):
39 """Test parsing multiple space-separated identifiers."""
40 result = parse_identifiers("doi:10.1234/test isbn:978-3-16-148410-0")
41 expected = [
42 {'schema': 'doi', 'value': '10.1234/test'},
43 {'schema': 'isbn', 'value': '978-3-16-148410-0'}
44 ]
45 self.assertEqual(result, expected)
47 def test_parse_identifier_with_colon_in_value(self):
48 """Test parsing identifier where value contains colons."""
49 result = parse_identifiers("url:http://example.com:8080/path")
50 expected = [{'schema': 'url', 'value': 'http://example.com:8080/path'}]
51 self.assertEqual(result, expected)
53 def test_parse_empty_string(self):
54 """Test parsing empty string returns empty list."""
55 result = parse_identifiers("")
56 self.assertEqual(result, [])
58 def test_parse_whitespace_only(self):
59 """Test parsing whitespace-only string returns empty list."""
60 result = parse_identifiers(" ")
61 self.assertEqual(result, [])
63 def test_parse_none(self):
64 """Test parsing None returns empty list."""
65 result = parse_identifiers(None)
66 self.assertEqual(result, [])
68 def test_parse_with_uppercase_schema(self):
69 """Test that schema is converted to lowercase."""
70 result = parse_identifiers("DOI:10.1234/test")
71 expected = [{'schema': 'doi', 'value': '10.1234/test'}]
72 self.assertEqual(result, expected)
74 def test_parse_malformed_identifier(self):
75 """Test parsing identifier without colon."""
76 result = parse_identifiers("malformed")
77 self.assertEqual(result, [])
80class TestFindFile(unittest.TestCase):
81 """Test cases for find_file function."""
83 def test_find_file_zip_format(self):
84 """Test finding file path for ZIP format."""
85 uri = "https://w3id.org/oc/meta/br/0605"
86 result = find_file(
87 rdf_dir="/base/rdf",
88 dir_split_number=10000,
89 items_per_file=1000,
90 uri=uri,
91 zip_output_rdf=True
92 )
93 expected = "/base/rdf/br/060/10000/1000.zip"
94 self.assertEqual(result, expected)
96 def test_find_file_json_format(self):
97 """Test finding file path for JSON format."""
98 uri = "https://w3id.org/oc/meta/br/0605"
99 result = find_file(
100 rdf_dir="/base/rdf",
101 dir_split_number=10000,
102 items_per_file=1000,
103 uri=uri,
104 zip_output_rdf=False
105 )
106 expected = "/base/rdf/br/060/10000/1000.json"
107 self.assertEqual(result, expected)
109 def test_find_file_with_subfolder(self):
110 """Test finding file path with subfolder prefix."""
111 uri = "https://w3id.org/oc/meta/br/06012345"
112 result = find_file(
113 rdf_dir="/base/rdf",
114 dir_split_number=10000,
115 items_per_file=1000,
116 uri=uri,
117 zip_output_rdf=True
118 )
119 expected = "/base/rdf/br/060/20000/13000.zip"
120 self.assertEqual(result, expected)
122 def test_find_file_different_entity_type(self):
123 """Test finding file for different entity types."""
124 uri = "https://w3id.org/oc/meta/ra/0605"
125 result = find_file(
126 rdf_dir="/base/rdf",
127 dir_split_number=10000,
128 items_per_file=1000,
129 uri=uri,
130 zip_output_rdf=True
131 )
132 expected = "/base/rdf/ra/060/10000/1000.zip"
133 self.assertEqual(result, expected)
135 def test_find_file_invalid_uri(self):
136 """Test that invalid URI returns None."""
137 uri = "invalid-uri"
138 result = find_file(
139 rdf_dir="/base/rdf",
140 dir_split_number=10000,
141 items_per_file=1000,
142 uri=uri,
143 zip_output_rdf=True
144 )
145 self.assertIsNone(result)
147 def test_find_file_boundary_values(self):
148 """Test file finding with boundary values."""
149 uri = "https://w3id.org/oc/meta/br/06010000"
150 result = find_file(
151 rdf_dir="/base/rdf",
152 dir_split_number=10000,
153 items_per_file=1000,
154 uri=uri,
155 zip_output_rdf=True
156 )
157 expected = "/base/rdf/br/060/10000/10000.zip"
158 self.assertEqual(result, expected)
161class TestFindProvFile(unittest.TestCase):
162 """Test cases for find_prov_file function."""
164 def test_find_prov_file_exists(self):
165 """Test finding provenance file when it exists."""
166 data_zip_path = "/base/rdf/br/060/10000/1000.zip"
168 with patch('os.path.exists', return_value=True):
169 result = find_prov_file(data_zip_path)
170 expected = "/base/rdf/br/060/10000/1000/prov/se.zip"
171 self.assertEqual(result, expected)
173 def test_find_prov_file_not_exists(self):
174 """Test finding provenance file when it doesn't exist."""
175 data_zip_path = "/base/rdf/br/060/10000/1000.zip"
177 with patch('os.path.exists', return_value=False):
178 result = find_prov_file(data_zip_path)
179 self.assertIsNone(result)
181 def test_find_prov_file_with_exception(self):
182 """Test that exceptions in find_prov_file are handled gracefully."""
183 data_zip_path = "/base/rdf/br/060/10000/1000.zip"
185 with patch('os.path.dirname', side_effect=Exception("Test error")):
186 result = find_prov_file(data_zip_path)
187 self.assertIsNone(result)
190class TestCheckOMIDsExistence(unittest.TestCase):
191 """Test cases for check_omids_existence function."""
193 @patch('oc_meta.run.meta.check_results.SPARQLClient')
194 def test_check_single_identifier_found(self, mock_sparql_client):
195 """Test checking single identifier that exists."""
196 mock_client = MagicMock()
197 mock_sparql_client.return_value.__enter__.return_value = mock_client
198 mock_client.query.return_value = {
199 "results": {
200 "bindings": [
201 {"omid": {"value": "https://w3id.org/oc/meta/br/0601"}}
202 ]
203 }
204 }
206 identifiers = [{'schema': 'doi', 'value': '10.1234/test'}]
207 result = check_omids_existence(identifiers, "http://example.com/sparql")
209 expected = {'doi:10.1234/test': {'https://w3id.org/oc/meta/br/0601'}}
210 self.assertEqual(result, expected)
211 mock_client.query.assert_called_once()
213 @patch('oc_meta.run.meta.check_results.SPARQLClient')
214 def test_check_identifier_not_found(self, mock_sparql_client):
215 """Test checking identifier that doesn't exist."""
216 mock_client = MagicMock()
217 mock_sparql_client.return_value.__enter__.return_value = mock_client
218 mock_client.query.return_value = {"results": {"bindings": []}}
220 identifiers = [{'schema': 'doi', 'value': '10.9999/notfound'}]
221 result = check_omids_existence(identifiers, "http://example.com/sparql")
223 expected = {'doi:10.9999/notfound': set()}
224 self.assertEqual(result, expected)
226 @patch('oc_meta.run.meta.check_results.SPARQLClient')
227 def test_check_multiple_omids_for_identifier(self, mock_sparql_client):
228 """Test identifier with multiple OMIDs."""
229 mock_client = MagicMock()
230 mock_sparql_client.return_value.__enter__.return_value = mock_client
231 mock_client.query.return_value = {
232 "results": {
233 "bindings": [
234 {"omid": {"value": "https://w3id.org/oc/meta/br/0601"}},
235 {"omid": {"value": "https://w3id.org/oc/meta/br/0602"}}
236 ]
237 }
238 }
240 identifiers = [{'schema': 'doi', 'value': '10.1234/duplicate'}]
241 result = check_omids_existence(identifiers, "http://example.com/sparql")
243 expected = {
244 'doi:10.1234/duplicate': {
245 'https://w3id.org/oc/meta/br/0601',
246 'https://w3id.org/oc/meta/br/0602'
247 }
248 }
249 self.assertEqual(result, expected)
251 def test_check_empty_identifiers_list(self):
252 """Test with empty identifiers list."""
253 result = check_omids_existence([], "http://example.com/sparql")
254 self.assertEqual(result, {})
256 @patch('oc_meta.run.meta.check_results.SPARQLClient')
257 def test_check_sparql_exception_handling(self, mock_sparql_client):
258 """Test that SPARQL exceptions are handled gracefully."""
259 mock_client = MagicMock()
260 mock_sparql_client.return_value.__enter__.return_value = mock_client
261 mock_client.query.side_effect = Exception("SPARQL endpoint unavailable")
263 identifiers = [{'schema': 'doi', 'value': '10.1234/test'}]
264 result = check_omids_existence(identifiers, "http://example.com/sparql")
266 # Should return empty set for the identifier
267 expected = {'doi:10.1234/test': set()}
268 self.assertEqual(result, expected)
271class TestCheckProvenanceExistence(unittest.TestCase):
272 """Test cases for check_provenance_existence function."""
274 @patch('oc_meta.run.meta.check_results.SPARQLClient')
275 def test_check_provenance_exists(self, mock_sparql_client):
276 """Test checking provenance that exists."""
277 mock_client = MagicMock()
278 mock_sparql_client.return_value.__enter__.return_value = mock_client
279 mock_client.query.return_value = {
280 "results": {
281 "bindings": [
282 {"omid": {"value": "https://w3id.org/oc/meta/br/0601"}}
283 ]
284 }
285 }
287 omids = ["https://w3id.org/oc/meta/br/0601"]
288 result = check_provenance_existence(omids, "http://example.com/prov-sparql")
290 expected = {"https://w3id.org/oc/meta/br/0601": True}
291 self.assertEqual(result, expected)
293 @patch('oc_meta.run.meta.check_results.SPARQLClient')
294 def test_check_provenance_not_exists(self, mock_sparql_client):
295 """Test checking provenance that doesn't exist."""
296 mock_client = MagicMock()
297 mock_sparql_client.return_value.__enter__.return_value = mock_client
298 mock_client.query.return_value = {"results": {"bindings": []}}
300 omids = ["https://w3id.org/oc/meta/br/0601"]
301 result = check_provenance_existence(omids, "http://example.com/prov-sparql")
303 expected = {"https://w3id.org/oc/meta/br/0601": False}
304 self.assertEqual(result, expected)
306 @patch('oc_meta.run.meta.check_results.SPARQLClient')
307 def test_check_multiple_omids_mixed_results(self, mock_sparql_client):
308 """Test checking multiple OMIDs with mixed provenance results."""
309 mock_client = MagicMock()
310 mock_sparql_client.return_value.__enter__.return_value = mock_client
311 mock_client.query.return_value = {
312 "results": {
313 "bindings": [
314 {"omid": {"value": "https://w3id.org/oc/meta/br/0601"}},
315 {"omid": {"value": "https://w3id.org/oc/meta/br/0603"}}
316 ]
317 }
318 }
320 omids = [
321 "https://w3id.org/oc/meta/br/0601",
322 "https://w3id.org/oc/meta/br/0602",
323 "https://w3id.org/oc/meta/br/0603"
324 ]
325 result = check_provenance_existence(omids, "http://example.com/prov-sparql")
327 # 0601 and 0603 have provenance, 0602 doesn't
328 self.assertTrue(result["https://w3id.org/oc/meta/br/0601"])
329 self.assertFalse(result["https://w3id.org/oc/meta/br/0602"])
330 self.assertTrue(result["https://w3id.org/oc/meta/br/0603"])
332 def test_check_empty_omids_list(self):
333 """Test with empty OMIDs list."""
334 result = check_provenance_existence([], "http://example.com/prov-sparql")
335 self.assertEqual(result, {})
337 @patch('oc_meta.run.meta.check_results.SPARQLClient')
338 def test_check_provenance_batching(self, mock_sparql_client):
339 """Test that large lists are batched correctly."""
340 # Create 25 OMIDs (should result in 3 batches with BATCH_SIZE=10)
341 omids = [f"https://w3id.org/oc/meta/br/06{i:02d}" for i in range(1, 26)]
343 mock_client = MagicMock()
344 mock_sparql_client.return_value.__enter__.return_value = mock_client
345 mock_client.query.return_value = {"results": {"bindings": []}}
347 result = check_provenance_existence(omids, "http://example.com/prov-sparql")
349 # Should have made 3 calls (batches of 10, 10, 5)
350 self.assertEqual(mock_client.query.call_count, 3)
352 # All OMIDs should be in result with False
353 self.assertEqual(len(result), 25)
354 self.assertTrue(all(not v for v in result.values()))
356 @patch('oc_meta.run.meta.check_results.SPARQLClient')
357 def test_check_provenance_exception_handling(self, mock_sparql_client):
358 """Test that SPARQL exceptions are handled gracefully."""
359 mock_client = MagicMock()
360 mock_sparql_client.return_value.__enter__.return_value = mock_client
361 mock_client.query.side_effect = Exception("SPARQL endpoint unavailable")
363 omids = ["https://w3id.org/oc/meta/br/0601"]
364 result = check_provenance_existence(omids, "http://example.com/prov-sparql")
366 # Should still return the OMID with False
367 expected = {"https://w3id.org/oc/meta/br/0601": False}
368 self.assertEqual(result, expected)
371if __name__ == '__main__':
372 unittest.main()