Coverage for test/check_results_test.py: 99%
166 statements
« prev ^ index » next coverage.py v6.5.0, created at 2026-01-15 10:29 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2026-01-15 10:29 +0000
1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3# Copyright 2025, Arcangelo Massari <arcangelo.massari@unibo.it>
4#
5# Permission to use, copy, modify, and/or distribute this software for any purpose
6# with or without fee is hereby granted, provided that the above copyright notice
7# and this permission notice appear in all copies.
8#
9# THE SOFTWARE IS PROVIDED 'AS IS' AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
15# SOFTWARE.
17import unittest
18from unittest.mock import MagicMock, patch
20from oc_meta.run.meta.check_results import (
21 check_omids_existence,
22 check_provenance_existence,
23 find_file,
24 find_prov_file,
25 parse_identifiers,
26)
29class TestParseIdentifiers(unittest.TestCase):
30 """Test cases for parse_identifiers function."""
32 def test_parse_single_identifier(self):
33 """Test parsing a single identifier."""
34 result = parse_identifiers("doi:10.1234/test")
35 expected = [{'schema': 'doi', 'value': '10.1234/test'}]
36 self.assertEqual(result, expected)
38 def test_parse_multiple_identifiers(self):
39 """Test parsing multiple space-separated identifiers."""
40 result = parse_identifiers("doi:10.1234/test isbn:978-3-16-148410-0")
41 expected = [
42 {'schema': 'doi', 'value': '10.1234/test'},
43 {'schema': 'isbn', 'value': '978-3-16-148410-0'}
44 ]
45 self.assertEqual(result, expected)
47 def test_parse_identifier_with_colon_in_value(self):
48 """Test parsing identifier where value contains colons."""
49 result = parse_identifiers("url:http://example.com:8080/path")
50 expected = [{'schema': 'url', 'value': 'http://example.com:8080/path'}]
51 self.assertEqual(result, expected)
53 def test_parse_empty_string(self):
54 """Test parsing empty string returns empty list."""
55 result = parse_identifiers("")
56 self.assertEqual(result, [])
58 def test_parse_whitespace_only(self):
59 """Test parsing whitespace-only string returns empty list."""
60 result = parse_identifiers(" ")
61 self.assertEqual(result, [])
63 def test_parse_none(self):
64 """Test parsing None returns empty list."""
65 result = parse_identifiers(None)
66 self.assertEqual(result, [])
68 def test_parse_with_uppercase_schema(self):
69 """Test that schema is converted to lowercase."""
70 result = parse_identifiers("DOI:10.1234/test")
71 expected = [{'schema': 'doi', 'value': '10.1234/test'}]
72 self.assertEqual(result, expected)
74 def test_parse_malformed_identifier(self):
75 """Test parsing identifier without colon."""
76 result = parse_identifiers("malformed")
77 self.assertEqual(result, [])
80class TestFindFile(unittest.TestCase):
81 """Test cases for find_file function."""
83 def test_find_file_zip_format(self):
84 """Test finding file path for ZIP format."""
85 uri = "https://w3id.org/oc/meta/br/0605"
86 result = find_file(
87 rdf_dir="/base/rdf",
88 dir_split_number=10000,
89 items_per_file=1000,
90 uri=uri,
91 zip_output_rdf=True
92 )
93 expected = "/base/rdf/br/060/10000/1000.zip"
94 self.assertEqual(result, expected)
96 def test_find_file_json_format(self):
97 """Test finding file path for JSON format."""
98 uri = "https://w3id.org/oc/meta/br/0605"
99 result = find_file(
100 rdf_dir="/base/rdf",
101 dir_split_number=10000,
102 items_per_file=1000,
103 uri=uri,
104 zip_output_rdf=False
105 )
106 expected = "/base/rdf/br/060/10000/1000.json"
107 self.assertEqual(result, expected)
109 def test_find_file_with_subfolder(self):
110 """Test finding file path with subfolder prefix."""
111 uri = "https://w3id.org/oc/meta/br/06012345"
112 result = find_file(
113 rdf_dir="/base/rdf",
114 dir_split_number=10000,
115 items_per_file=1000,
116 uri=uri,
117 zip_output_rdf=True
118 )
119 expected = "/base/rdf/br/060/20000/13000.zip"
120 self.assertEqual(result, expected)
122 def test_find_file_different_entity_type(self):
123 """Test finding file for different entity types."""
124 uri = "https://w3id.org/oc/meta/ra/0605"
125 result = find_file(
126 rdf_dir="/base/rdf",
127 dir_split_number=10000,
128 items_per_file=1000,
129 uri=uri,
130 zip_output_rdf=True
131 )
132 expected = "/base/rdf/ra/060/10000/1000.zip"
133 self.assertEqual(result, expected)
135 def test_find_file_invalid_uri(self):
136 """Test that invalid URI returns None."""
137 uri = "invalid-uri"
138 result = find_file(
139 rdf_dir="/base/rdf",
140 dir_split_number=10000,
141 items_per_file=1000,
142 uri=uri,
143 zip_output_rdf=True
144 )
145 self.assertIsNone(result)
147 def test_find_file_boundary_values(self):
148 """Test file finding with boundary values."""
149 uri = "https://w3id.org/oc/meta/br/06010000"
150 result = find_file(
151 rdf_dir="/base/rdf",
152 dir_split_number=10000,
153 items_per_file=1000,
154 uri=uri,
155 zip_output_rdf=True
156 )
157 expected = "/base/rdf/br/060/10000/10000.zip"
158 self.assertEqual(result, expected)
161class TestFindProvFile(unittest.TestCase):
162 """Test cases for find_prov_file function."""
164 def test_find_prov_file_exists(self):
165 """Test finding provenance file when it exists."""
166 data_zip_path = "/base/rdf/br/060/10000/1000.zip"
168 with patch('os.path.exists', return_value=True):
169 result = find_prov_file(data_zip_path)
170 expected = "/base/rdf/br/060/10000/1000/prov/se.zip"
171 self.assertEqual(result, expected)
173 def test_find_prov_file_not_exists(self):
174 """Test finding provenance file when it doesn't exist."""
175 data_zip_path = "/base/rdf/br/060/10000/1000.zip"
177 with patch('os.path.exists', return_value=False):
178 result = find_prov_file(data_zip_path)
179 self.assertIsNone(result)
181 def test_find_prov_file_with_exception(self):
182 """Test that exceptions in find_prov_file are handled gracefully."""
183 data_zip_path = "/base/rdf/br/060/10000/1000.zip"
185 with patch('os.path.dirname', side_effect=Exception("Test error")):
186 result = find_prov_file(data_zip_path)
187 self.assertIsNone(result)
190class TestCheckOMIDsExistence(unittest.TestCase):
191 """Test cases for check_omids_existence function."""
193 @patch('oc_meta.run.meta.check_results.SPARQLClient')
194 def test_check_single_identifier_found(self, mock_sparql_client):
195 """Test checking single identifier that exists."""
196 mock_client = MagicMock()
197 mock_sparql_client.return_value.__enter__.return_value = mock_client
198 mock_client.query.return_value = {
199 "results": {
200 "bindings": [
201 {"omid": {"value": "https://w3id.org/oc/meta/br/0601"}}
202 ]
203 }
204 }
206 identifiers = [{'schema': 'doi', 'value': '10.1234/test'}]
207 result = check_omids_existence(identifiers, "http://example.com/sparql")
209 expected = {'doi:10.1234/test': {'https://w3id.org/oc/meta/br/0601'}}
210 self.assertEqual(result, expected)
211 mock_client.query.assert_called_once()
213 @patch('oc_meta.run.meta.check_results.SPARQLClient')
214 def test_check_identifier_not_found(self, mock_sparql_client):
215 """Test checking identifier that doesn't exist."""
216 mock_client = MagicMock()
217 mock_sparql_client.return_value.__enter__.return_value = mock_client
218 mock_client.query.return_value = {"results": {"bindings": []}}
220 identifiers = [{'schema': 'doi', 'value': '10.9999/notfound'}]
221 result = check_omids_existence(identifiers, "http://example.com/sparql")
223 expected = {'doi:10.9999/notfound': set()}
224 self.assertEqual(result, expected)
226 @patch('oc_meta.run.meta.check_results.SPARQLClient')
227 def test_check_multiple_omids_for_identifier(self, mock_sparql_client):
228 """Test identifier with multiple OMIDs."""
229 mock_client = MagicMock()
230 mock_sparql_client.return_value.__enter__.return_value = mock_client
231 mock_client.query.return_value = {
232 "results": {
233 "bindings": [
234 {"omid": {"value": "https://w3id.org/oc/meta/br/0601"}},
235 {"omid": {"value": "https://w3id.org/oc/meta/br/0602"}}
236 ]
237 }
238 }
240 identifiers = [{'schema': 'doi', 'value': '10.1234/duplicate'}]
241 result = check_omids_existence(identifiers, "http://example.com/sparql")
243 expected = {
244 'doi:10.1234/duplicate': {
245 'https://w3id.org/oc/meta/br/0601',
246 'https://w3id.org/oc/meta/br/0602'
247 }
248 }
249 self.assertEqual(result, expected)
251 def test_check_empty_identifiers_list(self):
252 """Test with empty identifiers list."""
253 result = check_omids_existence([], "http://example.com/sparql")
254 self.assertEqual(result, {})
256 @patch('oc_meta.run.meta.check_results.time.sleep')
257 @patch('oc_meta.run.meta.check_results.SPARQLClient')
258 def test_check_sparql_exception_handling(self, mock_sparql_client, mock_sleep):
259 """Test that SPARQL exceptions are retried and eventually raised."""
260 from sparqlite.exceptions import EndpointError
262 mock_client = MagicMock()
263 mock_sparql_client.return_value.__enter__.return_value = mock_client
264 mock_client.query.side_effect = EndpointError("SPARQL endpoint unavailable")
266 identifiers = [{'schema': 'doi', 'value': '10.1234/test'}]
268 # After MAX_RETRIES, the exception should be raised
269 with self.assertRaises(EndpointError):
270 check_omids_existence(identifiers, "http://example.com/sparql")
273class TestCheckProvenanceExistence(unittest.TestCase):
274 """Test cases for check_provenance_existence function."""
276 @patch('oc_meta.run.meta.check_results.SPARQLClient')
277 def test_check_provenance_exists(self, mock_sparql_client):
278 """Test checking provenance that exists."""
279 mock_client = MagicMock()
280 mock_sparql_client.return_value.__enter__.return_value = mock_client
281 mock_client.ask.return_value = True
283 omids = ["https://w3id.org/oc/meta/br/0601"]
284 result = check_provenance_existence(omids, "http://example.com/prov-sparql")
286 expected = {"https://w3id.org/oc/meta/br/0601": True}
287 self.assertEqual(result, expected)
288 mock_client.ask.assert_called_once()
290 @patch('oc_meta.run.meta.check_results.SPARQLClient')
291 def test_check_provenance_not_exists(self, mock_sparql_client):
292 """Test checking provenance that doesn't exist."""
293 mock_client = MagicMock()
294 mock_sparql_client.return_value.__enter__.return_value = mock_client
295 mock_client.ask.return_value = False
297 omids = ["https://w3id.org/oc/meta/br/0601"]
298 result = check_provenance_existence(omids, "http://example.com/prov-sparql")
300 expected = {"https://w3id.org/oc/meta/br/0601": False}
301 self.assertEqual(result, expected)
303 @patch('oc_meta.run.meta.check_results.SPARQLClient')
304 def test_check_multiple_omids_mixed_results(self, mock_sparql_client):
305 """Test checking multiple OMIDs with mixed provenance results."""
306 mock_client = MagicMock()
307 mock_sparql_client.return_value.__enter__.return_value = mock_client
308 mock_client.ask.side_effect = [True, False, True]
310 omids = [
311 "https://w3id.org/oc/meta/br/0601",
312 "https://w3id.org/oc/meta/br/0602",
313 "https://w3id.org/oc/meta/br/0603"
314 ]
315 result = check_provenance_existence(omids, "http://example.com/prov-sparql")
317 self.assertTrue(result["https://w3id.org/oc/meta/br/0601"])
318 self.assertFalse(result["https://w3id.org/oc/meta/br/0602"])
319 self.assertTrue(result["https://w3id.org/oc/meta/br/0603"])
321 def test_check_empty_omids_list(self):
322 """Test with empty OMIDs list."""
323 result = check_provenance_existence([], "http://example.com/prov-sparql")
324 self.assertEqual(result, {})
326 @patch('oc_meta.run.meta.check_results.SPARQLClient')
327 def test_check_provenance_individual_queries(self, mock_sparql_client):
328 """Test that each OMID gets an individual ASK query."""
329 omids = [f"https://w3id.org/oc/meta/br/06{i:02d}" for i in range(1, 6)]
331 mock_client = MagicMock()
332 mock_sparql_client.return_value.__enter__.return_value = mock_client
333 mock_client.ask.return_value = False
335 result = check_provenance_existence(omids, "http://example.com/prov-sparql")
337 # Should have made 5 individual ASK calls
338 self.assertEqual(mock_client.ask.call_count, 5)
339 self.assertEqual(len(result), 5)
340 self.assertTrue(all(not v for v in result.values()))
343if __name__ == '__main__':
344 unittest.main()