Coverage for test/check_results_test.py: 99%

166 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2026-01-15 10:29 +0000

1#!/usr/bin/python 

2# -*- coding: utf-8 -*- 

3# Copyright 2025, Arcangelo Massari <arcangelo.massari@unibo.it> 

4# 

5# Permission to use, copy, modify, and/or distribute this software for any purpose 

6# with or without fee is hereby granted, provided that the above copyright notice 

7# and this permission notice appear in all copies. 

8# 

9# THE SOFTWARE IS PROVIDED 'AS IS' AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 

10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 

11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, 

12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 

13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 

15# SOFTWARE. 

16 

17import unittest 

18from unittest.mock import MagicMock, patch 

19 

20from oc_meta.run.meta.check_results import ( 

21 check_omids_existence, 

22 check_provenance_existence, 

23 find_file, 

24 find_prov_file, 

25 parse_identifiers, 

26) 

27 

28 

29class TestParseIdentifiers(unittest.TestCase): 

30 """Test cases for parse_identifiers function.""" 

31 

32 def test_parse_single_identifier(self): 

33 """Test parsing a single identifier.""" 

34 result = parse_identifiers("doi:10.1234/test") 

35 expected = [{'schema': 'doi', 'value': '10.1234/test'}] 

36 self.assertEqual(result, expected) 

37 

38 def test_parse_multiple_identifiers(self): 

39 """Test parsing multiple space-separated identifiers.""" 

40 result = parse_identifiers("doi:10.1234/test isbn:978-3-16-148410-0") 

41 expected = [ 

42 {'schema': 'doi', 'value': '10.1234/test'}, 

43 {'schema': 'isbn', 'value': '978-3-16-148410-0'} 

44 ] 

45 self.assertEqual(result, expected) 

46 

47 def test_parse_identifier_with_colon_in_value(self): 

48 """Test parsing identifier where value contains colons.""" 

49 result = parse_identifiers("url:http://example.com:8080/path") 

50 expected = [{'schema': 'url', 'value': 'http://example.com:8080/path'}] 

51 self.assertEqual(result, expected) 

52 

53 def test_parse_empty_string(self): 

54 """Test parsing empty string returns empty list.""" 

55 result = parse_identifiers("") 

56 self.assertEqual(result, []) 

57 

58 def test_parse_whitespace_only(self): 

59 """Test parsing whitespace-only string returns empty list.""" 

60 result = parse_identifiers(" ") 

61 self.assertEqual(result, []) 

62 

63 def test_parse_none(self): 

64 """Test parsing None returns empty list.""" 

65 result = parse_identifiers(None) 

66 self.assertEqual(result, []) 

67 

68 def test_parse_with_uppercase_schema(self): 

69 """Test that schema is converted to lowercase.""" 

70 result = parse_identifiers("DOI:10.1234/test") 

71 expected = [{'schema': 'doi', 'value': '10.1234/test'}] 

72 self.assertEqual(result, expected) 

73 

74 def test_parse_malformed_identifier(self): 

75 """Test parsing identifier without colon.""" 

76 result = parse_identifiers("malformed") 

77 self.assertEqual(result, []) 

78 

79 

80class TestFindFile(unittest.TestCase): 

81 """Test cases for find_file function.""" 

82 

83 def test_find_file_zip_format(self): 

84 """Test finding file path for ZIP format.""" 

85 uri = "https://w3id.org/oc/meta/br/0605" 

86 result = find_file( 

87 rdf_dir="/base/rdf", 

88 dir_split_number=10000, 

89 items_per_file=1000, 

90 uri=uri, 

91 zip_output_rdf=True 

92 ) 

93 expected = "/base/rdf/br/060/10000/1000.zip" 

94 self.assertEqual(result, expected) 

95 

96 def test_find_file_json_format(self): 

97 """Test finding file path for JSON format.""" 

98 uri = "https://w3id.org/oc/meta/br/0605" 

99 result = find_file( 

100 rdf_dir="/base/rdf", 

101 dir_split_number=10000, 

102 items_per_file=1000, 

103 uri=uri, 

104 zip_output_rdf=False 

105 ) 

106 expected = "/base/rdf/br/060/10000/1000.json" 

107 self.assertEqual(result, expected) 

108 

109 def test_find_file_with_subfolder(self): 

110 """Test finding file path with subfolder prefix.""" 

111 uri = "https://w3id.org/oc/meta/br/06012345" 

112 result = find_file( 

113 rdf_dir="/base/rdf", 

114 dir_split_number=10000, 

115 items_per_file=1000, 

116 uri=uri, 

117 zip_output_rdf=True 

118 ) 

119 expected = "/base/rdf/br/060/20000/13000.zip" 

120 self.assertEqual(result, expected) 

121 

122 def test_find_file_different_entity_type(self): 

123 """Test finding file for different entity types.""" 

124 uri = "https://w3id.org/oc/meta/ra/0605" 

125 result = find_file( 

126 rdf_dir="/base/rdf", 

127 dir_split_number=10000, 

128 items_per_file=1000, 

129 uri=uri, 

130 zip_output_rdf=True 

131 ) 

132 expected = "/base/rdf/ra/060/10000/1000.zip" 

133 self.assertEqual(result, expected) 

134 

135 def test_find_file_invalid_uri(self): 

136 """Test that invalid URI returns None.""" 

137 uri = "invalid-uri" 

138 result = find_file( 

139 rdf_dir="/base/rdf", 

140 dir_split_number=10000, 

141 items_per_file=1000, 

142 uri=uri, 

143 zip_output_rdf=True 

144 ) 

145 self.assertIsNone(result) 

146 

147 def test_find_file_boundary_values(self): 

148 """Test file finding with boundary values.""" 

149 uri = "https://w3id.org/oc/meta/br/06010000" 

150 result = find_file( 

151 rdf_dir="/base/rdf", 

152 dir_split_number=10000, 

153 items_per_file=1000, 

154 uri=uri, 

155 zip_output_rdf=True 

156 ) 

157 expected = "/base/rdf/br/060/10000/10000.zip" 

158 self.assertEqual(result, expected) 

159 

160 

161class TestFindProvFile(unittest.TestCase): 

162 """Test cases for find_prov_file function.""" 

163 

164 def test_find_prov_file_exists(self): 

165 """Test finding provenance file when it exists.""" 

166 data_zip_path = "/base/rdf/br/060/10000/1000.zip" 

167 

168 with patch('os.path.exists', return_value=True): 

169 result = find_prov_file(data_zip_path) 

170 expected = "/base/rdf/br/060/10000/1000/prov/se.zip" 

171 self.assertEqual(result, expected) 

172 

173 def test_find_prov_file_not_exists(self): 

174 """Test finding provenance file when it doesn't exist.""" 

175 data_zip_path = "/base/rdf/br/060/10000/1000.zip" 

176 

177 with patch('os.path.exists', return_value=False): 

178 result = find_prov_file(data_zip_path) 

179 self.assertIsNone(result) 

180 

181 def test_find_prov_file_with_exception(self): 

182 """Test that exceptions in find_prov_file are handled gracefully.""" 

183 data_zip_path = "/base/rdf/br/060/10000/1000.zip" 

184 

185 with patch('os.path.dirname', side_effect=Exception("Test error")): 

186 result = find_prov_file(data_zip_path) 

187 self.assertIsNone(result) 

188 

189 

190class TestCheckOMIDsExistence(unittest.TestCase): 

191 """Test cases for check_omids_existence function.""" 

192 

193 @patch('oc_meta.run.meta.check_results.SPARQLClient') 

194 def test_check_single_identifier_found(self, mock_sparql_client): 

195 """Test checking single identifier that exists.""" 

196 mock_client = MagicMock() 

197 mock_sparql_client.return_value.__enter__.return_value = mock_client 

198 mock_client.query.return_value = { 

199 "results": { 

200 "bindings": [ 

201 {"omid": {"value": "https://w3id.org/oc/meta/br/0601"}} 

202 ] 

203 } 

204 } 

205 

206 identifiers = [{'schema': 'doi', 'value': '10.1234/test'}] 

207 result = check_omids_existence(identifiers, "http://example.com/sparql") 

208 

209 expected = {'doi:10.1234/test': {'https://w3id.org/oc/meta/br/0601'}} 

210 self.assertEqual(result, expected) 

211 mock_client.query.assert_called_once() 

212 

213 @patch('oc_meta.run.meta.check_results.SPARQLClient') 

214 def test_check_identifier_not_found(self, mock_sparql_client): 

215 """Test checking identifier that doesn't exist.""" 

216 mock_client = MagicMock() 

217 mock_sparql_client.return_value.__enter__.return_value = mock_client 

218 mock_client.query.return_value = {"results": {"bindings": []}} 

219 

220 identifiers = [{'schema': 'doi', 'value': '10.9999/notfound'}] 

221 result = check_omids_existence(identifiers, "http://example.com/sparql") 

222 

223 expected = {'doi:10.9999/notfound': set()} 

224 self.assertEqual(result, expected) 

225 

226 @patch('oc_meta.run.meta.check_results.SPARQLClient') 

227 def test_check_multiple_omids_for_identifier(self, mock_sparql_client): 

228 """Test identifier with multiple OMIDs.""" 

229 mock_client = MagicMock() 

230 mock_sparql_client.return_value.__enter__.return_value = mock_client 

231 mock_client.query.return_value = { 

232 "results": { 

233 "bindings": [ 

234 {"omid": {"value": "https://w3id.org/oc/meta/br/0601"}}, 

235 {"omid": {"value": "https://w3id.org/oc/meta/br/0602"}} 

236 ] 

237 } 

238 } 

239 

240 identifiers = [{'schema': 'doi', 'value': '10.1234/duplicate'}] 

241 result = check_omids_existence(identifiers, "http://example.com/sparql") 

242 

243 expected = { 

244 'doi:10.1234/duplicate': { 

245 'https://w3id.org/oc/meta/br/0601', 

246 'https://w3id.org/oc/meta/br/0602' 

247 } 

248 } 

249 self.assertEqual(result, expected) 

250 

251 def test_check_empty_identifiers_list(self): 

252 """Test with empty identifiers list.""" 

253 result = check_omids_existence([], "http://example.com/sparql") 

254 self.assertEqual(result, {}) 

255 

256 @patch('oc_meta.run.meta.check_results.time.sleep') 

257 @patch('oc_meta.run.meta.check_results.SPARQLClient') 

258 def test_check_sparql_exception_handling(self, mock_sparql_client, mock_sleep): 

259 """Test that SPARQL exceptions are retried and eventually raised.""" 

260 from sparqlite.exceptions import EndpointError 

261 

262 mock_client = MagicMock() 

263 mock_sparql_client.return_value.__enter__.return_value = mock_client 

264 mock_client.query.side_effect = EndpointError("SPARQL endpoint unavailable") 

265 

266 identifiers = [{'schema': 'doi', 'value': '10.1234/test'}] 

267 

268 # After MAX_RETRIES, the exception should be raised 

269 with self.assertRaises(EndpointError): 

270 check_omids_existence(identifiers, "http://example.com/sparql") 

271 

272 

273class TestCheckProvenanceExistence(unittest.TestCase): 

274 """Test cases for check_provenance_existence function.""" 

275 

276 @patch('oc_meta.run.meta.check_results.SPARQLClient') 

277 def test_check_provenance_exists(self, mock_sparql_client): 

278 """Test checking provenance that exists.""" 

279 mock_client = MagicMock() 

280 mock_sparql_client.return_value.__enter__.return_value = mock_client 

281 mock_client.ask.return_value = True 

282 

283 omids = ["https://w3id.org/oc/meta/br/0601"] 

284 result = check_provenance_existence(omids, "http://example.com/prov-sparql") 

285 

286 expected = {"https://w3id.org/oc/meta/br/0601": True} 

287 self.assertEqual(result, expected) 

288 mock_client.ask.assert_called_once() 

289 

290 @patch('oc_meta.run.meta.check_results.SPARQLClient') 

291 def test_check_provenance_not_exists(self, mock_sparql_client): 

292 """Test checking provenance that doesn't exist.""" 

293 mock_client = MagicMock() 

294 mock_sparql_client.return_value.__enter__.return_value = mock_client 

295 mock_client.ask.return_value = False 

296 

297 omids = ["https://w3id.org/oc/meta/br/0601"] 

298 result = check_provenance_existence(omids, "http://example.com/prov-sparql") 

299 

300 expected = {"https://w3id.org/oc/meta/br/0601": False} 

301 self.assertEqual(result, expected) 

302 

303 @patch('oc_meta.run.meta.check_results.SPARQLClient') 

304 def test_check_multiple_omids_mixed_results(self, mock_sparql_client): 

305 """Test checking multiple OMIDs with mixed provenance results.""" 

306 mock_client = MagicMock() 

307 mock_sparql_client.return_value.__enter__.return_value = mock_client 

308 mock_client.ask.side_effect = [True, False, True] 

309 

310 omids = [ 

311 "https://w3id.org/oc/meta/br/0601", 

312 "https://w3id.org/oc/meta/br/0602", 

313 "https://w3id.org/oc/meta/br/0603" 

314 ] 

315 result = check_provenance_existence(omids, "http://example.com/prov-sparql") 

316 

317 self.assertTrue(result["https://w3id.org/oc/meta/br/0601"]) 

318 self.assertFalse(result["https://w3id.org/oc/meta/br/0602"]) 

319 self.assertTrue(result["https://w3id.org/oc/meta/br/0603"]) 

320 

321 def test_check_empty_omids_list(self): 

322 """Test with empty OMIDs list.""" 

323 result = check_provenance_existence([], "http://example.com/prov-sparql") 

324 self.assertEqual(result, {}) 

325 

326 @patch('oc_meta.run.meta.check_results.SPARQLClient') 

327 def test_check_provenance_individual_queries(self, mock_sparql_client): 

328 """Test that each OMID gets an individual ASK query.""" 

329 omids = [f"https://w3id.org/oc/meta/br/06{i:02d}" for i in range(1, 6)] 

330 

331 mock_client = MagicMock() 

332 mock_sparql_client.return_value.__enter__.return_value = mock_client 

333 mock_client.ask.return_value = False 

334 

335 result = check_provenance_existence(omids, "http://example.com/prov-sparql") 

336 

337 # Should have made 5 individual ASK calls 

338 self.assertEqual(mock_client.ask.call_count, 5) 

339 self.assertEqual(len(result), 5) 

340 self.assertTrue(all(not v for v in result.values())) 

341 

342 

343if __name__ == '__main__': 

344 unittest.main()