Coverage for test/check_results_test.py: 99%

173 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-12-20 08:55 +0000

1#!/usr/bin/python 

2# -*- coding: utf-8 -*- 

3# Copyright 2025, Arcangelo Massari <arcangelo.massari@unibo.it> 

4# 

5# Permission to use, copy, modify, and/or distribute this software for any purpose 

6# with or without fee is hereby granted, provided that the above copyright notice 

7# and this permission notice appear in all copies. 

8# 

9# THE SOFTWARE IS PROVIDED 'AS IS' AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 

10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 

11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, 

12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 

13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 

15# SOFTWARE. 

16 

17import unittest 

18from unittest.mock import MagicMock, patch 

19 

20from oc_meta.run.meta.check_results import ( 

21 check_omids_existence, 

22 check_provenance_existence, 

23 find_file, 

24 find_prov_file, 

25 parse_identifiers, 

26) 

27 

28 

29class TestParseIdentifiers(unittest.TestCase): 

30 """Test cases for parse_identifiers function.""" 

31 

32 def test_parse_single_identifier(self): 

33 """Test parsing a single identifier.""" 

34 result = parse_identifiers("doi:10.1234/test") 

35 expected = [{'schema': 'doi', 'value': '10.1234/test'}] 

36 self.assertEqual(result, expected) 

37 

38 def test_parse_multiple_identifiers(self): 

39 """Test parsing multiple space-separated identifiers.""" 

40 result = parse_identifiers("doi:10.1234/test isbn:978-3-16-148410-0") 

41 expected = [ 

42 {'schema': 'doi', 'value': '10.1234/test'}, 

43 {'schema': 'isbn', 'value': '978-3-16-148410-0'} 

44 ] 

45 self.assertEqual(result, expected) 

46 

47 def test_parse_identifier_with_colon_in_value(self): 

48 """Test parsing identifier where value contains colons.""" 

49 result = parse_identifiers("url:http://example.com:8080/path") 

50 expected = [{'schema': 'url', 'value': 'http://example.com:8080/path'}] 

51 self.assertEqual(result, expected) 

52 

53 def test_parse_empty_string(self): 

54 """Test parsing empty string returns empty list.""" 

55 result = parse_identifiers("") 

56 self.assertEqual(result, []) 

57 

58 def test_parse_whitespace_only(self): 

59 """Test parsing whitespace-only string returns empty list.""" 

60 result = parse_identifiers(" ") 

61 self.assertEqual(result, []) 

62 

63 def test_parse_none(self): 

64 """Test parsing None returns empty list.""" 

65 result = parse_identifiers(None) 

66 self.assertEqual(result, []) 

67 

68 def test_parse_with_uppercase_schema(self): 

69 """Test that schema is converted to lowercase.""" 

70 result = parse_identifiers("DOI:10.1234/test") 

71 expected = [{'schema': 'doi', 'value': '10.1234/test'}] 

72 self.assertEqual(result, expected) 

73 

74 def test_parse_malformed_identifier(self): 

75 """Test parsing identifier without colon.""" 

76 result = parse_identifiers("malformed") 

77 self.assertEqual(result, []) 

78 

79 

80class TestFindFile(unittest.TestCase): 

81 """Test cases for find_file function.""" 

82 

83 def test_find_file_zip_format(self): 

84 """Test finding file path for ZIP format.""" 

85 uri = "https://w3id.org/oc/meta/br/0605" 

86 result = find_file( 

87 rdf_dir="/base/rdf", 

88 dir_split_number=10000, 

89 items_per_file=1000, 

90 uri=uri, 

91 zip_output_rdf=True 

92 ) 

93 expected = "/base/rdf/br/060/10000/1000.zip" 

94 self.assertEqual(result, expected) 

95 

96 def test_find_file_json_format(self): 

97 """Test finding file path for JSON format.""" 

98 uri = "https://w3id.org/oc/meta/br/0605" 

99 result = find_file( 

100 rdf_dir="/base/rdf", 

101 dir_split_number=10000, 

102 items_per_file=1000, 

103 uri=uri, 

104 zip_output_rdf=False 

105 ) 

106 expected = "/base/rdf/br/060/10000/1000.json" 

107 self.assertEqual(result, expected) 

108 

109 def test_find_file_with_subfolder(self): 

110 """Test finding file path with subfolder prefix.""" 

111 uri = "https://w3id.org/oc/meta/br/06012345" 

112 result = find_file( 

113 rdf_dir="/base/rdf", 

114 dir_split_number=10000, 

115 items_per_file=1000, 

116 uri=uri, 

117 zip_output_rdf=True 

118 ) 

119 expected = "/base/rdf/br/060/20000/13000.zip" 

120 self.assertEqual(result, expected) 

121 

122 def test_find_file_different_entity_type(self): 

123 """Test finding file for different entity types.""" 

124 uri = "https://w3id.org/oc/meta/ra/0605" 

125 result = find_file( 

126 rdf_dir="/base/rdf", 

127 dir_split_number=10000, 

128 items_per_file=1000, 

129 uri=uri, 

130 zip_output_rdf=True 

131 ) 

132 expected = "/base/rdf/ra/060/10000/1000.zip" 

133 self.assertEqual(result, expected) 

134 

135 def test_find_file_invalid_uri(self): 

136 """Test that invalid URI returns None.""" 

137 uri = "invalid-uri" 

138 result = find_file( 

139 rdf_dir="/base/rdf", 

140 dir_split_number=10000, 

141 items_per_file=1000, 

142 uri=uri, 

143 zip_output_rdf=True 

144 ) 

145 self.assertIsNone(result) 

146 

147 def test_find_file_boundary_values(self): 

148 """Test file finding with boundary values.""" 

149 uri = "https://w3id.org/oc/meta/br/06010000" 

150 result = find_file( 

151 rdf_dir="/base/rdf", 

152 dir_split_number=10000, 

153 items_per_file=1000, 

154 uri=uri, 

155 zip_output_rdf=True 

156 ) 

157 expected = "/base/rdf/br/060/10000/10000.zip" 

158 self.assertEqual(result, expected) 

159 

160 

161class TestFindProvFile(unittest.TestCase): 

162 """Test cases for find_prov_file function.""" 

163 

164 def test_find_prov_file_exists(self): 

165 """Test finding provenance file when it exists.""" 

166 data_zip_path = "/base/rdf/br/060/10000/1000.zip" 

167 

168 with patch('os.path.exists', return_value=True): 

169 result = find_prov_file(data_zip_path) 

170 expected = "/base/rdf/br/060/10000/1000/prov/se.zip" 

171 self.assertEqual(result, expected) 

172 

173 def test_find_prov_file_not_exists(self): 

174 """Test finding provenance file when it doesn't exist.""" 

175 data_zip_path = "/base/rdf/br/060/10000/1000.zip" 

176 

177 with patch('os.path.exists', return_value=False): 

178 result = find_prov_file(data_zip_path) 

179 self.assertIsNone(result) 

180 

181 def test_find_prov_file_with_exception(self): 

182 """Test that exceptions in find_prov_file are handled gracefully.""" 

183 data_zip_path = "/base/rdf/br/060/10000/1000.zip" 

184 

185 with patch('os.path.dirname', side_effect=Exception("Test error")): 

186 result = find_prov_file(data_zip_path) 

187 self.assertIsNone(result) 

188 

189 

190class TestCheckOMIDsExistence(unittest.TestCase): 

191 """Test cases for check_omids_existence function.""" 

192 

193 @patch('oc_meta.run.meta.check_results.SPARQLClient') 

194 def test_check_single_identifier_found(self, mock_sparql_client): 

195 """Test checking single identifier that exists.""" 

196 mock_client = MagicMock() 

197 mock_sparql_client.return_value.__enter__.return_value = mock_client 

198 mock_client.query.return_value = { 

199 "results": { 

200 "bindings": [ 

201 {"omid": {"value": "https://w3id.org/oc/meta/br/0601"}} 

202 ] 

203 } 

204 } 

205 

206 identifiers = [{'schema': 'doi', 'value': '10.1234/test'}] 

207 result = check_omids_existence(identifiers, "http://example.com/sparql") 

208 

209 expected = {'doi:10.1234/test': {'https://w3id.org/oc/meta/br/0601'}} 

210 self.assertEqual(result, expected) 

211 mock_client.query.assert_called_once() 

212 

213 @patch('oc_meta.run.meta.check_results.SPARQLClient') 

214 def test_check_identifier_not_found(self, mock_sparql_client): 

215 """Test checking identifier that doesn't exist.""" 

216 mock_client = MagicMock() 

217 mock_sparql_client.return_value.__enter__.return_value = mock_client 

218 mock_client.query.return_value = {"results": {"bindings": []}} 

219 

220 identifiers = [{'schema': 'doi', 'value': '10.9999/notfound'}] 

221 result = check_omids_existence(identifiers, "http://example.com/sparql") 

222 

223 expected = {'doi:10.9999/notfound': set()} 

224 self.assertEqual(result, expected) 

225 

226 @patch('oc_meta.run.meta.check_results.SPARQLClient') 

227 def test_check_multiple_omids_for_identifier(self, mock_sparql_client): 

228 """Test identifier with multiple OMIDs.""" 

229 mock_client = MagicMock() 

230 mock_sparql_client.return_value.__enter__.return_value = mock_client 

231 mock_client.query.return_value = { 

232 "results": { 

233 "bindings": [ 

234 {"omid": {"value": "https://w3id.org/oc/meta/br/0601"}}, 

235 {"omid": {"value": "https://w3id.org/oc/meta/br/0602"}} 

236 ] 

237 } 

238 } 

239 

240 identifiers = [{'schema': 'doi', 'value': '10.1234/duplicate'}] 

241 result = check_omids_existence(identifiers, "http://example.com/sparql") 

242 

243 expected = { 

244 'doi:10.1234/duplicate': { 

245 'https://w3id.org/oc/meta/br/0601', 

246 'https://w3id.org/oc/meta/br/0602' 

247 } 

248 } 

249 self.assertEqual(result, expected) 

250 

251 def test_check_empty_identifiers_list(self): 

252 """Test with empty identifiers list.""" 

253 result = check_omids_existence([], "http://example.com/sparql") 

254 self.assertEqual(result, {}) 

255 

256 @patch('oc_meta.run.meta.check_results.SPARQLClient') 

257 def test_check_sparql_exception_handling(self, mock_sparql_client): 

258 """Test that SPARQL exceptions are handled gracefully.""" 

259 mock_client = MagicMock() 

260 mock_sparql_client.return_value.__enter__.return_value = mock_client 

261 mock_client.query.side_effect = Exception("SPARQL endpoint unavailable") 

262 

263 identifiers = [{'schema': 'doi', 'value': '10.1234/test'}] 

264 result = check_omids_existence(identifiers, "http://example.com/sparql") 

265 

266 # Should return empty set for the identifier 

267 expected = {'doi:10.1234/test': set()} 

268 self.assertEqual(result, expected) 

269 

270 

271class TestCheckProvenanceExistence(unittest.TestCase): 

272 """Test cases for check_provenance_existence function.""" 

273 

274 @patch('oc_meta.run.meta.check_results.SPARQLClient') 

275 def test_check_provenance_exists(self, mock_sparql_client): 

276 """Test checking provenance that exists.""" 

277 mock_client = MagicMock() 

278 mock_sparql_client.return_value.__enter__.return_value = mock_client 

279 mock_client.query.return_value = { 

280 "results": { 

281 "bindings": [ 

282 {"omid": {"value": "https://w3id.org/oc/meta/br/0601"}} 

283 ] 

284 } 

285 } 

286 

287 omids = ["https://w3id.org/oc/meta/br/0601"] 

288 result = check_provenance_existence(omids, "http://example.com/prov-sparql") 

289 

290 expected = {"https://w3id.org/oc/meta/br/0601": True} 

291 self.assertEqual(result, expected) 

292 

293 @patch('oc_meta.run.meta.check_results.SPARQLClient') 

294 def test_check_provenance_not_exists(self, mock_sparql_client): 

295 """Test checking provenance that doesn't exist.""" 

296 mock_client = MagicMock() 

297 mock_sparql_client.return_value.__enter__.return_value = mock_client 

298 mock_client.query.return_value = {"results": {"bindings": []}} 

299 

300 omids = ["https://w3id.org/oc/meta/br/0601"] 

301 result = check_provenance_existence(omids, "http://example.com/prov-sparql") 

302 

303 expected = {"https://w3id.org/oc/meta/br/0601": False} 

304 self.assertEqual(result, expected) 

305 

306 @patch('oc_meta.run.meta.check_results.SPARQLClient') 

307 def test_check_multiple_omids_mixed_results(self, mock_sparql_client): 

308 """Test checking multiple OMIDs with mixed provenance results.""" 

309 mock_client = MagicMock() 

310 mock_sparql_client.return_value.__enter__.return_value = mock_client 

311 mock_client.query.return_value = { 

312 "results": { 

313 "bindings": [ 

314 {"omid": {"value": "https://w3id.org/oc/meta/br/0601"}}, 

315 {"omid": {"value": "https://w3id.org/oc/meta/br/0603"}} 

316 ] 

317 } 

318 } 

319 

320 omids = [ 

321 "https://w3id.org/oc/meta/br/0601", 

322 "https://w3id.org/oc/meta/br/0602", 

323 "https://w3id.org/oc/meta/br/0603" 

324 ] 

325 result = check_provenance_existence(omids, "http://example.com/prov-sparql") 

326 

327 # 0601 and 0603 have provenance, 0602 doesn't 

328 self.assertTrue(result["https://w3id.org/oc/meta/br/0601"]) 

329 self.assertFalse(result["https://w3id.org/oc/meta/br/0602"]) 

330 self.assertTrue(result["https://w3id.org/oc/meta/br/0603"]) 

331 

332 def test_check_empty_omids_list(self): 

333 """Test with empty OMIDs list.""" 

334 result = check_provenance_existence([], "http://example.com/prov-sparql") 

335 self.assertEqual(result, {}) 

336 

337 @patch('oc_meta.run.meta.check_results.SPARQLClient') 

338 def test_check_provenance_batching(self, mock_sparql_client): 

339 """Test that large lists are batched correctly.""" 

340 # Create 25 OMIDs (should result in 3 batches with BATCH_SIZE=10) 

341 omids = [f"https://w3id.org/oc/meta/br/06{i:02d}" for i in range(1, 26)] 

342 

343 mock_client = MagicMock() 

344 mock_sparql_client.return_value.__enter__.return_value = mock_client 

345 mock_client.query.return_value = {"results": {"bindings": []}} 

346 

347 result = check_provenance_existence(omids, "http://example.com/prov-sparql") 

348 

349 # Should have made 3 calls (batches of 10, 10, 5) 

350 self.assertEqual(mock_client.query.call_count, 3) 

351 

352 # All OMIDs should be in result with False 

353 self.assertEqual(len(result), 25) 

354 self.assertTrue(all(not v for v in result.values())) 

355 

356 @patch('oc_meta.run.meta.check_results.SPARQLClient') 

357 def test_check_provenance_exception_handling(self, mock_sparql_client): 

358 """Test that SPARQL exceptions are handled gracefully.""" 

359 mock_client = MagicMock() 

360 mock_sparql_client.return_value.__enter__.return_value = mock_client 

361 mock_client.query.side_effect = Exception("SPARQL endpoint unavailable") 

362 

363 omids = ["https://w3id.org/oc/meta/br/0601"] 

364 result = check_provenance_existence(omids, "http://example.com/prov-sparql") 

365 

366 # Should still return the OMID with False 

367 expected = {"https://w3id.org/oc/meta/br/0601": False} 

368 self.assertEqual(result, expected) 

369 

370 

371if __name__ == '__main__': 

372 unittest.main()