Coverage for test / extract_crossref_publishers_test.py: 99%

214 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-25 18:06 +0000

1# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it> 

2# 

3# SPDX-License-Identifier: ISC 

4 

5import os 

6import tempfile 

7import time 

8import unittest 

9from unittest.mock import MagicMock, patch 

10 

11from oc_ds_converter.crossref.extract_crossref_publishers import ( 

12 get_publishers, 

13 get_via_requests, 

14 is_stale, 

15 process, 

16 store_csv_on_file, 

17) 

18 

19 

20class TestIsStale(unittest.TestCase): 

21 def test_nonexistent_file_is_stale(self) -> None: 

22 result = is_stale('/nonexistent/file.csv', 30) 

23 self.assertTrue(result) 

24 

25 def test_recent_file_is_not_stale(self) -> None: 

26 with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f: 

27 f.write('test') 

28 temp_path = f.name 

29 try: 

30 result = is_stale(temp_path, 30) 

31 self.assertFalse(result) 

32 finally: 

33 os.unlink(temp_path) 

34 

35 def test_old_file_is_stale(self) -> None: 

36 with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f: 

37 f.write('test') 

38 temp_path = f.name 

39 try: 

40 old_time = time.time() - (40 * 24 * 60 * 60) 

41 os.utime(temp_path, (old_time, old_time)) 

42 result = is_stale(temp_path, 30) 

43 self.assertTrue(result) 

44 finally: 

45 os.unlink(temp_path) 

46 

47 

48class TestGetViaRequests(unittest.TestCase): 

49 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get') 

50 def test_successful_request(self, mock_get: MagicMock) -> None: 

51 mock_response = MagicMock() 

52 mock_response.status_code = 200 

53 mock_response.text = '{"message": "success"}' 

54 mock_get.return_value = mock_response 

55 

56 result = get_via_requests("https://api.crossref.org/members") 

57 

58 self.assertEqual(result, {"message": "success"}) 

59 

60 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get') 

61 def test_404_response(self, mock_get: MagicMock) -> None: 

62 mock_response = MagicMock() 

63 mock_response.status_code = 404 

64 mock_get.return_value = mock_response 

65 

66 result = get_via_requests("https://api.crossref.org/members") 

67 

68 self.assertIsNone(result) 

69 

70 @patch('oc_ds_converter.crossref.extract_crossref_publishers.sleep') 

71 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get') 

72 def test_retry_on_server_error(self, mock_get: MagicMock, mock_sleep: MagicMock) -> None: 

73 mock_response_500 = MagicMock() 

74 mock_response_500.status_code = 500 

75 mock_response_200 = MagicMock() 

76 mock_response_200.status_code = 200 

77 mock_response_200.text = '{"data": "ok"}' 

78 mock_get.side_effect = [mock_response_500, mock_response_200] 

79 

80 result = get_via_requests("https://api.crossref.org/members") 

81 

82 self.assertEqual(result, {"data": "ok"}) 

83 self.assertEqual(mock_get.call_count, 2) 

84 mock_sleep.assert_called_once_with(5) 

85 

86 @patch('oc_ds_converter.crossref.extract_crossref_publishers.sleep') 

87 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get') 

88 def test_retry_on_exception(self, mock_get: MagicMock, mock_sleep: MagicMock) -> None: 

89 mock_response_200 = MagicMock() 

90 mock_response_200.status_code = 200 

91 mock_response_200.text = '{"data": "ok"}' 

92 mock_get.side_effect = [ConnectionError("Connection refused"), mock_response_200] 

93 

94 result = get_via_requests("https://api.crossref.org/members") 

95 

96 self.assertEqual(result, {"data": "ok"}) 

97 self.assertEqual(mock_get.call_count, 2) 

98 mock_sleep.assert_called_once_with(5) 

99 

100 @patch('oc_ds_converter.crossref.extract_crossref_publishers.sleep') 

101 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get') 

102 def test_max_retries_exceeded(self, mock_get: MagicMock, mock_sleep: MagicMock) -> None: 

103 mock_response = MagicMock() 

104 mock_response.status_code = 500 

105 mock_get.return_value = mock_response 

106 

107 with self.assertRaises(ConnectionError): 

108 get_via_requests("https://api.crossref.org/members") 

109 

110 self.assertEqual(mock_get.call_count, 5) 

111 self.assertEqual(mock_sleep.call_count, 5) 

112 

113 

114class TestGetPublishers(unittest.TestCase): 

115 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_via_requests') 

116 def test_successful_response(self, mock_get_via_requests: MagicMock) -> None: 

117 mock_get_via_requests.return_value = { 

118 "message": { 

119 "total-results": 15000, 

120 "items": [ 

121 {"id": 1, "primary-name": "Publisher A", "prefix": [{"value": "10.1000"}]}, 

122 {"id": 2, "primary-name": "Publisher B", "prefix": [{"value": "10.2000"}]}, 

123 ] 

124 } 

125 } 

126 

127 result = get_publishers(0) 

128 

129 self.assertIsNotNone(result) 

130 items, new_offset, total = result # type: ignore[misc] 

131 self.assertEqual(len(items), 2) 

132 self.assertEqual(new_offset, 1000) 

133 self.assertEqual(total, 15000) 

134 

135 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_via_requests') 

136 def test_none_response(self, mock_get_via_requests: MagicMock) -> None: 

137 mock_get_via_requests.return_value = None 

138 

139 result = get_publishers(0) 

140 

141 self.assertIsNone(result) 

142 

143 

144class TestStoreCSVOnFile(unittest.TestCase): 

145 def test_create_new_file(self) -> None: 

146 with tempfile.TemporaryDirectory() as tmpdir: 

147 filepath = os.path.join(tmpdir, "publishers.csv") 

148 store_csv_on_file(filepath, ("id", "name", "prefix"), {"id": "1", "name": "Test Publisher", "prefix": "10.1234"}) 

149 

150 with open(filepath, "r", encoding="utf8") as f: 

151 content = f.read() 

152 

153 self.assertIn('"id","name","prefix"', content) 

154 self.assertIn('"1","Test Publisher","10.1234"', content) 

155 

156 def test_append_to_existing_file(self) -> None: 

157 with tempfile.TemporaryDirectory() as tmpdir: 

158 filepath = os.path.join(tmpdir, "publishers.csv") 

159 store_csv_on_file(filepath, ("id", "name", "prefix"), {"id": "1", "name": "Publisher A", "prefix": "10.1000"}) 

160 store_csv_on_file(filepath, ("id", "name", "prefix"), {"id": "2", "name": "Publisher B", "prefix": "10.2000"}) 

161 

162 with open(filepath, "r", encoding="utf8") as f: 

163 lines = f.readlines() 

164 

165 self.assertEqual(len(lines), 3) 

166 self.assertIn('"id","name","prefix"', lines[0]) 

167 self.assertIn('"1","Publisher A","10.1000"', lines[1]) 

168 self.assertIn('"2","Publisher B","10.2000"', lines[2]) 

169 

170 

171class TestProcess(unittest.TestCase): 

172 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_publishers') 

173 def test_process_new_file(self, mock_get_publishers: MagicMock) -> None: 

174 mock_get_publishers.return_value = ( 

175 [ 

176 {"id": 1, "primary-name": "Publisher A", "prefix": [{"value": "10.1000"}]}, 

177 {"id": 2, "primary-name": "Publisher B", "prefix": [{"value": "10.2000"}, {"value": "10.2001"}]}, 

178 ], 

179 1000, 

180 2 

181 ) 

182 

183 with tempfile.TemporaryDirectory() as tmpdir: 

184 filepath = os.path.join(tmpdir, "publishers.csv") 

185 result = process(filepath) 

186 

187 self.assertTrue(result) 

188 with open(filepath, "r", encoding="utf8") as f: 

189 lines = f.readlines() 

190 

191 self.assertEqual(len(lines), 4) 

192 self.assertIn('"id","name","prefix"', lines[0]) 

193 self.assertIn('"1","Publisher A","10.1000"', lines[1]) 

194 self.assertIn('"2","Publisher B","10.2000"', lines[2]) 

195 self.assertIn('"2","Publisher B","10.2001"', lines[3]) 

196 

197 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_publishers') 

198 def test_process_with_existing_data_deduplication(self, mock_get_publishers: MagicMock) -> None: 

199 mock_get_publishers.return_value = ( 

200 [ 

201 {"id": 1, "primary-name": "Publisher A", "prefix": [{"value": "10.1000"}]}, 

202 {"id": 2, "primary-name": "Publisher B", "prefix": [{"value": "10.2000"}]}, 

203 ], 

204 1000, 

205 2 

206 ) 

207 

208 with tempfile.TemporaryDirectory() as tmpdir: 

209 filepath = os.path.join(tmpdir, "publishers.csv") 

210 with open(filepath, "w", encoding="utf8") as f: 

211 f.write('"id","name","prefix"\n') 

212 f.write('"1","Publisher A","10.1000"\n') 

213 

214 process(filepath, force=True) 

215 

216 with open(filepath, "r", encoding="utf8") as f: 

217 lines = f.readlines() 

218 

219 self.assertEqual(len(lines), 3) 

220 id_occurrences = sum(1 for line in lines if '"1"' in line and "Publisher A" in line) 

221 self.assertEqual(id_occurrences, 1) 

222 

223 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_publishers') 

224 def test_process_html_unescape(self, mock_get_publishers: MagicMock) -> None: 

225 mock_get_publishers.return_value = ( 

226 [{"id": 1, "primary-name": "Publisher &amp; Co", "prefix": [{"value": "10.1000"}]}], 

227 1000, 

228 1 

229 ) 

230 

231 with tempfile.TemporaryDirectory() as tmpdir: 

232 filepath = os.path.join(tmpdir, "publishers.csv") 

233 process(filepath) 

234 

235 with open(filepath, "r", encoding="utf8") as f: 

236 content = f.read() 

237 

238 self.assertIn("Publisher & Co", content) 

239 self.assertNotIn("&amp;", content) 

240 

241 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_publishers') 

242 def test_process_api_failure_breaks_loop(self, mock_get_publishers: MagicMock) -> None: 

243 mock_get_publishers.return_value = None 

244 

245 with tempfile.TemporaryDirectory() as tmpdir: 

246 filepath = os.path.join(tmpdir, "publishers.csv") 

247 process(filepath) 

248 

249 self.assertFalse(os.path.exists(filepath)) 

250 

251 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_publishers') 

252 def test_process_multiple_pages(self, mock_get_publishers: MagicMock) -> None: 

253 mock_get_publishers.side_effect = [ 

254 ([{"id": 1, "primary-name": "Publisher A", "prefix": [{"value": "10.1000"}]}], 1000, 2000), 

255 ([{"id": 2, "primary-name": "Publisher B", "prefix": [{"value": "10.2000"}]}], 2000, 2000), 

256 ] 

257 

258 with tempfile.TemporaryDirectory() as tmpdir: 

259 filepath = os.path.join(tmpdir, "publishers.csv") 

260 process(filepath) 

261 

262 with open(filepath, "r", encoding="utf8") as f: 

263 lines = f.readlines() 

264 

265 self.assertEqual(len(lines), 3) 

266 self.assertEqual(mock_get_publishers.call_count, 2) 

267 

268 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_publishers') 

269 def test_process_duplicate_prefix_same_publisher(self, mock_get_publishers: MagicMock) -> None: 

270 mock_get_publishers.return_value = ( 

271 [{"id": 1, "primary-name": "Publisher A", "prefix": [{"value": "10.1000"}, {"value": "10.1000"}]}], 

272 1000, 

273 1 

274 ) 

275 

276 with tempfile.TemporaryDirectory() as tmpdir: 

277 filepath = os.path.join(tmpdir, "publishers.csv") 

278 process(filepath) 

279 

280 with open(filepath, "r", encoding="utf8") as f: 

281 lines = f.readlines() 

282 

283 self.assertEqual(len(lines), 2) 

284 

285 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_publishers') 

286 def test_process_skips_if_file_recent(self, mock_get_publishers: MagicMock) -> None: 

287 with tempfile.TemporaryDirectory() as tmpdir: 

288 filepath = os.path.join(tmpdir, "publishers.csv") 

289 with open(filepath, "w", encoding="utf8") as f: 

290 f.write('"id","name","prefix"\n') 

291 f.write('"1","Existing Publisher","10.1000"\n') 

292 

293 result = process(filepath, max_age_days=30, force=False) 

294 

295 self.assertFalse(result) 

296 mock_get_publishers.assert_not_called() 

297 

298 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_publishers') 

299 def test_process_force_updates_recent_file(self, mock_get_publishers: MagicMock) -> None: 

300 mock_get_publishers.return_value = ( 

301 [{"id": 2, "primary-name": "New Publisher", "prefix": [{"value": "10.2000"}]}], 

302 1000, 

303 1 

304 ) 

305 

306 with tempfile.TemporaryDirectory() as tmpdir: 

307 filepath = os.path.join(tmpdir, "publishers.csv") 

308 with open(filepath, "w", encoding="utf8") as f: 

309 f.write('"id","name","prefix"\n') 

310 f.write('"1","Existing Publisher","10.1000"\n') 

311 

312 result = process(filepath, max_age_days=30, force=True) 

313 

314 self.assertTrue(result) 

315 mock_get_publishers.assert_called_once() 

316 

317 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_publishers') 

318 def test_process_updates_stale_file(self, mock_get_publishers: MagicMock) -> None: 

319 mock_get_publishers.return_value = ( 

320 [{"id": 2, "primary-name": "New Publisher", "prefix": [{"value": "10.2000"}]}], 

321 1000, 

322 1 

323 ) 

324 

325 with tempfile.TemporaryDirectory() as tmpdir: 

326 filepath = os.path.join(tmpdir, "publishers.csv") 

327 with open(filepath, "w", encoding="utf8") as f: 

328 f.write('"id","name","prefix"\n') 

329 f.write('"1","Existing Publisher","10.1000"\n') 

330 

331 old_time = time.time() - (40 * 24 * 60 * 60) 

332 os.utime(filepath, (old_time, old_time)) 

333 

334 result = process(filepath, max_age_days=30, force=False) 

335 

336 self.assertTrue(result) 

337 mock_get_publishers.assert_called_once() 

338 

339 

340if __name__ == '__main__': 

341 unittest.main()