Coverage for test / extract_crossref_publishers_test.py: 99%
214 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
1# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it>
2#
3# SPDX-License-Identifier: ISC
5import os
6import tempfile
7import time
8import unittest
9from unittest.mock import MagicMock, patch
11from oc_ds_converter.crossref.extract_crossref_publishers import (
12 get_publishers,
13 get_via_requests,
14 is_stale,
15 process,
16 store_csv_on_file,
17)
20class TestIsStale(unittest.TestCase):
21 def test_nonexistent_file_is_stale(self) -> None:
22 result = is_stale('/nonexistent/file.csv', 30)
23 self.assertTrue(result)
25 def test_recent_file_is_not_stale(self) -> None:
26 with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
27 f.write('test')
28 temp_path = f.name
29 try:
30 result = is_stale(temp_path, 30)
31 self.assertFalse(result)
32 finally:
33 os.unlink(temp_path)
35 def test_old_file_is_stale(self) -> None:
36 with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
37 f.write('test')
38 temp_path = f.name
39 try:
40 old_time = time.time() - (40 * 24 * 60 * 60)
41 os.utime(temp_path, (old_time, old_time))
42 result = is_stale(temp_path, 30)
43 self.assertTrue(result)
44 finally:
45 os.unlink(temp_path)
48class TestGetViaRequests(unittest.TestCase):
49 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get')
50 def test_successful_request(self, mock_get: MagicMock) -> None:
51 mock_response = MagicMock()
52 mock_response.status_code = 200
53 mock_response.text = '{"message": "success"}'
54 mock_get.return_value = mock_response
56 result = get_via_requests("https://api.crossref.org/members")
58 self.assertEqual(result, {"message": "success"})
60 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get')
61 def test_404_response(self, mock_get: MagicMock) -> None:
62 mock_response = MagicMock()
63 mock_response.status_code = 404
64 mock_get.return_value = mock_response
66 result = get_via_requests("https://api.crossref.org/members")
68 self.assertIsNone(result)
70 @patch('oc_ds_converter.crossref.extract_crossref_publishers.sleep')
71 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get')
72 def test_retry_on_server_error(self, mock_get: MagicMock, mock_sleep: MagicMock) -> None:
73 mock_response_500 = MagicMock()
74 mock_response_500.status_code = 500
75 mock_response_200 = MagicMock()
76 mock_response_200.status_code = 200
77 mock_response_200.text = '{"data": "ok"}'
78 mock_get.side_effect = [mock_response_500, mock_response_200]
80 result = get_via_requests("https://api.crossref.org/members")
82 self.assertEqual(result, {"data": "ok"})
83 self.assertEqual(mock_get.call_count, 2)
84 mock_sleep.assert_called_once_with(5)
86 @patch('oc_ds_converter.crossref.extract_crossref_publishers.sleep')
87 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get')
88 def test_retry_on_exception(self, mock_get: MagicMock, mock_sleep: MagicMock) -> None:
89 mock_response_200 = MagicMock()
90 mock_response_200.status_code = 200
91 mock_response_200.text = '{"data": "ok"}'
92 mock_get.side_effect = [ConnectionError("Connection refused"), mock_response_200]
94 result = get_via_requests("https://api.crossref.org/members")
96 self.assertEqual(result, {"data": "ok"})
97 self.assertEqual(mock_get.call_count, 2)
98 mock_sleep.assert_called_once_with(5)
100 @patch('oc_ds_converter.crossref.extract_crossref_publishers.sleep')
101 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get')
102 def test_max_retries_exceeded(self, mock_get: MagicMock, mock_sleep: MagicMock) -> None:
103 mock_response = MagicMock()
104 mock_response.status_code = 500
105 mock_get.return_value = mock_response
107 with self.assertRaises(ConnectionError):
108 get_via_requests("https://api.crossref.org/members")
110 self.assertEqual(mock_get.call_count, 5)
111 self.assertEqual(mock_sleep.call_count, 5)
114class TestGetPublishers(unittest.TestCase):
115 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_via_requests')
116 def test_successful_response(self, mock_get_via_requests: MagicMock) -> None:
117 mock_get_via_requests.return_value = {
118 "message": {
119 "total-results": 15000,
120 "items": [
121 {"id": 1, "primary-name": "Publisher A", "prefix": [{"value": "10.1000"}]},
122 {"id": 2, "primary-name": "Publisher B", "prefix": [{"value": "10.2000"}]},
123 ]
124 }
125 }
127 result = get_publishers(0)
129 self.assertIsNotNone(result)
130 items, new_offset, total = result # type: ignore[misc]
131 self.assertEqual(len(items), 2)
132 self.assertEqual(new_offset, 1000)
133 self.assertEqual(total, 15000)
135 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_via_requests')
136 def test_none_response(self, mock_get_via_requests: MagicMock) -> None:
137 mock_get_via_requests.return_value = None
139 result = get_publishers(0)
141 self.assertIsNone(result)
144class TestStoreCSVOnFile(unittest.TestCase):
145 def test_create_new_file(self) -> None:
146 with tempfile.TemporaryDirectory() as tmpdir:
147 filepath = os.path.join(tmpdir, "publishers.csv")
148 store_csv_on_file(filepath, ("id", "name", "prefix"), {"id": "1", "name": "Test Publisher", "prefix": "10.1234"})
150 with open(filepath, "r", encoding="utf8") as f:
151 content = f.read()
153 self.assertIn('"id","name","prefix"', content)
154 self.assertIn('"1","Test Publisher","10.1234"', content)
156 def test_append_to_existing_file(self) -> None:
157 with tempfile.TemporaryDirectory() as tmpdir:
158 filepath = os.path.join(tmpdir, "publishers.csv")
159 store_csv_on_file(filepath, ("id", "name", "prefix"), {"id": "1", "name": "Publisher A", "prefix": "10.1000"})
160 store_csv_on_file(filepath, ("id", "name", "prefix"), {"id": "2", "name": "Publisher B", "prefix": "10.2000"})
162 with open(filepath, "r", encoding="utf8") as f:
163 lines = f.readlines()
165 self.assertEqual(len(lines), 3)
166 self.assertIn('"id","name","prefix"', lines[0])
167 self.assertIn('"1","Publisher A","10.1000"', lines[1])
168 self.assertIn('"2","Publisher B","10.2000"', lines[2])
171class TestProcess(unittest.TestCase):
172 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_publishers')
173 def test_process_new_file(self, mock_get_publishers: MagicMock) -> None:
174 mock_get_publishers.return_value = (
175 [
176 {"id": 1, "primary-name": "Publisher A", "prefix": [{"value": "10.1000"}]},
177 {"id": 2, "primary-name": "Publisher B", "prefix": [{"value": "10.2000"}, {"value": "10.2001"}]},
178 ],
179 1000,
180 2
181 )
183 with tempfile.TemporaryDirectory() as tmpdir:
184 filepath = os.path.join(tmpdir, "publishers.csv")
185 result = process(filepath)
187 self.assertTrue(result)
188 with open(filepath, "r", encoding="utf8") as f:
189 lines = f.readlines()
191 self.assertEqual(len(lines), 4)
192 self.assertIn('"id","name","prefix"', lines[0])
193 self.assertIn('"1","Publisher A","10.1000"', lines[1])
194 self.assertIn('"2","Publisher B","10.2000"', lines[2])
195 self.assertIn('"2","Publisher B","10.2001"', lines[3])
197 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_publishers')
198 def test_process_with_existing_data_deduplication(self, mock_get_publishers: MagicMock) -> None:
199 mock_get_publishers.return_value = (
200 [
201 {"id": 1, "primary-name": "Publisher A", "prefix": [{"value": "10.1000"}]},
202 {"id": 2, "primary-name": "Publisher B", "prefix": [{"value": "10.2000"}]},
203 ],
204 1000,
205 2
206 )
208 with tempfile.TemporaryDirectory() as tmpdir:
209 filepath = os.path.join(tmpdir, "publishers.csv")
210 with open(filepath, "w", encoding="utf8") as f:
211 f.write('"id","name","prefix"\n')
212 f.write('"1","Publisher A","10.1000"\n')
214 process(filepath, force=True)
216 with open(filepath, "r", encoding="utf8") as f:
217 lines = f.readlines()
219 self.assertEqual(len(lines), 3)
220 id_occurrences = sum(1 for line in lines if '"1"' in line and "Publisher A" in line)
221 self.assertEqual(id_occurrences, 1)
223 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_publishers')
224 def test_process_html_unescape(self, mock_get_publishers: MagicMock) -> None:
225 mock_get_publishers.return_value = (
226 [{"id": 1, "primary-name": "Publisher & Co", "prefix": [{"value": "10.1000"}]}],
227 1000,
228 1
229 )
231 with tempfile.TemporaryDirectory() as tmpdir:
232 filepath = os.path.join(tmpdir, "publishers.csv")
233 process(filepath)
235 with open(filepath, "r", encoding="utf8") as f:
236 content = f.read()
238 self.assertIn("Publisher & Co", content)
239 self.assertNotIn("&", content)
241 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_publishers')
242 def test_process_api_failure_breaks_loop(self, mock_get_publishers: MagicMock) -> None:
243 mock_get_publishers.return_value = None
245 with tempfile.TemporaryDirectory() as tmpdir:
246 filepath = os.path.join(tmpdir, "publishers.csv")
247 process(filepath)
249 self.assertFalse(os.path.exists(filepath))
251 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_publishers')
252 def test_process_multiple_pages(self, mock_get_publishers: MagicMock) -> None:
253 mock_get_publishers.side_effect = [
254 ([{"id": 1, "primary-name": "Publisher A", "prefix": [{"value": "10.1000"}]}], 1000, 2000),
255 ([{"id": 2, "primary-name": "Publisher B", "prefix": [{"value": "10.2000"}]}], 2000, 2000),
256 ]
258 with tempfile.TemporaryDirectory() as tmpdir:
259 filepath = os.path.join(tmpdir, "publishers.csv")
260 process(filepath)
262 with open(filepath, "r", encoding="utf8") as f:
263 lines = f.readlines()
265 self.assertEqual(len(lines), 3)
266 self.assertEqual(mock_get_publishers.call_count, 2)
268 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_publishers')
269 def test_process_duplicate_prefix_same_publisher(self, mock_get_publishers: MagicMock) -> None:
270 mock_get_publishers.return_value = (
271 [{"id": 1, "primary-name": "Publisher A", "prefix": [{"value": "10.1000"}, {"value": "10.1000"}]}],
272 1000,
273 1
274 )
276 with tempfile.TemporaryDirectory() as tmpdir:
277 filepath = os.path.join(tmpdir, "publishers.csv")
278 process(filepath)
280 with open(filepath, "r", encoding="utf8") as f:
281 lines = f.readlines()
283 self.assertEqual(len(lines), 2)
285 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_publishers')
286 def test_process_skips_if_file_recent(self, mock_get_publishers: MagicMock) -> None:
287 with tempfile.TemporaryDirectory() as tmpdir:
288 filepath = os.path.join(tmpdir, "publishers.csv")
289 with open(filepath, "w", encoding="utf8") as f:
290 f.write('"id","name","prefix"\n')
291 f.write('"1","Existing Publisher","10.1000"\n')
293 result = process(filepath, max_age_days=30, force=False)
295 self.assertFalse(result)
296 mock_get_publishers.assert_not_called()
298 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_publishers')
299 def test_process_force_updates_recent_file(self, mock_get_publishers: MagicMock) -> None:
300 mock_get_publishers.return_value = (
301 [{"id": 2, "primary-name": "New Publisher", "prefix": [{"value": "10.2000"}]}],
302 1000,
303 1
304 )
306 with tempfile.TemporaryDirectory() as tmpdir:
307 filepath = os.path.join(tmpdir, "publishers.csv")
308 with open(filepath, "w", encoding="utf8") as f:
309 f.write('"id","name","prefix"\n')
310 f.write('"1","Existing Publisher","10.1000"\n')
312 result = process(filepath, max_age_days=30, force=True)
314 self.assertTrue(result)
315 mock_get_publishers.assert_called_once()
317 @patch('oc_ds_converter.crossref.extract_crossref_publishers.get_publishers')
318 def test_process_updates_stale_file(self, mock_get_publishers: MagicMock) -> None:
319 mock_get_publishers.return_value = (
320 [{"id": 2, "primary-name": "New Publisher", "prefix": [{"value": "10.2000"}]}],
321 1000,
322 1
323 )
325 with tempfile.TemporaryDirectory() as tmpdir:
326 filepath = os.path.join(tmpdir, "publishers.csv")
327 with open(filepath, "w", encoding="utf8") as f:
328 f.write('"id","name","prefix"\n')
329 f.write('"1","Existing Publisher","10.1000"\n')
331 old_time = time.time() - (40 * 24 * 60 * 60)
332 os.utime(filepath, (old_time, old_time))
334 result = process(filepath, max_age_days=30, force=False)
336 self.assertTrue(result)
337 mock_get_publishers.assert_called_once()
340if __name__ == '__main__':
341 unittest.main()