Coverage for test / idm_doi_test.py: 99%
199 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
1# SPDX-FileCopyrightText: 2023 Arianna Moretti <arianna.moretti4@unibo.it>
2# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it>
3#
4# SPDX-License-Identifier: ISC
6import json
7import unittest
8from os import makedirs
9from os.path import exists, join
11from oc_ds_converter.oc_idmanager.doi import DOIManager
13class DOIIdentifierManagerTest(unittest.TestCase):
14 """This class aim at testing identifiers manager."""
16 def setUp(self):
17 if not exists("tmp"):
18 makedirs("tmp")
20 self.test_dir = join("test", "data")
21 self.test_json_path = join(self.test_dir, "glob.json")
22 with open(self.test_json_path, encoding="utf-8") as fp:
23 self.data = json.load(fp)
25 self.valid_doi_1 = "10.1108/jd-12-2013-0166"
26 self.valid_doi_2 = "10.1130/2015.2513(00)"
27 self.invalid_doi_1 = "10.1108/12-2013-0166"
28 self.invalid_doi_2 = "10.1371"
30 def test_exists(self):
31 with self.subTest(msg="get_extra_info=True, allow_extra_api=None"):
32 doi_manager = DOIManager()
33 output = doi_manager.exists('10.1007/s11192-022-04367-w', get_extra_info=True, allow_extra_api=None)
34 expected_output = (True, {'id': '10.1007/s11192-022-04367-w', 'valid': True, 'ra': 'unknown'})
35 self.assertEqual(output, expected_output)
36 with self.subTest(msg="get_extra_info=False, allow_extra_api=None"):
37 doi_manager = DOIManager()
38 output = doi_manager.exists('10.1007/s11192-022-04367-w', get_extra_info=False, allow_extra_api=None)
39 expected_output = True
40 self.assertEqual(output, expected_output)
41 with self.subTest(msg="get_extra_info=False, allow_extra_api='crossref'"):
42 doi_manager = DOIManager()
43 output = doi_manager.exists('10.1007/s11192-022-04367-w', get_extra_info=False, allow_extra_api='crossref')
44 expected_output = True
45 self.assertEqual(output, expected_output)
47 def test_doi_normalise(self):
48 dm = DOIManager()
49 self.assertEqual(
50 self.valid_doi_1,
51 dm.normalise(self.valid_doi_1.upper().replace("10.", "doi: 10. ")),
52 )
53 self.assertEqual(
54 self.valid_doi_1,
55 dm.normalise(self.valid_doi_1.upper().replace("10.", "doi:10.")),
56 )
57 self.assertEqual(
58 self.valid_doi_1,
59 dm.normalise(
60 self.valid_doi_1.upper().replace("10.", "https://doi.org/10.")
61 ),
62 )
64 def test_doi_is_valid(self):
65 dm_nofile = DOIManager()
66 self.assertTrue(dm_nofile.is_valid(self.valid_doi_1))
67 self.assertTrue(dm_nofile.is_valid(self.valid_doi_2))
68 self.assertFalse(dm_nofile.is_valid(self.invalid_doi_1))
69 self.assertFalse(dm_nofile.is_valid(self.invalid_doi_2))
71 dm_file = DOIManager(testing=True, use_api_service=False)
72 # Pre-seed storage with data from glob.json
73 for key, value in self.data.items():
74 if key.startswith("doi:"):
75 dm_file.storage_manager.set_value(key, value.get("valid", False))
76 self.assertTrue(dm_file.normalise(self.valid_doi_1, include_prefix=True) in self.data)
77 self.assertTrue(dm_file.normalise(self.invalid_doi_1, include_prefix=True) in self.data)
78 self.assertTrue(dm_file.is_valid(self.valid_doi_1))
79 self.assertFalse(dm_file.is_valid(self.invalid_doi_1))
82 def test_doi_default(self):
83 am_nofile = DOIManager(testing=True)
84 # Uses RedisStorageManager with testing=True (fakeredis)
85 # uses API
86 self.assertTrue(am_nofile.is_valid(self.valid_doi_1))
87 self.assertTrue(am_nofile.is_valid(self.valid_doi_2))
88 self.assertFalse(am_nofile.is_valid(self.invalid_doi_2))
89 self.assertFalse(am_nofile.is_valid(self.invalid_doi_1))
90 validated_ids = [self.valid_doi_1, self.valid_doi_2, self.invalid_doi_1, self.invalid_doi_2]
91 # check that all the validated ids are stored in redis
92 all_ids_stored = am_nofile.storage_manager.get_all_keys()
93 self.assertTrue(all(am_nofile.normalise(x, include_prefix=True) in all_ids_stored for x in validated_ids))
94 am_nofile.storage_manager.delete_storage()
95 # check that the storage was correctly deleted
96 self.assertEqual(am_nofile.storage_manager.get_all_keys(), set())
98 def test_doi_memory_file_noapi(self):
99 # Uses pre-seeded data (without updating it)
100 # Uses RedisStorageManager storage manager
101 # does not use API (so a syntactically correct id is considered to be valid)
102 am_file = DOIManager(testing=True, use_api_service=False)
103 # Pre-seed storage with data from glob.json
104 for key, value in self.data.items():
105 if key.startswith("doi:"):
106 am_file.storage_manager.set_value(key, value.get("valid", False))
107 norm_valid = am_file.normalise(self.valid_doi_1, include_prefix=True)
108 norm_invalid = am_file.normalise(self.invalid_doi_1.strip().lower(), include_prefix=True)
109 norm_fake = am_file.normalise("10.1109/5.771073FAKE_ID", include_prefix=True)
110 assert norm_valid is not None
111 assert norm_invalid is not None
112 assert norm_fake is not None
113 self.assertTrue(norm_valid in self.data)
114 self.assertTrue(norm_invalid in self.data)
115 self.assertFalse(am_file.is_valid(self.invalid_doi_1))
116 self.assertTrue(am_file.is_valid(norm_fake))
118 def test_doi_memory_file_api(self):
119 # Uses support file (without updating it)
120 # Uses RedisStorageManager storage manager
121 # uses API (so a syntactically correct id which is not valid is considered to be invalid)
122 am_file = DOIManager(testing=True, use_api_service=True)
123 self.assertFalse(am_file.is_valid(self.invalid_doi_1))
125 def test_doi_memory_nofile_noapi(self):
126 # Does not use support file
127 # Uses RedisStorageManager storage manager
128 # Does not API (so a syntactically correct id which is not valid is considered to be valid)
129 am_nofile_noapi = DOIManager(testing=True, use_api_service=False)
130 self.assertTrue(am_nofile_noapi.is_valid(self.valid_doi_1))
131 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_doi_1))
132 am_nofile_noapi.storage_manager.delete_storage()
136 def test_doi_sqlite_nofile_api(self):
137 # No pre-existing data
138 # storage manager : RedisStorageManager
139 # uses API
140 sql_am_nofile = DOIManager(testing=True)
141 self.assertTrue(sql_am_nofile.is_valid(self.valid_doi_1))
142 self.assertTrue(sql_am_nofile.is_valid(self.valid_doi_2))
143 self.assertFalse(sql_am_nofile.is_valid(self.invalid_doi_1))
144 self.assertFalse(sql_am_nofile.is_valid(self.invalid_doi_2))
145 # check that the redis storage contains all the validated ids
146 validated_ids = [self.valid_doi_1, self.valid_doi_2, self.invalid_doi_1, self.invalid_doi_2]
147 all_ids_stored = sql_am_nofile.storage_manager.get_all_keys()
148 normalized_ids = [sql_am_nofile.normalise(x, include_prefix=True) for x in validated_ids]
149 self.assertTrue(all(nid in all_ids_stored for nid in normalized_ids if nid is not None))
150 sql_am_nofile.storage_manager.delete_storage()
151 # check that the storage was correctly deleted
152 self.assertEqual(sql_am_nofile.storage_manager.get_all_keys(), set())
154 def test_doi_sqlite_file_api(self):
155 # Uses pre-existing data in Redis
156 # Uses RedisStorageManager storage manager
157 # tests validation behavior with pre-seeded data
158 to_insert = [self.invalid_doi_1, self.valid_doi_1]
159 sql_file = DOIManager(testing=True, use_api_service=True)
160 for doi_id in to_insert:
161 norm_id = sql_file.normalise(doi_id, include_prefix=True)
162 assert norm_id is not None
163 is_valid = sql_file.is_valid(norm_id)
164 sql_file.storage_manager.set_value(norm_id, is_valid)
166 sql_no_api = DOIManager(testing=True, use_api_service=False)
167 # Copy values from the first manager to the second for testing
168 for doi_id in to_insert:
169 norm_id = sql_no_api.normalise(doi_id, include_prefix=True)
170 value = sql_file.storage_manager.get_value(norm_id)
171 if value is not None:
172 sql_no_api.storage_manager.set_value(norm_id, value)
173 all_db_keys = sql_no_api.storage_manager.get_all_keys()
174 normalized_ids = [sql_no_api.normalise(x, include_prefix=True) for x in to_insert]
175 self.assertTrue(all(nid in all_db_keys for nid in normalized_ids if nid is not None))
176 self.assertTrue(sql_no_api.is_valid(self.valid_doi_1))
177 self.assertFalse(sql_no_api.is_valid(self.invalid_doi_1))
178 norm_fake = sql_no_api.normalise("10.1109/5.771073FAKE_ID", include_prefix=True)
179 assert norm_fake is not None
180 self.assertTrue(sql_no_api.is_valid(norm_fake))
181 sql_no_api.storage_manager.delete_storage()
183 def test_doi_sqlite_nofile_noapi(self):
184 # Does not use support file
185 # Uses RedisStorageManager storage manager
186 # Does not use API (so a syntactically correct id which is not valid is considered to be valid)
187 am_nofile_noapi = DOIManager(testing=True, use_api_service=False)
188 self.assertTrue(am_nofile_noapi.is_valid(self.valid_doi_1))
189 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_doi_1))
190 am_nofile_noapi.storage_manager.delete_storage()
192 def test_attempt_repair_removes_backslash(self):
193 dm = DOIManager(use_api_service=True)
194 repaired = dm.attempt_repair("10.1108/jd-12-2013-0166\\")
195 self.assertEqual(repaired, "10.1108/jd-12-2013-0166")
197 def test_attempt_repair_removes_double_underscore(self):
198 dm = DOIManager(use_api_service=True)
199 repaired = dm.attempt_repair("10.1108/jd__12-2013-0166")
200 self.assertIsNone(repaired)
202 def test_attempt_repair_removes_double_dot(self):
203 dm = DOIManager(use_api_service=True)
204 repaired = dm.attempt_repair("10..1108/jd-12-2013-0166")
205 self.assertEqual(repaired, "10.1108/jd-12-2013-0166")
207 def test_attempt_repair_removes_html_tags(self):
208 dm = DOIManager(use_api_service=True)
209 repaired = dm.attempt_repair("10.1108/jd-12-2013-0166<tag>content</tag>")
210 self.assertEqual(repaired, "10.1108/jd-12-2013-0166")
212 def test_attempt_repair_removes_self_closing_tags(self):
213 dm = DOIManager(use_api_service=True)
214 repaired = dm.attempt_repair("10.1108/jd-12-2013-0166<br/>")
215 self.assertEqual(repaired, "10.1108/jd-12-2013-0166")
217 def test_attempt_repair_no_change_returns_none(self):
218 dm = DOIManager(use_api_service=True)
219 repaired = dm.attempt_repair("10.1108/jd-12-2013-0166")
220 self.assertIsNone(repaired)
222 def test_attempt_repair_api_disabled_returns_none(self):
223 dm = DOIManager(use_api_service=False)
224 repaired = dm.attempt_repair("10.1108/jd-12-2013-0166\\")
225 self.assertIsNone(repaired)
227 def test_is_valid_repairs_malformed_doi(self):
228 dm = DOIManager(use_api_service=True)
229 malformed_doi = "10.1108/jd-12-2013-0166\\"
230 self.assertTrue(dm.is_valid(malformed_doi))
232 def test_is_valid_repairs_malformed_doi_with_extra_info(self):
233 dm = DOIManager(use_api_service=True)
234 malformed_doi = "10.1108/jd-12-2013-0166\\"
235 result = dm.is_valid(malformed_doi, get_extra_info=True)
236 assert isinstance(result, tuple)
237 self.assertTrue(result[0])
238 self.assertEqual(result[1]["id"], "10.1108/jd-12-2013-0166")
240 def test_is_valid_no_repair_when_api_disabled(self):
241 dm = DOIManager(use_api_service=False)
242 malformed_doi = "10.1108/jd-12-2013-0166\\"
243 self.assertTrue(dm.is_valid(malformed_doi))
245 def test_is_valid_with_extra_info_valid_doi(self):
246 dm = DOIManager(use_api_service=True)
247 result = dm.is_valid(self.valid_doi_1, get_extra_info=True)
248 assert isinstance(result, tuple)
249 self.assertTrue(result[0])
250 self.assertEqual(result[1]["id"], self.valid_doi_1)
252 def test_normalise_removes_dx_doi_prefix(self):
253 dm = DOIManager()
254 doi_with_prefix = "http://dx.doi.org/10.1108/jd-12-2013-0166"
255 self.assertEqual(dm.normalise(doi_with_prefix), "10.1108/jd-12-2013-0166")
257 def test_normalise_removes_suffix_pmid(self):
258 dm = DOIManager()
259 doi_with_suffix = "10.1108/jd-12-2013-0166.PMID:12345"
260 self.assertEqual(dm.normalise(doi_with_suffix), "10.1108/jd-12-2013-0166")
262 def test_normalise_invalid_string_returns_none(self):
263 dm = DOIManager()
264 self.assertIsNone(dm.normalise("not a doi"))
266 def test_base_normalise_invalid_string_returns_none(self):
267 dm = DOIManager()
268 self.assertIsNone(dm.base_normalise("not a doi"))
270 def test_is_valid_normalise_returns_none(self):
271 dm = DOIManager()
272 self.assertFalse(dm.is_valid("not a doi"))
274 def test_syntax_ok_without_prefix(self):
275 dm = DOIManager()
276 self.assertTrue(dm.syntax_ok("10.1108/jd-12-2013-0166"))
278 def test_normalise_removes_embedded_url_prefix(self):
279 dm = DOIManager()
280 doi_with_embedded_url = "10.1108http://dx.doi.org/jd-12-2013-0166"
281 self.assertEqual(dm.normalise(doi_with_embedded_url), "10.1108")