Coverage for test / idm_crossref_test.py: 99%
164 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
1# SPDX-FileCopyrightText: 2024 Elia Rizzetto <elia.rizzetto2@unibo.it>
2# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it>
3# SPDX-FileCopyrightText: 2026 Marta Soricetti <marta.soricetti@unibo.it>
4#
5# SPDX-License-Identifier: ISC
7import json
8import unittest
9from os import makedirs
10from os.path import exists, join
12import xmltodict
13from oc_ds_converter.oc_idmanager import *
14from oc_ds_converter.oc_idmanager.base import IdentifierManager
15from requests import ReadTimeout, get
16from requests.exceptions import ConnectionError
17from oc_ds_converter.oc_idmanager.oc_data_storage.storage_manager import StorageManager
18from oc_ds_converter.oc_idmanager.oc_data_storage.in_memory_manager import InMemoryStorageManager
19from oc_ds_converter.oc_idmanager.oc_data_storage.sqlite_manager import SqliteStorageManager
20from oc_ds_converter.oc_idmanager.oc_data_storage.redis_manager import RedisStorageManager
22class CrossrefIdentifierManagerTest(unittest.TestCase):
23 """This class aim at testing identifiers manager."""
25 def setUp(self):
26 if not exists("tmp"):
27 makedirs("tmp")
29 self.test_dir = join("test","data")
30 self.test_json_path = join(self.test_dir, "glob.json")
31 with open(self.test_json_path, encoding="utf-8") as fp:
32 self.data = json.load(fp)
34 self.valid_crmid1 = "297"
35 self.valid_crmid2 = "4443"
36 self.invalid_crmid1 = "342427"
37 self.invalid_crmid2 = "0123"
39 def test_crossref_is_valid(self):
40 crmngr_nofile = CrossrefManager()
41 self.assertTrue(crmngr_nofile.is_valid(self.valid_crmid1))
42 self.assertTrue(crmngr_nofile.is_valid(self.valid_crmid2))
43 self.assertFalse(crmngr_nofile.is_valid(self.invalid_crmid1))
44 self.assertFalse(crmngr_nofile.is_valid(self.invalid_crmid2))
46 crmngr_file = CrossrefManager(use_api_service=False, testing=True)
47 # Pre-seed storage with data from glob.json
48 for key, value in self.data.items():
49 if key.startswith("crossref:"):
50 crmngr_file.storage_manager.set_value(key, value.get("valid", False))
51 self.assertTrue(crmngr_file.normalise(self.valid_crmid1, include_prefix=True) in self.data)
52 self.assertTrue(crmngr_file.normalise(self.invalid_crmid1, include_prefix=True) in self.data)
53 self.assertTrue(crmngr_file.is_valid(self.valid_crmid1))
54 self.assertFalse(crmngr_file.is_valid(self.invalid_crmid1))
56 crmngr_nofile_noapi = CrossrefManager(testing=True, use_api_service=False)
57 self.assertTrue(crmngr_nofile_noapi.is_valid(self.valid_crmid1))
58 self.assertTrue(crmngr_nofile_noapi.is_valid(self.valid_crmid2))
60 def test_exists(self):
61 with self.subTest(msg="get_extra_info=True, allow_extra_api=None"):
62 crmngr = CrossrefManager()
63 output = crmngr.exists(self.valid_crmid1, get_extra_info=True, allow_extra_api=None)
64 expected_output = (True, {'valid': True})
65 self.assertEqual(expected_output[0], output[0])
66 # self.assertCountEqual({k:v for k,v in expected_output[1].items() if k!= "author"}, {k:v for k,v in output[1].items() if k!= "author"})
67 # self.assertCountEqual(expected_output[1]["author"], output[1]["author"])
69 with self.subTest(msg="get_extra_info=False, allow_extra_api=None"):
70 crmngr = CrossrefManager()
71 output = crmngr.exists(self.valid_crmid2, get_extra_info=False, allow_extra_api=None)
72 expected_output = True
73 self.assertEqual(output, expected_output)
76 def test_openalex_normalise(self):
77 crmngr = CrossrefManager()
79 self.assertEqual(
80 self.valid_crmid1, crmngr.normalise("crossref:" + self.valid_crmid1)
81 )
82 self.assertEqual(
83 self.valid_crmid1, crmngr.normalise(self.valid_crmid1.replace("", " "))
84 )
85 self.assertEqual(
86 self.valid_crmid1,
87 crmngr.normalise("https://api.crossref.org/members/" + self.valid_crmid1),
88 )
89 self.assertEqual(
90 crmngr.normalise(self.valid_crmid1),
91 crmngr.normalise(' ' + self.valid_crmid1),
92 )
93 self.assertEqual(
94 crmngr.normalise(self.valid_crmid2),
95 crmngr.normalise("https://api.crossref.org/members/" + self.valid_crmid2),
96 )
98 dm_file = CrossrefManager(testing=True, use_api_service=False)
99 # Pre-seed storage with data from glob.json
100 for key, value in self.data.items():
101 if key.startswith("crossref:"):
102 dm_file.storage_manager.set_value(key, value.get("valid", False))
103 self.assertTrue(dm_file.normalise(self.valid_crmid1, include_prefix=True) in self.data)
104 self.assertTrue(dm_file.normalise(self.invalid_crmid2, include_prefix=True) in self.data)
105 self.assertTrue(dm_file.is_valid(self.valid_crmid1))
106 self.assertFalse(dm_file.is_valid(self.invalid_crmid2))
108 def test_crossref_default(self):
109 mngr = CrossrefManager(testing=True)
110 # Uses RedisStorageManager with testing=True (fakeredis)
111 # uses API
112 self.assertTrue(mngr.is_valid(self.valid_crmid1))
113 self.assertTrue(mngr.is_valid(self.valid_crmid2))
114 self.assertFalse(mngr.is_valid(self.invalid_crmid2))
115 self.assertFalse(mngr.is_valid(self.invalid_crmid1))
116 validated_ids = [self.valid_crmid1, self.valid_crmid2, self.invalid_crmid1, self.invalid_crmid2]
117 validated = [mngr.normalise(x, include_prefix=True) for x in validated_ids if mngr.normalise(x, include_prefix=True)]
118 # check that all the validated ids are stored in redis
119 all_ids_stored = mngr.storage_manager.get_all_keys()
120 self.assertTrue(all(x in all_ids_stored for x in validated))
121 mngr.storage_manager.delete_storage()
122 # check that the storage was correctly deleted
123 self.assertEqual(mngr.storage_manager.get_all_keys(), set())
125 #### IN MEMORY STORAGE MANAGER
127 def test_crossref_memory_file_noapi(self):
128 # Uses pre-seeded data (without updating it)
129 # Uses RedisStorageManager storage manager
130 # does not use API (so a syntactically correct id is considered to be valid)
131 am_file = CrossrefManager(testing=True, use_api_service=False)
132 # Pre-seed storage with data from glob.json
133 for key, value in self.data.items():
134 if key.startswith("crossref:"):
135 am_file.storage_manager.set_value(key, value.get("valid", False))
136 self.assertTrue(am_file.normalise(self.valid_crmid1, include_prefix=True) in self.data)
137 self.assertTrue(am_file.normalise(self.invalid_crmid2, include_prefix=True) in self.data)
138 self.assertFalse(am_file.is_valid(self.invalid_crmid2)) # is stored as invalid
140 def test_crossref_memory_file_api(self):
141 # Uses support file (without updating it)
142 # Uses RedisStorageManager storage manager
143 # uses API (so a syntactically correct id which is not valid is considered to be invalid)
144 am_file = CrossrefManager(testing=True, use_api_service=True)
145 self.assertFalse(am_file.is_valid(self.invalid_crmid1))
147 def test_crossref_memory_nofile_noapi(self):
148 # Does not use support file
149 # Uses RedisStorageManager storage manager
150 # Does not use API (so a syntactically correct id which is not valid is considered to be valid)
151 am_nofile_noapi = CrossrefManager(testing=True, use_api_service=False)
152 self.assertTrue(am_nofile_noapi.is_valid(self.valid_crmid1))
153 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_crmid1))
154 am_nofile_noapi.storage_manager.delete_storage()
156 #### SQLITE STORAGE MANAGER
158 def test_crossref_sqlite_nofile_api(self):
159 # No pre-existing data
160 # storage manager : RedisStorageManager
161 # uses API
162 sql_am_nofile = CrossrefManager(testing=True)
163 self.assertTrue(sql_am_nofile.is_valid(self.valid_crmid1))
164 self.assertTrue(sql_am_nofile.is_valid(self.valid_crmid2))
165 self.assertFalse(sql_am_nofile.is_valid(self.invalid_crmid1))
166 self.assertFalse(sql_am_nofile.is_valid(self.invalid_crmid2))
167 # check that the redis storage contains all the validated ids
168 validated_ids = [self.valid_crmid1, self.valid_crmid2, self.invalid_crmid1, self.invalid_crmid2]
169 all_ids_stored = sql_am_nofile.storage_manager.get_all_keys()
170 validated = [sql_am_nofile.normalise(x, include_prefix=True) for x in validated_ids if sql_am_nofile.normalise(x, include_prefix=True)]
171 self.assertTrue(all(x in all_ids_stored for x in validated))
172 sql_am_nofile.storage_manager.delete_storage()
173 # check that the storage was correctly deleted
174 self.assertEqual(sql_am_nofile.storage_manager.get_all_keys(), set())
176 def test_crossref_sqlite_file_api(self):
177 # Uses pre-existing data in Redis
178 # Uses RedisStorageManager storage manager
179 # tests validation behavior with pre-seeded data
180 to_insert = [self.invalid_crmid1, self.valid_crmid1]
181 sql_file = CrossrefManager(testing=True, use_api_service=True)
182 for crmid in to_insert:
183 norm_id = sql_file.normalise(crmid, include_prefix=True)
184 is_valid = sql_file.is_valid(norm_id)
185 sql_file.storage_manager.set_value(norm_id, is_valid)
187 sql_no_api = CrossrefManager(testing=True, use_api_service=False)
188 # Copy values from the first manager to the second for testing
189 for crmid in to_insert:
190 norm_id = sql_no_api.normalise(crmid, include_prefix=True)
191 value = sql_file.storage_manager.get_value(norm_id)
192 if value is not None:
193 sql_no_api.storage_manager.set_value(norm_id, value)
194 all_db_keys = sql_no_api.storage_manager.get_all_keys()
195 # check that all the normalised ids in the list were correctly inserted
196 self.assertTrue(all(sql_no_api.normalise(x, include_prefix=True) in all_db_keys for x in to_insert))
197 self.assertTrue(sql_no_api.is_valid(self.valid_crmid1)) # is stored as valid
198 self.assertFalse(sql_no_api.is_valid(self.invalid_crmid1)) # is stored as invalid
199 self.assertTrue(sql_no_api.is_valid(sql_no_api.normalise(self.invalid_crmid2, include_prefix=True))) # not stored, has correct syntax
200 sql_no_api.storage_manager.delete_storage()
202 def test_crossref_sqlite_nofile_noapi(self):
203 # Does not use support file
204 # Uses RedisStorageManager storage manager
205 # Does not use API (so a syntactically correct id which is not valid is considered to be valid)
206 am_nofile_noapi = CrossrefManager(testing=True, use_api_service=False)
207 self.assertTrue(am_nofile_noapi.is_valid(self.valid_crmid1))
208 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_crmid2))
209 am_nofile_noapi.storage_manager.delete_storage()
211 #### REDIS STORAGE MANAGER
213 def test_crossref_redis_nofile_api(self):
214 # No support files (it generates it)
215 # storage manager : RedisStorageManager
216 # uses API
217 redis_cm_nofile = CrossrefManager(storage_manager=RedisStorageManager())
218 self.assertTrue(redis_cm_nofile.is_valid(self.valid_crmid1))
219 self.assertTrue(redis_cm_nofile.is_valid(self.valid_crmid2))
220 self.assertFalse(redis_cm_nofile.is_valid(self.invalid_crmid1))
221 self.assertFalse(redis_cm_nofile.is_valid(self.invalid_crmid2))
222 # check that the redis db was correctly filled and that it contains all the validated ids
223 validated_ids = {self.valid_crmid1, self.valid_crmid2, self.invalid_crmid1, self.invalid_crmid2}
224 validated_ids = {redis_cm_nofile.normalise(x, include_prefix=True) for x in validated_ids}
225 all_ids_stored = redis_cm_nofile.storage_manager.get_all_keys()
226 # check that all the validated ids are stored in the json file
227 self.assertEqual(validated_ids, all_ids_stored)
228 redis_cm_nofile.storage_manager.delete_storage()
229 # check that the support file was correctly deleted
230 self.assertEqual(redis_cm_nofile.storage_manager.get_all_keys(), set())
232 def test_crossref_redis_file_api(self):
233 # Uses data in redis db
234 # Uses RedisStorageManager
235 # does not use API (so a syntactically correct id is considered to be valid)
236 # fills db
237 to_insert = [self.invalid_crmid1, self.valid_crmid1]
238 storage_manager = RedisStorageManager(testing=True)
239 redis_file = CrossrefManager(storage_manager=storage_manager, use_api_service=True)
240 for id in to_insert:
241 norm_id = redis_file.normalise(id, include_prefix=True)
242 is_valid = redis_file.is_valid(norm_id)
243 # insert_tup = (norm_id, is_valid)
244 redis_file.storage_manager.set_value(norm_id, is_valid)
246 redis_no_api = CrossrefManager(storage_manager=storage_manager, use_api_service=False)
247 all_db_keys = redis_no_api.storage_manager.get_all_keys()
248 # check that all the normalised ids in the list were correctly inserted in the db
249 self.assertTrue(all(redis_no_api.normalise(x, include_prefix=True) in all_db_keys for x in to_insert))
250 self.assertTrue(redis_no_api.is_valid(self.valid_crmid1)) # is stored in support file as valid
251 self.assertFalse(redis_no_api.is_valid(self.invalid_crmid1)) # is stored in support file as invalid
252 self.assertTrue(redis_no_api.is_valid(self.invalid_crmid2)) # is not stored in support file as invalid, does not exist but has correct syntax
253 redis_no_api.storage_manager.delete_storage()
255 def test_crossref_redis_nofile_noapi(self):
256 # Does not use support file
257 # Uses RedisStorageManager storage manager
258 # Does not use API (so a syntactically correct id which is not valid is considered to be valid)
259 cr_nofile_noapi = CrossrefManager(storage_manager=SqliteStorageManager(), use_api_service=False)
260 self.assertTrue(cr_nofile_noapi.is_valid(self.valid_crmid1))
261 self.assertTrue(cr_nofile_noapi.is_valid(self.invalid_crmid2))
262 cr_nofile_noapi.storage_manager.delete_storage()