Coverage for test / idm_arxiv_test.py: 100%
125 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
1# SPDX-FileCopyrightText: 2023 Arianna Moretti <arianna.moretti4@unibo.it>
2# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it>
3# SPDX-FileCopyrightText: 2026 Marta Soricetti <marta.soricetti@unibo.it>
4#
5# SPDX-License-Identifier: ISC
8import json
9import unittest
10from os import makedirs
11from os.path import exists, join
13from oc_ds_converter.oc_idmanager.arxiv import ArXivManager
16class ArxivIdentifierManagerTest(unittest.TestCase):
17 """This class aim at testing identifiers manager."""
19 def setUp(self):
20 if not exists("tmp"):
21 makedirs("tmp")
23 self.test_dir = join("test","data")
24 self.test_json_path = join(self.test_dir, "glob.json")
25 with open(self.test_json_path, encoding="utf-8") as fp:
26 self.data = json.load(fp)
28 self.valid_arxiv_1 = "arXiv:2109.05583"
29 self.valid_arxiv_1v = "2109.05583v2"
30 self.valid_arxiv_2 = "arXiv:2109.05582"
31 self.valid_arx_U_S = "2109.05583V2 "
32 self.invalid_arxiv_1 = "1133.5582"
33 self.invalid_arxiv_2v = "2109.05583v23"
36 def test_arxiv_default(self):
37 am_nofile = ArXivManager(testing=True)
38 # Uses RedisStorageManager with testing=True (fakeredis)
39 # uses API
40 self.assertTrue(am_nofile.is_valid(self.valid_arxiv_1))
41 self.assertTrue(am_nofile.is_valid(self.valid_arxiv_2))
42 self.assertTrue(am_nofile.is_valid(self.valid_arxiv_1v))
43 self.assertFalse(am_nofile.is_valid(self.invalid_arxiv_1))
44 self.assertFalse(am_nofile.is_valid(self.invalid_arxiv_2v))
45 validated_ids = [self.valid_arxiv_1, self.valid_arxiv_2, self.valid_arxiv_1v, self.invalid_arxiv_1, self.invalid_arxiv_2v]
46 # check that all the validated ids are stored in redis
47 all_ids_stored = am_nofile.storage_manager.get_all_keys()
48 self.assertTrue(all(am_nofile.normalise(x, include_prefix=True) in all_ids_stored for x in validated_ids))
49 am_nofile.storage_manager.delete_storage()
50 # check that the storage was correctly deleted
51 self.assertEqual(am_nofile.storage_manager.get_all_keys(), set())
53 #### IN MEMORY STORAGE MANAGER
54 def test_arxiv_memory_file_noapi(self):
55 # Uses pre-seeded data (without updating it)
56 # Uses RedisStorageManager storage manager
57 # does not use API (so a syntactically correct id is considered to be valid)
58 am_file = ArXivManager(testing=True, use_api_service=False)
59 # Pre-seed storage with data from glob.json
60 for key, value in self.data.items():
61 if key.startswith("arxiv:"):
62 am_file.storage_manager.set_value(key, value.get("valid", False))
63 self.assertTrue(am_file.normalise(self.valid_arxiv_1.lower(), include_prefix=True) in self.data)
64 self.assertTrue(am_file.normalise(self.valid_arx_U_S.strip().lower(), include_prefix=True) in self.data)
65 self.assertTrue(am_file.normalise(self.invalid_arxiv_1.strip().lower(), include_prefix=True) in self.data)
66 self.assertTrue(am_file.is_valid(self.valid_arxiv_1))
67 self.assertFalse(am_file.is_valid(self.invalid_arxiv_1)) # is stored as invalid
68 self.assertTrue(am_file.is_valid("arxiv:2229.00851")) # is not stored as invalid, does not exist but has correct syntax
71 def test_arxiv_memory_file_api(self):
72 # Uses support file (without updating it)
73 # Uses RedisStorageManager storage manager
74 # uses API (so a syntactically correct id which is not valid is considered to be invalid)
75 am_file = ArXivManager(testing=True, use_api_service=True)
76 self.assertFalse(am_file.is_valid(self.invalid_arxiv_1))
78 def test_arxiv_memory_nofile_noapi(self):
79 # Does not use support file
80 # Uses RedisStorageManager storage manager
81 # Does not API (so a syntactically correct id which is not valid is considered to be valid)
82 am_nofile_noapi = ArXivManager(testing=True, use_api_service=False)
83 self.assertTrue(am_nofile_noapi.is_valid(self.valid_arxiv_1v))
84 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_arxiv_1))
85 am_nofile_noapi.storage_manager.delete_storage()
88 #### SQLITE STORAGE MANAGER
89 def test_arxiv_sqlite_nofile_api(self):
90 # No pre-existing data
91 # storage manager : RedisStorageManager
92 # uses API
93 sql_am_nofile = ArXivManager(testing=True)
94 self.assertTrue(sql_am_nofile.is_valid(self.valid_arxiv_1))
95 self.assertTrue(sql_am_nofile.is_valid(self.valid_arxiv_2))
96 self.assertTrue(sql_am_nofile.is_valid(self.valid_arxiv_1v))
97 self.assertFalse(sql_am_nofile.is_valid(self.invalid_arxiv_1))
98 self.assertFalse(sql_am_nofile.is_valid(self.invalid_arxiv_2v))
99 # check that the redis storage contains all the validated ids
100 validated_ids = [self.valid_arxiv_1, self.valid_arxiv_2, self.valid_arxiv_1v, self.invalid_arxiv_1, self.invalid_arxiv_2v]
101 all_ids_stored = sql_am_nofile.storage_manager.get_all_keys()
102 self.assertTrue(all(sql_am_nofile.normalise(x, include_prefix=True) in all_ids_stored for x in validated_ids))
103 sql_am_nofile.storage_manager.delete_storage()
104 # check that the storage was correctly deleted
105 self.assertEqual(sql_am_nofile.storage_manager.get_all_keys(), set())
107 def test_arxiv_sqlite_file_api(self):
108 # Uses pre-existing data in Redis
109 # Uses RedisStorageManager storage manager
110 # tests validation behavior with pre-seeded data
111 to_insert = [self.invalid_arxiv_1, self.valid_arxiv_1, self.valid_arx_U_S]
112 sql_file = ArXivManager(testing=True, use_api_service=True)
113 for arxiv_id in to_insert:
114 norm_id = sql_file.normalise(arxiv_id, include_prefix=True)
115 is_valid = sql_file.is_valid(norm_id)
116 sql_file.storage_manager.set_value(norm_id, is_valid)
118 sql_no_api = ArXivManager(testing=True, use_api_service=False)
119 # Copy values from the first manager to the second for testing
120 for arxiv_id in to_insert:
121 norm_id = sql_no_api.normalise(arxiv_id, include_prefix=True)
122 value = sql_file.storage_manager.get_value(norm_id)
123 if value is not None:
124 sql_no_api.storage_manager.set_value(norm_id, value)
125 all_db_keys = sql_no_api.storage_manager.get_all_keys()
126 # check that all the normalised ids in the list were correctly inserted
127 self.assertTrue(all(sql_no_api.normalise(x, include_prefix=True) in all_db_keys for x in to_insert))
128 self.assertTrue(sql_no_api.is_valid(self.valid_arxiv_1)) # is stored as valid
129 self.assertTrue(sql_no_api.is_valid(self.valid_arx_U_S)) # is stored as valid
130 self.assertFalse(sql_no_api.is_valid(self.invalid_arxiv_1)) # is stored as invalid
131 self.assertTrue(sql_no_api.is_valid("arxiv:2229.00851")) # is not stored as invalid, does not exist but has correct syntax
132 sql_no_api.storage_manager.delete_storage()
134 def test_arxiv_sqlite_nofile_noapi(self):
135 # Does not use support file
136 # Uses RedisStorageManager storage manager
137 # Does not API (so a syntactically correct id which is not valid is considered to be valid)
138 am_nofile_noapi = ArXivManager(testing=True, use_api_service=False)
139 self.assertTrue(am_nofile_noapi.is_valid(self.valid_arxiv_1v))
140 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_arxiv_1))
141 am_nofile_noapi.storage_manager.delete_storage()
144 #### REDIS STORAGE MANAGER
145 def test_arxiv_redis_nofile_api(self):
146 # No available data in redis db
147 # Storage manager : RedisStorageManager
148 # uses API
149 sql_am_nofile = ArXivManager(testing=True)
150 self.assertTrue(sql_am_nofile.is_valid(self.valid_arxiv_1))
151 self.assertTrue(sql_am_nofile.is_valid(self.valid_arxiv_2))
152 self.assertTrue(sql_am_nofile.is_valid(self.valid_arxiv_1v))
153 self.assertFalse(sql_am_nofile.is_valid(self.invalid_arxiv_1))
154 self.assertFalse(sql_am_nofile.is_valid(self.invalid_arxiv_2v))
155 # check that the redis db was correctly filled and that it contains all the validated ids
157 validated_ids = {self.valid_arxiv_1, self.valid_arxiv_2, self.valid_arxiv_1v, self.invalid_arxiv_1, self.invalid_arxiv_2v}
158 validated_ids = {sql_am_nofile.normalise(x, include_prefix=True) for x in validated_ids}
159 all_ids_stored = sql_am_nofile.storage_manager.get_all_keys()
160 # check that all the validated ids are stored in the json file
161 self.assertEqual(validated_ids, all_ids_stored)
162 sql_am_nofile.storage_manager.delete_storage()
163 # check that the support file was correctly deleted
164 self.assertEqual(sql_am_nofile.storage_manager.get_all_keys(), set())
166 def test_arxiv_redis_file_api(self):
167 # Uses data in redis db
168 # Uses RedisStorageManager
169 # does not use API (so a syntactically correct id is considered to be valid)
170 # fills db
172 to_insert = [self.invalid_arxiv_1, self.valid_arxiv_1, self.valid_arx_U_S]
173 sql_file = ArXivManager(testing=True, use_api_service=True)
174 for id in to_insert:
175 norm_id = sql_file.normalise(id, include_prefix=True)
176 is_valid = sql_file.is_valid(norm_id)
177 sql_file.storage_manager.set_value(norm_id,is_valid)
179 sql_no_api = ArXivManager(testing=True, use_api_service=False)
180 # Copy values from the first manager to the second for testing
181 for id in to_insert:
182 norm_id = sql_no_api.normalise(id, include_prefix=True)
183 value = sql_file.storage_manager.get_value(norm_id)
184 if value is not None:
185 sql_no_api.storage_manager.set_value(norm_id, value)
186 all_db_keys = sql_no_api.storage_manager.get_all_keys()
187 #check that all the normalised ids in the list were correctly inserted in the db
188 self.assertTrue(all(sql_no_api.normalise(x,include_prefix=True) in all_db_keys for x in to_insert))
189 self.assertTrue(sql_no_api.is_valid(self.valid_arxiv_1)) # is stored in support file as valid
190 self.assertTrue(sql_no_api.is_valid(self.valid_arx_U_S)) # is stored in support file as valid
191 self.assertFalse(sql_no_api.is_valid(self.invalid_arxiv_1)) # is stored in support file as invalid
192 self.assertTrue(sql_no_api.is_valid("arxiv:2229.00851")) # is not stored in support file as invalid, does not exist but has correct syntax
193 sql_no_api.storage_manager.delete_storage()
195 def test_arxiv_redis_nofile_noapi(self):
196 # No data in redis db
197 # Uses RedisStorageManager
198 # Does not API (so a syntactically correct id which is not valid is considered to be valid)
199 am_nofile_noapi = ArXivManager(testing=True, use_api_service=False)
200 self.assertTrue(am_nofile_noapi.is_valid(self.valid_arxiv_1v))
201 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_arxiv_1))
202 am_nofile_noapi.storage_manager.delete_storage()