Coverage for test / idm_openalex_test.py: 99%
124 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
1# SPDX-FileCopyrightText: 2024 Elia Rizzetto <elia.rizzetto2@unibo.it>
2# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it>
3#
4# SPDX-License-Identifier: ISC
6import json
7import unittest
8from os import makedirs
9from os.path import exists, join
11from oc_ds_converter.oc_idmanager.openalex import OpenAlexManager
13class OpenAlexIdentifierManagerTest(unittest.TestCase):
14 """This class aim at testing identifiers manager."""
16 def setUp(self):
17 if not exists("tmp"):
18 makedirs("tmp")
20 self.test_dir = join("test", "data")
21 self.test_json_path = join(self.test_dir, "glob.json")
22 with open(self.test_json_path, encoding="utf-8") as fp:
23 self.data = json.load(fp)
25 self.valid_wid = "W2013228336"
26 self.valid_sid = "S4210229581"
27 self.invalid_wid = "W7836728310"
28 self.invalid_sid = "S4263287381"
30 def test_openalex_is_valid(self):
31 oalm_nofile = OpenAlexManager()
32 self.assertTrue(oalm_nofile.is_valid(self.valid_wid))
33 self.assertTrue(oalm_nofile.is_valid(self.valid_sid))
34 self.assertFalse(oalm_nofile.is_valid(self.invalid_wid))
35 self.assertFalse(oalm_nofile.is_valid(self.invalid_sid))
37 oalm_file = OpenAlexManager(use_api_service=False, testing=True)
38 # Pre-seed storage with data from glob.json
39 for key, value in self.data.items():
40 if key.startswith("openalex:"):
41 oalm_file.storage_manager.set_value(key, value.get("valid", False))
42 self.assertTrue(oalm_file.normalise(self.valid_wid, include_prefix=True) in self.data)
43 self.assertTrue(oalm_file.normalise(self.invalid_wid, include_prefix=True) in self.data)
44 self.assertTrue(oalm_file.is_valid(self.valid_wid))
45 self.assertFalse(oalm_file.is_valid(self.invalid_wid))
47 oalm_nofile_noapi = OpenAlexManager(testing=True, use_api_service=False)
48 self.assertTrue(oalm_nofile_noapi.is_valid(self.valid_wid))
49 self.assertTrue(oalm_nofile_noapi.is_valid(self.valid_sid))
51 def test_exists(self):
52 with self.subTest(msg="get_extra_info=True, allow_extra_api=None"):
53 oalm = OpenAlexManager()
54 output = oalm.exists('openalex:W748315831', get_extra_info=True, allow_extra_api=None)
55 expected_output = (True, {'valid': True})
56 self.assertEqual(expected_output[0], output[0])
57 # self.assertCountEqual({k:v for k,v in expected_output[1].items() if k!= "author"}, {k:v for k,v in output[1].items() if k!= "author"})
58 # self.assertCountEqual(expected_output[1]["author"], output[1]["author"])
60 with self.subTest(msg="get_extra_info=False, allow_extra_api=None"):
61 oalm = OpenAlexManager()
62 output = oalm.exists('S4210229581', get_extra_info=False, allow_extra_api=None)
63 expected_output = True
64 self.assertEqual(output, expected_output)
67 def test_openalex_normalise(self):
68 oalm = OpenAlexManager()
70 self.assertEqual(
71 self.valid_wid, oalm.normalise("openalex:" + self.valid_wid)
72 )
73 self.assertEqual(
74 self.valid_wid, oalm.normalise(self.valid_wid.replace("", " "))
75 )
76 self.assertEqual(
77 self.valid_wid,
78 oalm.normalise("https://openalex.org/" + self.valid_wid),
79 )
80 self.assertEqual(
81 oalm.normalise(self.valid_wid),
82 oalm.normalise(' ' + self.valid_wid),
83 )
84 self.assertEqual(
85 oalm.normalise(self.valid_sid),
86 oalm.normalise("https://api.openalex.org/sources/" + self.valid_sid),
87 )
89 dm_file = OpenAlexManager(testing=True, use_api_service=False)
90 # Pre-seed storage with data from glob.json
91 for key, value in self.data.items():
92 if key.startswith("openalex:"):
93 dm_file.storage_manager.set_value(key, value.get("valid", False))
94 self.assertTrue(dm_file.normalise(self.valid_wid, include_prefix=True) in self.data)
95 self.assertTrue(dm_file.normalise(self.invalid_sid, include_prefix=True) in self.data)
96 self.assertTrue(dm_file.is_valid(self.valid_wid))
97 self.assertFalse(dm_file.is_valid(self.invalid_sid))
99 def test_openalex_default(self):
100 mngr = OpenAlexManager(testing=True)
101 # Uses RedisStorageManager with testing=True (fakeredis)
102 # uses API
103 self.assertTrue(mngr.is_valid(self.valid_wid))
104 self.assertTrue(mngr.is_valid(self.valid_sid))
105 self.assertFalse(mngr.is_valid(self.invalid_sid))
106 self.assertFalse(mngr.is_valid(self.invalid_wid))
107 validated_ids = [self.valid_wid, self.valid_sid, self.invalid_wid, self.invalid_sid]
108 validated = [mngr.normalise(x, include_prefix=True) for x in validated_ids if mngr.normalise(x, include_prefix=True)]
109 # check that all the validated ids are stored in redis
110 all_ids_stored = mngr.storage_manager.get_all_keys()
111 self.assertTrue(all(x in all_ids_stored for x in validated))
112 mngr.storage_manager.delete_storage()
113 # check that the storage was correctly deleted
114 self.assertEqual(mngr.storage_manager.get_all_keys(), set())
116 def test_openalex_memory_file_noapi(self):
117 # Uses pre-seeded data (without updating it)
118 # Uses RedisStorageManager storage manager
119 # does not use API (so a syntactically correct id is considered to be valid)
120 am_file = OpenAlexManager(testing=True, use_api_service=False)
121 # Pre-seed storage with data from glob.json
122 for key, value in self.data.items():
123 if key.startswith("openalex:"):
124 am_file.storage_manager.set_value(key, value.get("valid", False))
125 self.assertTrue(am_file.normalise(self.valid_wid, include_prefix=True) in self.data)
126 self.assertTrue(am_file.normalise(self.invalid_sid, include_prefix=True) in self.data)
127 self.assertFalse(am_file.is_valid(self.invalid_sid)) # is stored as invalid
129 def test_openalex_memory_file_api(self):
130 # Uses support file (without updating it)
131 # Uses RedisStorageManager storage manager
132 # uses API (so a syntactically correct id which is not valid is considered to be invalid)
133 am_file = OpenAlexManager(testing=True, use_api_service=True)
134 self.assertFalse(am_file.is_valid(self.invalid_wid))
136 def test_openalex_memory_nofile_noapi(self):
137 # Does not use support file
138 # Uses RedisStorageManager storage manager
139 # Does not use API (so a syntactically correct id which is not valid is considered to be valid)
140 am_nofile_noapi = OpenAlexManager(testing=True, use_api_service=False)
141 self.assertTrue(am_nofile_noapi.is_valid(self.valid_wid))
142 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_wid))
143 am_nofile_noapi.storage_manager.delete_storage()
145 def test_openalex_sqlite_nofile_api(self):
146 # No pre-existing data
147 # storage manager : RedisStorageManager
148 # uses API
149 sql_am_nofile = OpenAlexManager(testing=True)
150 self.assertTrue(sql_am_nofile.is_valid(self.valid_wid))
151 self.assertTrue(sql_am_nofile.is_valid(self.valid_sid))
152 self.assertFalse(sql_am_nofile.is_valid(self.invalid_wid))
153 self.assertFalse(sql_am_nofile.is_valid(self.invalid_sid))
154 # check that the redis storage contains all the validated ids
155 validated_ids = [self.valid_wid, self.valid_sid, self.invalid_wid, self.invalid_sid]
156 all_ids_stored = sql_am_nofile.storage_manager.get_all_keys()
157 validated = [sql_am_nofile.normalise(x, include_prefix=True) for x in validated_ids if sql_am_nofile.normalise(x, include_prefix=True)]
158 self.assertTrue(all(x in all_ids_stored for x in validated))
159 sql_am_nofile.storage_manager.delete_storage()
160 # check that the storage was correctly deleted
161 self.assertEqual(sql_am_nofile.storage_manager.get_all_keys(), set())
163 def test_openalex_sqlite_file_api(self):
164 # Uses pre-existing data in Redis
165 # Uses RedisStorageManager storage manager
166 # tests validation behavior with pre-seeded data
167 to_insert = [self.invalid_wid, self.valid_wid]
168 sql_file = OpenAlexManager(testing=True, use_api_service=True)
169 for oalid in to_insert:
170 norm_id = sql_file.normalise(oalid, include_prefix=True)
171 is_valid = sql_file.is_valid(norm_id)
172 sql_file.storage_manager.set_value(norm_id, is_valid)
174 sql_no_api = OpenAlexManager(testing=True, use_api_service=False)
175 # Copy values from the first manager to the second for testing
176 for oalid in to_insert:
177 norm_id = sql_no_api.normalise(oalid, include_prefix=True)
178 value = sql_file.storage_manager.get_value(norm_id)
179 if value is not None:
180 sql_no_api.storage_manager.set_value(norm_id, value)
181 all_db_keys = sql_no_api.storage_manager.get_all_keys()
182 # check that all the normalised ids in the list were correctly inserted
183 self.assertTrue(all(sql_no_api.normalise(x, include_prefix=True) in all_db_keys for x in to_insert))
184 self.assertTrue(sql_no_api.is_valid(self.valid_wid)) # is stored as valid
185 self.assertFalse(sql_no_api.is_valid(self.invalid_wid)) # is stored as invalid
186 self.assertTrue(sql_no_api.is_valid(sql_no_api.normalise(self.invalid_sid, include_prefix=True))) # not stored, has correct syntax
187 sql_no_api.storage_manager.delete_storage()
189 def test_openalex_sqlite_nofile_noapi(self):
190 # Does not use support file
191 # Uses RedisStorageManager storage manager
192 # Does not use API (so a syntactically correct id which is not valid is considered to be valid)
193 am_nofile_noapi = OpenAlexManager(testing=True, use_api_service=False)
194 self.assertTrue(am_nofile_noapi.is_valid(self.valid_wid))
195 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_sid))
196 am_nofile_noapi.storage_manager.delete_storage()