Coverage for test / idm_crossref_test.py: 99%

164 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-25 18:06 +0000

1# SPDX-FileCopyrightText: 2024 Elia Rizzetto <elia.rizzetto2@unibo.it> 

2# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it> 

3# SPDX-FileCopyrightText: 2026 Marta Soricetti <marta.soricetti@unibo.it> 

4# 

5# SPDX-License-Identifier: ISC 

6 

7import json 

8import unittest 

9from os import makedirs 

10from os.path import exists, join 

11 

12import xmltodict 

13from oc_ds_converter.oc_idmanager import * 

14from oc_ds_converter.oc_idmanager.base import IdentifierManager 

15from requests import ReadTimeout, get 

16from requests.exceptions import ConnectionError 

17from oc_ds_converter.oc_idmanager.oc_data_storage.storage_manager import StorageManager 

18from oc_ds_converter.oc_idmanager.oc_data_storage.in_memory_manager import InMemoryStorageManager 

19from oc_ds_converter.oc_idmanager.oc_data_storage.sqlite_manager import SqliteStorageManager 

20from oc_ds_converter.oc_idmanager.oc_data_storage.redis_manager import RedisStorageManager 

21 

22class CrossrefIdentifierManagerTest(unittest.TestCase): 

23 """This class aim at testing identifiers manager.""" 

24 

25 def setUp(self): 

26 if not exists("tmp"): 

27 makedirs("tmp") 

28 

29 self.test_dir = join("test","data") 

30 self.test_json_path = join(self.test_dir, "glob.json") 

31 with open(self.test_json_path, encoding="utf-8") as fp: 

32 self.data = json.load(fp) 

33 

34 self.valid_crmid1 = "297" 

35 self.valid_crmid2 = "4443" 

36 self.invalid_crmid1 = "342427" 

37 self.invalid_crmid2 = "0123" 

38 

39 def test_crossref_is_valid(self): 

40 crmngr_nofile = CrossrefManager() 

41 self.assertTrue(crmngr_nofile.is_valid(self.valid_crmid1)) 

42 self.assertTrue(crmngr_nofile.is_valid(self.valid_crmid2)) 

43 self.assertFalse(crmngr_nofile.is_valid(self.invalid_crmid1)) 

44 self.assertFalse(crmngr_nofile.is_valid(self.invalid_crmid2)) 

45 

46 crmngr_file = CrossrefManager(use_api_service=False, testing=True) 

47 # Pre-seed storage with data from glob.json 

48 for key, value in self.data.items(): 

49 if key.startswith("crossref:"): 

50 crmngr_file.storage_manager.set_value(key, value.get("valid", False)) 

51 self.assertTrue(crmngr_file.normalise(self.valid_crmid1, include_prefix=True) in self.data) 

52 self.assertTrue(crmngr_file.normalise(self.invalid_crmid1, include_prefix=True) in self.data) 

53 self.assertTrue(crmngr_file.is_valid(self.valid_crmid1)) 

54 self.assertFalse(crmngr_file.is_valid(self.invalid_crmid1)) 

55 

56 crmngr_nofile_noapi = CrossrefManager(testing=True, use_api_service=False) 

57 self.assertTrue(crmngr_nofile_noapi.is_valid(self.valid_crmid1)) 

58 self.assertTrue(crmngr_nofile_noapi.is_valid(self.valid_crmid2)) 

59 

60 def test_exists(self): 

61 with self.subTest(msg="get_extra_info=True, allow_extra_api=None"): 

62 crmngr = CrossrefManager() 

63 output = crmngr.exists(self.valid_crmid1, get_extra_info=True, allow_extra_api=None) 

64 expected_output = (True, {'valid': True}) 

65 self.assertEqual(expected_output[0], output[0]) 

66 # self.assertCountEqual({k:v for k,v in expected_output[1].items() if k!= "author"}, {k:v for k,v in output[1].items() if k!= "author"}) 

67 # self.assertCountEqual(expected_output[1]["author"], output[1]["author"]) 

68 

69 with self.subTest(msg="get_extra_info=False, allow_extra_api=None"): 

70 crmngr = CrossrefManager() 

71 output = crmngr.exists(self.valid_crmid2, get_extra_info=False, allow_extra_api=None) 

72 expected_output = True 

73 self.assertEqual(output, expected_output) 

74 

75 

76 def test_openalex_normalise(self): 

77 crmngr = CrossrefManager() 

78 

79 self.assertEqual( 

80 self.valid_crmid1, crmngr.normalise("crossref:" + self.valid_crmid1) 

81 ) 

82 self.assertEqual( 

83 self.valid_crmid1, crmngr.normalise(self.valid_crmid1.replace("", " ")) 

84 ) 

85 self.assertEqual( 

86 self.valid_crmid1, 

87 crmngr.normalise("https://api.crossref.org/members/" + self.valid_crmid1), 

88 ) 

89 self.assertEqual( 

90 crmngr.normalise(self.valid_crmid1), 

91 crmngr.normalise(' ' + self.valid_crmid1), 

92 ) 

93 self.assertEqual( 

94 crmngr.normalise(self.valid_crmid2), 

95 crmngr.normalise("https://api.crossref.org/members/" + self.valid_crmid2), 

96 ) 

97 

98 dm_file = CrossrefManager(testing=True, use_api_service=False) 

99 # Pre-seed storage with data from glob.json 

100 for key, value in self.data.items(): 

101 if key.startswith("crossref:"): 

102 dm_file.storage_manager.set_value(key, value.get("valid", False)) 

103 self.assertTrue(dm_file.normalise(self.valid_crmid1, include_prefix=True) in self.data) 

104 self.assertTrue(dm_file.normalise(self.invalid_crmid2, include_prefix=True) in self.data) 

105 self.assertTrue(dm_file.is_valid(self.valid_crmid1)) 

106 self.assertFalse(dm_file.is_valid(self.invalid_crmid2)) 

107 

108 def test_crossref_default(self): 

109 mngr = CrossrefManager(testing=True) 

110 # Uses RedisStorageManager with testing=True (fakeredis) 

111 # uses API 

112 self.assertTrue(mngr.is_valid(self.valid_crmid1)) 

113 self.assertTrue(mngr.is_valid(self.valid_crmid2)) 

114 self.assertFalse(mngr.is_valid(self.invalid_crmid2)) 

115 self.assertFalse(mngr.is_valid(self.invalid_crmid1)) 

116 validated_ids = [self.valid_crmid1, self.valid_crmid2, self.invalid_crmid1, self.invalid_crmid2] 

117 validated = [mngr.normalise(x, include_prefix=True) for x in validated_ids if mngr.normalise(x, include_prefix=True)] 

118 # check that all the validated ids are stored in redis 

119 all_ids_stored = mngr.storage_manager.get_all_keys() 

120 self.assertTrue(all(x in all_ids_stored for x in validated)) 

121 mngr.storage_manager.delete_storage() 

122 # check that the storage was correctly deleted 

123 self.assertEqual(mngr.storage_manager.get_all_keys(), set()) 

124 

125 #### IN MEMORY STORAGE MANAGER 

126 

127 def test_crossref_memory_file_noapi(self): 

128 # Uses pre-seeded data (without updating it) 

129 # Uses RedisStorageManager storage manager 

130 # does not use API (so a syntactically correct id is considered to be valid) 

131 am_file = CrossrefManager(testing=True, use_api_service=False) 

132 # Pre-seed storage with data from glob.json 

133 for key, value in self.data.items(): 

134 if key.startswith("crossref:"): 

135 am_file.storage_manager.set_value(key, value.get("valid", False)) 

136 self.assertTrue(am_file.normalise(self.valid_crmid1, include_prefix=True) in self.data) 

137 self.assertTrue(am_file.normalise(self.invalid_crmid2, include_prefix=True) in self.data) 

138 self.assertFalse(am_file.is_valid(self.invalid_crmid2)) # is stored as invalid 

139 

140 def test_crossref_memory_file_api(self): 

141 # Uses support file (without updating it) 

142 # Uses RedisStorageManager storage manager 

143 # uses API (so a syntactically correct id which is not valid is considered to be invalid) 

144 am_file = CrossrefManager(testing=True, use_api_service=True) 

145 self.assertFalse(am_file.is_valid(self.invalid_crmid1)) 

146 

147 def test_crossref_memory_nofile_noapi(self): 

148 # Does not use support file 

149 # Uses RedisStorageManager storage manager 

150 # Does not use API (so a syntactically correct id which is not valid is considered to be valid) 

151 am_nofile_noapi = CrossrefManager(testing=True, use_api_service=False) 

152 self.assertTrue(am_nofile_noapi.is_valid(self.valid_crmid1)) 

153 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_crmid1)) 

154 am_nofile_noapi.storage_manager.delete_storage() 

155 

156 #### SQLITE STORAGE MANAGER 

157 

158 def test_crossref_sqlite_nofile_api(self): 

159 # No pre-existing data 

160 # storage manager : RedisStorageManager 

161 # uses API 

162 sql_am_nofile = CrossrefManager(testing=True) 

163 self.assertTrue(sql_am_nofile.is_valid(self.valid_crmid1)) 

164 self.assertTrue(sql_am_nofile.is_valid(self.valid_crmid2)) 

165 self.assertFalse(sql_am_nofile.is_valid(self.invalid_crmid1)) 

166 self.assertFalse(sql_am_nofile.is_valid(self.invalid_crmid2)) 

167 # check that the redis storage contains all the validated ids 

168 validated_ids = [self.valid_crmid1, self.valid_crmid2, self.invalid_crmid1, self.invalid_crmid2] 

169 all_ids_stored = sql_am_nofile.storage_manager.get_all_keys() 

170 validated = [sql_am_nofile.normalise(x, include_prefix=True) for x in validated_ids if sql_am_nofile.normalise(x, include_prefix=True)] 

171 self.assertTrue(all(x in all_ids_stored for x in validated)) 

172 sql_am_nofile.storage_manager.delete_storage() 

173 # check that the storage was correctly deleted 

174 self.assertEqual(sql_am_nofile.storage_manager.get_all_keys(), set()) 

175 

176 def test_crossref_sqlite_file_api(self): 

177 # Uses pre-existing data in Redis 

178 # Uses RedisStorageManager storage manager 

179 # tests validation behavior with pre-seeded data 

180 to_insert = [self.invalid_crmid1, self.valid_crmid1] 

181 sql_file = CrossrefManager(testing=True, use_api_service=True) 

182 for crmid in to_insert: 

183 norm_id = sql_file.normalise(crmid, include_prefix=True) 

184 is_valid = sql_file.is_valid(norm_id) 

185 sql_file.storage_manager.set_value(norm_id, is_valid) 

186 

187 sql_no_api = CrossrefManager(testing=True, use_api_service=False) 

188 # Copy values from the first manager to the second for testing 

189 for crmid in to_insert: 

190 norm_id = sql_no_api.normalise(crmid, include_prefix=True) 

191 value = sql_file.storage_manager.get_value(norm_id) 

192 if value is not None: 

193 sql_no_api.storage_manager.set_value(norm_id, value) 

194 all_db_keys = sql_no_api.storage_manager.get_all_keys() 

195 # check that all the normalised ids in the list were correctly inserted 

196 self.assertTrue(all(sql_no_api.normalise(x, include_prefix=True) in all_db_keys for x in to_insert)) 

197 self.assertTrue(sql_no_api.is_valid(self.valid_crmid1)) # is stored as valid 

198 self.assertFalse(sql_no_api.is_valid(self.invalid_crmid1)) # is stored as invalid 

199 self.assertTrue(sql_no_api.is_valid(sql_no_api.normalise(self.invalid_crmid2, include_prefix=True))) # not stored, has correct syntax 

200 sql_no_api.storage_manager.delete_storage() 

201 

202 def test_crossref_sqlite_nofile_noapi(self): 

203 # Does not use support file 

204 # Uses RedisStorageManager storage manager 

205 # Does not use API (so a syntactically correct id which is not valid is considered to be valid) 

206 am_nofile_noapi = CrossrefManager(testing=True, use_api_service=False) 

207 self.assertTrue(am_nofile_noapi.is_valid(self.valid_crmid1)) 

208 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_crmid2)) 

209 am_nofile_noapi.storage_manager.delete_storage() 

210 

211 #### REDIS STORAGE MANAGER 

212 

213 def test_crossref_redis_nofile_api(self): 

214 # No support files (it generates it) 

215 # storage manager : RedisStorageManager 

216 # uses API 

217 redis_cm_nofile = CrossrefManager(storage_manager=RedisStorageManager()) 

218 self.assertTrue(redis_cm_nofile.is_valid(self.valid_crmid1)) 

219 self.assertTrue(redis_cm_nofile.is_valid(self.valid_crmid2)) 

220 self.assertFalse(redis_cm_nofile.is_valid(self.invalid_crmid1)) 

221 self.assertFalse(redis_cm_nofile.is_valid(self.invalid_crmid2)) 

222 # check that the redis db was correctly filled and that it contains all the validated ids 

223 validated_ids = {self.valid_crmid1, self.valid_crmid2, self.invalid_crmid1, self.invalid_crmid2} 

224 validated_ids = {redis_cm_nofile.normalise(x, include_prefix=True) for x in validated_ids} 

225 all_ids_stored = redis_cm_nofile.storage_manager.get_all_keys() 

226 # check that all the validated ids are stored in the json file 

227 self.assertEqual(validated_ids, all_ids_stored) 

228 redis_cm_nofile.storage_manager.delete_storage() 

229 # check that the support file was correctly deleted 

230 self.assertEqual(redis_cm_nofile.storage_manager.get_all_keys(), set()) 

231 

232 def test_crossref_redis_file_api(self): 

233 # Uses data in redis db 

234 # Uses RedisStorageManager 

235 # does not use API (so a syntactically correct id is considered to be valid) 

236 # fills db 

237 to_insert = [self.invalid_crmid1, self.valid_crmid1] 

238 storage_manager = RedisStorageManager(testing=True) 

239 redis_file = CrossrefManager(storage_manager=storage_manager, use_api_service=True) 

240 for id in to_insert: 

241 norm_id = redis_file.normalise(id, include_prefix=True) 

242 is_valid = redis_file.is_valid(norm_id) 

243 # insert_tup = (norm_id, is_valid) 

244 redis_file.storage_manager.set_value(norm_id, is_valid) 

245 

246 redis_no_api = CrossrefManager(storage_manager=storage_manager, use_api_service=False) 

247 all_db_keys = redis_no_api.storage_manager.get_all_keys() 

248 # check that all the normalised ids in the list were correctly inserted in the db 

249 self.assertTrue(all(redis_no_api.normalise(x, include_prefix=True) in all_db_keys for x in to_insert)) 

250 self.assertTrue(redis_no_api.is_valid(self.valid_crmid1)) # is stored in support file as valid 

251 self.assertFalse(redis_no_api.is_valid(self.invalid_crmid1)) # is stored in support file as invalid 

252 self.assertTrue(redis_no_api.is_valid(self.invalid_crmid2)) # is not stored in support file as invalid, does not exist but has correct syntax 

253 redis_no_api.storage_manager.delete_storage() 

254 

255 def test_crossref_redis_nofile_noapi(self): 

256 # Does not use support file 

257 # Uses RedisStorageManager storage manager 

258 # Does not use API (so a syntactically correct id which is not valid is considered to be valid) 

259 cr_nofile_noapi = CrossrefManager(storage_manager=SqliteStorageManager(), use_api_service=False) 

260 self.assertTrue(cr_nofile_noapi.is_valid(self.valid_crmid1)) 

261 self.assertTrue(cr_nofile_noapi.is_valid(self.invalid_crmid2)) 

262 cr_nofile_noapi.storage_manager.delete_storage()