Coverage for test / idm_openalex_test.py: 99%

124 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-25 18:06 +0000

1# SPDX-FileCopyrightText: 2024 Elia Rizzetto <elia.rizzetto2@unibo.it> 

2# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it> 

3# 

4# SPDX-License-Identifier: ISC 

5 

6import json 

7import unittest 

8from os import makedirs 

9from os.path import exists, join 

10 

11from oc_ds_converter.oc_idmanager.openalex import OpenAlexManager 

12 

13class OpenAlexIdentifierManagerTest(unittest.TestCase): 

14 """This class aim at testing identifiers manager.""" 

15 

16 def setUp(self): 

17 if not exists("tmp"): 

18 makedirs("tmp") 

19 

20 self.test_dir = join("test", "data") 

21 self.test_json_path = join(self.test_dir, "glob.json") 

22 with open(self.test_json_path, encoding="utf-8") as fp: 

23 self.data = json.load(fp) 

24 

25 self.valid_wid = "W2013228336" 

26 self.valid_sid = "S4210229581" 

27 self.invalid_wid = "W7836728310" 

28 self.invalid_sid = "S4263287381" 

29 

30 def test_openalex_is_valid(self): 

31 oalm_nofile = OpenAlexManager() 

32 self.assertTrue(oalm_nofile.is_valid(self.valid_wid)) 

33 self.assertTrue(oalm_nofile.is_valid(self.valid_sid)) 

34 self.assertFalse(oalm_nofile.is_valid(self.invalid_wid)) 

35 self.assertFalse(oalm_nofile.is_valid(self.invalid_sid)) 

36 

37 oalm_file = OpenAlexManager(use_api_service=False, testing=True) 

38 # Pre-seed storage with data from glob.json 

39 for key, value in self.data.items(): 

40 if key.startswith("openalex:"): 

41 oalm_file.storage_manager.set_value(key, value.get("valid", False)) 

42 self.assertTrue(oalm_file.normalise(self.valid_wid, include_prefix=True) in self.data) 

43 self.assertTrue(oalm_file.normalise(self.invalid_wid, include_prefix=True) in self.data) 

44 self.assertTrue(oalm_file.is_valid(self.valid_wid)) 

45 self.assertFalse(oalm_file.is_valid(self.invalid_wid)) 

46 

47 oalm_nofile_noapi = OpenAlexManager(testing=True, use_api_service=False) 

48 self.assertTrue(oalm_nofile_noapi.is_valid(self.valid_wid)) 

49 self.assertTrue(oalm_nofile_noapi.is_valid(self.valid_sid)) 

50 

51 def test_exists(self): 

52 with self.subTest(msg="get_extra_info=True, allow_extra_api=None"): 

53 oalm = OpenAlexManager() 

54 output = oalm.exists('openalex:W748315831', get_extra_info=True, allow_extra_api=None) 

55 expected_output = (True, {'valid': True}) 

56 self.assertEqual(expected_output[0], output[0]) 

57 # self.assertCountEqual({k:v for k,v in expected_output[1].items() if k!= "author"}, {k:v for k,v in output[1].items() if k!= "author"}) 

58 # self.assertCountEqual(expected_output[1]["author"], output[1]["author"]) 

59 

60 with self.subTest(msg="get_extra_info=False, allow_extra_api=None"): 

61 oalm = OpenAlexManager() 

62 output = oalm.exists('S4210229581', get_extra_info=False, allow_extra_api=None) 

63 expected_output = True 

64 self.assertEqual(output, expected_output) 

65 

66 

67 def test_openalex_normalise(self): 

68 oalm = OpenAlexManager() 

69 

70 self.assertEqual( 

71 self.valid_wid, oalm.normalise("openalex:" + self.valid_wid) 

72 ) 

73 self.assertEqual( 

74 self.valid_wid, oalm.normalise(self.valid_wid.replace("", " ")) 

75 ) 

76 self.assertEqual( 

77 self.valid_wid, 

78 oalm.normalise("https://openalex.org/" + self.valid_wid), 

79 ) 

80 self.assertEqual( 

81 oalm.normalise(self.valid_wid), 

82 oalm.normalise(' ' + self.valid_wid), 

83 ) 

84 self.assertEqual( 

85 oalm.normalise(self.valid_sid), 

86 oalm.normalise("https://api.openalex.org/sources/" + self.valid_sid), 

87 ) 

88 

89 dm_file = OpenAlexManager(testing=True, use_api_service=False) 

90 # Pre-seed storage with data from glob.json 

91 for key, value in self.data.items(): 

92 if key.startswith("openalex:"): 

93 dm_file.storage_manager.set_value(key, value.get("valid", False)) 

94 self.assertTrue(dm_file.normalise(self.valid_wid, include_prefix=True) in self.data) 

95 self.assertTrue(dm_file.normalise(self.invalid_sid, include_prefix=True) in self.data) 

96 self.assertTrue(dm_file.is_valid(self.valid_wid)) 

97 self.assertFalse(dm_file.is_valid(self.invalid_sid)) 

98 

99 def test_openalex_default(self): 

100 mngr = OpenAlexManager(testing=True) 

101 # Uses RedisStorageManager with testing=True (fakeredis) 

102 # uses API 

103 self.assertTrue(mngr.is_valid(self.valid_wid)) 

104 self.assertTrue(mngr.is_valid(self.valid_sid)) 

105 self.assertFalse(mngr.is_valid(self.invalid_sid)) 

106 self.assertFalse(mngr.is_valid(self.invalid_wid)) 

107 validated_ids = [self.valid_wid, self.valid_sid, self.invalid_wid, self.invalid_sid] 

108 validated = [mngr.normalise(x, include_prefix=True) for x in validated_ids if mngr.normalise(x, include_prefix=True)] 

109 # check that all the validated ids are stored in redis 

110 all_ids_stored = mngr.storage_manager.get_all_keys() 

111 self.assertTrue(all(x in all_ids_stored for x in validated)) 

112 mngr.storage_manager.delete_storage() 

113 # check that the storage was correctly deleted 

114 self.assertEqual(mngr.storage_manager.get_all_keys(), set()) 

115 

116 def test_openalex_memory_file_noapi(self): 

117 # Uses pre-seeded data (without updating it) 

118 # Uses RedisStorageManager storage manager 

119 # does not use API (so a syntactically correct id is considered to be valid) 

120 am_file = OpenAlexManager(testing=True, use_api_service=False) 

121 # Pre-seed storage with data from glob.json 

122 for key, value in self.data.items(): 

123 if key.startswith("openalex:"): 

124 am_file.storage_manager.set_value(key, value.get("valid", False)) 

125 self.assertTrue(am_file.normalise(self.valid_wid, include_prefix=True) in self.data) 

126 self.assertTrue(am_file.normalise(self.invalid_sid, include_prefix=True) in self.data) 

127 self.assertFalse(am_file.is_valid(self.invalid_sid)) # is stored as invalid 

128 

129 def test_openalex_memory_file_api(self): 

130 # Uses support file (without updating it) 

131 # Uses RedisStorageManager storage manager 

132 # uses API (so a syntactically correct id which is not valid is considered to be invalid) 

133 am_file = OpenAlexManager(testing=True, use_api_service=True) 

134 self.assertFalse(am_file.is_valid(self.invalid_wid)) 

135 

136 def test_openalex_memory_nofile_noapi(self): 

137 # Does not use support file 

138 # Uses RedisStorageManager storage manager 

139 # Does not use API (so a syntactically correct id which is not valid is considered to be valid) 

140 am_nofile_noapi = OpenAlexManager(testing=True, use_api_service=False) 

141 self.assertTrue(am_nofile_noapi.is_valid(self.valid_wid)) 

142 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_wid)) 

143 am_nofile_noapi.storage_manager.delete_storage() 

144 

145 def test_openalex_sqlite_nofile_api(self): 

146 # No pre-existing data 

147 # storage manager : RedisStorageManager 

148 # uses API 

149 sql_am_nofile = OpenAlexManager(testing=True) 

150 self.assertTrue(sql_am_nofile.is_valid(self.valid_wid)) 

151 self.assertTrue(sql_am_nofile.is_valid(self.valid_sid)) 

152 self.assertFalse(sql_am_nofile.is_valid(self.invalid_wid)) 

153 self.assertFalse(sql_am_nofile.is_valid(self.invalid_sid)) 

154 # check that the redis storage contains all the validated ids 

155 validated_ids = [self.valid_wid, self.valid_sid, self.invalid_wid, self.invalid_sid] 

156 all_ids_stored = sql_am_nofile.storage_manager.get_all_keys() 

157 validated = [sql_am_nofile.normalise(x, include_prefix=True) for x in validated_ids if sql_am_nofile.normalise(x, include_prefix=True)] 

158 self.assertTrue(all(x in all_ids_stored for x in validated)) 

159 sql_am_nofile.storage_manager.delete_storage() 

160 # check that the storage was correctly deleted 

161 self.assertEqual(sql_am_nofile.storage_manager.get_all_keys(), set()) 

162 

163 def test_openalex_sqlite_file_api(self): 

164 # Uses pre-existing data in Redis 

165 # Uses RedisStorageManager storage manager 

166 # tests validation behavior with pre-seeded data 

167 to_insert = [self.invalid_wid, self.valid_wid] 

168 sql_file = OpenAlexManager(testing=True, use_api_service=True) 

169 for oalid in to_insert: 

170 norm_id = sql_file.normalise(oalid, include_prefix=True) 

171 is_valid = sql_file.is_valid(norm_id) 

172 sql_file.storage_manager.set_value(norm_id, is_valid) 

173 

174 sql_no_api = OpenAlexManager(testing=True, use_api_service=False) 

175 # Copy values from the first manager to the second for testing 

176 for oalid in to_insert: 

177 norm_id = sql_no_api.normalise(oalid, include_prefix=True) 

178 value = sql_file.storage_manager.get_value(norm_id) 

179 if value is not None: 

180 sql_no_api.storage_manager.set_value(norm_id, value) 

181 all_db_keys = sql_no_api.storage_manager.get_all_keys() 

182 # check that all the normalised ids in the list were correctly inserted 

183 self.assertTrue(all(sql_no_api.normalise(x, include_prefix=True) in all_db_keys for x in to_insert)) 

184 self.assertTrue(sql_no_api.is_valid(self.valid_wid)) # is stored as valid 

185 self.assertFalse(sql_no_api.is_valid(self.invalid_wid)) # is stored as invalid 

186 self.assertTrue(sql_no_api.is_valid(sql_no_api.normalise(self.invalid_sid, include_prefix=True))) # not stored, has correct syntax 

187 sql_no_api.storage_manager.delete_storage() 

188 

189 def test_openalex_sqlite_nofile_noapi(self): 

190 # Does not use support file 

191 # Uses RedisStorageManager storage manager 

192 # Does not use API (so a syntactically correct id which is not valid is considered to be valid) 

193 am_nofile_noapi = OpenAlexManager(testing=True, use_api_service=False) 

194 self.assertTrue(am_nofile_noapi.is_valid(self.valid_wid)) 

195 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_sid)) 

196 am_nofile_noapi.storage_manager.delete_storage()