Coverage for test / idm_arxiv_test.py: 100%

125 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-25 18:06 +0000

1# SPDX-FileCopyrightText: 2023 Arianna Moretti <arianna.moretti4@unibo.it> 

2# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it> 

3# SPDX-FileCopyrightText: 2026 Marta Soricetti <marta.soricetti@unibo.it> 

4# 

5# SPDX-License-Identifier: ISC 

6 

7 

8import json 

9import unittest 

10from os import makedirs 

11from os.path import exists, join 

12 

13from oc_ds_converter.oc_idmanager.arxiv import ArXivManager 

14 

15 

16class ArxivIdentifierManagerTest(unittest.TestCase): 

17 """This class aim at testing identifiers manager.""" 

18 

19 def setUp(self): 

20 if not exists("tmp"): 

21 makedirs("tmp") 

22 

23 self.test_dir = join("test","data") 

24 self.test_json_path = join(self.test_dir, "glob.json") 

25 with open(self.test_json_path, encoding="utf-8") as fp: 

26 self.data = json.load(fp) 

27 

28 self.valid_arxiv_1 = "arXiv:2109.05583" 

29 self.valid_arxiv_1v = "2109.05583v2" 

30 self.valid_arxiv_2 = "arXiv:2109.05582" 

31 self.valid_arx_U_S = "2109.05583V2 " 

32 self.invalid_arxiv_1 = "1133.5582" 

33 self.invalid_arxiv_2v = "2109.05583v23" 

34 

35 

36 def test_arxiv_default(self): 

37 am_nofile = ArXivManager(testing=True) 

38 # Uses RedisStorageManager with testing=True (fakeredis) 

39 # uses API 

40 self.assertTrue(am_nofile.is_valid(self.valid_arxiv_1)) 

41 self.assertTrue(am_nofile.is_valid(self.valid_arxiv_2)) 

42 self.assertTrue(am_nofile.is_valid(self.valid_arxiv_1v)) 

43 self.assertFalse(am_nofile.is_valid(self.invalid_arxiv_1)) 

44 self.assertFalse(am_nofile.is_valid(self.invalid_arxiv_2v)) 

45 validated_ids = [self.valid_arxiv_1, self.valid_arxiv_2, self.valid_arxiv_1v, self.invalid_arxiv_1, self.invalid_arxiv_2v] 

46 # check that all the validated ids are stored in redis 

47 all_ids_stored = am_nofile.storage_manager.get_all_keys() 

48 self.assertTrue(all(am_nofile.normalise(x, include_prefix=True) in all_ids_stored for x in validated_ids)) 

49 am_nofile.storage_manager.delete_storage() 

50 # check that the storage was correctly deleted 

51 self.assertEqual(am_nofile.storage_manager.get_all_keys(), set()) 

52 

53 #### IN MEMORY STORAGE MANAGER 

54 def test_arxiv_memory_file_noapi(self): 

55 # Uses pre-seeded data (without updating it) 

56 # Uses RedisStorageManager storage manager 

57 # does not use API (so a syntactically correct id is considered to be valid) 

58 am_file = ArXivManager(testing=True, use_api_service=False) 

59 # Pre-seed storage with data from glob.json 

60 for key, value in self.data.items(): 

61 if key.startswith("arxiv:"): 

62 am_file.storage_manager.set_value(key, value.get("valid", False)) 

63 self.assertTrue(am_file.normalise(self.valid_arxiv_1.lower(), include_prefix=True) in self.data) 

64 self.assertTrue(am_file.normalise(self.valid_arx_U_S.strip().lower(), include_prefix=True) in self.data) 

65 self.assertTrue(am_file.normalise(self.invalid_arxiv_1.strip().lower(), include_prefix=True) in self.data) 

66 self.assertTrue(am_file.is_valid(self.valid_arxiv_1)) 

67 self.assertFalse(am_file.is_valid(self.invalid_arxiv_1)) # is stored as invalid 

68 self.assertTrue(am_file.is_valid("arxiv:2229.00851")) # is not stored as invalid, does not exist but has correct syntax 

69 

70 

71 def test_arxiv_memory_file_api(self): 

72 # Uses support file (without updating it) 

73 # Uses RedisStorageManager storage manager 

74 # uses API (so a syntactically correct id which is not valid is considered to be invalid) 

75 am_file = ArXivManager(testing=True, use_api_service=True) 

76 self.assertFalse(am_file.is_valid(self.invalid_arxiv_1)) 

77 

78 def test_arxiv_memory_nofile_noapi(self): 

79 # Does not use support file 

80 # Uses RedisStorageManager storage manager 

81 # Does not API (so a syntactically correct id which is not valid is considered to be valid) 

82 am_nofile_noapi = ArXivManager(testing=True, use_api_service=False) 

83 self.assertTrue(am_nofile_noapi.is_valid(self.valid_arxiv_1v)) 

84 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_arxiv_1)) 

85 am_nofile_noapi.storage_manager.delete_storage() 

86 

87 

88 #### SQLITE STORAGE MANAGER 

89 def test_arxiv_sqlite_nofile_api(self): 

90 # No pre-existing data 

91 # storage manager : RedisStorageManager 

92 # uses API 

93 sql_am_nofile = ArXivManager(testing=True) 

94 self.assertTrue(sql_am_nofile.is_valid(self.valid_arxiv_1)) 

95 self.assertTrue(sql_am_nofile.is_valid(self.valid_arxiv_2)) 

96 self.assertTrue(sql_am_nofile.is_valid(self.valid_arxiv_1v)) 

97 self.assertFalse(sql_am_nofile.is_valid(self.invalid_arxiv_1)) 

98 self.assertFalse(sql_am_nofile.is_valid(self.invalid_arxiv_2v)) 

99 # check that the redis storage contains all the validated ids 

100 validated_ids = [self.valid_arxiv_1, self.valid_arxiv_2, self.valid_arxiv_1v, self.invalid_arxiv_1, self.invalid_arxiv_2v] 

101 all_ids_stored = sql_am_nofile.storage_manager.get_all_keys() 

102 self.assertTrue(all(sql_am_nofile.normalise(x, include_prefix=True) in all_ids_stored for x in validated_ids)) 

103 sql_am_nofile.storage_manager.delete_storage() 

104 # check that the storage was correctly deleted 

105 self.assertEqual(sql_am_nofile.storage_manager.get_all_keys(), set()) 

106 

107 def test_arxiv_sqlite_file_api(self): 

108 # Uses pre-existing data in Redis 

109 # Uses RedisStorageManager storage manager 

110 # tests validation behavior with pre-seeded data 

111 to_insert = [self.invalid_arxiv_1, self.valid_arxiv_1, self.valid_arx_U_S] 

112 sql_file = ArXivManager(testing=True, use_api_service=True) 

113 for arxiv_id in to_insert: 

114 norm_id = sql_file.normalise(arxiv_id, include_prefix=True) 

115 is_valid = sql_file.is_valid(norm_id) 

116 sql_file.storage_manager.set_value(norm_id, is_valid) 

117 

118 sql_no_api = ArXivManager(testing=True, use_api_service=False) 

119 # Copy values from the first manager to the second for testing 

120 for arxiv_id in to_insert: 

121 norm_id = sql_no_api.normalise(arxiv_id, include_prefix=True) 

122 value = sql_file.storage_manager.get_value(norm_id) 

123 if value is not None: 

124 sql_no_api.storage_manager.set_value(norm_id, value) 

125 all_db_keys = sql_no_api.storage_manager.get_all_keys() 

126 # check that all the normalised ids in the list were correctly inserted 

127 self.assertTrue(all(sql_no_api.normalise(x, include_prefix=True) in all_db_keys for x in to_insert)) 

128 self.assertTrue(sql_no_api.is_valid(self.valid_arxiv_1)) # is stored as valid 

129 self.assertTrue(sql_no_api.is_valid(self.valid_arx_U_S)) # is stored as valid 

130 self.assertFalse(sql_no_api.is_valid(self.invalid_arxiv_1)) # is stored as invalid 

131 self.assertTrue(sql_no_api.is_valid("arxiv:2229.00851")) # is not stored as invalid, does not exist but has correct syntax 

132 sql_no_api.storage_manager.delete_storage() 

133 

134 def test_arxiv_sqlite_nofile_noapi(self): 

135 # Does not use support file 

136 # Uses RedisStorageManager storage manager 

137 # Does not API (so a syntactically correct id which is not valid is considered to be valid) 

138 am_nofile_noapi = ArXivManager(testing=True, use_api_service=False) 

139 self.assertTrue(am_nofile_noapi.is_valid(self.valid_arxiv_1v)) 

140 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_arxiv_1)) 

141 am_nofile_noapi.storage_manager.delete_storage() 

142 

143 

144 #### REDIS STORAGE MANAGER 

145 def test_arxiv_redis_nofile_api(self): 

146 # No available data in redis db 

147 # Storage manager : RedisStorageManager 

148 # uses API 

149 sql_am_nofile = ArXivManager(testing=True) 

150 self.assertTrue(sql_am_nofile.is_valid(self.valid_arxiv_1)) 

151 self.assertTrue(sql_am_nofile.is_valid(self.valid_arxiv_2)) 

152 self.assertTrue(sql_am_nofile.is_valid(self.valid_arxiv_1v)) 

153 self.assertFalse(sql_am_nofile.is_valid(self.invalid_arxiv_1)) 

154 self.assertFalse(sql_am_nofile.is_valid(self.invalid_arxiv_2v)) 

155 # check that the redis db was correctly filled and that it contains all the validated ids 

156 

157 validated_ids = {self.valid_arxiv_1, self.valid_arxiv_2, self.valid_arxiv_1v, self.invalid_arxiv_1, self.invalid_arxiv_2v} 

158 validated_ids = {sql_am_nofile.normalise(x, include_prefix=True) for x in validated_ids} 

159 all_ids_stored = sql_am_nofile.storage_manager.get_all_keys() 

160 # check that all the validated ids are stored in the json file 

161 self.assertEqual(validated_ids, all_ids_stored) 

162 sql_am_nofile.storage_manager.delete_storage() 

163 # check that the support file was correctly deleted 

164 self.assertEqual(sql_am_nofile.storage_manager.get_all_keys(), set()) 

165 

166 def test_arxiv_redis_file_api(self): 

167 # Uses data in redis db 

168 # Uses RedisStorageManager 

169 # does not use API (so a syntactically correct id is considered to be valid) 

170 # fills db 

171 

172 to_insert = [self.invalid_arxiv_1, self.valid_arxiv_1, self.valid_arx_U_S] 

173 sql_file = ArXivManager(testing=True, use_api_service=True) 

174 for id in to_insert: 

175 norm_id = sql_file.normalise(id, include_prefix=True) 

176 is_valid = sql_file.is_valid(norm_id) 

177 sql_file.storage_manager.set_value(norm_id,is_valid) 

178 

179 sql_no_api = ArXivManager(testing=True, use_api_service=False) 

180 # Copy values from the first manager to the second for testing 

181 for id in to_insert: 

182 norm_id = sql_no_api.normalise(id, include_prefix=True) 

183 value = sql_file.storage_manager.get_value(norm_id) 

184 if value is not None: 

185 sql_no_api.storage_manager.set_value(norm_id, value) 

186 all_db_keys = sql_no_api.storage_manager.get_all_keys() 

187 #check that all the normalised ids in the list were correctly inserted in the db 

188 self.assertTrue(all(sql_no_api.normalise(x,include_prefix=True) in all_db_keys for x in to_insert)) 

189 self.assertTrue(sql_no_api.is_valid(self.valid_arxiv_1)) # is stored in support file as valid 

190 self.assertTrue(sql_no_api.is_valid(self.valid_arx_U_S)) # is stored in support file as valid 

191 self.assertFalse(sql_no_api.is_valid(self.invalid_arxiv_1)) # is stored in support file as invalid 

192 self.assertTrue(sql_no_api.is_valid("arxiv:2229.00851")) # is not stored in support file as invalid, does not exist but has correct syntax 

193 sql_no_api.storage_manager.delete_storage() 

194 

195 def test_arxiv_redis_nofile_noapi(self): 

196 # No data in redis db 

197 # Uses RedisStorageManager 

198 # Does not API (so a syntactically correct id which is not valid is considered to be valid) 

199 am_nofile_noapi = ArXivManager(testing=True, use_api_service=False) 

200 self.assertTrue(am_nofile_noapi.is_valid(self.valid_arxiv_1v)) 

201 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_arxiv_1)) 

202 am_nofile_noapi.storage_manager.delete_storage() 

203 

204 

205