Coverage for test / idm_doi_test.py: 99%

199 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-25 18:06 +0000

1# SPDX-FileCopyrightText: 2023 Arianna Moretti <arianna.moretti4@unibo.it> 

2# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it> 

3# 

4# SPDX-License-Identifier: ISC 

5 

6import json 

7import unittest 

8from os import makedirs 

9from os.path import exists, join 

10 

11from oc_ds_converter.oc_idmanager.doi import DOIManager 

12 

13class DOIIdentifierManagerTest(unittest.TestCase): 

14 """This class aim at testing identifiers manager.""" 

15 

16 def setUp(self): 

17 if not exists("tmp"): 

18 makedirs("tmp") 

19 

20 self.test_dir = join("test", "data") 

21 self.test_json_path = join(self.test_dir, "glob.json") 

22 with open(self.test_json_path, encoding="utf-8") as fp: 

23 self.data = json.load(fp) 

24 

25 self.valid_doi_1 = "10.1108/jd-12-2013-0166" 

26 self.valid_doi_2 = "10.1130/2015.2513(00)" 

27 self.invalid_doi_1 = "10.1108/12-2013-0166" 

28 self.invalid_doi_2 = "10.1371" 

29 

30 def test_exists(self): 

31 with self.subTest(msg="get_extra_info=True, allow_extra_api=None"): 

32 doi_manager = DOIManager() 

33 output = doi_manager.exists('10.1007/s11192-022-04367-w', get_extra_info=True, allow_extra_api=None) 

34 expected_output = (True, {'id': '10.1007/s11192-022-04367-w', 'valid': True, 'ra': 'unknown'}) 

35 self.assertEqual(output, expected_output) 

36 with self.subTest(msg="get_extra_info=False, allow_extra_api=None"): 

37 doi_manager = DOIManager() 

38 output = doi_manager.exists('10.1007/s11192-022-04367-w', get_extra_info=False, allow_extra_api=None) 

39 expected_output = True 

40 self.assertEqual(output, expected_output) 

41 with self.subTest(msg="get_extra_info=False, allow_extra_api='crossref'"): 

42 doi_manager = DOIManager() 

43 output = doi_manager.exists('10.1007/s11192-022-04367-w', get_extra_info=False, allow_extra_api='crossref') 

44 expected_output = True 

45 self.assertEqual(output, expected_output) 

46 

47 def test_doi_normalise(self): 

48 dm = DOIManager() 

49 self.assertEqual( 

50 self.valid_doi_1, 

51 dm.normalise(self.valid_doi_1.upper().replace("10.", "doi: 10. ")), 

52 ) 

53 self.assertEqual( 

54 self.valid_doi_1, 

55 dm.normalise(self.valid_doi_1.upper().replace("10.", "doi:10.")), 

56 ) 

57 self.assertEqual( 

58 self.valid_doi_1, 

59 dm.normalise( 

60 self.valid_doi_1.upper().replace("10.", "https://doi.org/10.") 

61 ), 

62 ) 

63 

64 def test_doi_is_valid(self): 

65 dm_nofile = DOIManager() 

66 self.assertTrue(dm_nofile.is_valid(self.valid_doi_1)) 

67 self.assertTrue(dm_nofile.is_valid(self.valid_doi_2)) 

68 self.assertFalse(dm_nofile.is_valid(self.invalid_doi_1)) 

69 self.assertFalse(dm_nofile.is_valid(self.invalid_doi_2)) 

70 

71 dm_file = DOIManager(testing=True, use_api_service=False) 

72 # Pre-seed storage with data from glob.json 

73 for key, value in self.data.items(): 

74 if key.startswith("doi:"): 

75 dm_file.storage_manager.set_value(key, value.get("valid", False)) 

76 self.assertTrue(dm_file.normalise(self.valid_doi_1, include_prefix=True) in self.data) 

77 self.assertTrue(dm_file.normalise(self.invalid_doi_1, include_prefix=True) in self.data) 

78 self.assertTrue(dm_file.is_valid(self.valid_doi_1)) 

79 self.assertFalse(dm_file.is_valid(self.invalid_doi_1)) 

80 

81 

82 def test_doi_default(self): 

83 am_nofile = DOIManager(testing=True) 

84 # Uses RedisStorageManager with testing=True (fakeredis) 

85 # uses API 

86 self.assertTrue(am_nofile.is_valid(self.valid_doi_1)) 

87 self.assertTrue(am_nofile.is_valid(self.valid_doi_2)) 

88 self.assertFalse(am_nofile.is_valid(self.invalid_doi_2)) 

89 self.assertFalse(am_nofile.is_valid(self.invalid_doi_1)) 

90 validated_ids = [self.valid_doi_1, self.valid_doi_2, self.invalid_doi_1, self.invalid_doi_2] 

91 # check that all the validated ids are stored in redis 

92 all_ids_stored = am_nofile.storage_manager.get_all_keys() 

93 self.assertTrue(all(am_nofile.normalise(x, include_prefix=True) in all_ids_stored for x in validated_ids)) 

94 am_nofile.storage_manager.delete_storage() 

95 # check that the storage was correctly deleted 

96 self.assertEqual(am_nofile.storage_manager.get_all_keys(), set()) 

97 

98 def test_doi_memory_file_noapi(self): 

99 # Uses pre-seeded data (without updating it) 

100 # Uses RedisStorageManager storage manager 

101 # does not use API (so a syntactically correct id is considered to be valid) 

102 am_file = DOIManager(testing=True, use_api_service=False) 

103 # Pre-seed storage with data from glob.json 

104 for key, value in self.data.items(): 

105 if key.startswith("doi:"): 

106 am_file.storage_manager.set_value(key, value.get("valid", False)) 

107 norm_valid = am_file.normalise(self.valid_doi_1, include_prefix=True) 

108 norm_invalid = am_file.normalise(self.invalid_doi_1.strip().lower(), include_prefix=True) 

109 norm_fake = am_file.normalise("10.1109/5.771073FAKE_ID", include_prefix=True) 

110 assert norm_valid is not None 

111 assert norm_invalid is not None 

112 assert norm_fake is not None 

113 self.assertTrue(norm_valid in self.data) 

114 self.assertTrue(norm_invalid in self.data) 

115 self.assertFalse(am_file.is_valid(self.invalid_doi_1)) 

116 self.assertTrue(am_file.is_valid(norm_fake)) 

117 

118 def test_doi_memory_file_api(self): 

119 # Uses support file (without updating it) 

120 # Uses RedisStorageManager storage manager 

121 # uses API (so a syntactically correct id which is not valid is considered to be invalid) 

122 am_file = DOIManager(testing=True, use_api_service=True) 

123 self.assertFalse(am_file.is_valid(self.invalid_doi_1)) 

124 

125 def test_doi_memory_nofile_noapi(self): 

126 # Does not use support file 

127 # Uses RedisStorageManager storage manager 

128 # Does not API (so a syntactically correct id which is not valid is considered to be valid) 

129 am_nofile_noapi = DOIManager(testing=True, use_api_service=False) 

130 self.assertTrue(am_nofile_noapi.is_valid(self.valid_doi_1)) 

131 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_doi_1)) 

132 am_nofile_noapi.storage_manager.delete_storage() 

133 

134 

135 

136 def test_doi_sqlite_nofile_api(self): 

137 # No pre-existing data 

138 # storage manager : RedisStorageManager 

139 # uses API 

140 sql_am_nofile = DOIManager(testing=True) 

141 self.assertTrue(sql_am_nofile.is_valid(self.valid_doi_1)) 

142 self.assertTrue(sql_am_nofile.is_valid(self.valid_doi_2)) 

143 self.assertFalse(sql_am_nofile.is_valid(self.invalid_doi_1)) 

144 self.assertFalse(sql_am_nofile.is_valid(self.invalid_doi_2)) 

145 # check that the redis storage contains all the validated ids 

146 validated_ids = [self.valid_doi_1, self.valid_doi_2, self.invalid_doi_1, self.invalid_doi_2] 

147 all_ids_stored = sql_am_nofile.storage_manager.get_all_keys() 

148 normalized_ids = [sql_am_nofile.normalise(x, include_prefix=True) for x in validated_ids] 

149 self.assertTrue(all(nid in all_ids_stored for nid in normalized_ids if nid is not None)) 

150 sql_am_nofile.storage_manager.delete_storage() 

151 # check that the storage was correctly deleted 

152 self.assertEqual(sql_am_nofile.storage_manager.get_all_keys(), set()) 

153 

154 def test_doi_sqlite_file_api(self): 

155 # Uses pre-existing data in Redis 

156 # Uses RedisStorageManager storage manager 

157 # tests validation behavior with pre-seeded data 

158 to_insert = [self.invalid_doi_1, self.valid_doi_1] 

159 sql_file = DOIManager(testing=True, use_api_service=True) 

160 for doi_id in to_insert: 

161 norm_id = sql_file.normalise(doi_id, include_prefix=True) 

162 assert norm_id is not None 

163 is_valid = sql_file.is_valid(norm_id) 

164 sql_file.storage_manager.set_value(norm_id, is_valid) 

165 

166 sql_no_api = DOIManager(testing=True, use_api_service=False) 

167 # Copy values from the first manager to the second for testing 

168 for doi_id in to_insert: 

169 norm_id = sql_no_api.normalise(doi_id, include_prefix=True) 

170 value = sql_file.storage_manager.get_value(norm_id) 

171 if value is not None: 

172 sql_no_api.storage_manager.set_value(norm_id, value) 

173 all_db_keys = sql_no_api.storage_manager.get_all_keys() 

174 normalized_ids = [sql_no_api.normalise(x, include_prefix=True) for x in to_insert] 

175 self.assertTrue(all(nid in all_db_keys for nid in normalized_ids if nid is not None)) 

176 self.assertTrue(sql_no_api.is_valid(self.valid_doi_1)) 

177 self.assertFalse(sql_no_api.is_valid(self.invalid_doi_1)) 

178 norm_fake = sql_no_api.normalise("10.1109/5.771073FAKE_ID", include_prefix=True) 

179 assert norm_fake is not None 

180 self.assertTrue(sql_no_api.is_valid(norm_fake)) 

181 sql_no_api.storage_manager.delete_storage() 

182 

183 def test_doi_sqlite_nofile_noapi(self): 

184 # Does not use support file 

185 # Uses RedisStorageManager storage manager 

186 # Does not use API (so a syntactically correct id which is not valid is considered to be valid) 

187 am_nofile_noapi = DOIManager(testing=True, use_api_service=False) 

188 self.assertTrue(am_nofile_noapi.is_valid(self.valid_doi_1)) 

189 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_doi_1)) 

190 am_nofile_noapi.storage_manager.delete_storage() 

191 

192 def test_attempt_repair_removes_backslash(self): 

193 dm = DOIManager(use_api_service=True) 

194 repaired = dm.attempt_repair("10.1108/jd-12-2013-0166\\") 

195 self.assertEqual(repaired, "10.1108/jd-12-2013-0166") 

196 

197 def test_attempt_repair_removes_double_underscore(self): 

198 dm = DOIManager(use_api_service=True) 

199 repaired = dm.attempt_repair("10.1108/jd__12-2013-0166") 

200 self.assertIsNone(repaired) 

201 

202 def test_attempt_repair_removes_double_dot(self): 

203 dm = DOIManager(use_api_service=True) 

204 repaired = dm.attempt_repair("10..1108/jd-12-2013-0166") 

205 self.assertEqual(repaired, "10.1108/jd-12-2013-0166") 

206 

207 def test_attempt_repair_removes_html_tags(self): 

208 dm = DOIManager(use_api_service=True) 

209 repaired = dm.attempt_repair("10.1108/jd-12-2013-0166<tag>content</tag>") 

210 self.assertEqual(repaired, "10.1108/jd-12-2013-0166") 

211 

212 def test_attempt_repair_removes_self_closing_tags(self): 

213 dm = DOIManager(use_api_service=True) 

214 repaired = dm.attempt_repair("10.1108/jd-12-2013-0166<br/>") 

215 self.assertEqual(repaired, "10.1108/jd-12-2013-0166") 

216 

217 def test_attempt_repair_no_change_returns_none(self): 

218 dm = DOIManager(use_api_service=True) 

219 repaired = dm.attempt_repair("10.1108/jd-12-2013-0166") 

220 self.assertIsNone(repaired) 

221 

222 def test_attempt_repair_api_disabled_returns_none(self): 

223 dm = DOIManager(use_api_service=False) 

224 repaired = dm.attempt_repair("10.1108/jd-12-2013-0166\\") 

225 self.assertIsNone(repaired) 

226 

227 def test_is_valid_repairs_malformed_doi(self): 

228 dm = DOIManager(use_api_service=True) 

229 malformed_doi = "10.1108/jd-12-2013-0166\\" 

230 self.assertTrue(dm.is_valid(malformed_doi)) 

231 

232 def test_is_valid_repairs_malformed_doi_with_extra_info(self): 

233 dm = DOIManager(use_api_service=True) 

234 malformed_doi = "10.1108/jd-12-2013-0166\\" 

235 result = dm.is_valid(malformed_doi, get_extra_info=True) 

236 assert isinstance(result, tuple) 

237 self.assertTrue(result[0]) 

238 self.assertEqual(result[1]["id"], "10.1108/jd-12-2013-0166") 

239 

240 def test_is_valid_no_repair_when_api_disabled(self): 

241 dm = DOIManager(use_api_service=False) 

242 malformed_doi = "10.1108/jd-12-2013-0166\\" 

243 self.assertTrue(dm.is_valid(malformed_doi)) 

244 

245 def test_is_valid_with_extra_info_valid_doi(self): 

246 dm = DOIManager(use_api_service=True) 

247 result = dm.is_valid(self.valid_doi_1, get_extra_info=True) 

248 assert isinstance(result, tuple) 

249 self.assertTrue(result[0]) 

250 self.assertEqual(result[1]["id"], self.valid_doi_1) 

251 

252 def test_normalise_removes_dx_doi_prefix(self): 

253 dm = DOIManager() 

254 doi_with_prefix = "http://dx.doi.org/10.1108/jd-12-2013-0166" 

255 self.assertEqual(dm.normalise(doi_with_prefix), "10.1108/jd-12-2013-0166") 

256 

257 def test_normalise_removes_suffix_pmid(self): 

258 dm = DOIManager() 

259 doi_with_suffix = "10.1108/jd-12-2013-0166.PMID:12345" 

260 self.assertEqual(dm.normalise(doi_with_suffix), "10.1108/jd-12-2013-0166") 

261 

262 def test_normalise_invalid_string_returns_none(self): 

263 dm = DOIManager() 

264 self.assertIsNone(dm.normalise("not a doi")) 

265 

266 def test_base_normalise_invalid_string_returns_none(self): 

267 dm = DOIManager() 

268 self.assertIsNone(dm.base_normalise("not a doi")) 

269 

270 def test_is_valid_normalise_returns_none(self): 

271 dm = DOIManager() 

272 self.assertFalse(dm.is_valid("not a doi")) 

273 

274 def test_syntax_ok_without_prefix(self): 

275 dm = DOIManager() 

276 self.assertTrue(dm.syntax_ok("10.1108/jd-12-2013-0166")) 

277 

278 def test_normalise_removes_embedded_url_prefix(self): 

279 dm = DOIManager() 

280 doi_with_embedded_url = "10.1108http://dx.doi.org/jd-12-2013-0166" 

281 self.assertEqual(dm.normalise(doi_with_embedded_url), "10.1108")