Coverage for test / idm_wikidata_test.py: 99%

177 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-25 18:06 +0000

1# SPDX-FileCopyrightText: 2026 Marta Soricetti <marta.soricetti@unibo.it> 

2# 

3# SPDX-License-Identifier: ISC 

4 

5import json 

6import sqlite3 

7import os.path 

8import unittest 

9from os import makedirs 

10from os.path import exists, join 

11 

12import xmltodict 

13from oc_ds_converter.oc_idmanager import * 

14from oc_ds_converter.oc_idmanager.base import IdentifierManager 

15from requests import ReadTimeout, get 

16from requests.exceptions import ConnectionError 

17from oc_ds_converter.oc_idmanager.oc_data_storage.storage_manager import StorageManager 

18from oc_ds_converter.oc_idmanager.oc_data_storage.in_memory_manager import InMemoryStorageManager 

19from oc_ds_converter.oc_idmanager.oc_data_storage.sqlite_manager import SqliteStorageManager 

20from oc_ds_converter.oc_idmanager.oc_data_storage.redis_manager import RedisStorageManager 

21 

22class WikidataIdentifierManagerTest(unittest.TestCase): 

23 """This class aim at testing identifiers manager.""" 

24 

25 def setUp(self): 

26 if not exists("tmp"): 

27 makedirs("tmp") 

28 

29 self.test_dir = os.path.join("test","data") 

30 self.test_json_path = join(self.test_dir, "glob.json") 

31 with open(self.test_json_path, encoding="utf-8") as fp: 

32 self.data = json.load(fp) 

33 

34 self.valid_wikidata_1 = "Q34433" 

35 self.valid_wikidata_2 = "Q24698708" 

36 self.valid_wikidata_3 = "Q15767074" 

37 self.invalid_wikidata_1 = "Q34433Q345" 

38 self.invalid_wikidata_2 = "Q24698722" #valid format but not existing 

39 self.invalid_wikidata_3 = "Q12" # not existing yet 

40 

41 def test_wikidata_normalise(self): 

42 wdm = WikidataManager() 

43 self.assertTrue( 

44 self.valid_wikidata_1, 

45 wdm.normalise(self.valid_wikidata_1.replace("Q", "https://www.wikidata.org/wiki/Q")) 

46 ) 

47 self.assertTrue( 

48 self.valid_wikidata_2, 

49 wdm.normalise(self.valid_wikidata_2) 

50 ) 

51 self.assertTrue( 

52 self.valid_wikidata_2, 

53 wdm.normalise(self.valid_wikidata_2.replace("Q", "wikidata: Q")) 

54 ) 

55 self.assertTrue( 

56 self.valid_wikidata_3, 

57 wdm.normalise((self.valid_wikidata_3.replace("Q", "Q "))) 

58 ) 

59 

60 def test_wikidata_is_valid(self): 

61 wdm = WikidataManager() 

62 self.assertTrue(wdm.is_valid(self.valid_wikidata_1)) 

63 self.assertTrue(wdm.is_valid(self.valid_wikidata_2)) 

64 self.assertTrue(wdm.is_valid(self.valid_wikidata_3)) 

65 self.assertFalse(wdm.is_valid(self.invalid_wikidata_1)) 

66 self.assertFalse(wdm.is_valid(self.invalid_wikidata_3)) 

67 

68 wdm_file = WikidataManager(storage_manager=InMemoryStorageManager(self.test_json_path)) 

69 self.assertTrue(wdm_file.normalise(self.valid_wikidata_1, include_prefix=True) in self.data) 

70 self.assertTrue(wdm_file.normalise(self.valid_wikidata_2, include_prefix=True) in self.data) 

71 self.assertTrue(wdm_file.normalise(self.invalid_wikidata_3, include_prefix=True) in self.data) 

72 self.assertTrue(wdm_file.is_valid((wdm_file.normalise(self.valid_wikidata_1, include_prefix=True)))) 

73 self.assertTrue(wdm_file.is_valid((wdm_file.normalise(self.valid_wikidata_2, include_prefix=True)))) 

74 self.assertFalse(wdm_file.is_valid((wdm_file.normalise(self.invalid_wikidata_3, include_prefix=True)))) 

75 

76 wdm_nofile_noapi = WikidataManager(storage_manager=InMemoryStorageManager(self.test_json_path), use_api_service=False) 

77 self.assertTrue(wdm_nofile_noapi.is_valid(self.valid_wikidata_1)) 

78 self.assertTrue(wdm_nofile_noapi.is_valid(self.valid_wikidata_2)) 

79 

80 def test_wikidata_exists(self): 

81 with self.subTest(msg="get_extra_info=True, allow_extra_api=None"): 

82 wikidata_manager = WikidataManager() 

83 output = wikidata_manager.exists(self.valid_wikidata_1, get_extra_info=True, allow_extra_api=None) 

84 expected_output = (True, {'valid': True}) 

85 self.assertEqual(output, expected_output) 

86 with self.subTest(msg="get_extra_info=False, allow_extra_api=None"): 

87 wikidata_manager = WikidataManager() 

88 output = wikidata_manager.exists(self.valid_wikidata_1, get_extra_info=False, allow_extra_api=None) 

89 expected_output = True 

90 self.assertEqual(output, expected_output) 

91 with self.subTest(msg="get_extra_info=False, allow_extra_api='None'"): 

92 wikidata_manager = WikidataManager() 

93 output = wikidata_manager.exists(self.valid_wikidata_2, get_extra_info=False, allow_extra_api=None) 

94 expected_output = True 

95 self.assertEqual(output, expected_output) 

96 with self.subTest(msg="get_extra_info=False, allow_extra_api='None'"): 

97 wikidata_manager = WikidataManager() 

98 output = wikidata_manager.exists(self.invalid_wikidata_1, get_extra_info=False, allow_extra_api=None) 

99 expected_output = False 

100 self.assertEqual(output, expected_output) 

101 with self.subTest(msg="get_extra_info=True, allow_extra_api='None'"): 

102 wikidata_manager = WikidataManager() 

103 output = wikidata_manager.exists(self.invalid_wikidata_1, get_extra_info=True, allow_extra_api=None) 

104 expected_output = (False, {'valid': False}) 

105 self.assertEqual(output, expected_output) 

106 

107 

108 def test_wikidata_default(self): 

109 wm_nofile = WikidataManager() 

110 # No support files (it generates it) 

111 # Default storage manager : in Memory + generates file on method call (not automatically) 

112 # uses API 

113 self.assertTrue(wm_nofile.is_valid(self.valid_wikidata_1)) 

114 self.assertTrue(wm_nofile.is_valid(self.valid_wikidata_2)) 

115 self.assertFalse(wm_nofile.is_valid(self.invalid_wikidata_3)) 

116 self.assertFalse(wm_nofile.is_valid(self.invalid_wikidata_1)) 

117 wm_nofile.storage_manager.store_file() 

118 validated_ids = [self.valid_wikidata_1, self.valid_wikidata_2, self.invalid_wikidata_1, self.invalid_wikidata_3] 

119 # check that the support file was correctly created 

120 self.assertTrue(os.path.exists("storage/id_value.json")) 

121 lj = open("storage/id_value.json") 

122 load_dict = json.load(lj) 

123 lj.close() 

124 # check that all the validated ids are stored in the json file 

125 self.assertTrue(all(wm_nofile.normalise(x, include_prefix=True) in load_dict for x in validated_ids)) 

126 wm_nofile.storage_manager.delete_storage() 

127 # check that the support file was correctly deleted 

128 self.assertFalse(os.path.exists("storage/id_value.json")) 

129 

130 ##### IN-MEMORY STORAGE MANAGER 

131 

132 def test_wikidata_memory_file_noapi(self): 

133 # Uses support file (without updating it) 

134 # Uses InMemoryStorageManager storage manager 

135 # does not use API (so a syntactically correct id is considered to be valid) 

136 wm_file = WikidataManager(storage_manager=InMemoryStorageManager(self.test_json_path), use_api_service=False) 

137 self.assertTrue(wm_file.normalise(self.valid_wikidata_1, include_prefix=True) in self.data) 

138 self.assertTrue(wm_file.normalise(self.valid_wikidata_2, include_prefix=True) in self.data) 

139 self.assertFalse(wm_file.is_valid(self.invalid_wikidata_3)) # is stored in support file as invalid 

140 self.assertTrue(wm_file.is_valid(wm_file.normalise(self.invalid_wikidata_2, include_prefix=True))) # is not stored in support file as invalid, does not exist but has correct syntax 

141 

142 def test_wikidata_memory_file_api(self): 

143 # Uses support file (without updating it) 

144 # Uses InMemoryStorageManager storage manager 

145 # uses API (so a syntactically correct id which is not valid is considered to be invalid) 

146 wm_file = WikidataManager(storage_manager=InMemoryStorageManager(self.test_json_path), use_api_service=True) 

147 self.assertFalse(wm_file.is_valid(self.invalid_wikidata_2)) 

148 

149 def test_wikidata_memory_nofile_noapi(self): 

150 # Does not use support file 

151 # Uses InMemoryStorageManager storage manager 

152 # Does not use API (so a syntactically correct id which is not valid is considered to be valid) 

153 wm_nofile_noapi = WikidataManager(storage_manager=InMemoryStorageManager(), use_api_service=False) 

154 self.assertTrue(wm_nofile_noapi.is_valid(self.valid_wikidata_1)) 

155 self.assertTrue(wm_nofile_noapi.is_valid(self.invalid_wikidata_2)) 

156 wm_nofile_noapi.storage_manager.delete_storage() 

157 

158 ##### SQLITE STORAGE MANAGER 

159 

160 def test_wikidata_sqlite_nofile_api(self): 

161 # No support files (it generates it) 

162 # storage manager : SqliteStorageManager 

163 # uses API 

164 sql_wm_nofile = WikidataManager(storage_manager=SqliteStorageManager()) 

165 self.assertTrue(sql_wm_nofile.is_valid(self.valid_wikidata_1)) 

166 self.assertTrue(sql_wm_nofile.is_valid(self.valid_wikidata_2)) 

167 self.assertFalse(sql_wm_nofile.is_valid(self.invalid_wikidata_2)) 

168 self.assertFalse(sql_wm_nofile.is_valid(self.invalid_wikidata_3)) 

169 # check that the support db was correctly created and that it contains all the validated ids 

170 self.assertTrue(os.path.exists("storage/id_valid_dict.db")) 

171 validated_ids = [self.valid_wikidata_1, self.valid_wikidata_2, self.invalid_wikidata_2, self.invalid_wikidata_3] 

172 all_ids_stored = sql_wm_nofile.storage_manager.get_all_keys() 

173 # check that all the validated ids are stored in the json file 

174 self.assertTrue(all(sql_wm_nofile.normalise(x, include_prefix=True) in all_ids_stored for x in validated_ids)) 

175 

176 sql_wm_nofile.storage_manager.delete_storage() 

177 # check that the support file was correctly deleted 

178 self.assertFalse(os.path.exists("storage/id_valid_dict.db")) 

179 

180 def test_wikidata_sqlite_file_api(self): 

181 # Uses support file 

182 # Uses SqliteStorageManager storage manager 

183 # does not use API (so a syntactically correct id is considered to be valid) 

184 # db creation 

185 test_sqlite_db = os.path.join(self.test_dir, "database.db") 

186 if os.path.exists(test_sqlite_db): 

187 os.remove(test_sqlite_db) 

188 #con = sqlite3.connect(test_sqlite_db) 

189 #cur = con.cursor() 

190 to_insert = [self.invalid_wikidata_3, self.valid_wikidata_1] 

191 sql_file = WikidataManager(storage_manager=SqliteStorageManager(test_sqlite_db), use_api_service=True) 

192 for id in to_insert: 

193 norm_id = sql_file.normalise(id, include_prefix=True) 

194 is_valid = 1 if sql_file.is_valid(norm_id) else 0 

195 insert_tup = (norm_id, is_valid) 

196 sql_file.storage_manager.cur.execute( f"INSERT OR REPLACE INTO info VALUES (?,?)", insert_tup ) 

197 sql_file.storage_manager.con.commit() 

198 sql_file.storage_manager.con.close() 

199 

200 sql_no_api = WikidataManager(storage_manager=SqliteStorageManager(test_sqlite_db), use_api_service=False) 

201 all_db_keys = sql_no_api.storage_manager.get_all_keys() 

202 #check that all the normalised ind in the list were correctly inserted in the db 

203 self.assertTrue(all(sql_no_api.normalise(x,include_prefix=True) in all_db_keys for x in to_insert)) 

204 self.assertTrue(sql_no_api.is_valid(self.valid_wikidata_1)) # is stored in support file as valid 

205 self.assertFalse(sql_no_api.is_valid(self.invalid_wikidata_3)) # is stored in support file as invalid 

206 self.assertTrue(sql_no_api.is_valid(sql_no_api.normalise(self.invalid_wikidata_2, include_prefix=True))) # is not stored in support file as invalid, does not exist but has correct syntax 

207 sql_no_api.storage_manager.delete_storage() 

208 

209 def test_wikidata_sqlite_nofile_noapi(self): 

210 # Does not use support file 

211 # Uses SqliteStorageManager storage manager 

212 # Does not use API (so a syntactically correct id which is not valid is considered to be valid) 

213 wm_nofile_noapi = WikidataManager(storage_manager=SqliteStorageManager(), use_api_service=False) 

214 self.assertTrue(wm_nofile_noapi.is_valid(self.valid_wikidata_1)) 

215 self.assertTrue(wm_nofile_noapi.is_valid(self.invalid_wikidata_2)) 

216 wm_nofile_noapi.storage_manager.delete_storage() 

217 

218 ##### REDIS STORAGE MANAGER 

219 

220 def test_wikidata_redis_nofile_api(self): 

221 # No available data in redis db 

222 # Storage manager : RedisStorageManager 

223 # uses API 

224 wm_nofile = WikidataManager(storage_manager=RedisStorageManager(testing=True)) 

225 self.assertTrue(wm_nofile.is_valid(self.valid_wikidata_1)) 

226 self.assertTrue(wm_nofile.is_valid(self.valid_wikidata_2)) 

227 

228 self.assertFalse(wm_nofile.is_valid(self.invalid_wikidata_2)) 

229 self.assertFalse(wm_nofile.is_valid(self.invalid_wikidata_3)) 

230 # check that the redis db was correctly filled and that it contains all the validated ids 

231 

232 validated_ids = {self.valid_wikidata_1, self.valid_wikidata_2, self.invalid_wikidata_2, self.invalid_wikidata_3} 

233 validated_ids = {wm_nofile.normalise(x, include_prefix=True) for x in validated_ids} 

234 all_ids_stored = wm_nofile.storage_manager.get_all_keys() 

235 # check that all the validated ids are stored in the json file 

236 self.assertEqual(validated_ids, all_ids_stored) 

237 wm_nofile.storage_manager.delete_storage() 

238 # check that the support file was correctly deleted 

239 self.assertEqual(wm_nofile.storage_manager.get_all_keys(), set()) 

240 

241 def test_wikidata_redis_file_api(self): 

242 # Uses data in redis db 

243 # Uses RedisStorageManager 

244 # fills db 

245 

246 # use API to save validity values 

247 to_insert = [self.invalid_wikidata_3, self.valid_wikidata_3, self.valid_wikidata_1] 

248 storage_manager = RedisStorageManager(testing=True) 

249 redis_file = WikidataManager(storage_manager=storage_manager, use_api_service=True) 

250 for id in to_insert: 

251 norm_id = redis_file.normalise(id, include_prefix=True) 

252 is_valid = redis_file.is_valid(norm_id) 

253 # insert_tup = (norm_id, is_valid) 

254 redis_file.storage_manager.set_value(norm_id, is_valid) 

255 

256 # does not use API, retrieve values from DB 

257 redis_no_api = WikidataManager(storage_manager=storage_manager, use_api_service=False) 

258 all_db_keys = redis_no_api.storage_manager.get_all_keys() 

259 # check that all the normalised ids in the list were correctly inserted in the db 

260 self.assertTrue(all(redis_no_api.normalise(x, include_prefix=True) in all_db_keys for x in to_insert)) 

261 self.assertTrue(redis_no_api.is_valid(self.valid_wikidata_1)) # is stored in support file as valid 

262 self.assertTrue(redis_no_api.is_valid(self.valid_wikidata_3)) # is stored in support file as valid 

263 self.assertFalse(redis_no_api.is_valid(self.invalid_wikidata_3)) # is stored in support file as invalid 

264 self.assertTrue(redis_no_api.is_valid( 

265 self.invalid_wikidata_2)) # is not stored in support file as invalid, does not exist but has correct syntax 

266 redis_no_api.storage_manager.delete_storage() 

267 

268 def test_wikidata_redis_nofile_noapi(self): 

269 # No data in redis db 

270 # Uses RedisStorageManager 

271 # Does not use API (so a syntactically correct id which is not valid is considered to be valid) 

272 wm_nofile_noapi = WikidataManager(storage_manager=RedisStorageManager(testing=True), use_api_service=False) 

273 self.assertTrue(wm_nofile_noapi.is_valid(self.valid_wikidata_2)) 

274 self.assertTrue(wm_nofile_noapi.is_valid(self.invalid_wikidata_2)) 

275 

276 wm_nofile_noapi.storage_manager.delete_storage()