Coverage for test / idm_wikidata_test.py: 99%
177 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
1# SPDX-FileCopyrightText: 2026 Marta Soricetti <marta.soricetti@unibo.it>
2#
3# SPDX-License-Identifier: ISC
5import json
6import sqlite3
7import os.path
8import unittest
9from os import makedirs
10from os.path import exists, join
12import xmltodict
13from oc_ds_converter.oc_idmanager import *
14from oc_ds_converter.oc_idmanager.base import IdentifierManager
15from requests import ReadTimeout, get
16from requests.exceptions import ConnectionError
17from oc_ds_converter.oc_idmanager.oc_data_storage.storage_manager import StorageManager
18from oc_ds_converter.oc_idmanager.oc_data_storage.in_memory_manager import InMemoryStorageManager
19from oc_ds_converter.oc_idmanager.oc_data_storage.sqlite_manager import SqliteStorageManager
20from oc_ds_converter.oc_idmanager.oc_data_storage.redis_manager import RedisStorageManager
22class WikidataIdentifierManagerTest(unittest.TestCase):
23 """This class aim at testing identifiers manager."""
25 def setUp(self):
26 if not exists("tmp"):
27 makedirs("tmp")
29 self.test_dir = os.path.join("test","data")
30 self.test_json_path = join(self.test_dir, "glob.json")
31 with open(self.test_json_path, encoding="utf-8") as fp:
32 self.data = json.load(fp)
34 self.valid_wikidata_1 = "Q34433"
35 self.valid_wikidata_2 = "Q24698708"
36 self.valid_wikidata_3 = "Q15767074"
37 self.invalid_wikidata_1 = "Q34433Q345"
38 self.invalid_wikidata_2 = "Q24698722" #valid format but not existing
39 self.invalid_wikidata_3 = "Q12" # not existing yet
41 def test_wikidata_normalise(self):
42 wdm = WikidataManager()
43 self.assertTrue(
44 self.valid_wikidata_1,
45 wdm.normalise(self.valid_wikidata_1.replace("Q", "https://www.wikidata.org/wiki/Q"))
46 )
47 self.assertTrue(
48 self.valid_wikidata_2,
49 wdm.normalise(self.valid_wikidata_2)
50 )
51 self.assertTrue(
52 self.valid_wikidata_2,
53 wdm.normalise(self.valid_wikidata_2.replace("Q", "wikidata: Q"))
54 )
55 self.assertTrue(
56 self.valid_wikidata_3,
57 wdm.normalise((self.valid_wikidata_3.replace("Q", "Q ")))
58 )
60 def test_wikidata_is_valid(self):
61 wdm = WikidataManager()
62 self.assertTrue(wdm.is_valid(self.valid_wikidata_1))
63 self.assertTrue(wdm.is_valid(self.valid_wikidata_2))
64 self.assertTrue(wdm.is_valid(self.valid_wikidata_3))
65 self.assertFalse(wdm.is_valid(self.invalid_wikidata_1))
66 self.assertFalse(wdm.is_valid(self.invalid_wikidata_3))
68 wdm_file = WikidataManager(storage_manager=InMemoryStorageManager(self.test_json_path))
69 self.assertTrue(wdm_file.normalise(self.valid_wikidata_1, include_prefix=True) in self.data)
70 self.assertTrue(wdm_file.normalise(self.valid_wikidata_2, include_prefix=True) in self.data)
71 self.assertTrue(wdm_file.normalise(self.invalid_wikidata_3, include_prefix=True) in self.data)
72 self.assertTrue(wdm_file.is_valid((wdm_file.normalise(self.valid_wikidata_1, include_prefix=True))))
73 self.assertTrue(wdm_file.is_valid((wdm_file.normalise(self.valid_wikidata_2, include_prefix=True))))
74 self.assertFalse(wdm_file.is_valid((wdm_file.normalise(self.invalid_wikidata_3, include_prefix=True))))
76 wdm_nofile_noapi = WikidataManager(storage_manager=InMemoryStorageManager(self.test_json_path), use_api_service=False)
77 self.assertTrue(wdm_nofile_noapi.is_valid(self.valid_wikidata_1))
78 self.assertTrue(wdm_nofile_noapi.is_valid(self.valid_wikidata_2))
80 def test_wikidata_exists(self):
81 with self.subTest(msg="get_extra_info=True, allow_extra_api=None"):
82 wikidata_manager = WikidataManager()
83 output = wikidata_manager.exists(self.valid_wikidata_1, get_extra_info=True, allow_extra_api=None)
84 expected_output = (True, {'valid': True})
85 self.assertEqual(output, expected_output)
86 with self.subTest(msg="get_extra_info=False, allow_extra_api=None"):
87 wikidata_manager = WikidataManager()
88 output = wikidata_manager.exists(self.valid_wikidata_1, get_extra_info=False, allow_extra_api=None)
89 expected_output = True
90 self.assertEqual(output, expected_output)
91 with self.subTest(msg="get_extra_info=False, allow_extra_api='None'"):
92 wikidata_manager = WikidataManager()
93 output = wikidata_manager.exists(self.valid_wikidata_2, get_extra_info=False, allow_extra_api=None)
94 expected_output = True
95 self.assertEqual(output, expected_output)
96 with self.subTest(msg="get_extra_info=False, allow_extra_api='None'"):
97 wikidata_manager = WikidataManager()
98 output = wikidata_manager.exists(self.invalid_wikidata_1, get_extra_info=False, allow_extra_api=None)
99 expected_output = False
100 self.assertEqual(output, expected_output)
101 with self.subTest(msg="get_extra_info=True, allow_extra_api='None'"):
102 wikidata_manager = WikidataManager()
103 output = wikidata_manager.exists(self.invalid_wikidata_1, get_extra_info=True, allow_extra_api=None)
104 expected_output = (False, {'valid': False})
105 self.assertEqual(output, expected_output)
108 def test_wikidata_default(self):
109 wm_nofile = WikidataManager()
110 # No support files (it generates it)
111 # Default storage manager : in Memory + generates file on method call (not automatically)
112 # uses API
113 self.assertTrue(wm_nofile.is_valid(self.valid_wikidata_1))
114 self.assertTrue(wm_nofile.is_valid(self.valid_wikidata_2))
115 self.assertFalse(wm_nofile.is_valid(self.invalid_wikidata_3))
116 self.assertFalse(wm_nofile.is_valid(self.invalid_wikidata_1))
117 wm_nofile.storage_manager.store_file()
118 validated_ids = [self.valid_wikidata_1, self.valid_wikidata_2, self.invalid_wikidata_1, self.invalid_wikidata_3]
119 # check that the support file was correctly created
120 self.assertTrue(os.path.exists("storage/id_value.json"))
121 lj = open("storage/id_value.json")
122 load_dict = json.load(lj)
123 lj.close()
124 # check that all the validated ids are stored in the json file
125 self.assertTrue(all(wm_nofile.normalise(x, include_prefix=True) in load_dict for x in validated_ids))
126 wm_nofile.storage_manager.delete_storage()
127 # check that the support file was correctly deleted
128 self.assertFalse(os.path.exists("storage/id_value.json"))
130 ##### IN-MEMORY STORAGE MANAGER
132 def test_wikidata_memory_file_noapi(self):
133 # Uses support file (without updating it)
134 # Uses InMemoryStorageManager storage manager
135 # does not use API (so a syntactically correct id is considered to be valid)
136 wm_file = WikidataManager(storage_manager=InMemoryStorageManager(self.test_json_path), use_api_service=False)
137 self.assertTrue(wm_file.normalise(self.valid_wikidata_1, include_prefix=True) in self.data)
138 self.assertTrue(wm_file.normalise(self.valid_wikidata_2, include_prefix=True) in self.data)
139 self.assertFalse(wm_file.is_valid(self.invalid_wikidata_3)) # is stored in support file as invalid
140 self.assertTrue(wm_file.is_valid(wm_file.normalise(self.invalid_wikidata_2, include_prefix=True))) # is not stored in support file as invalid, does not exist but has correct syntax
142 def test_wikidata_memory_file_api(self):
143 # Uses support file (without updating it)
144 # Uses InMemoryStorageManager storage manager
145 # uses API (so a syntactically correct id which is not valid is considered to be invalid)
146 wm_file = WikidataManager(storage_manager=InMemoryStorageManager(self.test_json_path), use_api_service=True)
147 self.assertFalse(wm_file.is_valid(self.invalid_wikidata_2))
149 def test_wikidata_memory_nofile_noapi(self):
150 # Does not use support file
151 # Uses InMemoryStorageManager storage manager
152 # Does not use API (so a syntactically correct id which is not valid is considered to be valid)
153 wm_nofile_noapi = WikidataManager(storage_manager=InMemoryStorageManager(), use_api_service=False)
154 self.assertTrue(wm_nofile_noapi.is_valid(self.valid_wikidata_1))
155 self.assertTrue(wm_nofile_noapi.is_valid(self.invalid_wikidata_2))
156 wm_nofile_noapi.storage_manager.delete_storage()
158 ##### SQLITE STORAGE MANAGER
160 def test_wikidata_sqlite_nofile_api(self):
161 # No support files (it generates it)
162 # storage manager : SqliteStorageManager
163 # uses API
164 sql_wm_nofile = WikidataManager(storage_manager=SqliteStorageManager())
165 self.assertTrue(sql_wm_nofile.is_valid(self.valid_wikidata_1))
166 self.assertTrue(sql_wm_nofile.is_valid(self.valid_wikidata_2))
167 self.assertFalse(sql_wm_nofile.is_valid(self.invalid_wikidata_2))
168 self.assertFalse(sql_wm_nofile.is_valid(self.invalid_wikidata_3))
169 # check that the support db was correctly created and that it contains all the validated ids
170 self.assertTrue(os.path.exists("storage/id_valid_dict.db"))
171 validated_ids = [self.valid_wikidata_1, self.valid_wikidata_2, self.invalid_wikidata_2, self.invalid_wikidata_3]
172 all_ids_stored = sql_wm_nofile.storage_manager.get_all_keys()
173 # check that all the validated ids are stored in the json file
174 self.assertTrue(all(sql_wm_nofile.normalise(x, include_prefix=True) in all_ids_stored for x in validated_ids))
176 sql_wm_nofile.storage_manager.delete_storage()
177 # check that the support file was correctly deleted
178 self.assertFalse(os.path.exists("storage/id_valid_dict.db"))
180 def test_wikidata_sqlite_file_api(self):
181 # Uses support file
182 # Uses SqliteStorageManager storage manager
183 # does not use API (so a syntactically correct id is considered to be valid)
184 # db creation
185 test_sqlite_db = os.path.join(self.test_dir, "database.db")
186 if os.path.exists(test_sqlite_db):
187 os.remove(test_sqlite_db)
188 #con = sqlite3.connect(test_sqlite_db)
189 #cur = con.cursor()
190 to_insert = [self.invalid_wikidata_3, self.valid_wikidata_1]
191 sql_file = WikidataManager(storage_manager=SqliteStorageManager(test_sqlite_db), use_api_service=True)
192 for id in to_insert:
193 norm_id = sql_file.normalise(id, include_prefix=True)
194 is_valid = 1 if sql_file.is_valid(norm_id) else 0
195 insert_tup = (norm_id, is_valid)
196 sql_file.storage_manager.cur.execute( f"INSERT OR REPLACE INTO info VALUES (?,?)", insert_tup )
197 sql_file.storage_manager.con.commit()
198 sql_file.storage_manager.con.close()
200 sql_no_api = WikidataManager(storage_manager=SqliteStorageManager(test_sqlite_db), use_api_service=False)
201 all_db_keys = sql_no_api.storage_manager.get_all_keys()
202 #check that all the normalised ind in the list were correctly inserted in the db
203 self.assertTrue(all(sql_no_api.normalise(x,include_prefix=True) in all_db_keys for x in to_insert))
204 self.assertTrue(sql_no_api.is_valid(self.valid_wikidata_1)) # is stored in support file as valid
205 self.assertFalse(sql_no_api.is_valid(self.invalid_wikidata_3)) # is stored in support file as invalid
206 self.assertTrue(sql_no_api.is_valid(sql_no_api.normalise(self.invalid_wikidata_2, include_prefix=True))) # is not stored in support file as invalid, does not exist but has correct syntax
207 sql_no_api.storage_manager.delete_storage()
209 def test_wikidata_sqlite_nofile_noapi(self):
210 # Does not use support file
211 # Uses SqliteStorageManager storage manager
212 # Does not use API (so a syntactically correct id which is not valid is considered to be valid)
213 wm_nofile_noapi = WikidataManager(storage_manager=SqliteStorageManager(), use_api_service=False)
214 self.assertTrue(wm_nofile_noapi.is_valid(self.valid_wikidata_1))
215 self.assertTrue(wm_nofile_noapi.is_valid(self.invalid_wikidata_2))
216 wm_nofile_noapi.storage_manager.delete_storage()
218 ##### REDIS STORAGE MANAGER
220 def test_wikidata_redis_nofile_api(self):
221 # No available data in redis db
222 # Storage manager : RedisStorageManager
223 # uses API
224 wm_nofile = WikidataManager(storage_manager=RedisStorageManager(testing=True))
225 self.assertTrue(wm_nofile.is_valid(self.valid_wikidata_1))
226 self.assertTrue(wm_nofile.is_valid(self.valid_wikidata_2))
228 self.assertFalse(wm_nofile.is_valid(self.invalid_wikidata_2))
229 self.assertFalse(wm_nofile.is_valid(self.invalid_wikidata_3))
230 # check that the redis db was correctly filled and that it contains all the validated ids
232 validated_ids = {self.valid_wikidata_1, self.valid_wikidata_2, self.invalid_wikidata_2, self.invalid_wikidata_3}
233 validated_ids = {wm_nofile.normalise(x, include_prefix=True) for x in validated_ids}
234 all_ids_stored = wm_nofile.storage_manager.get_all_keys()
235 # check that all the validated ids are stored in the json file
236 self.assertEqual(validated_ids, all_ids_stored)
237 wm_nofile.storage_manager.delete_storage()
238 # check that the support file was correctly deleted
239 self.assertEqual(wm_nofile.storage_manager.get_all_keys(), set())
241 def test_wikidata_redis_file_api(self):
242 # Uses data in redis db
243 # Uses RedisStorageManager
244 # fills db
246 # use API to save validity values
247 to_insert = [self.invalid_wikidata_3, self.valid_wikidata_3, self.valid_wikidata_1]
248 storage_manager = RedisStorageManager(testing=True)
249 redis_file = WikidataManager(storage_manager=storage_manager, use_api_service=True)
250 for id in to_insert:
251 norm_id = redis_file.normalise(id, include_prefix=True)
252 is_valid = redis_file.is_valid(norm_id)
253 # insert_tup = (norm_id, is_valid)
254 redis_file.storage_manager.set_value(norm_id, is_valid)
256 # does not use API, retrieve values from DB
257 redis_no_api = WikidataManager(storage_manager=storage_manager, use_api_service=False)
258 all_db_keys = redis_no_api.storage_manager.get_all_keys()
259 # check that all the normalised ids in the list were correctly inserted in the db
260 self.assertTrue(all(redis_no_api.normalise(x, include_prefix=True) in all_db_keys for x in to_insert))
261 self.assertTrue(redis_no_api.is_valid(self.valid_wikidata_1)) # is stored in support file as valid
262 self.assertTrue(redis_no_api.is_valid(self.valid_wikidata_3)) # is stored in support file as valid
263 self.assertFalse(redis_no_api.is_valid(self.invalid_wikidata_3)) # is stored in support file as invalid
264 self.assertTrue(redis_no_api.is_valid(
265 self.invalid_wikidata_2)) # is not stored in support file as invalid, does not exist but has correct syntax
266 redis_no_api.storage_manager.delete_storage()
268 def test_wikidata_redis_nofile_noapi(self):
269 # No data in redis db
270 # Uses RedisStorageManager
271 # Does not use API (so a syntactically correct id which is not valid is considered to be valid)
272 wm_nofile_noapi = WikidataManager(storage_manager=RedisStorageManager(testing=True), use_api_service=False)
273 self.assertTrue(wm_nofile_noapi.is_valid(self.valid_wikidata_2))
274 self.assertTrue(wm_nofile_noapi.is_valid(self.invalid_wikidata_2))
276 wm_nofile_noapi.storage_manager.delete_storage()