Coverage for test / datacite_processing_test.py: 100%
876 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
1# SPDX-FileCopyrightText: 2023-2026 Arcangelo Massari <arcangelo.massari@unibo.it>
2# SPDX-FileCopyrightText: 2024 Arianna Moretti <arianna.moretti4@unibo.it>
3# SPDX-FileCopyrightText: 2024-2026 Marta Soricetti <marta.soricetti@unibo.it>
4# SPDX-FileCopyrightText: 2025 Arianna Moretti <arianna.moretti4@unibo.it>
5#
6# SPDX-License-Identifier: ISC
8import unittest
9import json
10from oc_ds_converter.lib.csvmanager import CSVManager
11from oc_ds_converter.lib.jsonmanager import *
12from oc_ds_converter.datacite.datacite_processing import DataciteProcessing
14from oc_ds_converter.oc_idmanager.oc_data_storage.redis_manager import RedisStorageManager
15from oc_ds_converter.oc_idmanager.oc_data_storage.in_memory_manager import InMemoryStorageManager
16from oc_ds_converter.oc_idmanager.oc_data_storage.sqlite_manager import SqliteStorageManager
18TEST_DIR = os.path.join("test","datacite_processing")
19TMP_SUPPORT_MATERIAL = os.path.join(TEST_DIR, "tmp_support")
20IOD = os.path.join(TEST_DIR, 'iod')
21WANTED_DOIS = os.path.join(TEST_DIR, 'wanted_dois')
22PUBLISHERS_MAPPING = os.path.join(TEST_DIR, 'publishers.csv')
23DATA = os.path.join(TEST_DIR, 'jSonFile_1_new_dump.json')
25class TestDataciteProcessing(unittest.TestCase):
27 def setUp(self):
28 # Create dirs
29 for d in [TMP_SUPPORT_MATERIAL, IOD, WANTED_DOIS]:
30 makedirs(d, exist_ok=True)
32 # Load golden data
33 with open(DATA, 'r', encoding='utf-8') as f:
34 self.expected_entities = json.load(f)["data"]
35 self.expected_count = len(self.expected_entities)
37 def test_get_all_ids_citing(self):
38 all_br = set()
39 all_ra = set()
40 dcp = DataciteProcessing()
41 for entity in self.expected_entities:
42 allids = dcp.extract_all_ids(entity, is_citing=True)
43 all_br.update(set(allids[0]))
44 all_ra.update(set(allids[1]))
46 self.assertEqual(all_br, set())
47 self.assertTrue({"orcid:0000-0002-8013-9947", "orcid:0000-0001-7392-1415",
48 "orcid:0000-0003-2328-5769", "orcid:0000-0002-6715-3533", "orcid:0000-0002-0801-0890",
49 "orcid:0000-0001-7543-3466", "orcid:0000-0002-6210-8370", "orcid:0000-0002-9747-4928",
50 "ror:03ztgj037"} == all_ra)
52 def test_get_all_ids_cited(self):
53 all_br = set()
54 all_ra = set()
55 dcp = DataciteProcessing()
56 for entity in self.expected_entities:
57 allids = dcp.extract_all_ids(entity, is_citing=False)
59 all_br.update(set(allids[0]))
60 all_ra.update(set(allids[1]))
61 self.assertTrue({"doi:10.5281/zenodo.8249952", "doi:10.5281/zenodo.8249970", "doi:10.1017/9781009157896",
62 "doi:10.1017/9781009157896.005"} == all_br)
64 def test_get_redis_validity_list_br(self):
65 dcp = DataciteProcessing()
66 br = {"doi:10.5281/zenodo.8249952", "doi:10.5281/zenodo.8249970", "doi:10.1017/9781009157896", "doi:10.1017/9781009157896.005"}
67 br_valid_list = dcp.get_reids_validity_list(br, "br")
68 exp_br_valid_list = []
69 self.assertEqual(br_valid_list, exp_br_valid_list)
70 dcp.storage_manager.delete_storage()
72 def test_get_redis_validity_list_ra(self):
73 dcp = DataciteProcessing()
74 ra = {"orcid:0000-0002-8013-9947", "orcid:0000-0001-7392-1415",
75 "orcid:0000-0003-2328-5769", "orcid:0000-0002-6715-3533", "orcid:0000-0002-0801-0890",
76 "orcid:0000-0001-7543-3466", "orcid:0000-0002-6210-8370", "orcid:0000-0002-9747-4928",
77 "ror:03ztgj037"}
78 ra_valid_list = dcp.get_reids_validity_list(ra, "ra")
79 exp_ra_valid_list = []
80 self.assertEqual(ra_valid_list, exp_ra_valid_list)
81 dcp.storage_manager.delete_storage()
83 def test_get_redis_validity_list_br_redis(self):
84 dcp = DataciteProcessing(storage_manager=RedisStorageManager(testing=True))
85 br = {"doi:10.5281/zenodo.8249952", "doi:10.5281/zenodo.8249970", "doi:10.1017/9781009157896", "doi:10.1017/9781009157896.005"}
86 br_valid_list = dcp.get_reids_validity_list(br, "br")
87 exp_br_valid_list = []
88 self.assertEqual(br_valid_list, exp_br_valid_list)
89 dcp.storage_manager.delete_storage()
91 def test_get_redis_validity_dict_w_fakeredis_db_values_sqlite(self):
92 dcp = DataciteProcessing()
93 dcp.BR_redis.sadd("doi:10.5281/zenodo.8249952", "omid:1")
94 dcp.RA_redis.sadd("orcid:0000-0002-8013-9947", "omid:2")
95 dcp.RA_redis.sadd("ror:03ztgj039", "omid:3") # invalid ror
97 br = {"doi:10.5281/zenodo.8249952", "doi:10.5281/zenodo.8249970", "doi:10.1017/9781009157896", "doi:10.1017/9781009157896.005"}
99 ra = {"orcid:0000-0002-8013-9947", "orcid:0000-0001-7392-1415",
100 "orcid:0000-0003-2328-5769", "orcid:0000-0002-6715-3533", "orcid:0000-0002-0801-0890",
101 "orcid:0000-0001-7543-3466", "orcid:0000-0002-6210-8370", "orcid:0000-0002-9747-4928", "ror:03ztgj039"}
103 br_validity_dict = dcp.get_reids_validity_list(br, "br")
104 exp_br_valid_list = ["doi:10.5281/zenodo.8249952"]
105 ra_validity_dict = dcp.get_reids_validity_list(ra, "ra")
106 exp_ra_valid_list = ["orcid:0000-0002-8013-9947", "ror:03ztgj039"]
107 self.assertEqual(set(br_validity_dict), set(exp_br_valid_list))
108 self.assertEqual(set(ra_validity_dict), set(exp_ra_valid_list))
110 dcp.storage_manager.delete_storage()
112 dcp.BR_redis.delete("doi:10.5281/zenodo.8249952")
113 dcp.RA_redis.delete("orcid:0000-0002-8013-9947")
114 dcp.RA_redis.delete("ror:03ztgj039")
116 def test_get_redis_validity_dict_w_fakeredis_db_values_redis(self):
117 dcp = DataciteProcessing(storage_manager=RedisStorageManager())
118 dcp.BR_redis.sadd("doi:10.5281/zenodo.8249970", "omid:1")
119 dcp.RA_redis.sadd("orcid:0000-0002-6210-8370", "omid:2")
120 dcp.RA_redis.sadd("ror:03ztgj039", "omid:3") # invalid ror
122 br = {"doi:10.5281/zenodo.8249952", "doi:10.5281/zenodo.8249970", "doi:10.1017/9781009157896", "doi:10.1017/9781009157896.005"}
124 ra = {"orcid:0000-0002-8013-9947", "orcid:0000-0001-7392-1415",
125 "orcid:0000-0003-2328-5769", "orcid:0000-0002-6715-3533", "orcid:0000-0002-0801-0890",
126 "orcid:0000-0001-7543-3466", "orcid:0000-0002-6210-8370", "orcid:0000-0002-9747-4928", "ror:03ztgj039"}
128 br_validity_dict = dcp.get_reids_validity_list(br, "br")
129 exp_br_valid_list = ["doi:10.5281/zenodo.8249970"]
130 ra_validity_dict = dcp.get_reids_validity_list(ra, "ra")
131 exp_ra_valid_list = ["orcid:0000-0002-6210-8370", "ror:03ztgj039"]
132 self.assertEqual(set(br_validity_dict), set(exp_br_valid_list))
133 self.assertEqual(set(ra_validity_dict), set(exp_ra_valid_list))
135 dcp.storage_manager.delete_storage()
137 dcp.BR_redis.delete("doi:10.5281/zenodo.8249970")
138 dcp.RA_redis.delete("orcid:0000-0002-6210-8370")
139 dcp.RA_redis.delete("ror:03ztgj039")
141 def test_validated_as_default(self):
142 """
143 Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value:
144 string of the identifier, the method "validated_as" returns:
145 - True if the id was already validated as valid
146 - False if the id was already validated as invalid
147 - None if the id was not validated before
148 The procedure is tested
149 - With default storage manager (sqlite) without a pre-existent db associated
150 """
152 dcp = DataciteProcessing()
153 validate_as_none_doi = dcp.validated_as({"schema": "doi", "identifier": "doi:10.11578/1480643"})
154 validated_as_none_orcid = dcp.validated_as({"schema": "orcid", "identifier": "orcid:0000-0001-8513-8700"})
155 validated_as_none_ror = dcp.validated_as({"schema": "ror", "identifier": "ror:03ztgj037"})
156 self.assertEqual(validate_as_none_doi, None)
157 self.assertEqual(validated_as_none_orcid, None)
158 self.assertEqual(validated_as_none_ror, None)
160 dcp.storage_manager.delete_storage()
162 def test_validated_as_default_redis(self):
163 """
164 Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value:
165 string of the identifier, the method "validated_as" returns:
166 - True if the id was already validated as valid
167 - False if the id was already validated as invalid
168 - None if the id was not validated before
169 The procedure is tested
170 - With redis storage manager without a pre-existent db associated
171 """
173 dcp = DataciteProcessing(storage_manager=RedisStorageManager(testing=True))
174 validate_as_none_doi = dcp.validated_as({"schema": "doi", "identifier": "doi:10.11578/1480643"})
175 validated_as_none_orcid = dcp.validated_as({"schema": "orcid", "identifier": "orcid:0000-0001-8513-8700"})
176 validated_as_none_ror = dcp.validated_as({"schema": "ror", "identifier": "ror:03ztgj037"})
177 self.assertEqual(validate_as_none_doi, None)
178 self.assertEqual(validated_as_none_orcid, None)
179 self.assertEqual(validated_as_none_ror, None)
180 dcp.storage_manager.delete_storage()
182 def test_validated_as_redis_with_preexistent_data(self):
183 """
184 Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value:
185 string of the identifier, the method "validated_as" returns:
186 - True if the id was already validated as valid
187 - False if the id was already validated as invalid
188 - None if the id was not validated before
189 The procedure is tested
190 - With redis storage manager and pre-existent data associated
191 """
192 db_path = os.path.join(TMP_SUPPORT_MATERIAL, "db_path.db")
193 sqlite_man = SqliteStorageManager(db_path)
195 valid_doi_not_in_db = {"identifier": "doi:10.11578/1480643", "schema": "doi"}
196 valid_doi_in_db = {"identifier": "doi:10.15407/scin11.06.057", "schema": "doi"}
197 invalid_doi_in_db = {"identifier": "doi:10.1066/1741-4326/aa6b", "schema": "doi"}
199 valid_orcid_not_in_db = {"schema": "orcid", "identifier": "orcid:0000-0001-8513-8700"}
200 valid_orcid_in_db = {"schema": "orcid", "identifier": "orcid:0000-0002-9286-2630"}
201 invalid_orcid_in_db = {"schema": "orcid", "identifier": "orcid:0000-0002-9286-26XX"}
203 valid_ror_in_db = {"schema": "ror", "identifier": "ror:03ztgj037"}
204 valid_ror_not_in_db = {"schema": "ror", "identifier": "ror:01111rn36"}
205 invalid_ror_in_db = {"schema": "ror", "identifier": "ror:03ztgj039"}
207 valid_viaf_not_in_db = {"identifier": "viaf:102333412", "schema": "viaf"}
208 valid_viaf_in_db = {"identifier": "viaf:108389263", "schema": "viaf"}
209 invalid_viaf_in_db = {"identifier": "viaf:12345ABC", "schema": "viaf"}
211 valid_wikidata_not_in_db = {"identifier": "wikidata:Q2330656", "schema": "wikidata"}
212 valid_wikidata_in_db = {"identifier": "wikidata:Q42", "schema": "wikidata"}
213 invalid_wikidata_in_db = {"identifier": "wikidata:Q_invalid_123", "schema": "wikidata"}
215 # --- POPOLAMENTO DATABASE SQLITE ---
216 sqlite_man.set_value(valid_doi_in_db["identifier"], True)
217 sqlite_man.set_value(invalid_doi_in_db["identifier"], False)
219 sqlite_man.set_value(valid_orcid_in_db["identifier"], True)
220 sqlite_man.set_value(invalid_orcid_in_db["identifier"], False)
222 sqlite_man.set_value(valid_ror_in_db["identifier"], True)
223 sqlite_man.set_value(invalid_ror_in_db["identifier"], False)
225 sqlite_man.set_value(valid_viaf_in_db["identifier"], True)
226 sqlite_man.set_value(invalid_viaf_in_db["identifier"], False)
228 sqlite_man.set_value(valid_wikidata_in_db["identifier"], True)
229 sqlite_man.set_value(invalid_wikidata_in_db["identifier"], False)
231 # --- ESECUZIONE DEI METODI ---
232 # New class instance to check the correct task management with a sqlite db in input
233 d_processing_sql = DataciteProcessing(storage_manager=sqlite_man)
235 doi_validated_as_True = d_processing_sql.validated_as(valid_doi_in_db)
236 doi_validated_as_False = d_processing_sql.validated_as(invalid_doi_in_db)
237 doi_not_validated = d_processing_sql.validated_as(valid_doi_not_in_db)
239 orcid_validated_as_True = d_processing_sql.validated_as(valid_orcid_in_db)
240 orcid_validated_as_False = d_processing_sql.validated_as(invalid_orcid_in_db)
241 orcid_not_validated = d_processing_sql.validated_as(valid_orcid_not_in_db)
243 ror_validated_as_True = d_processing_sql.validated_as(valid_ror_in_db)
244 ror_validated_as_False = d_processing_sql.validated_as(invalid_ror_in_db)
245 ror_not_validated = d_processing_sql.validated_as(valid_ror_not_in_db)
247 viaf_validated_as_True = d_processing_sql.validated_as(valid_viaf_in_db)
248 viaf_validated_as_False = d_processing_sql.validated_as(invalid_viaf_in_db)
249 viaf_not_validated = d_processing_sql.validated_as(valid_viaf_not_in_db)
251 wikidata_validated_as_True = d_processing_sql.validated_as(valid_wikidata_in_db)
252 wikidata_validated_as_False = d_processing_sql.validated_as(invalid_wikidata_in_db)
253 wikidata_not_validated = d_processing_sql.validated_as(valid_wikidata_not_in_db)
255 # --- ASSERZIONI ---
256 self.assertEqual(doi_validated_as_True, True)
257 self.assertEqual(doi_validated_as_False, False)
258 self.assertEqual(doi_not_validated, None)
260 self.assertEqual(orcid_validated_as_True, True)
261 self.assertEqual(orcid_validated_as_False, False)
262 self.assertEqual(orcid_not_validated, None)
264 self.assertEqual(ror_validated_as_True, True)
265 self.assertEqual(ror_validated_as_False, False)
266 self.assertEqual(ror_not_validated, None)
268 self.assertEqual(viaf_validated_as_True, True)
269 self.assertEqual(viaf_validated_as_False, False)
270 self.assertEqual(viaf_not_validated, None)
272 self.assertEqual(wikidata_validated_as_True, True)
273 self.assertEqual(wikidata_validated_as_False, False)
274 self.assertEqual(wikidata_not_validated, None)
276 d_processing_sql.storage_manager.delete_storage()
278 def test_validated_as_inmemory(self):
279 '''
280 Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value:
281 string of the identifier, the method "validated_as" returns:
282 - True if the id was already validated as valid
283 - False if the id was already validated as invalid
284 - None if the id was not validated before
285 The procedure is tested
286 - With in Memory + Json storage manager and a pre-existent db associated
287 '''
289 db_json_path = os.path.join(TMP_SUPPORT_MATERIAL, "db_path.json")
290 inmemory_man = InMemoryStorageManager(db_json_path)
292 valid_doi_not_in_db = {"identifier": "doi:10.11578/1480643", "schema": "doi"}
293 valid_doi_in_db = {"identifier": "doi:10.15407/scin11.06.057", "schema": "doi"}
294 invalid_doi_in_db = {"identifier": "doi:10.1066/1741-4326/aa6b", "schema": "doi"}
296 valid_orcid_not_in_db = {"schema": "orcid", "identifier": "orcid:0000-0001-8513-8700"}
297 valid_orcid_in_db = {"schema": "orcid", "identifier": "orcid:0000-0002-9286-2630"}
298 invalid_orcid_in_db = {"schema": "orcid", "identifier": "orcid:0000-0002-9286-26XX"}
300 valid_ror_in_db = {"schema": "ror", "identifier": "ror:03ztgj037"}
301 valid_ror_not_in_db = {"schema": "ror", "identifier": "ror:01111rn36"}
302 invalid_ror_in_db = {"schema": "ror", "identifier": "ror:03ztgj039"}
304 valid_viaf_not_in_db = {"identifier": "viaf:102333412", "schema": "viaf"}
305 valid_viaf_in_db = {"identifier": "viaf:108389263", "schema": "viaf"}
306 invalid_viaf_in_db = {"identifier": "viaf:12345ABC", "schema": "viaf"}
308 valid_wikidata_not_in_db = {"identifier": "wikidata:Q2330656", "schema": "wikidata"}
309 valid_wikidata_in_db = {"identifier": "wikidata:Q42", "schema": "wikidata"}
310 invalid_wikidata_in_db = {"identifier": "wikidata:Q_invalid_123", "schema": "wikidata"}
312 inmemory_man.set_value(valid_doi_in_db["identifier"], True)
313 inmemory_man.set_value(invalid_doi_in_db["identifier"], False)
315 inmemory_man.set_value(valid_orcid_in_db["identifier"], True)
316 inmemory_man.set_value(invalid_orcid_in_db["identifier"], False)
318 inmemory_man.set_value(valid_ror_in_db["identifier"], True)
319 inmemory_man.set_value(invalid_ror_in_db["identifier"], False)
321 inmemory_man.set_value(valid_viaf_in_db["identifier"], True)
322 inmemory_man.set_value(invalid_viaf_in_db["identifier"], False)
324 inmemory_man.set_value(valid_wikidata_in_db["identifier"], True)
325 inmemory_man.set_value(invalid_wikidata_in_db["identifier"], False)
328 # New class instance to check the correct task management with a sqlite db in input
329 d_processing = DataciteProcessing(storage_manager=inmemory_man)
331 doi_validated_as_True = d_processing.validated_as(valid_doi_in_db)
332 doi_validated_as_False = d_processing.validated_as(invalid_doi_in_db)
333 doi_not_validated = d_processing.validated_as(valid_doi_not_in_db)
335 orcid_validated_as_True = d_processing.validated_as(valid_orcid_in_db)
336 orcid_validated_as_False = d_processing.validated_as(invalid_orcid_in_db)
337 orcid_not_validated = d_processing.validated_as(valid_orcid_not_in_db)
339 ror_validated_as_True = d_processing.validated_as(valid_ror_in_db)
340 ror_validated_as_False = d_processing.validated_as(invalid_ror_in_db)
341 ror_not_validated = d_processing.validated_as(valid_ror_not_in_db)
343 viaf_validated_as_True = d_processing.validated_as(valid_viaf_in_db)
344 viaf_validated_as_False = d_processing.validated_as(invalid_viaf_in_db)
345 viaf_not_validated = d_processing.validated_as(valid_viaf_not_in_db)
347 wikidata_validated_as_True = d_processing.validated_as(valid_wikidata_in_db)
348 wikidata_validated_as_False = d_processing.validated_as(invalid_wikidata_in_db)
349 wikidata_not_validated = d_processing.validated_as(valid_wikidata_not_in_db)
351 self.assertEqual(doi_validated_as_True, True)
352 self.assertEqual(doi_validated_as_False, False)
353 self.assertEqual(doi_not_validated, None)
355 self.assertEqual(orcid_validated_as_True, True)
356 self.assertEqual(orcid_validated_as_False, False)
357 self.assertEqual(orcid_not_validated, None)
359 self.assertEqual(ror_validated_as_True, True)
360 self.assertEqual(ror_validated_as_False, False)
361 self.assertEqual(ror_not_validated, None)
363 self.assertEqual(viaf_validated_as_True, True)
364 self.assertEqual(viaf_validated_as_False, False)
365 self.assertEqual(viaf_not_validated, None)
367 self.assertEqual(wikidata_validated_as_True, True)
368 self.assertEqual(wikidata_validated_as_False, False)
369 self.assertEqual(wikidata_not_validated, None)
372 d_processing.storage_manager.delete_storage()
374 def test_validated_as_redis(self):
375 """
376 Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value:
377 string of the identifier, the method "validated_as" returns:
378 - True if the id was already validated as valid
379 - False if the id was already validated as invalid
380 - None if the id was not validated before
381 The procedure is tested
382 - With REDIS storage manager and a pre-existent db associated
383 """
384 redis_man = RedisStorageManager(testing=True)
386 valid_doi_not_in_db = {"identifier": "doi:10.11578/1480643", "schema": "doi"}
387 valid_doi_in_db = {"identifier": "doi:10.15407/scin11.06.057", "schema": "doi"}
388 invalid_doi_in_db = {"identifier": "doi:10.1066/1741-4326/aa6b", "schema": "doi"}
390 valid_orcid_not_in_db = {"schema": "orcid", "identifier": "orcid:0000-0001-8513-8700"}
391 valid_orcid_in_db = {"schema": "orcid", "identifier": "orcid:0000-0002-9286-2630"}
392 invalid_orcid_in_db = {"schema": "orcid", "identifier": "orcid:0000-0002-9286-26XX"}
394 valid_ror_in_db = {"schema": "ror", "identifier": "ror:03ztgj037"}
395 valid_ror_not_in_db = {"schema": "ror", "identifier": "ror:01111rn36"}
396 invalid_ror_in_db = {"schema": "ror", "identifier": "ror:03ztgj039"}
398 valid_viaf_not_in_db = {"identifier": "viaf:102333412", "schema": "viaf"}
399 valid_viaf_in_db = {"identifier": "viaf:108389263", "schema": "viaf"}
400 invalid_viaf_in_db = {"identifier": "viaf:12345ABC", "schema": "viaf"}
402 valid_wikidata_not_in_db = {"identifier": "wikidata:Q2330656", "schema": "wikidata"}
403 valid_wikidata_in_db = {"identifier": "wikidata:Q42", "schema": "wikidata"}
404 invalid_wikidata_in_db = {"identifier": "wikidata:Q_invalid_123", "schema": "wikidata"}
406 redis_man.set_value(valid_doi_in_db["identifier"], True)
407 redis_man.set_value(invalid_doi_in_db["identifier"], False)
409 redis_man.set_value(valid_orcid_in_db["identifier"], True)
410 redis_man.set_value(invalid_orcid_in_db["identifier"], False)
412 redis_man.set_value(valid_ror_in_db["identifier"], True)
413 redis_man.set_value(invalid_ror_in_db["identifier"], False)
415 redis_man.set_value(valid_viaf_in_db["identifier"], True)
416 redis_man.set_value(invalid_viaf_in_db["identifier"], False)
418 redis_man.set_value(valid_wikidata_in_db["identifier"], True)
419 redis_man.set_value(invalid_wikidata_in_db["identifier"], False)
421 d_processing_redis = DataciteProcessing(storage_manager=redis_man)
423 doi_validated_as_True = d_processing_redis.validated_as(valid_doi_in_db)
424 doi_validated_as_False = d_processing_redis.validated_as(invalid_doi_in_db)
425 doi_not_validated = d_processing_redis.validated_as(valid_doi_not_in_db)
427 orcid_validated_as_True = d_processing_redis.validated_as(valid_orcid_in_db)
428 orcid_validated_as_False = d_processing_redis.validated_as(invalid_orcid_in_db)
429 orcid_not_validated = d_processing_redis.validated_as(valid_orcid_not_in_db)
431 ror_validated_as_True = d_processing_redis.validated_as(valid_ror_in_db)
432 ror_validated_as_False = d_processing_redis.validated_as(invalid_ror_in_db)
433 ror_not_validated = d_processing_redis.validated_as(valid_ror_not_in_db)
435 viaf_validated_as_True = d_processing_redis.validated_as(valid_viaf_in_db)
436 viaf_validated_as_False = d_processing_redis.validated_as(invalid_viaf_in_db)
437 viaf_not_validated = d_processing_redis.validated_as(valid_viaf_not_in_db)
439 wikidata_validated_as_True = d_processing_redis.validated_as(valid_wikidata_in_db)
440 wikidata_validated_as_False = d_processing_redis.validated_as(invalid_wikidata_in_db)
441 wikidata_not_validated = d_processing_redis.validated_as(valid_wikidata_not_in_db)
443 self.assertEqual(doi_validated_as_True, True)
444 self.assertEqual(doi_validated_as_False, False)
445 self.assertEqual(doi_not_validated, None)
447 self.assertEqual(orcid_validated_as_True, True)
448 self.assertEqual(orcid_validated_as_False, False)
449 self.assertEqual(orcid_not_validated, None)
451 self.assertEqual(ror_validated_as_True, True)
452 self.assertEqual(ror_validated_as_False, False)
453 self.assertEqual(ror_not_validated, None)
455 self.assertEqual(viaf_validated_as_True, True)
456 self.assertEqual(viaf_validated_as_False, False)
457 self.assertEqual(viaf_not_validated, None)
459 self.assertEqual(wikidata_validated_as_True, True)
460 self.assertEqual(wikidata_validated_as_False, False)
461 self.assertEqual(wikidata_not_validated, None)
463 d_processing_redis.storage_manager.delete_storage()
465 def test_get_id_manager(self):
466 """Check that, given in input the string of a schema (e.g.:'pmid') or an id with a prefix (e.g.: 'pmid:12334')
467 and a dictionary mapping the strings of the schemas to their id managers, the method returns the correct
468 id manager. Note that each instance of the Preprocessing class needs its own instances of the id managers,
469 in order to avoid conflicts while validating data"""
471 d_processing = DataciteProcessing()
473 id_man_dict = d_processing.venue_id_man_dict
474 ra_man_dict = d_processing.ra_man_dict
476 issn_id = "issn:0003-987X"
477 issn_string = "issn"
479 isbn_id = "isbn:978-88-98719-08-2"
480 isbn_string = "isbn"
482 orcid_id = "orcid:0000-0001-8513-8700"
483 orcid_string = "orcid"
485 ror_id = "ror:03ztgj037"
486 ror_string = "ror"
488 viaf_id = "viaf:102333412"
489 viaf_string = "viaf"
491 wikidata_id = "wikidata:Q42"
492 wikidata_string = "wikidata"
494 issn_man_exp = d_processing.get_id_manager(issn_id, id_man_dict)
495 issn_man_exp_2 = d_processing.get_id_manager(issn_string, id_man_dict)
497 isbn_man_exp = d_processing.get_id_manager(isbn_id, id_man_dict)
498 isbn_man_exp_2 = d_processing.get_id_manager(isbn_string, id_man_dict)
500 orcid_man_exp = d_processing.get_id_manager(orcid_id, ra_man_dict)
501 orcid_man_exp_2 = d_processing.get_id_manager(orcid_string, ra_man_dict)
503 ror_man_exp = d_processing.get_id_manager(ror_id, ra_man_dict)
504 ror_man_exp_2 = d_processing.get_id_manager(ror_string, ra_man_dict)
506 viaf_man_exp = d_processing.get_id_manager(viaf_id, ra_man_dict)
507 viaf_man_exp_2 = d_processing.get_id_manager(viaf_string, ra_man_dict)
509 wikidata_man_exp = d_processing.get_id_manager(wikidata_id, ra_man_dict)
510 wikidata_man_exp_2 = d_processing.get_id_manager(wikidata_string, ra_man_dict)
512 # check that the idmanager for the issn was returned and that it works as expected
513 self.assertTrue(issn_man_exp.is_valid(issn_id))
514 self.assertTrue(issn_man_exp_2.is_valid(issn_id))
516 # check that the idmanager for the isbn was returned and that it works as expected
517 self.assertTrue(isbn_man_exp.is_valid(isbn_id))
518 self.assertTrue(isbn_man_exp_2.is_valid(isbn_id))
520 # check that the idmanager for the orcid was returned and that it works as expected
521 self.assertTrue(orcid_man_exp.is_valid(orcid_id))
522 self.assertTrue(orcid_man_exp_2.is_valid(orcid_id))
524 # check that the idmanager for the ror was returned and that it works as expected
525 self.assertTrue(ror_man_exp.is_valid(ror_id))
526 self.assertTrue(ror_man_exp_2.is_valid(ror_id))
528 # check that the idmanager for the viaf was returned and that it works as expected
529 self.assertTrue(viaf_man_exp.is_valid(viaf_id))
530 self.assertTrue(viaf_man_exp_2.is_valid(viaf_id))
532 # check that the idmanager for the wikidata was returned and that it works as expected
533 self.assertTrue(wikidata_man_exp.is_valid(wikidata_id))
534 self.assertTrue(wikidata_man_exp_2.is_valid(wikidata_id))
536 d_processing.storage_manager.delete_storage()
538 def test_csv_creator(self):
539 '''Add a test with all the data'''
540 datacite_processor = DataciteProcessing()
541 data = {
542 'id': '10.34780/7510-t906',
543 'type': 'dois',
544 'attributes': {
545 'container': {
546 'identifier': '2701-5572',
547 'firstPage': '2021',
548 'identifierType': 'ISSN',
549 'type': 'Series',
550 'title': 'Journal of Global Archaeology'
551 },
552 'reason': None,
553 'prefix': '10.34780',
554 'citationsOverTime': [],
555 'registered': '2021-06-07T10:39:06Z',
556 'language': 'en',
557 'source': 'fabricaForm',
558 'suffix': '7510-t906',
559 'relatedItems': [],
560 'descriptions': [
561 {'descriptionType': 'SeriesInformation', 'description': 'Journal of Global Archaeology, 2021'},
562 {'descriptionType': 'SeriesInformation', 'description': 'Journal of Global Archaeology, 2021'},
563 {'descriptionType': 'Abstract',
564 'description': 'The kingdom of Eswatini provides a rich archaeological sequence covering all time periods from the Early Stone Age to the Iron Age. For over 27 years though, no or very little archaeological research was conducted in the country. In the scope of a new project funded by the German Research Foundation (DFG) we aim to re-excavate and re-date Lion Cavern, the potentially oldest ochre mine in the world. In addition, we conduct a largescale geological survey for outcrops of ochre and test their geochemical signatures for comparative studies with archaeological ochre pieces from MSA and LSA assemblages in Eswatini. Here we present a review of the research history of the kingdom and some preliminary results from our ongoing project.',
565 'lang': 'en'}],
566 'sizes': ['§ 1–12'],
567 'versionOfCount': 0,
568 'relatedIdentifiers': [
569 {'relationType': 'IsPartOf', 'relatedIdentifier': '2701-5572', 'relatedIdentifierType': 'ISSN'},
570 {'relationType': 'IsPartOf', 'relatedIdentifierType': 'DOI'},
571 {'relationType': 'HasMetadata', 'relatedIdentifier': 'https://zenon.dainst.org/Record/002035353',
572 'relatedIdentifierType': 'URL'},
573 {'relationType': 'References', 'relatedIdentifier': '10.2307/3888317',
574 'relatedIdentifierType': 'DOI'},
575 {'relationType': 'References', 'relatedIdentifier': '10.1086/204793',
576 'relatedIdentifierType': 'DOI'},
577 {'relationType': 'References', 'relatedIdentifier': '10.1086/338292',
578 'relatedIdentifierType': 'DOI'},
579 {'relationType': 'References', 'relatedIdentifier': '10.1111/arcm.12202',
580 'relatedIdentifierType': 'DOI'},
581 {'relationType': 'References', 'relatedIdentifier': '10.1006/jasc.2000.0638',
582 'relatedIdentifierType': 'DOI'},
583 {'relationType': 'References', 'relatedIdentifier': '10.2307/3888015',
584 'relatedIdentifierType': 'DOI'},
585 {'relationType': 'References', 'relatedIdentifier': '10.3213/2191-5784-10199',
586 'relatedIdentifierType': 'DOI'},
587 {'relationType': 'References', 'relatedIdentifier': '10.1016/j.jhevol.2005.06.007',
588 'relatedIdentifierType': 'DOI'},
589 {'relationType': 'References', 'relatedIdentifier': '10.1017/s0003598x00113298',
590 'relatedIdentifierType': 'DOI'}], 'created': '2021-05-11T13:11:58Z',
591 'dates': [{'date': '2021', 'dateType': 'Issued'}],
592 'published': '2021',
593 'geoLocations': [],
594 'partCount': 0,
595 'publicationYear': 2021,
596 'partOfCount': 0,
597 'updated': '2021-07-30T12:39:50Z',
598 'formats': [],
599 'fundingReferences': [],
600 'creators': [
601 {
602 'nameType': 'Personal',
603 'affiliation': [
604 {'affiliationIdentifier': 'https://ror.org/03a1kwz48',
605 'name': 'University of Tübingen, Senckenberg Centre for Human Evolution and Palaeoenvironment',
606 'affiliationIdentifierScheme': 'ROR'}],
607 'givenName': 'Gregor D.',
608 'familyName': 'Bader',
609 'name': 'Bader, Gregor D.',
610 'nameIdentifiers': [
611 {'nameIdentifierScheme': 'ORCID', 'schemeUri': 'https://orcid.org',
612 'nameIdentifier': 'https://orcid.org/0000-0003-0621-9209'}]
613 },
614 {
615 'nameType': 'Personal',
616 'affiliation': [
617 {
618 'affiliationIdentifier': 'https://ror.org/02vrphe47',
619 'name': 'Swaziland National Trust Commission',
620 'affiliationIdentifierScheme': 'ROR'}
621 ],
622 'givenName': 'Bob',
623 'familyName': 'Forrester',
624 'name': 'Forrester, Bob'
625 },
626 {
627 'nameType': 'Personal',
628 'affiliation': [
629 {
630 'affiliationIdentifier': 'https://ror.org/041qv0h25',
631 'name': 'Deutsches Archäologisches Institut, Kommission für Archäologie Außereuropäischer Kulturen',
632 'affiliationIdentifierScheme': 'ROR'}
633 ],
634 'givenName': 'Lisa',
635 'familyName': 'Ehlers',
636 'name': 'Ehlers, Lisa'
637 },
638 {
639 'nameType': 'Personal',
640 'affiliation': [
641 {
642 'affiliationIdentifier': 'https://ror.org/03zga2b32',
643 'name': 'University of Bergen, SFF Centre for Early Sapiens Behaviour',
644 'affiliationIdentifierScheme': 'ROR'}
645 ],
646 'givenName': 'Elizabeth',
647 'familyName': 'Velliky',
648 'name': 'Velliky, Elizabeth',
649 'nameIdentifiers': [
650 {
651 'nameIdentifierScheme': 'ORCID',
652 'schemeUri': 'https://orcid.org',
653 'nameIdentifier': 'https://orcid.org/0000-0002-3019-5377'}
654 ]
655 }],
656 'schemaVersion': 'http://datacite.org/schema/kernel-4', 'versionCount': 0, 'metadataVersion': 2,
657 'citationCount': 0,
658 'types': {'schemaOrg': 'ScholarlyArticle', 'resourceTypeGeneral': 'Text', 'citeproc': 'article-journal',
659 'bibtex': 'article', 'ris': 'RPRT', 'resourceType': 'Article'}, 'isActive': True,
660 'viewsOverTime': [], 'identifiers': [],
661 'subjects': [{'subject': 'Eswatini'}, {'subject': 'Lion Cavern'}, {'subject': 'Ochre'},
662 {'subject': 'Provenance tracing'}], 'titles': [
663 {'lang': 'en', 'title': 'The Forgotten Kingdom. New investigations in the prehistory of Eswatini'}],
664 'url': 'https://publications.dainst.org/journals/index.php/joga/article/view/3559', 'downloadCount': 0,
665 'rightsList': [], 'contentUrl': None, 'contributors': [], 'referenceCount': 9, 'viewCount': 0,
666 'downloadsOverTime': [], 'doi': '10.34780/7510-t906',
667 'publisher': {
668 'publisherIdentifierScheme': 'ROR',
669 'schemeUri': 'https://ror.org',
670 'name': 'Deutsches Archäologisches Institut',
671 'publisherIdentifier': 'https://ror.org/041qv0h25'
672 },
673 'version': None,
674 'state': 'findable',
675 'alternateIdentifiers': []
676 },
677 'relationships': {'client': {'data': {'id': 'dai.avnrkz', 'type': 'clients'}},
678 'provider': {'data': {'id': 'dai', 'type': 'providers'}}, 'media': {'data': []},
679 'references': {'data': [{'id': '10.2307/3888317', 'type': 'dois'},
680 {'id': '10.1086/204793', 'type': 'dois'},
681 {'id': '10.1086/338292', 'type': 'dois'},
682 {'id': '10.1111/arcm.12202', 'type': 'dois'},
683 {'id': '10.1006/jasc.2000.0638', 'type': 'dois'},
684 {'id': '10.2307/3888015', 'type': 'dois'},
685 {'id': '10.3213/2191-5784-10199', 'type': 'dois'},
686 {'id': '10.1016/j.jhevol.2005.06.007', 'type': 'dois'},
687 {'id': '10.1017/s0003598x00113298', 'type': 'dois'}]},
688 'citations': {'data': []}, 'parts': {'data': []}, 'partOf': {'data': []},
689 'versions': {'data': []}, 'versionOf': {'data': []}}}
690 output = list()
691 tabular_data = datacite_processor.csv_creator(data)
692 if tabular_data:
693 output.append(tabular_data)
695 expected_output = [
696 {
697 'id': 'doi:10.34780/7510-t906',
698 'title': 'The Forgotten Kingdom. New investigations in the prehistory of Eswatini',
699 'author': 'Bader, Gregor D. [orcid:0000-0003-0621-9209]; Forrester, Bob; Ehlers, Lisa; Velliky, Elizabeth [orcid:0000-0002-3019-5377]',
700 'pub_date': '2021',
701 'venue': 'journal of global archaeology [issn:2701-5572]',
702 'volume': '',
703 'issue': '',
704 'page': '2021-2021',
705 'type': 'journal article',
706 'publisher': 'Deutsches Archäologisches Institut [ror:041qv0h25]',
707 'editor': ''
708 }
709 ]
710 self.assertEqual(output,expected_output)
712 def test_csv_creator2(self):
713 datacite_processor = DataciteProcessing()
714 data = load_json(DATA, None)
715 output = list()
716 for item in data['data']:
717 tabular_data = datacite_processor.csv_creator(item)
718 if tabular_data:
719 output.append(tabular_data)
721 expected_output = [
722 {'id': 'doi:10.5281/zenodo.8244010',
723 'title': 'FIGURE 1A, B in Meeting the southern brothers: a revision of the Neotropical spider genus Hexapopha Platnick, Berniker & Víquez, 2014 (Araneae, Oonopidae)',
724 'author': 'Feitosa, Níthomas M. [orcid:0000-0002-8013-9947]; Ott, Ricardo [orcid:0000-0001-7392-1415]; Bonaldo, Alexandre B. [orcid:0000-0002-8013-9947]',
725 'pub_date': '2023-08-11',
726 'venue': '',
727 'volume': '',
728 'issue': '',
729 'page': '',
730 'type': 'other',
731 'publisher': 'Zenodo',
732 'editor': ''},
733 {'id': 'doi:10.26050/wdcc/ar6.c6gmipicl',
734 'title': 'IPCC DDC: IPSL IPSL-CM6A-LR model output prepared for CMIP6 GMMIP',
735 'author': 'Boucher, Olivier [orcid:0000-0003-2328-5769]; Denvil, Sébastien [orcid:0000-0002-6715-3533]; Levavasseur, Guillaume [orcid:0000-0002-0801-0890]; Cozic, Anne [orcid:0000-0001-7543-3466]; Caubel, Arnaud [orcid:0000-0002-6210-8370]; Foujols, Marie-Alice [orcid:0000-0002-9747-4928]; Meurdesoif, Yann; Mellul, Lidia',
736 'pub_date': '2023',
737 'venue': '',
738 'volume': '',
739 'issue': '',
740 'page': '',
741 'type': 'dataset',
742 'publisher': 'World Data Center for Climate (WDCC) at DKRZ [ror:03ztgj037]',
743 'editor': 'Boucher, Olivier [orcid:0000-0003-2328-5769]; Denvil, Sébastien [orcid:0000-0002-6715-3533]; Levavasseur, Guillaume [orcid:0000-0002-0801-0890]; Cozic, Anne [orcid:0000-0001-7543-3466]; Caubel, Arnaud [orcid:0000-0002-6210-8370]; Foujols, Marie-Alice [orcid:0000-0002-9747-4928]; Meurdesoif, Yann; Mellul, Lidia'},
744 ]
746 self.assertEqual(output, expected_output)
748 def test_csv_creator_object(self):
749 dcp = DataciteProcessing()
750 doi_obj = "doi:10.1021/acs.jpclett.7b01097"
751 expected_output = {
752 'id': 'doi:10.1021/acs.jpclett.7b01097',
753 'title': '',
754 'author': '',
755 'pub_date': '',
756 'venue': '',
757 'volume': '',
758 'issue': '',
759 'page': '',
760 'type': '',
761 'publisher': '',
762 'editor': ''}
764 out = dcp.csv_creator({"id": doi_obj, "type": "dois", "attributes": {"doi": doi_obj}})
765 self.assertEqual(out, expected_output)
767 def test_get_publisher_name_invalid_publishers(self):
768 dcp = DataciteProcessing()
769 item1 = {"publisher": {
770 "name":"(:unav)"}
771 }
772 item2 = {"publisher": {
773 "name":":unav"}
774 }
775 item3 = {"publisher": {
776 "name":":unkn"}}
777 item4 = {"publisher": {
778 "name":"(:unkn)"}}
779 item5 = {"publisher": {
780 "name":"Edo : [publisher not identified]mon han"}}
781 item6 = {"publisher": {
782 "name":"[place of publication not identified]: [pubisher not identified]"
783 }}
784 item7 = {"publisher": {
785 "name":"unknown unknown"
786 }}
787 item8 = {"publisher": {
788 "name":"[unknown] : [unknown]"
789 }}
790 item9 = {"publisher": {
791 "name":"[unknown] : College of Pharmacists of British Columbia"
792 }}
793 item10 = {"publisher": {
794 "name":"[Edinburgh]: [Unknown]"
795 }}
796 item11 = {"publisher": {
797 "name":"Unknown, National University of Singapore"
798 }}
799 item12 = {"publisher": {
800 "name":"Not provided."
801 }}
802 item13 = {"publisher": {
803 "name":"Soleure, s.n."
804 }}
805 item14 = {"publisher": {
806 "name":"[s.l. , s.n]"
807 }}
808 item15 = {"publisher": {
809 "name":"[ s.l. : s.n.]"
810 }}
811 item16 = {"publisher": {
812 "name":"s.n.]"
813 }}
814 item17 = {"publisher": {
815 "name":"Information not available, contact SND for more information"
816 }}
817 item18 = {"publisher": {
818 "name":"Publisher Not Specified"
819 }}
820 result1 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item1['publisher'])
821 result2 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item2['publisher'])
822 result3 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item3['publisher'])
823 result4 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item4['publisher'])
824 result5 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item5['publisher'])
825 result6 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item6['publisher'])
826 result7 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item7['publisher'])
827 result8 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item8['publisher'])
828 result9 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item9['publisher'])
829 result10 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item10['publisher'])
830 result11 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item11['publisher'])
831 result12 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item12['publisher'])
832 result13 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item13['publisher'])
833 result14 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item14['publisher'])
834 result15 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item15['publisher'])
835 result16 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item16['publisher'])
836 result17 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item17['publisher'])
837 result18 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item18['publisher'])
838 expected_res = ""
839 expected_res9 = "[unknown] : College of Pharmacists of British Columbia"
840 expected_res10 = "[Edinburgh]: [Unknown]"
841 expected_res11 = "Unknown, National University of Singapore"
842 expected_res13 = "Soleure, s.n."
844 self.assertEqual(result1, expected_res)
845 self.assertEqual(result2, expected_res)
846 self.assertEqual(result3, expected_res)
847 self.assertEqual(result4, expected_res)
848 self.assertEqual(result5, expected_res)
849 self.assertEqual(result6, expected_res)
850 self.assertEqual(result7, expected_res)
851 self.assertEqual(result8, expected_res)
852 self.assertEqual(result9, expected_res9)
853 self.assertEqual(result10, expected_res10)
854 self.assertEqual(result11, expected_res11)
855 self.assertEqual(result12, expected_res)
856 self.assertEqual(result13, expected_res13)
857 self.assertEqual(result14, expected_res)
858 self.assertEqual(result15, expected_res)
859 self.assertEqual(result16, expected_res)
860 self.assertEqual(result17, expected_res)
861 self.assertEqual(result18, expected_res)
863 def test_get_publisher_name_publisher_mapping(self):
865 item = {
866 "doi": "10.1594/pangaea.777220",
867 "publisher": {"name":"PANGAEA - Data Publisher for Earth & Environmental Science"}
868 }
869 doi = '10.1594/pangaea.777220'
870 datacite_processor = DataciteProcessing(orcid_index=None, doi_csv=None,
871 publishers_filepath_dc=PUBLISHERS_MAPPING)
872 publisher_name = datacite_processor.get_publisher(doi, item)
873 self.assertEqual(publisher_name, 'PANGAEA - Data Publisher for Earth & Environmental Science [datacite:2]')
875 def test_get_publisher_name_from_prefix(self):
876 # The item has no declared publisher, but the DOI prefix is in the publishers' mapping
877 item = {
878 'publisher': '',
879 'doi': '10.12753/sample_test_doi_with_known_prefix',
880 }
881 doi = '10.12753/sample_test_doi_with_known_prefix'
882 datacite_processor = DataciteProcessing(orcid_index=None, doi_csv=None,
883 publishers_filepath_dc=PUBLISHERS_MAPPING)
884 publisher_name = datacite_processor.get_publisher(doi, item)
885 self.assertEqual(publisher_name, 'ADLRO [datacite:3]')
887 def test_to_validated_id_list(self):
888 dcp = DataciteProcessing()
889 # CASE_1: is valid
890 inp_1 = {'id': 'doi:10.11578/1367552', 'schema': 'doi'}
891 out_1 = dcp.to_validated_id_list(inp_1)
892 exp_1 = ['doi:10.11578/1367552']
893 self.assertEqual(out_1, exp_1)
894 dcp.storage_manager.delete_storage()
896 dcp = DataciteProcessing()
897 # CASE_2: is invalid
898 inp_2 = {'id': 'doi:10.11578/136755', 'schema': 'doi'}
899 out_2 = dcp.to_validated_id_list(inp_2)
900 exp_2 = []
901 self.assertEqual(out_2, exp_2)
903 dcp = DataciteProcessing()
904 # CASE_3: valid orcid
905 inp_3 = {'id': 'orcid:0000-0002-9286-2630', 'schema': 'orcid'}
906 out_3 = dcp.to_validated_id_list(inp_3)
907 exp_3 = ['orcid:0000-0002-9286-2630']
908 self.assertEqual(out_3, exp_3)
909 dcp.storage_manager.delete_storage()
911 dcp = DataciteProcessing()
912 # CASE_4: invalid doi in self._redis_values_br
913 inp_4 = {'id': 'doi:10.1089/bsp.2008.002', 'schema': 'doi'}
914 dcp._redis_values_br.append(inp_4['id'])
915 out_4 = dcp.to_validated_id_list(inp_4)
916 exp_4 = ['doi:10.1089/bsp.2008.002']
917 self.assertEqual(out_4, exp_4)
918 value = dcp.tmp_doi_m.storage_manager.get_value('doi:10.1089/bsp.2008.002')
919 self.assertEqual(value, True)
920 dcp.storage_manager.delete_storage()
922 def test_to_validated_id_list_redis(self):
923 dcp = DataciteProcessing(testing=True)
924 # CASE_1: is valid
925 inp_1 = {'id': 'doi:10.11578/1367552', 'schema': 'doi'}
926 out_1 = dcp.to_validated_id_list(inp_1)
927 exp_1 = ['doi:10.11578/1367552']
928 self.assertEqual(out_1, exp_1)
929 dcp.storage_manager.delete_storage()
931 dcp = DataciteProcessing(testing=True)
932 # CASE_2: is invalid
933 inp_2 = {'id': 'doi:10.11578/136755', 'schema': 'doi'}
934 out_2 = dcp.to_validated_id_list(inp_2)
935 exp_2 = []
936 self.assertEqual(out_2, exp_2)
938 dcp = DataciteProcessing(testing=True)
939 # CASE_3: valid orcid
940 inp_3 = {'id': 'orcid:0000-0002-9286-2630', 'schema': 'orcid'}
941 out_3 = dcp.to_validated_id_list(inp_3)
942 exp_3 = ['orcid:0000-0002-9286-2630']
943 self.assertEqual(out_3, exp_3)
944 dcp.storage_manager.delete_storage()
946 dcp = DataciteProcessing(testing=True)
947 # CASE_4: invalid doi in self._redis_values_br
948 inp_4 = {'id': 'doi:10.1089/bsp.2008.002', 'schema': 'doi'}
949 dcp._redis_values_br.append(inp_4['id'])
950 out_4 = dcp.to_validated_id_list(inp_4)
951 exp_4 = ['doi:10.1089/bsp.2008.002']
952 self.assertEqual(out_4, exp_4)
953 value = dcp.tmp_doi_m.storage_manager.get_value('doi:10.1089/bsp.2008.002')
954 self.assertEqual(value, True)
955 dcp.storage_manager.delete_storage()
957 def test_find_datacite_orcid(self):
958 dcp = DataciteProcessing(testing=True)
959 inp = ["https://orcid.org/0000-0002-9286-2630"]
960 out = dcp.find_datacite_orcid(inp)
961 exp_out = "orcid:0000-0002-9286-2630"
962 self.assertEqual(out, exp_out)
964 inp_invalid_id = ["https://orcid.org/0000-0002-9286-2631"]
965 out_invalid_id = dcp.find_datacite_orcid(inp_invalid_id)
966 exp_invalid_id = ""
967 self.assertEqual(out_invalid_id, exp_invalid_id)
969 dcp.orcid_m.storage_manager.delete_storage()
971 # set a valid id as invalid in storage, so to check that the api check is
972 # avoided if the info is already in storage
973 dcp = DataciteProcessing(testing=True)
974 dcp.orcid_m.storage_manager.set_value("orcid:0000-0002-9286-2630", False)
975 inp = ["https://orcid.org/0000-0002-9286-2630"]
976 out = dcp.find_datacite_orcid(inp)
977 exp_out = ""
978 self.assertEqual(out, exp_out)
979 dcp.orcid_m.storage_manager.delete_storage()
981 dcp = DataciteProcessing(testing=True)
982 dcp.orcid_m.storage_manager.set_value("orcid:0000-0002-9286-2631", True)
983 inp = ["https://orcid.org/0000-0002-9286-2631"]
984 out = dcp.find_datacite_orcid(inp)
985 exp_out = "orcid:0000-0002-9286-2631"
986 self.assertEqual(out, exp_out)
987 dcp.orcid_m.storage_manager.delete_storage()
989 def test_find_datacite_orcid_api_disabled_not_in_index(self):
990 """Se l'API è OFF e l'ORCID non è nell'indice, non deve essere risolto."""
991 dp = DataciteProcessing(use_orcid_api=False)
992 test_doi = "10.9999/noindex"
993 candidate = "0000-0003-4082-1500" # valido sintatticamente
995 out = dp.find_datacite_orcid([candidate], test_doi)
997 self.assertEqual(out, "")
998 # Non deve essere stato scritto in tmp storage
999 self.assertIsNone(dp.tmp_orcid_m.storage_manager.get_value(f"orcid:{candidate}"))
1001 dp.storage_manager.delete_storage()
1003 def test_find_datacite_orcid_api_disabled_from_index(self):
1004 """Se l'API è OFF ma l'ORCID è nell'indice DOI→ORCID, deve essere risolto e salvato in tmp storage."""
1005 dp = DataciteProcessing(use_orcid_api=False)
1006 test_doi = "10.1234/test"
1007 test_orcid = "0000-0002-1234-5678"
1008 test_name = "Smith, John"
1010 # l'indice DOI→ORCID viene popolato
1011 dp.orcid_index.data = {test_doi: {f"{test_name} [orcid:{test_orcid}]"}}
1013 out = dp.find_datacite_orcid([test_orcid], test_doi)
1015 self.assertEqual(out, f"orcid:{test_orcid}")
1016 self.assertTrue(dp.tmp_orcid_m.storage_manager.get_value(f"orcid:{test_orcid}"))
1018 dp.storage_manager.delete_storage()
1020 def test_get_venue_container(self):
1021 item={'container': {'type': 'DataRepository', 'title': 'GEM Datasets'}, 'reason': None, 'prefix': '10.13117', 'citationsOverTime': [], 'registered': '2014-03-24T10:51:17Z', 'language': 'en', 'source': None, 'suffix': 'gem.dataset.ghea-v1.0', 'relatedItems': [], 'descriptions': [{'descriptionType': 'SeriesInformation', 'description': 'GEM Datasets'}, {'descriptionType': 'SeriesInformation', 'description': 'GEM Catalogues'}], 'sizes': ['1011 records'], 'versionOfCount': 0, 'relatedIdentifiers': [{'relationType': 'IsIdenticalTo', 'relatedIdentifier': 'http://emidius.eu/GEH/', 'relatedIdentifierType': 'URL'}, {'relationType': 'IsDocumentedBy', 'relatedIdentifier': '10.13117/gem.gegd.tr2013.01', 'relatedIdentifierType': 'DOI'}, {'relationType': 'Compiles', 'relatedIdentifier': '10.13117/gem.dataset.ghec-v1.0', 'relatedIdentifierType': 'DOI'}, {'relationType': 'References', 'relatedIdentifier': '10.6092/ingv.it-ahead', 'relatedIdentifierType': 'DOI'}], 'created': '2014-03-24T10:51:17Z', 'dates': [{'date': '1008-04-27/1903-12-28', 'dateType': 'Collected'}, {'date': '2013-06-01', 'dateType': 'Available'}, {'date': '2010-11-01/2013-03-31', 'dateType': 'Created'}, {'date': '2013', 'dateType': 'Issued'}], 'published': '2013', 'geoLocations': [], 'partCount': 0, 'publicationYear': 2013, 'partOfCount': 0, 'updated': '2020-07-26T16:07:36Z', 'formats': ['text/html', 'image/svg+xml', 'application/pdf'], 'fundingReferences': [], 'creators': [{'nameType': 'Personal', 'affiliation': [], 'givenName': 'Paola', 'familyName': 'Albini', 'name': 'Albini, Paola', 'nameIdentifiers': [{'nameIdentifierScheme': 'ORCID', 'schemeUri': 'https://orcid.org', 'nameIdentifier': 'https://orcid.org/0000-0003-4149-9760'}]}, {'nameType': 'Personal', 'affiliation': [], 'givenName': 'Roger M.W.', 'familyName': 'Musson', 'name': 'Musson, Roger M.W.', 'nameIdentifiers': [{'nameIdentifierScheme': 'ISNI', 'nameIdentifier': '0000 0000 5424 2727'}]}, {'nameType': 'Personal', 'affiliation': [], 'givenName': 'Antonio A.', 'familyName': 'Gomez Capera', 'name': 'Gomez Capera, Antonio A.', 'nameIdentifiers': []}, {'nameType': 'Personal', 'affiliation': [], 'givenName': 'Mario', 'familyName': 'Locati', 'name': 'Locati, Mario', 'nameIdentifiers': [{'nameIdentifierScheme': 'ORCID', 'schemeUri': 'https://orcid.org', 'nameIdentifier': 'https://orcid.org/0000-0003-2185-3267'}]}, {'nameType': 'Personal', 'affiliation': [], 'givenName': 'Andrea', 'familyName': 'Rovida', 'name': 'Rovida, Andrea', 'nameIdentifiers': [{'nameIdentifierScheme': 'ORCID', 'schemeUri': 'https://orcid.org', 'nameIdentifier': 'https://orcid.org/0000-0001-6147-9981'}]}, {'nameType': 'Personal', 'affiliation': [], 'givenName': 'Massimiliano', 'familyName': 'Stucchi', 'name': 'Stucchi, Massimiliano', 'nameIdentifiers': [{'nameIdentifierScheme': 'ORCID', 'schemeUri': 'https://orcid.org', 'nameIdentifier': 'https://orcid.org/0000-0002-5870-1542'}]}, {'nameType': 'Personal', 'affiliation': [], 'givenName': 'Daniele', 'familyName': 'Viganò', 'name': 'Viganò, Daniele', 'nameIdentifiers': [{'nameIdentifierScheme': 'ORCID', 'schemeUri': 'https://orcid.org', 'nameIdentifier': 'https://orcid.org/0000-0003-2713-8387'}]}], 'schemaVersion': 'http://datacite.org/schema/kernel-3', 'versionCount': 0, 'metadataVersion': 3, 'citationCount': 0, 'types': {'schemaOrg': 'Dataset', 'resourceTypeGeneral': 'Dataset', 'citeproc': 'dataset', 'bibtex': 'misc', 'ris': 'DATA', 'resourceType': 'Dataset/Earthquakes'}, 'isActive': True, 'viewsOverTime': [], 'identifiers': [], 'subjects': [{'subject': 'Earthquake history'}, {'subject': 'Historical seismology'}, {'subject': 'Catalogue'}, {'subject': 'Archive'}, {'subject': 'Macroseismic data'}, {'subject': 'GEM'}], 'titles': [{'title': 'GEM Global Historical Earthquake Archive'}], 'url': 'https://www.emidius.eu/GEH/', 'downloadCount': 0, 'rightsList': [{'rights': 'Copyright © 2013 GEM Foundation, Albini, P., R.M.W. Musson, A.A. Gomez Capera, M. Locati, A. Rovida, M. Stucchi, and D. Viganò'}, {'rightsUri': 'http://creativecommons.org/licenses/by-nc-sa/4.0', 'rights': 'Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International'}], 'contentUrl': None, 'contributors': [{'affiliation': [], 'name': 'Istituto Nazionale Di Geofisica E Vulcanologia (INGV)', 'nameIdentifiers': [], 'contributorType': 'DataCollector'}, {'affiliation': [], 'name': 'British Geological Survey (BGS)', 'nameIdentifiers': [], 'contributorType': 'DataCollector'}], 'referenceCount': 1, 'viewCount': 0, 'downloadsOverTime': [], 'doi': '10.13117/gem.dataset.ghea-v1.0', 'publisher': {'name': 'GEM Foundation, Pavia, Italy'}, 'version': '1.0', 'state': 'findable', 'alternateIdentifiers': []}
1022 row = {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '',
1023 'type': 'dataset', 'publisher': '', 'editor': ''}
1024 datacite_processor = DataciteProcessing(orcid_index=None, doi_csv=None,
1025 publishers_filepath_dc=PUBLISHERS_MAPPING)
1026 venue_name = datacite_processor.get_venue_name(item, row)
1027 self.assertEqual(venue_name, 'gem datasets')
1029 def test_get_venue_name_no_container(self):
1030 item = {
1031 "container": {},
1032 "relatedIdentifiers": [
1033 {
1034 "relationType": "IsSupplementTo",
1035 "resourceTypeGeneral": "Text",
1036 "relatedIdentifier": "10.4230/LIPIcs.ECOOP.2023.39",
1037 "relatedIdentifierType": "DOI"
1038 },
1039 {
1040 "relationType": "IsPartOf",
1041 "relatedIdentifier": "2509-8195",
1042 "relatedIdentifierType": "ISSN"
1043 },
1044 ]
1045 }
1046 row = {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '',
1047 'type': 'journal article', 'publisher': '', 'editor': ''}
1048 datacite_processor = DataciteProcessing(orcid_index=None, doi_csv=None,
1049 publishers_filepath_dc=PUBLISHERS_MAPPING)
1050 venue_name = datacite_processor.get_venue_name(item, row)
1051 self.assertEqual(venue_name, '[issn:2509-8195]')
1053 def test_get_venue_name_with_ISSN(self):
1054 item = {
1055 "container": {"type": "Series", "identifier": "2509-8195", "identifierType": "ISSN", "title": "DARTS",
1056 "volume": "Vol. 9", "firstPage": "pages 25:1", "lastPage": "25:2"}
1057 }
1058 row = {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '',
1059 'type': 'journal article', 'publisher': '', 'editor': ''}
1060 datacite_processor = DataciteProcessing(orcid_index=None, doi_csv=None,
1061 publishers_filepath_dc=PUBLISHERS_MAPPING)
1062 venue_name = datacite_processor.get_venue_name(item, row)
1063 self.assertEqual(venue_name,
1064 "darts [issn:2509-8195]")
1065 # ISSN with wrong number of digits
1066 item1 = {
1067 "container": {"type": "Journal", "issue": "18", "title": "Geophysical Research Letters", "volume": "41",
1068 "lastPage": "6451", "firstPage": "6443", "identifier": "00948276", "identifierType": "ISSN"}
1069 }
1070 row1 = {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '',
1071 'type': 'journal article', 'publisher': '', 'editor': ''}
1072 venue_name1 = datacite_processor.get_venue_name(item1, row1)
1073 self.assertEqual(venue_name1,
1074 "geophysical research letters [issn:0094-8276]")
1076 def test_get_pages(self):
1077 item = {
1078 "container": {"type": "Journal", "issue": "7", "title": "Global Biogeochemical Cycles", "volume": "29",
1079 "lastPage": "1013", "firstPage": "994", "identifier": "08866236",
1080 "identifierType": "ISSN"}
1081 }
1082 datacite_processor = DataciteProcessing(orcid_index=None,
1083 publishers_filepath_dc=PUBLISHERS_MAPPING)
1084 pages = datacite_processor.get_datacite_pages(item)
1085 self.assertEqual(pages, '994-1013')
1087 def test_get_pages_right_letter(self):
1088 item = {
1089 "container": {"type": "Journal", "issue": "4", "title": "Ecosphere", "volume": "10",
1090 "firstPage": "e02701", "identifier": "2150-8925", "identifierType": "ISSN"}
1091 }
1092 datacite_processor = DataciteProcessing(orcid_index=None,
1093 publishers_filepath_dc=PUBLISHERS_MAPPING)
1094 pages = datacite_processor.get_datacite_pages(item)
1095 self.assertEqual(pages, 'e02701-e02701')
1097 def test_get_pages_wrong_letter(self):
1098 item = {
1099 "relatedIdentifiers": [
1100 {"relationType": "IsPartOf",
1101 "relatedIdentifier": "0094-2405",
1102 "relatedIdentifierType": "ISSN",
1103 "firstPage": "583b",
1104 "lastPage": "584"},
1105 {"relationType": "References",
1106 "relatedIdentifier": "10.1016/j.ecl.2014.08.007",
1107 "relatedIdentifierType": "DOI"}
1108 ]
1109 }
1110 datacite_processor = DataciteProcessing(orcid_index=None,
1111 publishers_filepath_dc=PUBLISHERS_MAPPING)
1112 pages = datacite_processor.get_datacite_pages(item)
1113 self.assertEqual(pages, '583-584')
1115 def test_get_pages_roman_letters(self):
1116 item = {
1117 "relatedIdentifiers": [
1118 {"relationType": "IsPartOf",
1119 "relatedIdentifier": "0094-2405",
1120 "relatedIdentifierType": "ISSN",
1121 "firstPage": "iv",
1122 "lastPage": "l"},
1123 {"relationType": "References",
1124 "relatedIdentifier": "10.1016/j.ecl.2014.08.007",
1125 "relatedIdentifierType": "DOI"}
1126 ]
1127 }
1128 datacite_processor = DataciteProcessing(orcid_index=None,
1129 publishers_filepath_dc=PUBLISHERS_MAPPING)
1130 pages = datacite_processor.get_datacite_pages(item)
1131 self.assertEqual(pages, 'iv-l')
1133 def test_get_pages_non_roman_letters(self):
1134 item = {
1135 "relatedIdentifiers": [
1136 {"relationType": "IsPartOf",
1137 "relatedIdentifier": "0094-2405",
1138 "relatedIdentifierType": "ISSN",
1139 "firstPage": "kj",
1140 "lastPage": "hh"},
1141 {"relationType": "References",
1142 "relatedIdentifier": "10.1016/j.ecl.2014.08.007",
1143 "relatedIdentifierType": "DOI"}
1144 ]
1145 }
1146 datacite_processor = DataciteProcessing(orcid_index=None,
1147 publishers_filepath_dc=PUBLISHERS_MAPPING)
1148 pages = datacite_processor.get_datacite_pages(item)
1149 self.assertEqual(pages, '')
1151 def test_get_pages_with_strings_no_venue_id(self):
1152 item = {'container': {
1153 'firstPage': '13. Studi umanistici. Serie Antichistica',
1154 'type': 'Series',
1155 'title': 'Collana Studi e Ricerche'
1156 }}
1157 datacite_processor = DataciteProcessing(orcid_index=None, doi_csv=None,
1158 publishers_filepath_dc=PUBLISHERS_MAPPING)
1159 pages = datacite_processor.get_datacite_pages(item)
1160 self.assertEqual(pages, '')
1162 def test_venue_id_cont_and_rel_id(self):
1163 items = {'data': [
1164 {
1165 "id": "10.1002/2014jd022411",
1166 "type": "dois",
1167 "attributes": {
1168 "doi": "10.1002/2014jd022411",
1169 "identifiers": [],
1170 "titles": [{
1171 "title": "\n Assessing the magnitude of CO\n \n flux uncertainty in atmospheric CO\n \n records using products from NASA's Carbon Monitoring Flux Pilot Project\n "}],
1172 "publisher": {
1173 "name":"(:unav)"},
1174 "container": {"type": "Journal", "issue": "2",
1175 "title": "Journal of Geophysical Research: Atmospheres", "volume": "120",
1176 "lastPage": "765", "firstPage": "734", "identifier": "2169897X",
1177 "identifierType": "ISSN"},
1178 "types": {"ris": "JOUR", "bibtex": "article", "citeproc": "article-journal",
1179 "schemaOrg": "ScholarlyArticle", "resourceType": "JournalArticle",
1180 "resourceTypeGeneral": "Text"},
1181 "relatedIdentifiers": [{"relationType": "IsPartOf", "relatedIdentifier": "2169897X",
1182 "resourceTypeGeneral": "Collection", "relatedIdentifierType": "ISSN"}]
1183 }
1184 }
1185 ]}
1186 datacite_processor = DataciteProcessing(orcid_index=None,
1187 publishers_filepath_dc=PUBLISHERS_MAPPING)
1188 output = list()
1189 for item in items['data']:
1190 output.append(datacite_processor.csv_creator(item))
1191 expected_output = [{'id': 'doi:10.1002/2014jd022411',
1192 'title': "Assessing the magnitude of CO flux uncertainty in atmospheric CO records using products from NASA's Carbon Monitoring Flux Pilot Project",
1193 'author': '', 'pub_date': '',
1194 'venue': 'journal of geophysical research: atmospheres [issn:2169-897X]',
1195 'volume': '120', 'issue': '2', 'page': '734-765', 'type': 'journal article',
1196 'publisher': 'Wiley [datacite:1]', 'editor': ''}]
1197 self.assertEqual(output, expected_output)
1199 def test_venue_id_cont_and_rel_id_no_types(self):
1200 # the absence of publication types specified excludes the possibility
1201 # to assert whether the container can have an ISSN or not
1202 items = {'data': [
1203 {
1204 "id": "10.1002/2014jd022411",
1205 "type": "dois",
1206 "attributes": {
1207 "doi": "10.1002/2014jd022411",
1208 "identifiers": [],
1209 "titles": [{
1210 "title": "\n Assessing the magnitude of CO\n \n flux uncertainty in atmospheric CO\n \n records using products from NASA's Carbon Monitoring Flux Pilot Project\n "}],
1211 "publisher": {"name":"(:unav)"},
1212 "container": {"type": "Journal", "issue": "2",
1213 "title": "Journal of Geophysical Research: Atmospheres", "volume": "120",
1214 "lastPage": "765", "firstPage": "734", "identifier": "2169897X",
1215 "identifierType": "ISSN"},
1216 "relatedIdentifiers": [{"relationType": "IsPartOf", "relatedIdentifier": "2169897X",
1217 "resourceTypeGeneral": "Collection", "relatedIdentifierType": "ISSN"}]
1218 }
1219 }
1220 ]}
1221 datacite_processor = DataciteProcessing(orcid_index=None,
1222 publishers_filepath_dc=PUBLISHERS_MAPPING)
1223 output = list()
1224 for item in items['data']:
1225 output.append(datacite_processor.csv_creator(item))
1226 expected_output = [{'id': 'doi:10.1002/2014jd022411',
1227 'title': "Assessing the magnitude of CO flux uncertainty in atmospheric CO records using products from NASA's Carbon Monitoring Flux Pilot Project",
1228 'author': '', 'pub_date': '', 'venue': 'journal of geophysical research: atmospheres',
1229 'volume': '120', 'issue': '2', 'page': '734-765', 'type': '',
1230 'publisher': 'Wiley [datacite:1]', 'editor': ''}]
1231 self.assertEqual(output, expected_output)
1233 def test_get_agents_strings_list_overlapping_surnames(self):
1234 # The surname of one author is included in the surname of another.
1235 entity_attr_dict = {
1236 "creators": [
1237 {"name": "Olivarez Lyle, Annette",
1238 "givenName": "Annette",
1239 "familyName": "Olivarez Lyle",
1240 "affiliation": [],
1241 "nameIdentifiers": []
1242 },
1243 {"name": "Lyle, Mitchell W",
1244 "givenName": "Mitchell W",
1245 "familyName": "Lyle",
1246 "nameIdentifiers": [
1247 {"schemeUri": "https://orcid.org",
1248 "nameIdentifier": "https://orcid.org/0000-0002-0861-0511",
1249 "nameIdentifierScheme": "ORCID"}
1250 ],
1251 "affiliation": []
1252 }
1253 ],
1254 "contributors": []
1255 }
1257 datacite_processor = DataciteProcessing(None, None)
1258 authors_list = datacite_processor.add_authors_to_agent_list(entity_attr_dict, [],
1259 doi="doi:10.1594/pangaea.777220")
1260 editors_list = datacite_processor.add_editors_to_agent_list(entity_attr_dict, [],
1261 doi="doi:10.1594/pangaea.777220")
1262 agents_list = authors_list + editors_list
1263 csv_manager = CSVManager()
1264 csv_manager.data = {'10.1594/pangaea.777220': {'Lyle, Mitchell W [0000-0002-0861-0511]'}}
1265 datacite_processor.orcid_index = csv_manager
1266 authors_strings_list, editors_strings_list = datacite_processor.get_agents_strings_list(
1267 '10.1594/pangaea.777220', agents_list)
1269 expected_authors_list = ['Olivarez Lyle, Annette',
1270 'Lyle, Mitchell W [orcid:0000-0002-0861-0511]']
1271 expected_editors_list = []
1272 self.assertEqual((authors_strings_list, editors_strings_list), (expected_authors_list, expected_editors_list))
1274 def test_get_agents_strings_list(self):
1275 entity_attr_dict = {
1276 "doi": "10.1002/2014jd022411",
1277 "creators": [
1278 {"name": "Ott, Lesley E.", "nameType": "Personal", "givenName": "Lesley E.", "familyName": "Ott",
1279 "affiliation": [], "nameIdentifiers": []},
1280 {"name": "Pawson, Steven", "nameType": "Personal", "givenName": "Steven", "familyName": "Pawson",
1281 "affiliation": [], "nameIdentifiers": []},
1282 {"name": "Collatz, George J.", "nameType": "Personal", "givenName": "George J.",
1283 "familyName": "Collatz", "affiliation": [], "nameIdentifiers": []},
1284 {"name": "Gregg, Watson W.", "nameType": "Personal", "givenName": "Watson W.", "familyName": "Gregg",
1285 "affiliation": [], "nameIdentifiers": []},
1286 {"name": "Menemenlis, Dimitris", "nameType": "Personal", "givenName": "Dimitris",
1287 "familyName": "Menemenlis", "affiliation": [], "nameIdentifiers": [
1288 {"schemeUri": "https://orcid.org", "nameIdentifier": "https://orcid.org/0000-0001-9940-8409",
1289 "nameIdentifierScheme": "ORCID"}]},
1290 {"name": "Brix, Holger", "nameType": "Personal", "givenName": "Holger", "familyName": "Brix",
1291 "affiliation": [], "nameIdentifiers": []},
1292 {"name": "Rousseaux, Cecile S.", "nameType": "Personal", "givenName": "Cecile S.",
1293 "familyName": "Rousseaux", "affiliation": [], "nameIdentifiers": []},
1294 {"name": "Bowman, Kevin W.", "nameType": "Personal", "givenName": "Kevin W.", "familyName": "Bowman",
1295 "affiliation": [], "nameIdentifiers": []},
1296 {"name": "Liu, Junjie", "nameType": "Personal", "givenName": "Junjie", "familyName": "Liu",
1297 "affiliation": [], "nameIdentifiers": []},
1298 {"name": "Eldering, Annmarie", "nameType": "Personal", "givenName": "Annmarie",
1299 "familyName": "Eldering", "affiliation": [], "nameIdentifiers": []},
1300 {"name": "Gunson, Michael R.", "nameType": "Personal", "givenName": "Michael R.",
1301 "familyName": "Gunson", "affiliation": [], "nameIdentifiers": []},
1302 {"name": "Kawa, Stephan R.", "nameType": "Personal", "givenName": "Stephan R.", "familyName": "Kawa",
1303 "affiliation": [], "nameIdentifiers": []}],
1304 "contributors": [{
1305 'name': 'AKMB-News: Informationen Zu Kunst, Museum Und Bibliothek',
1306 'nameType': 'Personal',
1307 'givenName': 'Museum Und Bibliothek',
1308 'familyName': 'AKMB-News: Informationen Zu Kunst',
1309 'affiliation': [],
1310 'contributorType': 'Editor',
1311 'nameIdentifiers': []}]}
1313 datacite_processor = DataciteProcessing()
1314 authors_list = datacite_processor.add_authors_to_agent_list(entity_attr_dict, [],
1315 doi="doi:10.1002/2014jd022411")
1316 editors_list = datacite_processor.add_editors_to_agent_list(entity_attr_dict, [],
1317 doi="doi:10.1002/2014jd022411")
1318 agents_list = authors_list + editors_list
1319 authors_strings_list, editors_strings_list = datacite_processor.get_agents_strings_list('10.1002/2014jd022411',
1320 agents_list)
1321 expected_authors_list = ['Ott, Lesley E.', 'Pawson, Steven', 'Collatz, George J.', 'Gregg, Watson W.',
1322 'Menemenlis, Dimitris [orcid:0000-0001-9940-8409]', 'Brix, Holger',
1323 'Rousseaux, Cecile S.', 'Bowman, Kevin W.', 'Liu, Junjie', 'Eldering, Annmarie',
1324 'Gunson, Michael R.', 'Kawa, Stephan R.']
1325 expected_editors_list = ['AKMB-News: Informationen Zu Kunst, Museum Und Bibliothek']
1327 self.assertEqual(authors_strings_list, expected_authors_list)
1328 self.assertEqual(editors_strings_list, expected_editors_list)
1330 def test_get_agents_strings_list_same_family(self):
1331 # Two authors have the same family name and the same given name initials
1332 entity_attr_dict = {
1333 "creators": [
1334 {"name": "Schulz, Heide N",
1335 "nameType": "Personal",
1336 "givenName": "Heide N",
1337 "familyName": "Schulz",
1338 "nameIdentifiers":
1339 [
1340 {"schemeUri": "https://orcid.org", "nameIdentifier": "https://orcid.org/0000-0003-1445-0291",
1341 "nameIdentifierScheme": "ORCID"}
1342 ],
1343 "affiliation": []},
1344 {"name": "Schulz, Horst D",
1345 "nameType": "Personal",
1346 "givenName": "Horst D",
1347 "familyName": "Schulz",
1348 "affiliation": [],
1349 "nameIdentifiers": []}],
1350 "contributors": []
1351 }
1352 datacite_processor = DataciteProcessing()
1353 authors_list = datacite_processor.add_authors_to_agent_list(entity_attr_dict, [],
1354 doi="doi:10.1594/pangaea.231378")
1355 editors_list = datacite_processor.add_editors_to_agent_list(entity_attr_dict, [],
1356 doi="doi:10.1594/pangaea.231378")
1357 agents_list = authors_list + editors_list
1358 authors_strings_list, _ = datacite_processor.get_agents_strings_list('10.1594/pangaea.231378', agents_list)
1359 expected_authors_list = ['Schulz, Heide N [orcid:0000-0003-1445-0291]', 'Schulz, Horst D']
1360 self.assertEqual(authors_strings_list, expected_authors_list)
1362 def test_get_agents_strings_list_homonyms(self):
1363 # Two authors have the same family name and the same given name
1364 entity_attr_dict = {
1365 "creators":
1366 [
1367 {"name": "Viorel, Cojocaru",
1368 "nameType": "Personal",
1369 "givenName": "Cojocaru",
1370 "familyName": "Viorel",
1371 "affiliation": [],
1372 "nameIdentifiers": []},
1373 {"name": "Viorel, Cojocaru",
1374 "nameType": "Personal",
1375 "givenName": "Cojocaru",
1376 "familyName": "Viorel",
1377 "affiliation": [],
1378 "nameIdentifiers": []
1379 },
1380 {"name": "Ciprian, Panait",
1381 "nameType": "Personal",
1382 "givenName": "Panait",
1383 "familyName": "Ciprian",
1384 "affiliation": [],
1385 "nameIdentifiers": []}
1386 ],
1387 "contributors": []
1388 }
1389 datacite_processor = DataciteProcessing(None, None)
1390 authors_list = datacite_processor.add_authors_to_agent_list(entity_attr_dict, [],
1391 doi="doi:10.12753/2066-026x-14-246")
1392 editors_list = datacite_processor.add_editors_to_agent_list(entity_attr_dict, [],
1393 doi="doi:10.12753/2066-026x-14-246")
1394 agents_list = authors_list + editors_list
1395 authors_strings_list, _ = datacite_processor.get_agents_strings_list('10.12753/2066-026x-14-246', agents_list)
1396 expected_authors_list = ['Viorel, Cojocaru', 'Viorel, Cojocaru', 'Ciprian, Panait']
1397 self.assertEqual(authors_strings_list, expected_authors_list)
1399 def test_get_agents_strings_list_inverted_names(self):
1400 # One author with an ORCID has as a name the surname of another
1401 entity_attr_dict = {
1402 "creators":
1403 [
1404 {"name": "Viorel, Cojocaru",
1405 "nameType": "Personal",
1406 "givenName": "Cojocaru",
1407 "familyName": "Viorel",
1408 "affiliation": [],
1409 "nameIdentifiers": []},
1411 {"name": "Cojocaru, John",
1412 "nameType": "Personal",
1413 "givenName": "John",
1414 "familyName": "Cojocaru",
1415 "affiliation": [],
1416 "nameIdentifiers": []
1417 },
1418 {"name": "Ciprian, Panait",
1419 "nameType": "Personal",
1420 "givenName": "Panait",
1421 "familyName": "Ciprian",
1422 "affiliation": [],
1423 "nameIdentifiers": []}
1424 ],
1425 "contributors": []
1426 }
1427 # Note : 'Cojocaru, John' is not one of the authors of the item, the name was made up for testing purposes
1428 datacite_processor = DataciteProcessing(None, None)
1429 authors_list = datacite_processor.add_authors_to_agent_list(entity_attr_dict, [],
1430 doi="doi:10.12753/2066-026x-14-246")
1431 editors_list = datacite_processor.add_editors_to_agent_list(entity_attr_dict, [],
1432 doi="doi:10.12753/2066-026x-14-246")
1433 agents_list = authors_list + editors_list
1434 authors_strings_list, _ = datacite_processor.get_agents_strings_list('10.12753/2066-026x-14-246', agents_list)
1435 expected_authors_list = ['Viorel, Cojocaru', 'Cojocaru, John', 'Ciprian, Panait']
1436 self.assertEqual(authors_strings_list, expected_authors_list)
1438 def test_get_agents_strings_list_api_disabled_no_index(self):
1439 """Con API OFF e indice vuoto, gli ORCID presenti come nameIdentifier NON devono comparire in output."""
1440 entity_attr_dict = {
1441 "creators": [
1442 {
1443 "name": "Doe, Jane",
1444 "nameType": "Personal",
1445 "givenName": "Jane",
1446 "familyName": "Doe",
1447 "nameIdentifiers": [
1448 {
1449 "schemeUri": "https://orcid.org",
1450 "nameIdentifier": "https://orcid.org/0000-0003-4082-1500",
1451 "nameIdentifierScheme": "ORCID",
1452 }
1453 ],
1454 }
1455 ],
1456 "contributors": [],
1457 }
1459 dp = DataciteProcessing(use_orcid_api=False) # indice vuoto, nessuna API
1460 authors_list = dp.add_authors_to_agent_list(entity_attr_dict, [], doi="doi:10.9999/noindex")
1461 editors_list = dp.add_editors_to_agent_list(entity_attr_dict, [], doi="doi:10.9999/noindex")
1462 authors_strings, editors_strings = dp.get_agents_strings_list("10.9999/noindex", authors_list + editors_list)
1464 # L'ORCID NON deve essere aggiunto tra [] perché non c'è indice e l'API è OFF
1465 self.assertEqual(authors_strings, ["Doe, Jane"])
1466 self.assertEqual(editors_strings, [])
1468 dp.storage_manager.delete_storage()
1470 def test_find_datacite_orcid_with_index(self):
1471 """Test ORCID validation using ORCID index before API validation"""
1472 # Setup
1473 test_doi = "10.1234/test123"
1474 test_orcid = "0000-0002-1234-5678"
1475 test_name = "Smith, John"
1477 # Create DataciteProcessing instance with ORCID index
1478 dp = DataciteProcessing()
1479 dp.orcid_index.data = {test_doi: {f"{test_name} [orcid:{test_orcid}]"}}
1481 # Test Case 1: ORCID found in index
1482 inp_1 = [test_orcid]
1483 out_1 = dp.find_datacite_orcid([test_orcid], test_doi)
1484 exp_1 = f"orcid:{test_orcid}"
1485 self.assertEqual(out_1, exp_1)
1486 # Verify it was added to temporary storage
1487 self.assertTrue(dp.tmp_orcid_m.storage_manager.get_value(f"orcid:{test_orcid}"))
1489 # Test Case 2: ORCID not in index but valid via API
1490 inp_2 = ["0000-0003-4082-1500"]
1491 out_2 = dp.find_datacite_orcid(["0000-0003-4082-1500"], test_doi)
1492 exp_2 = "orcid:0000-0003-4082-1500"
1493 self.assertEqual(out_2, exp_2)
1495 # Test Case 3: ORCID not in index and invalid
1496 inp_3 = ["0000-0000-0000-0000"]
1497 out_3 = dp.find_datacite_orcid(["0000-0000-0000-0000"], test_doi)
1498 exp_3 = ""
1499 self.assertEqual(out_3, exp_3)
1501 # Test Case 4: Valid ORCID but no DOI provided (retrocompatibilità)
1502 inp_4 = [test_orcid]
1503 out_4 = dp.find_datacite_orcid(inp_4) # No DOI
1504 exp_4 = f"orcid:{test_orcid}" # Should still validate via API
1505 self.assertEqual(out_4, exp_4)
1507 # Test Case 5: Multiple ORCIDs, first one valid
1508 inp_5 = [test_orcid, "0000-0000-0000-0000"]
1509 out_5 = dp.find_datacite_orcid([test_orcid, "0000-0000-0000-0000"], test_doi)
1510 exp_5 = f"orcid:{test_orcid}"
1511 self.assertEqual(out_5, exp_5)
1513 # Cleanup
1514 dp.storage_manager.delete_storage()
1516 def test_find_datacite_orcid_api_enabled_invalid_in_storage(self):
1517 """API ON + ORCID marcato come invalid in storage: rifiuta subito (niente indice/API)."""
1518 dp = DataciteProcessing(use_orcid_api=True, testing=True)
1519 oid = "orcid:0000-0002-9286-2630"
1520 dp.orcid_m.storage_manager.set_value(oid, False)
1521 out = dp.find_datacite_orcid([oid.split(":")[1]], "10.9999/anything")
1522 self.assertEqual(out, "")
1523 # nessuna semina in tmp
1524 self.assertIsNone(dp.tmp_orcid_m.storage_manager.get_value(oid))
1525 dp.orcid_m.storage_manager.delete_storage()
1527 def test_find_datacite_orcid_api_enabled_from_redis_snapshot(self):
1528 """API ON + storage/indice vuoti, ma ORCID presente nello snapshot Redis RA: accetta senza rete."""
1529 dp = DataciteProcessing(use_orcid_api=True)
1530 oid = "orcid:0000-0003-4082-1500"
1531 dp.update_redis_values(br=[], ra=[oid]) # simula snapshot
1532 out = dp.find_datacite_orcid([oid.split(":")[1]], "10.9999/noindex")
1533 self.assertEqual(out, oid)
1534 self.assertTrue(dp.tmp_orcid_m.storage_manager.get_value(oid))
1535 dp.storage_manager.delete_storage()
1537 def test_find_datacite_orcid_api_disabled_from_redis_snapshot(self):
1538 """API OFF + storage/indice vuoti, ORCID nello snapshot Redis RA: accetta offline."""
1539 dp = DataciteProcessing(use_orcid_api=False)
1540 oid = "orcid:0000-0003-4082-1500"
1541 dp.update_redis_values(br=[], ra=[oid])
1542 out = dp.find_datacite_orcid([oid.split(":")[1]], "10.9999/noindex")
1543 self.assertEqual(out, oid)
1544 self.assertTrue(dp.tmp_orcid_m.storage_manager.get_value(oid))
1545 dp.storage_manager.delete_storage()
1547 def test_find_datacite_orcid_api_disabled_in_storage(self):
1548 """API OFF + ORCID già valido nello storage persistente: deve essere accettato."""
1549 dp = DataciteProcessing(use_orcid_api=False, testing=True)
1550 oid = "orcid:0000-0003-4082-1500"
1551 dp.orcid_m.storage_manager.set_value(oid, True)
1552 out = dp.find_datacite_orcid([oid.split(":")[1]], "10.9999/any")
1553 self.assertEqual(out, oid)
1554 dp.orcid_m.storage_manager.delete_storage()
1556 def test_find_datacite_orcid_index_with_normalized_doi(self):
1557 """La lookup nell'indice deve funzionare anche se DOI è passato senza prefisso o viceversa."""
1558 dp = DataciteProcessing()
1559 doi_no_prefix = "10.1234/test-norm"
1560 doi_with_prefix = f"doi:{doi_no_prefix}"
1561 orcid = "0000-0002-1234-5678"
1562 dp.orcid_index.add_value(doi_with_prefix, f"Rossi, Mario [orcid:{orcid}]")
1563 # DOI senza prefisso: deve matchare comunque
1564 out = dp.find_datacite_orcid([orcid], doi_no_prefix)
1565 self.assertEqual(out, f"orcid:{orcid}")
1566 self.assertTrue(dp.tmp_orcid_m.storage_manager.get_value(f"orcid:{orcid}"))
1567 dp.storage_manager.delete_storage()
1569 #PUBLISHER IDENTIFIERS
1570 def test_find_datacite_publisher_id_api_enabled_no_value_in_storage(self):
1571 """API ON + id non salvato nello storage."""
1572 dp = DataciteProcessing(use_ror_api=True, use_wikidata_api=True, use_viaf_api=True)
1573 publisher1 = {
1574 'publisherIdentifierScheme': 'ROR',
1575 'schemeUri': 'https://ror.org',
1576 'name': 'DataCite',
1577 'publisherIdentifier': 'https://ror.org/04wxnsj81'
1578 }
1579 publisher2 = {
1580 'publisherIdentifierScheme': 'VIAF',
1581 'schemeUri': 'https://viaf.org/',
1582 'name': 'Deutsches archäologisches Institut',
1583 'publisherIdentifier': 'http://viaf.org/viaf/148463773'
1584 }
1585 publisher3 = {
1586 'publisherIdentifierScheme': 'Wikidata',
1587 'schemeUri': 'https://www.wikidata.org/wiki/',
1588 'name': 'University of Tokyo',
1589 'publisherIdentifier': 'https://wikidata.org/wiki/Q7842'
1590 }
1592 id1 = "ror:04wxnsj81"
1593 id2 = "viaf:148463773"
1594 id3 = "wikidata:Q7842"
1596 out1 = dp.get_publisher_id(publisher1)
1597 out2 = dp.get_publisher_id(publisher2)
1598 out3 = dp.get_publisher_id(publisher3)
1600 self.assertEqual(out1, id1)
1601 self.assertEqual(out2, id2)
1602 self.assertEqual(out3, id3)
1604 dp.storage_manager.delete_storage()
1606 def test_get_pubblisher_api_disabled_no_index(self):
1607 """Con API OFF e indice vuoto, i publisher id presenti NON devono comparire in output."""
1609 publisher1 = {
1610 'publisherIdentifierScheme': 'ROR',
1611 'schemeUri': 'https://ror.org',
1612 'name': 'DataCite',
1613 'publisherIdentifier': 'https://ror.org/04wxnsj81'
1614 }
1615 publisher2 = {
1616 'publisherIdentifierScheme': 'VIAF',
1617 'schemeUri': 'https://viaf.org/',
1618 'name': 'Deutsches archäologisches Institut',
1619 'publisherIdentifier': 'http://viaf.org/viaf/148463773'
1620 }
1621 publisher3 = {
1622 'publisherIdentifierScheme': 'Wikidata',
1623 'schemeUri': 'https://www.wikidata.org/wiki/',
1624 'name': 'University of Tokyo',
1625 'publisherIdentifier': 'https://wikidata.org/wiki/Q7842'
1626 }
1629 dp = DataciteProcessing(use_ror_api=False, use_viaf_api=False, use_wikidata_api=False) # indice vuoto, nessuna API
1630 publisher_row1 = dp.get_publisher('10.60804/bpmz-jb79', publisher1)
1631 publisher_row2 = dp.get_publisher('10.60804/bpmz-jb79', publisher2)
1632 publisher_row3 = dp.get_publisher('10.60804/bpmz-jb79', publisher3)
1634 # L'id NON deve essere aggiunto tra [] perché non c'è indice e l'API è OFF
1635 self.assertEqual(publisher_row1, "DataCite")
1636 self.assertEqual(publisher_row2, "Deutsches archäologisches Institut")
1637 self.assertEqual(publisher_row3, "University of Tokyo")
1639 dp.storage_manager.delete_storage()
1641 def test_find_datacite_publisher_id_api_enabled_invalid_in_storage(self):
1642 """API ON + id marcato come invalid in storage: rifiuta subito (niente indice/API)."""
1643 dp = DataciteProcessing(use_ror_api=True, use_wikidata_api=True, use_viaf_api=True)
1644 publisher1 = {
1645 'publisherIdentifierScheme': 'ROR',
1646 'schemeUri': 'https://ror.org',
1647 'name': 'DataCite',
1648 'publisherIdentifier': 'https://ror.org/04wxnsj81'
1649 }
1650 publisher2 = {
1651 'publisherIdentifierScheme': 'VIAF',
1652 'schemeUri': 'https://viaf.org/',
1653 'name': 'Deutsches archäologisches Institut',
1654 'publisherIdentifier': 'http://viaf.org/viaf/148463773'
1655 }
1656 publisher3 = {
1657 'publisherIdentifierScheme': 'Wikidata',
1658 'schemeUri': 'https://www.wikidata.org/wiki/',
1659 'name': 'University of Tokyo',
1660 'publisherIdentifier': 'https://wikidata.org/wiki/Q7842'
1661 }
1663 id1 = "ror:04wxnsj81"
1664 id2 = "viaf:148463773"
1665 id3 = "wikidata:Q7842"
1667 dp.storage_manager.set_value(id1, False)
1668 dp.storage_manager.set_value(id2, False)
1669 dp.storage_manager.set_value(id3, False)
1671 out1 = dp.get_publisher_id(publisher1)
1672 out2 = dp.get_publisher_id(publisher2)
1673 out3 = dp.get_publisher_id(publisher3)
1675 self.assertEqual(out1, "")
1676 self.assertEqual(out2, "")
1677 self.assertEqual(out3, "")
1679 # nessuna semina in tmp
1680 self.assertIsNone(dp.tmp_viaf_m.storage_manager.get_value(id2))
1681 self.assertIsNone(dp.tmp_ror_m.storage_manager.get_value(id1))
1682 self.assertIsNone(dp.tmp_wikidata_m.storage_manager.get_value(id3))
1683 dp.storage_manager.delete_storage()
1685 def test_find_datacite_publisher_id_api_enabled_from_redis_snapshot(self):
1686 """API ON + storage/indice vuoti, ma id presente nello snapshot Redis RA: accetta senza rete."""
1687 dp = DataciteProcessing(use_viaf_api=True, use_wikidata_api=True, use_ror_api=True)
1689 id1 = "ror:04wxnsj81"
1690 id2 = "viaf:148463773"
1691 id3 = "wikidata:Q7842"
1693 publisher1 = {
1694 'publisherIdentifierScheme': 'ROR',
1695 'schemeUri': 'https://ror.org',
1696 'name': 'DataCite',
1697 'publisherIdentifier': 'https://ror.org/04wxnsj81'
1698 }
1699 publisher2 = {
1700 'publisherIdentifierScheme': 'VIAF',
1701 'schemeUri': 'https://viaf.org/',
1702 'name': 'Deutsches archäologisches Institut',
1703 'publisherIdentifier': 'http://viaf.org/viaf/148463773'
1704 }
1705 publisher3 = {
1706 'publisherIdentifierScheme': 'Wikidata',
1707 'schemeUri': 'https://www.wikidata.org/wiki/',
1708 'name': 'University of Tokyo',
1709 'publisherIdentifier': 'https://wikidata.org/wiki/Q7842'
1710 }
1712 dp.update_redis_values(br=[], ra=[id1, id2, id3]) # simula snapshot
1713 out1 = dp.get_publisher_id(publisher1)
1714 out2 = dp.get_publisher_id(publisher2)
1715 out3 = dp.get_publisher_id(publisher3)
1717 self.assertEqual(out1, id1)
1718 self.assertEqual(out2, id2)
1719 self.assertEqual(out3, id3)
1721 self.assertTrue(dp.tmp_ror_m.storage_manager.get_value(id1))
1722 self.assertTrue(dp.tmp_viaf_m.storage_manager.get_value(id2))
1723 self.assertTrue(dp.tmp_wikidata_m.storage_manager.get_value(id3))
1725 dp.storage_manager.delete_storage()
1727 def test_find_datacite_publisher_id_api_disabled_from_redis_snapshot(self):
1728 """API OFF + storage/indice vuoti, ORCID nello snapshot Redis RA: accetta offline."""
1729 dp = DataciteProcessing(use_ror_api=False, use_viaf_api=False, use_wikidata_api=False)
1731 id1 = "ror:04wxnsj81"
1732 id2 = "viaf:148463773"
1733 id3 = "wikidata:Q7842"
1734 id4 = "crossref:501100000739"
1736 publisher1 = {
1737 'publisherIdentifierScheme': 'ROR',
1738 'schemeUri': 'https://ror.org',
1739 'name': 'DataCite',
1740 'publisherIdentifier': 'https://ror.org/04wxnsj81'
1741 }
1742 publisher2 = {
1743 'publisherIdentifierScheme': 'VIAF',
1744 'schemeUri': 'https://viaf.org/',
1745 'name': 'Deutsches archäologisches Institut',
1746 'publisherIdentifier': 'http://viaf.org/viaf/148463773'
1747 }
1748 publisher3 = {
1749 'publisherIdentifierScheme': 'Wikidata',
1750 'schemeUri': 'https://www.wikidata.org/wiki/',
1751 'name': 'University of Tokyo',
1752 'publisherIdentifier': 'https://wikidata.org/wiki/Q7842'
1753 }
1755 dp.update_redis_values(br=[], ra=[id1, id2, id3, id4]) # simula snapshot
1756 out1 = dp.get_publisher_id(publisher1)
1757 out2 = dp.get_publisher_id(publisher2)
1758 out3 = dp.get_publisher_id(publisher3)
1760 self.assertEqual(out1, id1)
1761 self.assertEqual(out2, id2)
1762 self.assertEqual(out3, id3)
1764 self.assertTrue(dp.tmp_ror_m.storage_manager.get_value(id1))
1765 self.assertTrue(dp.tmp_viaf_m.storage_manager.get_value(id2))
1766 self.assertTrue(dp.tmp_wikidata_m.storage_manager.get_value(id3))
1768 dp.storage_manager.delete_storage()
1771 def test_find_datacite_publisher_id_api_disabled_in_storage(self):
1772 """API OFF + publisher id già valido nello storage persistente: deve essere accettato."""
1773 dp = DataciteProcessing(use_viaf_api=False, use_wikidata_api=False, use_ror_api=False)
1774 id1 = "ror:04wxnsj89" #invalid
1775 id2 = "viaf:148463773"
1776 id3 = "wikidata:Q7842"
1778 publisher1 = {
1779 'publisherIdentifierScheme': 'ROR',
1780 'schemeUri': 'https://ror.org',
1781 'name': 'DataCite',
1782 'publisherIdentifier': 'https://ror.org/04wxnsj89'
1783 }
1784 publisher2 = {
1785 'publisherIdentifierScheme': 'VIAF',
1786 'schemeUri': 'https://viaf.org/',
1787 'name': 'Deutsches archäologisches Institut',
1788 'publisherIdentifier': 'http://viaf.org/viaf/148463773'
1789 }
1790 publisher3 = {
1791 'publisherIdentifierScheme': 'Wikidata',
1792 'schemeUri': 'https://www.wikidata.org/wiki/',
1793 'name': 'University of Tokyo',
1794 'publisherIdentifier': 'https://wikidata.org/wiki/Q7842'
1795 }
1798 dp.storage_manager.set_value(id1, True)
1799 dp.storage_manager.set_value(id2, True)
1800 dp.storage_manager.set_value(id3, True)
1802 out1 = dp.get_publisher_id(publisher1)
1803 out2 = dp.get_publisher_id(publisher2)
1804 out3 = dp.get_publisher_id(publisher3)
1806 self.assertEqual(out1, id1)
1807 self.assertEqual(out2, id2)
1808 self.assertEqual(out3, id3)
1810 dp.storage_manager.delete_storage()
1812 def test_publisher_id_replaced_by_mapping(self):
1814 publisher3 = {
1815 'publisherIdentifierScheme': 'Wikidata',
1816 'schemeUri': 'https://www.wikidata.org/wiki/',
1817 'name': 'University of Tokyo',
1818 'publisherIdentifier': 'https://wikidata.org/wiki/Q7842'
1819 }
1821 dp = DataciteProcessing(publishers_filepath_dc=PUBLISHERS_MAPPING)
1822 doi = "10.12753/2066-026X-17-015"
1823 publisher3 = dp.get_publisher(doi, publisher3)
1824 publisher3_exp = "ADLRO [datacite:3]"
1825 self.assertEqual(publisher3, publisher3_exp)
1828 def test_update_redis_values_normalization(self):
1829 """update_redis_values deve normalizzare gli ID (doi:/orcid:) così i confronti funzionano."""
1830 dp = DataciteProcessing()
1831 dp.update_redis_values(
1832 br=["10.1002/2014jd022411"], # senza prefisso
1833 ra=["https://orcid.org/0000-0001-8513-8700"] # URL
1834 )
1835 # validazione via snapshot deve riuscire
1836 out_ra = dp.find_datacite_orcid(["0000-0001-8513-8700"], "10.9999/noindex")
1837 self.assertEqual(out_ra, "orcid:0000-0001-8513-8700")
1838 # DOI in BR: check via to_validated_id_list
1839 out_br = dp.to_validated_id_list({"id": "doi:10.1002/2014jd022411", "schema": "doi"})
1840 self.assertEqual(out_br, ["doi:10.1002/2014jd022411"])
1841 dp.storage_manager.delete_storage()
1843 def test_memory_to_storage_flushes_and_clears(self):
1844 """Gli aggiornamenti in tmp vengono persistiti in blocco e la memoria temporanea viene svuotata."""
1845 dp = DataciteProcessing(testing=True)
1846 # usa Redis snapshot per marcare True in tmp_orcid_m
1847 oid = "orcid:0000-0001-8513-8700"
1848 dp.update_redis_values(br=[], ra=[oid])
1849 _ = dp.find_datacite_orcid([oid.split(":")[1]], "10.9999/noindex")
1850 # dopo la validazione: il valore è in tmp_orcid_m.storage_manager
1851 self.assertTrue(dp.tmp_orcid_m.storage_manager.get_value(oid))
1852 # memory_to_storage svuota temporary_manager (che è già vuoto in questo caso)
1853 dp.memory_to_storage()
1854 # la memoria tmp è svuotata (nessun valore residuo)
1855 self.assertEqual(dp.temporary_manager.get_validity_list_of_tuples(), [])
1856 dp.tmp_orcid_m.storage_manager.delete_storage()
1858 def test_csv_creator_offline_uses_index_for_orcid(self):
1859 """API OFF: se l'ORCID è nell'indice DOI→ORCID, l'autore deve uscire con [orcid:...] anche offline."""
1860 dp = DataciteProcessing(use_orcid_api=False)
1861 doi = "10.2000/test-offline-index"
1862 orcid = "0000-0002-1234-5678"
1863 name = "Doe, Jane"
1864 dp.orcid_index.add_value(doi, f"{name} [orcid:{orcid}]")
1865 item = {
1866 "id": doi,
1867 "type": "dois",
1868 "attributes": {
1869 "doi": doi,
1870 "titles": [{"title": "Sample"}],
1871 "types": {"ris": "JOUR"},
1872 "creators": [{
1873 "nameType": "Personal",
1874 "familyName": "Doe",
1875 "givenName": "Jane",
1876 "nameIdentifiers": [{
1877 "nameIdentifierScheme": "ORCID",
1878 "nameIdentifier": f"https://orcid.org/{orcid}",
1879 "schemeUri": "https://orcid.org"
1880 }]
1881 }]
1882 }
1883 }
1884 row = dp.csv_creator(item)
1885 self.assertIn("[orcid:0000-0002-1234-5678]", row["author"])
1886 dp.storage_manager.delete_storage()
1888 def test_get_agents_strings_list_uses_index_with_doi_normalization(self):
1889 """get_agents_strings_list deve arricchire da indice anche se DOI arriva senza prefisso."""
1890 dp = DataciteProcessing()
1891 doi_no_prefix = "10.3000/abc"
1892 orcid = "0000-0003-1445-0291"
1893 dp.orcid_index.add_value(f"doi:{doi_no_prefix}", f"Schulz, Heide N [orcid:{orcid}]")
1894 entity_attr_dict = {
1895 "creators": [
1896 {"name": "Schulz, Heide N", "nameType": "Personal",
1897 "givenName": "Heide N", "familyName": "Schulz", "nameIdentifiers": []}
1898 ],
1899 "contributors": []
1900 }
1901 authors = dp.add_authors_to_agent_list(entity_attr_dict, [], doi="doi:10.3000/abc")
1902 editors = dp.add_editors_to_agent_list(entity_attr_dict, [], doi="doi:10.3000/abc")
1903 authors_strings, editors_strings = dp.get_agents_strings_list(doi_no_prefix, authors + editors)
1904 self.assertEqual(authors_strings, [f"Schulz, Heide N [orcid:{orcid}]"])
1905 self.assertEqual(editors_strings, [])
1906 dp.storage_manager.delete_storage()