Coverage for test / datacite_processing_test.py: 100%

876 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-25 18:06 +0000

1# SPDX-FileCopyrightText: 2023-2026 Arcangelo Massari <arcangelo.massari@unibo.it> 

2# SPDX-FileCopyrightText: 2024 Arianna Moretti <arianna.moretti4@unibo.it> 

3# SPDX-FileCopyrightText: 2024-2026 Marta Soricetti <marta.soricetti@unibo.it> 

4# SPDX-FileCopyrightText: 2025 Arianna Moretti <arianna.moretti4@unibo.it> 

5# 

6# SPDX-License-Identifier: ISC 

7 

8import unittest 

9import json 

10from oc_ds_converter.lib.csvmanager import CSVManager 

11from oc_ds_converter.lib.jsonmanager import * 

12from oc_ds_converter.datacite.datacite_processing import DataciteProcessing 

13 

14from oc_ds_converter.oc_idmanager.oc_data_storage.redis_manager import RedisStorageManager 

15from oc_ds_converter.oc_idmanager.oc_data_storage.in_memory_manager import InMemoryStorageManager 

16from oc_ds_converter.oc_idmanager.oc_data_storage.sqlite_manager import SqliteStorageManager 

17 

18TEST_DIR = os.path.join("test","datacite_processing") 

19TMP_SUPPORT_MATERIAL = os.path.join(TEST_DIR, "tmp_support") 

20IOD = os.path.join(TEST_DIR, 'iod') 

21WANTED_DOIS = os.path.join(TEST_DIR, 'wanted_dois') 

22PUBLISHERS_MAPPING = os.path.join(TEST_DIR, 'publishers.csv') 

23DATA = os.path.join(TEST_DIR, 'jSonFile_1_new_dump.json') 

24 

25class TestDataciteProcessing(unittest.TestCase): 

26 

27 def setUp(self): 

28 # Create dirs 

29 for d in [TMP_SUPPORT_MATERIAL, IOD, WANTED_DOIS]: 

30 makedirs(d, exist_ok=True) 

31 

32 # Load golden data 

33 with open(DATA, 'r', encoding='utf-8') as f: 

34 self.expected_entities = json.load(f)["data"] 

35 self.expected_count = len(self.expected_entities) 

36 

37 def test_get_all_ids_citing(self): 

38 all_br = set() 

39 all_ra = set() 

40 dcp = DataciteProcessing() 

41 for entity in self.expected_entities: 

42 allids = dcp.extract_all_ids(entity, is_citing=True) 

43 all_br.update(set(allids[0])) 

44 all_ra.update(set(allids[1])) 

45 

46 self.assertEqual(all_br, set()) 

47 self.assertTrue({"orcid:0000-0002-8013-9947", "orcid:0000-0001-7392-1415", 

48 "orcid:0000-0003-2328-5769", "orcid:0000-0002-6715-3533", "orcid:0000-0002-0801-0890", 

49 "orcid:0000-0001-7543-3466", "orcid:0000-0002-6210-8370", "orcid:0000-0002-9747-4928", 

50 "ror:03ztgj037"} == all_ra) 

51 

52 def test_get_all_ids_cited(self): 

53 all_br = set() 

54 all_ra = set() 

55 dcp = DataciteProcessing() 

56 for entity in self.expected_entities: 

57 allids = dcp.extract_all_ids(entity, is_citing=False) 

58 

59 all_br.update(set(allids[0])) 

60 all_ra.update(set(allids[1])) 

61 self.assertTrue({"doi:10.5281/zenodo.8249952", "doi:10.5281/zenodo.8249970", "doi:10.1017/9781009157896", 

62 "doi:10.1017/9781009157896.005"} == all_br) 

63 

64 def test_get_redis_validity_list_br(self): 

65 dcp = DataciteProcessing() 

66 br = {"doi:10.5281/zenodo.8249952", "doi:10.5281/zenodo.8249970", "doi:10.1017/9781009157896", "doi:10.1017/9781009157896.005"} 

67 br_valid_list = dcp.get_reids_validity_list(br, "br") 

68 exp_br_valid_list = [] 

69 self.assertEqual(br_valid_list, exp_br_valid_list) 

70 dcp.storage_manager.delete_storage() 

71 

72 def test_get_redis_validity_list_ra(self): 

73 dcp = DataciteProcessing() 

74 ra = {"orcid:0000-0002-8013-9947", "orcid:0000-0001-7392-1415", 

75 "orcid:0000-0003-2328-5769", "orcid:0000-0002-6715-3533", "orcid:0000-0002-0801-0890", 

76 "orcid:0000-0001-7543-3466", "orcid:0000-0002-6210-8370", "orcid:0000-0002-9747-4928", 

77 "ror:03ztgj037"} 

78 ra_valid_list = dcp.get_reids_validity_list(ra, "ra") 

79 exp_ra_valid_list = [] 

80 self.assertEqual(ra_valid_list, exp_ra_valid_list) 

81 dcp.storage_manager.delete_storage() 

82 

83 def test_get_redis_validity_list_br_redis(self): 

84 dcp = DataciteProcessing(storage_manager=RedisStorageManager(testing=True)) 

85 br = {"doi:10.5281/zenodo.8249952", "doi:10.5281/zenodo.8249970", "doi:10.1017/9781009157896", "doi:10.1017/9781009157896.005"} 

86 br_valid_list = dcp.get_reids_validity_list(br, "br") 

87 exp_br_valid_list = [] 

88 self.assertEqual(br_valid_list, exp_br_valid_list) 

89 dcp.storage_manager.delete_storage() 

90 

91 def test_get_redis_validity_dict_w_fakeredis_db_values_sqlite(self): 

92 dcp = DataciteProcessing() 

93 dcp.BR_redis.sadd("doi:10.5281/zenodo.8249952", "omid:1") 

94 dcp.RA_redis.sadd("orcid:0000-0002-8013-9947", "omid:2") 

95 dcp.RA_redis.sadd("ror:03ztgj039", "omid:3") # invalid ror 

96 

97 br = {"doi:10.5281/zenodo.8249952", "doi:10.5281/zenodo.8249970", "doi:10.1017/9781009157896", "doi:10.1017/9781009157896.005"} 

98 

99 ra = {"orcid:0000-0002-8013-9947", "orcid:0000-0001-7392-1415", 

100 "orcid:0000-0003-2328-5769", "orcid:0000-0002-6715-3533", "orcid:0000-0002-0801-0890", 

101 "orcid:0000-0001-7543-3466", "orcid:0000-0002-6210-8370", "orcid:0000-0002-9747-4928", "ror:03ztgj039"} 

102 

103 br_validity_dict = dcp.get_reids_validity_list(br, "br") 

104 exp_br_valid_list = ["doi:10.5281/zenodo.8249952"] 

105 ra_validity_dict = dcp.get_reids_validity_list(ra, "ra") 

106 exp_ra_valid_list = ["orcid:0000-0002-8013-9947", "ror:03ztgj039"] 

107 self.assertEqual(set(br_validity_dict), set(exp_br_valid_list)) 

108 self.assertEqual(set(ra_validity_dict), set(exp_ra_valid_list)) 

109 

110 dcp.storage_manager.delete_storage() 

111 

112 dcp.BR_redis.delete("doi:10.5281/zenodo.8249952") 

113 dcp.RA_redis.delete("orcid:0000-0002-8013-9947") 

114 dcp.RA_redis.delete("ror:03ztgj039") 

115 

116 def test_get_redis_validity_dict_w_fakeredis_db_values_redis(self): 

117 dcp = DataciteProcessing(storage_manager=RedisStorageManager()) 

118 dcp.BR_redis.sadd("doi:10.5281/zenodo.8249970", "omid:1") 

119 dcp.RA_redis.sadd("orcid:0000-0002-6210-8370", "omid:2") 

120 dcp.RA_redis.sadd("ror:03ztgj039", "omid:3") # invalid ror 

121 

122 br = {"doi:10.5281/zenodo.8249952", "doi:10.5281/zenodo.8249970", "doi:10.1017/9781009157896", "doi:10.1017/9781009157896.005"} 

123 

124 ra = {"orcid:0000-0002-8013-9947", "orcid:0000-0001-7392-1415", 

125 "orcid:0000-0003-2328-5769", "orcid:0000-0002-6715-3533", "orcid:0000-0002-0801-0890", 

126 "orcid:0000-0001-7543-3466", "orcid:0000-0002-6210-8370", "orcid:0000-0002-9747-4928", "ror:03ztgj039"} 

127 

128 br_validity_dict = dcp.get_reids_validity_list(br, "br") 

129 exp_br_valid_list = ["doi:10.5281/zenodo.8249970"] 

130 ra_validity_dict = dcp.get_reids_validity_list(ra, "ra") 

131 exp_ra_valid_list = ["orcid:0000-0002-6210-8370", "ror:03ztgj039"] 

132 self.assertEqual(set(br_validity_dict), set(exp_br_valid_list)) 

133 self.assertEqual(set(ra_validity_dict), set(exp_ra_valid_list)) 

134 

135 dcp.storage_manager.delete_storage() 

136 

137 dcp.BR_redis.delete("doi:10.5281/zenodo.8249970") 

138 dcp.RA_redis.delete("orcid:0000-0002-6210-8370") 

139 dcp.RA_redis.delete("ror:03ztgj039") 

140 

141 def test_validated_as_default(self): 

142 """ 

143 Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value: 

144 string of the identifier, the method "validated_as" returns: 

145 - True if the id was already validated as valid 

146 - False if the id was already validated as invalid 

147 - None if the id was not validated before 

148 The procedure is tested 

149 - With default storage manager (sqlite) without a pre-existent db associated 

150 """ 

151 

152 dcp = DataciteProcessing() 

153 validate_as_none_doi = dcp.validated_as({"schema": "doi", "identifier": "doi:10.11578/1480643"}) 

154 validated_as_none_orcid = dcp.validated_as({"schema": "orcid", "identifier": "orcid:0000-0001-8513-8700"}) 

155 validated_as_none_ror = dcp.validated_as({"schema": "ror", "identifier": "ror:03ztgj037"}) 

156 self.assertEqual(validate_as_none_doi, None) 

157 self.assertEqual(validated_as_none_orcid, None) 

158 self.assertEqual(validated_as_none_ror, None) 

159 

160 dcp.storage_manager.delete_storage() 

161 

162 def test_validated_as_default_redis(self): 

163 """ 

164 Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value: 

165 string of the identifier, the method "validated_as" returns: 

166 - True if the id was already validated as valid 

167 - False if the id was already validated as invalid 

168 - None if the id was not validated before 

169 The procedure is tested 

170 - With redis storage manager without a pre-existent db associated 

171 """ 

172 

173 dcp = DataciteProcessing(storage_manager=RedisStorageManager(testing=True)) 

174 validate_as_none_doi = dcp.validated_as({"schema": "doi", "identifier": "doi:10.11578/1480643"}) 

175 validated_as_none_orcid = dcp.validated_as({"schema": "orcid", "identifier": "orcid:0000-0001-8513-8700"}) 

176 validated_as_none_ror = dcp.validated_as({"schema": "ror", "identifier": "ror:03ztgj037"}) 

177 self.assertEqual(validate_as_none_doi, None) 

178 self.assertEqual(validated_as_none_orcid, None) 

179 self.assertEqual(validated_as_none_ror, None) 

180 dcp.storage_manager.delete_storage() 

181 

182 def test_validated_as_redis_with_preexistent_data(self): 

183 """ 

184 Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value: 

185 string of the identifier, the method "validated_as" returns: 

186 - True if the id was already validated as valid 

187 - False if the id was already validated as invalid 

188 - None if the id was not validated before 

189 The procedure is tested 

190 - With redis storage manager and pre-existent data associated 

191 """ 

192 db_path = os.path.join(TMP_SUPPORT_MATERIAL, "db_path.db") 

193 sqlite_man = SqliteStorageManager(db_path) 

194 

195 valid_doi_not_in_db = {"identifier": "doi:10.11578/1480643", "schema": "doi"} 

196 valid_doi_in_db = {"identifier": "doi:10.15407/scin11.06.057", "schema": "doi"} 

197 invalid_doi_in_db = {"identifier": "doi:10.1066/1741-4326/aa6b", "schema": "doi"} 

198 

199 valid_orcid_not_in_db = {"schema": "orcid", "identifier": "orcid:0000-0001-8513-8700"} 

200 valid_orcid_in_db = {"schema": "orcid", "identifier": "orcid:0000-0002-9286-2630"} 

201 invalid_orcid_in_db = {"schema": "orcid", "identifier": "orcid:0000-0002-9286-26XX"} 

202 

203 valid_ror_in_db = {"schema": "ror", "identifier": "ror:03ztgj037"} 

204 valid_ror_not_in_db = {"schema": "ror", "identifier": "ror:01111rn36"} 

205 invalid_ror_in_db = {"schema": "ror", "identifier": "ror:03ztgj039"} 

206 

207 valid_viaf_not_in_db = {"identifier": "viaf:102333412", "schema": "viaf"} 

208 valid_viaf_in_db = {"identifier": "viaf:108389263", "schema": "viaf"} 

209 invalid_viaf_in_db = {"identifier": "viaf:12345ABC", "schema": "viaf"} 

210 

211 valid_wikidata_not_in_db = {"identifier": "wikidata:Q2330656", "schema": "wikidata"} 

212 valid_wikidata_in_db = {"identifier": "wikidata:Q42", "schema": "wikidata"} 

213 invalid_wikidata_in_db = {"identifier": "wikidata:Q_invalid_123", "schema": "wikidata"} 

214 

215 # --- POPOLAMENTO DATABASE SQLITE --- 

216 sqlite_man.set_value(valid_doi_in_db["identifier"], True) 

217 sqlite_man.set_value(invalid_doi_in_db["identifier"], False) 

218 

219 sqlite_man.set_value(valid_orcid_in_db["identifier"], True) 

220 sqlite_man.set_value(invalid_orcid_in_db["identifier"], False) 

221 

222 sqlite_man.set_value(valid_ror_in_db["identifier"], True) 

223 sqlite_man.set_value(invalid_ror_in_db["identifier"], False) 

224 

225 sqlite_man.set_value(valid_viaf_in_db["identifier"], True) 

226 sqlite_man.set_value(invalid_viaf_in_db["identifier"], False) 

227 

228 sqlite_man.set_value(valid_wikidata_in_db["identifier"], True) 

229 sqlite_man.set_value(invalid_wikidata_in_db["identifier"], False) 

230 

231 # --- ESECUZIONE DEI METODI --- 

232 # New class instance to check the correct task management with a sqlite db in input 

233 d_processing_sql = DataciteProcessing(storage_manager=sqlite_man) 

234 

235 doi_validated_as_True = d_processing_sql.validated_as(valid_doi_in_db) 

236 doi_validated_as_False = d_processing_sql.validated_as(invalid_doi_in_db) 

237 doi_not_validated = d_processing_sql.validated_as(valid_doi_not_in_db) 

238 

239 orcid_validated_as_True = d_processing_sql.validated_as(valid_orcid_in_db) 

240 orcid_validated_as_False = d_processing_sql.validated_as(invalid_orcid_in_db) 

241 orcid_not_validated = d_processing_sql.validated_as(valid_orcid_not_in_db) 

242 

243 ror_validated_as_True = d_processing_sql.validated_as(valid_ror_in_db) 

244 ror_validated_as_False = d_processing_sql.validated_as(invalid_ror_in_db) 

245 ror_not_validated = d_processing_sql.validated_as(valid_ror_not_in_db) 

246 

247 viaf_validated_as_True = d_processing_sql.validated_as(valid_viaf_in_db) 

248 viaf_validated_as_False = d_processing_sql.validated_as(invalid_viaf_in_db) 

249 viaf_not_validated = d_processing_sql.validated_as(valid_viaf_not_in_db) 

250 

251 wikidata_validated_as_True = d_processing_sql.validated_as(valid_wikidata_in_db) 

252 wikidata_validated_as_False = d_processing_sql.validated_as(invalid_wikidata_in_db) 

253 wikidata_not_validated = d_processing_sql.validated_as(valid_wikidata_not_in_db) 

254 

255 # --- ASSERZIONI --- 

256 self.assertEqual(doi_validated_as_True, True) 

257 self.assertEqual(doi_validated_as_False, False) 

258 self.assertEqual(doi_not_validated, None) 

259 

260 self.assertEqual(orcid_validated_as_True, True) 

261 self.assertEqual(orcid_validated_as_False, False) 

262 self.assertEqual(orcid_not_validated, None) 

263 

264 self.assertEqual(ror_validated_as_True, True) 

265 self.assertEqual(ror_validated_as_False, False) 

266 self.assertEqual(ror_not_validated, None) 

267 

268 self.assertEqual(viaf_validated_as_True, True) 

269 self.assertEqual(viaf_validated_as_False, False) 

270 self.assertEqual(viaf_not_validated, None) 

271 

272 self.assertEqual(wikidata_validated_as_True, True) 

273 self.assertEqual(wikidata_validated_as_False, False) 

274 self.assertEqual(wikidata_not_validated, None) 

275 

276 d_processing_sql.storage_manager.delete_storage() 

277 

278 def test_validated_as_inmemory(self): 

279 ''' 

280 Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value: 

281 string of the identifier, the method "validated_as" returns: 

282 - True if the id was already validated as valid 

283 - False if the id was already validated as invalid 

284 - None if the id was not validated before 

285 The procedure is tested 

286 - With in Memory + Json storage manager and a pre-existent db associated 

287 ''' 

288 

289 db_json_path = os.path.join(TMP_SUPPORT_MATERIAL, "db_path.json") 

290 inmemory_man = InMemoryStorageManager(db_json_path) 

291 

292 valid_doi_not_in_db = {"identifier": "doi:10.11578/1480643", "schema": "doi"} 

293 valid_doi_in_db = {"identifier": "doi:10.15407/scin11.06.057", "schema": "doi"} 

294 invalid_doi_in_db = {"identifier": "doi:10.1066/1741-4326/aa6b", "schema": "doi"} 

295 

296 valid_orcid_not_in_db = {"schema": "orcid", "identifier": "orcid:0000-0001-8513-8700"} 

297 valid_orcid_in_db = {"schema": "orcid", "identifier": "orcid:0000-0002-9286-2630"} 

298 invalid_orcid_in_db = {"schema": "orcid", "identifier": "orcid:0000-0002-9286-26XX"} 

299 

300 valid_ror_in_db = {"schema": "ror", "identifier": "ror:03ztgj037"} 

301 valid_ror_not_in_db = {"schema": "ror", "identifier": "ror:01111rn36"} 

302 invalid_ror_in_db = {"schema": "ror", "identifier": "ror:03ztgj039"} 

303 

304 valid_viaf_not_in_db = {"identifier": "viaf:102333412", "schema": "viaf"} 

305 valid_viaf_in_db = {"identifier": "viaf:108389263", "schema": "viaf"} 

306 invalid_viaf_in_db = {"identifier": "viaf:12345ABC", "schema": "viaf"} 

307 

308 valid_wikidata_not_in_db = {"identifier": "wikidata:Q2330656", "schema": "wikidata"} 

309 valid_wikidata_in_db = {"identifier": "wikidata:Q42", "schema": "wikidata"} 

310 invalid_wikidata_in_db = {"identifier": "wikidata:Q_invalid_123", "schema": "wikidata"} 

311 

312 inmemory_man.set_value(valid_doi_in_db["identifier"], True) 

313 inmemory_man.set_value(invalid_doi_in_db["identifier"], False) 

314 

315 inmemory_man.set_value(valid_orcid_in_db["identifier"], True) 

316 inmemory_man.set_value(invalid_orcid_in_db["identifier"], False) 

317 

318 inmemory_man.set_value(valid_ror_in_db["identifier"], True) 

319 inmemory_man.set_value(invalid_ror_in_db["identifier"], False) 

320 

321 inmemory_man.set_value(valid_viaf_in_db["identifier"], True) 

322 inmemory_man.set_value(invalid_viaf_in_db["identifier"], False) 

323 

324 inmemory_man.set_value(valid_wikidata_in_db["identifier"], True) 

325 inmemory_man.set_value(invalid_wikidata_in_db["identifier"], False) 

326 

327 

328 # New class instance to check the correct task management with a sqlite db in input 

329 d_processing = DataciteProcessing(storage_manager=inmemory_man) 

330 

331 doi_validated_as_True = d_processing.validated_as(valid_doi_in_db) 

332 doi_validated_as_False = d_processing.validated_as(invalid_doi_in_db) 

333 doi_not_validated = d_processing.validated_as(valid_doi_not_in_db) 

334 

335 orcid_validated_as_True = d_processing.validated_as(valid_orcid_in_db) 

336 orcid_validated_as_False = d_processing.validated_as(invalid_orcid_in_db) 

337 orcid_not_validated = d_processing.validated_as(valid_orcid_not_in_db) 

338 

339 ror_validated_as_True = d_processing.validated_as(valid_ror_in_db) 

340 ror_validated_as_False = d_processing.validated_as(invalid_ror_in_db) 

341 ror_not_validated = d_processing.validated_as(valid_ror_not_in_db) 

342 

343 viaf_validated_as_True = d_processing.validated_as(valid_viaf_in_db) 

344 viaf_validated_as_False = d_processing.validated_as(invalid_viaf_in_db) 

345 viaf_not_validated = d_processing.validated_as(valid_viaf_not_in_db) 

346 

347 wikidata_validated_as_True = d_processing.validated_as(valid_wikidata_in_db) 

348 wikidata_validated_as_False = d_processing.validated_as(invalid_wikidata_in_db) 

349 wikidata_not_validated = d_processing.validated_as(valid_wikidata_not_in_db) 

350 

351 self.assertEqual(doi_validated_as_True, True) 

352 self.assertEqual(doi_validated_as_False, False) 

353 self.assertEqual(doi_not_validated, None) 

354 

355 self.assertEqual(orcid_validated_as_True, True) 

356 self.assertEqual(orcid_validated_as_False, False) 

357 self.assertEqual(orcid_not_validated, None) 

358 

359 self.assertEqual(ror_validated_as_True, True) 

360 self.assertEqual(ror_validated_as_False, False) 

361 self.assertEqual(ror_not_validated, None) 

362 

363 self.assertEqual(viaf_validated_as_True, True) 

364 self.assertEqual(viaf_validated_as_False, False) 

365 self.assertEqual(viaf_not_validated, None) 

366 

367 self.assertEqual(wikidata_validated_as_True, True) 

368 self.assertEqual(wikidata_validated_as_False, False) 

369 self.assertEqual(wikidata_not_validated, None) 

370 

371 

372 d_processing.storage_manager.delete_storage() 

373 

374 def test_validated_as_redis(self): 

375 """ 

376 Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value: 

377 string of the identifier, the method "validated_as" returns: 

378 - True if the id was already validated as valid 

379 - False if the id was already validated as invalid 

380 - None if the id was not validated before 

381 The procedure is tested 

382 - With REDIS storage manager and a pre-existent db associated 

383 """ 

384 redis_man = RedisStorageManager(testing=True) 

385 

386 valid_doi_not_in_db = {"identifier": "doi:10.11578/1480643", "schema": "doi"} 

387 valid_doi_in_db = {"identifier": "doi:10.15407/scin11.06.057", "schema": "doi"} 

388 invalid_doi_in_db = {"identifier": "doi:10.1066/1741-4326/aa6b", "schema": "doi"} 

389 

390 valid_orcid_not_in_db = {"schema": "orcid", "identifier": "orcid:0000-0001-8513-8700"} 

391 valid_orcid_in_db = {"schema": "orcid", "identifier": "orcid:0000-0002-9286-2630"} 

392 invalid_orcid_in_db = {"schema": "orcid", "identifier": "orcid:0000-0002-9286-26XX"} 

393 

394 valid_ror_in_db = {"schema": "ror", "identifier": "ror:03ztgj037"} 

395 valid_ror_not_in_db = {"schema": "ror", "identifier": "ror:01111rn36"} 

396 invalid_ror_in_db = {"schema": "ror", "identifier": "ror:03ztgj039"} 

397 

398 valid_viaf_not_in_db = {"identifier": "viaf:102333412", "schema": "viaf"} 

399 valid_viaf_in_db = {"identifier": "viaf:108389263", "schema": "viaf"} 

400 invalid_viaf_in_db = {"identifier": "viaf:12345ABC", "schema": "viaf"} 

401 

402 valid_wikidata_not_in_db = {"identifier": "wikidata:Q2330656", "schema": "wikidata"} 

403 valid_wikidata_in_db = {"identifier": "wikidata:Q42", "schema": "wikidata"} 

404 invalid_wikidata_in_db = {"identifier": "wikidata:Q_invalid_123", "schema": "wikidata"} 

405 

406 redis_man.set_value(valid_doi_in_db["identifier"], True) 

407 redis_man.set_value(invalid_doi_in_db["identifier"], False) 

408 

409 redis_man.set_value(valid_orcid_in_db["identifier"], True) 

410 redis_man.set_value(invalid_orcid_in_db["identifier"], False) 

411 

412 redis_man.set_value(valid_ror_in_db["identifier"], True) 

413 redis_man.set_value(invalid_ror_in_db["identifier"], False) 

414 

415 redis_man.set_value(valid_viaf_in_db["identifier"], True) 

416 redis_man.set_value(invalid_viaf_in_db["identifier"], False) 

417 

418 redis_man.set_value(valid_wikidata_in_db["identifier"], True) 

419 redis_man.set_value(invalid_wikidata_in_db["identifier"], False) 

420 

421 d_processing_redis = DataciteProcessing(storage_manager=redis_man) 

422 

423 doi_validated_as_True = d_processing_redis.validated_as(valid_doi_in_db) 

424 doi_validated_as_False = d_processing_redis.validated_as(invalid_doi_in_db) 

425 doi_not_validated = d_processing_redis.validated_as(valid_doi_not_in_db) 

426 

427 orcid_validated_as_True = d_processing_redis.validated_as(valid_orcid_in_db) 

428 orcid_validated_as_False = d_processing_redis.validated_as(invalid_orcid_in_db) 

429 orcid_not_validated = d_processing_redis.validated_as(valid_orcid_not_in_db) 

430 

431 ror_validated_as_True = d_processing_redis.validated_as(valid_ror_in_db) 

432 ror_validated_as_False = d_processing_redis.validated_as(invalid_ror_in_db) 

433 ror_not_validated = d_processing_redis.validated_as(valid_ror_not_in_db) 

434 

435 viaf_validated_as_True = d_processing_redis.validated_as(valid_viaf_in_db) 

436 viaf_validated_as_False = d_processing_redis.validated_as(invalid_viaf_in_db) 

437 viaf_not_validated = d_processing_redis.validated_as(valid_viaf_not_in_db) 

438 

439 wikidata_validated_as_True = d_processing_redis.validated_as(valid_wikidata_in_db) 

440 wikidata_validated_as_False = d_processing_redis.validated_as(invalid_wikidata_in_db) 

441 wikidata_not_validated = d_processing_redis.validated_as(valid_wikidata_not_in_db) 

442 

443 self.assertEqual(doi_validated_as_True, True) 

444 self.assertEqual(doi_validated_as_False, False) 

445 self.assertEqual(doi_not_validated, None) 

446 

447 self.assertEqual(orcid_validated_as_True, True) 

448 self.assertEqual(orcid_validated_as_False, False) 

449 self.assertEqual(orcid_not_validated, None) 

450 

451 self.assertEqual(ror_validated_as_True, True) 

452 self.assertEqual(ror_validated_as_False, False) 

453 self.assertEqual(ror_not_validated, None) 

454 

455 self.assertEqual(viaf_validated_as_True, True) 

456 self.assertEqual(viaf_validated_as_False, False) 

457 self.assertEqual(viaf_not_validated, None) 

458 

459 self.assertEqual(wikidata_validated_as_True, True) 

460 self.assertEqual(wikidata_validated_as_False, False) 

461 self.assertEqual(wikidata_not_validated, None) 

462 

463 d_processing_redis.storage_manager.delete_storage() 

464 

465 def test_get_id_manager(self): 

466 """Check that, given in input the string of a schema (e.g.:'pmid') or an id with a prefix (e.g.: 'pmid:12334') 

467 and a dictionary mapping the strings of the schemas to their id managers, the method returns the correct 

468 id manager. Note that each instance of the Preprocessing class needs its own instances of the id managers, 

469 in order to avoid conflicts while validating data""" 

470 

471 d_processing = DataciteProcessing() 

472 

473 id_man_dict = d_processing.venue_id_man_dict 

474 ra_man_dict = d_processing.ra_man_dict 

475 

476 issn_id = "issn:0003-987X" 

477 issn_string = "issn" 

478 

479 isbn_id = "isbn:978-88-98719-08-2" 

480 isbn_string = "isbn" 

481 

482 orcid_id = "orcid:0000-0001-8513-8700" 

483 orcid_string = "orcid" 

484 

485 ror_id = "ror:03ztgj037" 

486 ror_string = "ror" 

487 

488 viaf_id = "viaf:102333412" 

489 viaf_string = "viaf" 

490 

491 wikidata_id = "wikidata:Q42" 

492 wikidata_string = "wikidata" 

493 

494 issn_man_exp = d_processing.get_id_manager(issn_id, id_man_dict) 

495 issn_man_exp_2 = d_processing.get_id_manager(issn_string, id_man_dict) 

496 

497 isbn_man_exp = d_processing.get_id_manager(isbn_id, id_man_dict) 

498 isbn_man_exp_2 = d_processing.get_id_manager(isbn_string, id_man_dict) 

499 

500 orcid_man_exp = d_processing.get_id_manager(orcid_id, ra_man_dict) 

501 orcid_man_exp_2 = d_processing.get_id_manager(orcid_string, ra_man_dict) 

502 

503 ror_man_exp = d_processing.get_id_manager(ror_id, ra_man_dict) 

504 ror_man_exp_2 = d_processing.get_id_manager(ror_string, ra_man_dict) 

505 

506 viaf_man_exp = d_processing.get_id_manager(viaf_id, ra_man_dict) 

507 viaf_man_exp_2 = d_processing.get_id_manager(viaf_string, ra_man_dict) 

508 

509 wikidata_man_exp = d_processing.get_id_manager(wikidata_id, ra_man_dict) 

510 wikidata_man_exp_2 = d_processing.get_id_manager(wikidata_string, ra_man_dict) 

511 

512 # check that the idmanager for the issn was returned and that it works as expected 

513 self.assertTrue(issn_man_exp.is_valid(issn_id)) 

514 self.assertTrue(issn_man_exp_2.is_valid(issn_id)) 

515 

516 # check that the idmanager for the isbn was returned and that it works as expected 

517 self.assertTrue(isbn_man_exp.is_valid(isbn_id)) 

518 self.assertTrue(isbn_man_exp_2.is_valid(isbn_id)) 

519 

520 # check that the idmanager for the orcid was returned and that it works as expected 

521 self.assertTrue(orcid_man_exp.is_valid(orcid_id)) 

522 self.assertTrue(orcid_man_exp_2.is_valid(orcid_id)) 

523 

524 # check that the idmanager for the ror was returned and that it works as expected 

525 self.assertTrue(ror_man_exp.is_valid(ror_id)) 

526 self.assertTrue(ror_man_exp_2.is_valid(ror_id)) 

527 

528 # check that the idmanager for the viaf was returned and that it works as expected 

529 self.assertTrue(viaf_man_exp.is_valid(viaf_id)) 

530 self.assertTrue(viaf_man_exp_2.is_valid(viaf_id)) 

531 

532 # check that the idmanager for the wikidata was returned and that it works as expected 

533 self.assertTrue(wikidata_man_exp.is_valid(wikidata_id)) 

534 self.assertTrue(wikidata_man_exp_2.is_valid(wikidata_id)) 

535 

536 d_processing.storage_manager.delete_storage() 

537 

538 def test_csv_creator(self): 

539 '''Add a test with all the data''' 

540 datacite_processor = DataciteProcessing() 

541 data = { 

542 'id': '10.34780/7510-t906', 

543 'type': 'dois', 

544 'attributes': { 

545 'container': { 

546 'identifier': '2701-5572', 

547 'firstPage': '2021', 

548 'identifierType': 'ISSN', 

549 'type': 'Series', 

550 'title': 'Journal of Global Archaeology' 

551 }, 

552 'reason': None, 

553 'prefix': '10.34780', 

554 'citationsOverTime': [], 

555 'registered': '2021-06-07T10:39:06Z', 

556 'language': 'en', 

557 'source': 'fabricaForm', 

558 'suffix': '7510-t906', 

559 'relatedItems': [], 

560 'descriptions': [ 

561 {'descriptionType': 'SeriesInformation', 'description': 'Journal of Global Archaeology, 2021'}, 

562 {'descriptionType': 'SeriesInformation', 'description': 'Journal of Global Archaeology, 2021'}, 

563 {'descriptionType': 'Abstract', 

564 'description': 'The kingdom of Eswatini provides a rich archaeological sequence covering all time periods from the Early Stone Age to the Iron Age. For over 27 years though, no or very little archaeological research was conducted in the country. In the scope of a new project funded by the German Research Foundation (DFG) we aim to re-excavate and re-date Lion Cavern, the potentially oldest ochre mine in the world. In addition, we conduct a largescale geological survey for outcrops of ochre and test their geochemical signatures for comparative studies with archaeological ochre pieces from MSA and LSA assemblages in Eswatini. Here we present a review of the research history of the kingdom and some preliminary results from our ongoing project.', 

565 'lang': 'en'}], 

566 'sizes': ['§ 1–12'], 

567 'versionOfCount': 0, 

568 'relatedIdentifiers': [ 

569 {'relationType': 'IsPartOf', 'relatedIdentifier': '2701-5572', 'relatedIdentifierType': 'ISSN'}, 

570 {'relationType': 'IsPartOf', 'relatedIdentifierType': 'DOI'}, 

571 {'relationType': 'HasMetadata', 'relatedIdentifier': 'https://zenon.dainst.org/Record/002035353', 

572 'relatedIdentifierType': 'URL'}, 

573 {'relationType': 'References', 'relatedIdentifier': '10.2307/3888317', 

574 'relatedIdentifierType': 'DOI'}, 

575 {'relationType': 'References', 'relatedIdentifier': '10.1086/204793', 

576 'relatedIdentifierType': 'DOI'}, 

577 {'relationType': 'References', 'relatedIdentifier': '10.1086/338292', 

578 'relatedIdentifierType': 'DOI'}, 

579 {'relationType': 'References', 'relatedIdentifier': '10.1111/arcm.12202', 

580 'relatedIdentifierType': 'DOI'}, 

581 {'relationType': 'References', 'relatedIdentifier': '10.1006/jasc.2000.0638', 

582 'relatedIdentifierType': 'DOI'}, 

583 {'relationType': 'References', 'relatedIdentifier': '10.2307/3888015', 

584 'relatedIdentifierType': 'DOI'}, 

585 {'relationType': 'References', 'relatedIdentifier': '10.3213/2191-5784-10199', 

586 'relatedIdentifierType': 'DOI'}, 

587 {'relationType': 'References', 'relatedIdentifier': '10.1016/j.jhevol.2005.06.007', 

588 'relatedIdentifierType': 'DOI'}, 

589 {'relationType': 'References', 'relatedIdentifier': '10.1017/s0003598x00113298', 

590 'relatedIdentifierType': 'DOI'}], 'created': '2021-05-11T13:11:58Z', 

591 'dates': [{'date': '2021', 'dateType': 'Issued'}], 

592 'published': '2021', 

593 'geoLocations': [], 

594 'partCount': 0, 

595 'publicationYear': 2021, 

596 'partOfCount': 0, 

597 'updated': '2021-07-30T12:39:50Z', 

598 'formats': [], 

599 'fundingReferences': [], 

600 'creators': [ 

601 { 

602 'nameType': 'Personal', 

603 'affiliation': [ 

604 {'affiliationIdentifier': 'https://ror.org/03a1kwz48', 

605 'name': 'University of Tübingen, Senckenberg Centre for Human Evolution and Palaeoenvironment', 

606 'affiliationIdentifierScheme': 'ROR'}], 

607 'givenName': 'Gregor D.', 

608 'familyName': 'Bader', 

609 'name': 'Bader, Gregor D.', 

610 'nameIdentifiers': [ 

611 {'nameIdentifierScheme': 'ORCID', 'schemeUri': 'https://orcid.org', 

612 'nameIdentifier': 'https://orcid.org/0000-0003-0621-9209'}] 

613 }, 

614 { 

615 'nameType': 'Personal', 

616 'affiliation': [ 

617 { 

618 'affiliationIdentifier': 'https://ror.org/02vrphe47', 

619 'name': 'Swaziland National Trust Commission', 

620 'affiliationIdentifierScheme': 'ROR'} 

621 ], 

622 'givenName': 'Bob', 

623 'familyName': 'Forrester', 

624 'name': 'Forrester, Bob' 

625 }, 

626 { 

627 'nameType': 'Personal', 

628 'affiliation': [ 

629 { 

630 'affiliationIdentifier': 'https://ror.org/041qv0h25', 

631 'name': 'Deutsches Archäologisches Institut, Kommission für Archäologie Außereuropäischer Kulturen', 

632 'affiliationIdentifierScheme': 'ROR'} 

633 ], 

634 'givenName': 'Lisa', 

635 'familyName': 'Ehlers', 

636 'name': 'Ehlers, Lisa' 

637 }, 

638 { 

639 'nameType': 'Personal', 

640 'affiliation': [ 

641 { 

642 'affiliationIdentifier': 'https://ror.org/03zga2b32', 

643 'name': 'University of Bergen, SFF Centre for Early Sapiens Behaviour', 

644 'affiliationIdentifierScheme': 'ROR'} 

645 ], 

646 'givenName': 'Elizabeth', 

647 'familyName': 'Velliky', 

648 'name': 'Velliky, Elizabeth', 

649 'nameIdentifiers': [ 

650 { 

651 'nameIdentifierScheme': 'ORCID', 

652 'schemeUri': 'https://orcid.org', 

653 'nameIdentifier': 'https://orcid.org/0000-0002-3019-5377'} 

654 ] 

655 }], 

656 'schemaVersion': 'http://datacite.org/schema/kernel-4', 'versionCount': 0, 'metadataVersion': 2, 

657 'citationCount': 0, 

658 'types': {'schemaOrg': 'ScholarlyArticle', 'resourceTypeGeneral': 'Text', 'citeproc': 'article-journal', 

659 'bibtex': 'article', 'ris': 'RPRT', 'resourceType': 'Article'}, 'isActive': True, 

660 'viewsOverTime': [], 'identifiers': [], 

661 'subjects': [{'subject': 'Eswatini'}, {'subject': 'Lion Cavern'}, {'subject': 'Ochre'}, 

662 {'subject': 'Provenance tracing'}], 'titles': [ 

663 {'lang': 'en', 'title': 'The Forgotten Kingdom. New investigations in the prehistory of Eswatini'}], 

664 'url': 'https://publications.dainst.org/journals/index.php/joga/article/view/3559', 'downloadCount': 0, 

665 'rightsList': [], 'contentUrl': None, 'contributors': [], 'referenceCount': 9, 'viewCount': 0, 

666 'downloadsOverTime': [], 'doi': '10.34780/7510-t906', 

667 'publisher': { 

668 'publisherIdentifierScheme': 'ROR', 

669 'schemeUri': 'https://ror.org', 

670 'name': 'Deutsches Archäologisches Institut', 

671 'publisherIdentifier': 'https://ror.org/041qv0h25' 

672 }, 

673 'version': None, 

674 'state': 'findable', 

675 'alternateIdentifiers': [] 

676 }, 

677 'relationships': {'client': {'data': {'id': 'dai.avnrkz', 'type': 'clients'}}, 

678 'provider': {'data': {'id': 'dai', 'type': 'providers'}}, 'media': {'data': []}, 

679 'references': {'data': [{'id': '10.2307/3888317', 'type': 'dois'}, 

680 {'id': '10.1086/204793', 'type': 'dois'}, 

681 {'id': '10.1086/338292', 'type': 'dois'}, 

682 {'id': '10.1111/arcm.12202', 'type': 'dois'}, 

683 {'id': '10.1006/jasc.2000.0638', 'type': 'dois'}, 

684 {'id': '10.2307/3888015', 'type': 'dois'}, 

685 {'id': '10.3213/2191-5784-10199', 'type': 'dois'}, 

686 {'id': '10.1016/j.jhevol.2005.06.007', 'type': 'dois'}, 

687 {'id': '10.1017/s0003598x00113298', 'type': 'dois'}]}, 

688 'citations': {'data': []}, 'parts': {'data': []}, 'partOf': {'data': []}, 

689 'versions': {'data': []}, 'versionOf': {'data': []}}} 

690 output = list() 

691 tabular_data = datacite_processor.csv_creator(data) 

692 if tabular_data: 

693 output.append(tabular_data) 

694 

695 expected_output = [ 

696 { 

697 'id': 'doi:10.34780/7510-t906', 

698 'title': 'The Forgotten Kingdom. New investigations in the prehistory of Eswatini', 

699 'author': 'Bader, Gregor D. [orcid:0000-0003-0621-9209]; Forrester, Bob; Ehlers, Lisa; Velliky, Elizabeth [orcid:0000-0002-3019-5377]', 

700 'pub_date': '2021', 

701 'venue': 'journal of global archaeology [issn:2701-5572]', 

702 'volume': '', 

703 'issue': '', 

704 'page': '2021-2021', 

705 'type': 'journal article', 

706 'publisher': 'Deutsches Archäologisches Institut [ror:041qv0h25]', 

707 'editor': '' 

708 } 

709 ] 

710 self.assertEqual(output,expected_output) 

711 

712 def test_csv_creator2(self): 

713 datacite_processor = DataciteProcessing() 

714 data = load_json(DATA, None) 

715 output = list() 

716 for item in data['data']: 

717 tabular_data = datacite_processor.csv_creator(item) 

718 if tabular_data: 

719 output.append(tabular_data) 

720 

721 expected_output = [ 

722 {'id': 'doi:10.5281/zenodo.8244010', 

723 'title': 'FIGURE 1A, B in Meeting the southern brothers: a revision of the Neotropical spider genus Hexapopha Platnick, Berniker & Víquez, 2014 (Araneae, Oonopidae)', 

724 'author': 'Feitosa, Níthomas M. [orcid:0000-0002-8013-9947]; Ott, Ricardo [orcid:0000-0001-7392-1415]; Bonaldo, Alexandre B. [orcid:0000-0002-8013-9947]', 

725 'pub_date': '2023-08-11', 

726 'venue': '', 

727 'volume': '', 

728 'issue': '', 

729 'page': '', 

730 'type': 'other', 

731 'publisher': 'Zenodo', 

732 'editor': ''}, 

733 {'id': 'doi:10.26050/wdcc/ar6.c6gmipicl', 

734 'title': 'IPCC DDC: IPSL IPSL-CM6A-LR model output prepared for CMIP6 GMMIP', 

735 'author': 'Boucher, Olivier [orcid:0000-0003-2328-5769]; Denvil, Sébastien [orcid:0000-0002-6715-3533]; Levavasseur, Guillaume [orcid:0000-0002-0801-0890]; Cozic, Anne [orcid:0000-0001-7543-3466]; Caubel, Arnaud [orcid:0000-0002-6210-8370]; Foujols, Marie-Alice [orcid:0000-0002-9747-4928]; Meurdesoif, Yann; Mellul, Lidia', 

736 'pub_date': '2023', 

737 'venue': '', 

738 'volume': '', 

739 'issue': '', 

740 'page': '', 

741 'type': 'dataset', 

742 'publisher': 'World Data Center for Climate (WDCC) at DKRZ [ror:03ztgj037]', 

743 'editor': 'Boucher, Olivier [orcid:0000-0003-2328-5769]; Denvil, Sébastien [orcid:0000-0002-6715-3533]; Levavasseur, Guillaume [orcid:0000-0002-0801-0890]; Cozic, Anne [orcid:0000-0001-7543-3466]; Caubel, Arnaud [orcid:0000-0002-6210-8370]; Foujols, Marie-Alice [orcid:0000-0002-9747-4928]; Meurdesoif, Yann; Mellul, Lidia'}, 

744 ] 

745 

746 self.assertEqual(output, expected_output) 

747 

748 def test_csv_creator_object(self): 

749 dcp = DataciteProcessing() 

750 doi_obj = "doi:10.1021/acs.jpclett.7b01097" 

751 expected_output = { 

752 'id': 'doi:10.1021/acs.jpclett.7b01097', 

753 'title': '', 

754 'author': '', 

755 'pub_date': '', 

756 'venue': '', 

757 'volume': '', 

758 'issue': '', 

759 'page': '', 

760 'type': '', 

761 'publisher': '', 

762 'editor': ''} 

763 

764 out = dcp.csv_creator({"id": doi_obj, "type": "dois", "attributes": {"doi": doi_obj}}) 

765 self.assertEqual(out, expected_output) 

766 

767 def test_get_publisher_name_invalid_publishers(self): 

768 dcp = DataciteProcessing() 

769 item1 = {"publisher": { 

770 "name":"(:unav)"} 

771 } 

772 item2 = {"publisher": { 

773 "name":":unav"} 

774 } 

775 item3 = {"publisher": { 

776 "name":":unkn"}} 

777 item4 = {"publisher": { 

778 "name":"(:unkn)"}} 

779 item5 = {"publisher": { 

780 "name":"Edo : [publisher not identified]mon han"}} 

781 item6 = {"publisher": { 

782 "name":"[place of publication not identified]: [pubisher not identified]" 

783 }} 

784 item7 = {"publisher": { 

785 "name":"unknown unknown" 

786 }} 

787 item8 = {"publisher": { 

788 "name":"[unknown] : [unknown]" 

789 }} 

790 item9 = {"publisher": { 

791 "name":"[unknown] : College of Pharmacists of British Columbia" 

792 }} 

793 item10 = {"publisher": { 

794 "name":"[Edinburgh]: [Unknown]" 

795 }} 

796 item11 = {"publisher": { 

797 "name":"Unknown, National University of Singapore" 

798 }} 

799 item12 = {"publisher": { 

800 "name":"Not provided." 

801 }} 

802 item13 = {"publisher": { 

803 "name":"Soleure, s.n." 

804 }} 

805 item14 = {"publisher": { 

806 "name":"[s.l. , s.n]" 

807 }} 

808 item15 = {"publisher": { 

809 "name":"[ s.l. : s.n.]" 

810 }} 

811 item16 = {"publisher": { 

812 "name":"s.n.]" 

813 }} 

814 item17 = {"publisher": { 

815 "name":"Information not available, contact SND for more information" 

816 }} 

817 item18 = {"publisher": { 

818 "name":"Publisher Not Specified" 

819 }} 

820 result1 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item1['publisher']) 

821 result2 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item2['publisher']) 

822 result3 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item3['publisher']) 

823 result4 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item4['publisher']) 

824 result5 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item5['publisher']) 

825 result6 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item6['publisher']) 

826 result7 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item7['publisher']) 

827 result8 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item8['publisher']) 

828 result9 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item9['publisher']) 

829 result10 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item10['publisher']) 

830 result11 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item11['publisher']) 

831 result12 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item12['publisher']) 

832 result13 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item13['publisher']) 

833 result14 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item14['publisher']) 

834 result15 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item15['publisher']) 

835 result16 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item16['publisher']) 

836 result17 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item17['publisher']) 

837 result18 = dcp.get_publisher('doi:10.11578/dc.20191106.1', item18['publisher']) 

838 expected_res = "" 

839 expected_res9 = "[unknown] : College of Pharmacists of British Columbia" 

840 expected_res10 = "[Edinburgh]: [Unknown]" 

841 expected_res11 = "Unknown, National University of Singapore" 

842 expected_res13 = "Soleure, s.n." 

843 

844 self.assertEqual(result1, expected_res) 

845 self.assertEqual(result2, expected_res) 

846 self.assertEqual(result3, expected_res) 

847 self.assertEqual(result4, expected_res) 

848 self.assertEqual(result5, expected_res) 

849 self.assertEqual(result6, expected_res) 

850 self.assertEqual(result7, expected_res) 

851 self.assertEqual(result8, expected_res) 

852 self.assertEqual(result9, expected_res9) 

853 self.assertEqual(result10, expected_res10) 

854 self.assertEqual(result11, expected_res11) 

855 self.assertEqual(result12, expected_res) 

856 self.assertEqual(result13, expected_res13) 

857 self.assertEqual(result14, expected_res) 

858 self.assertEqual(result15, expected_res) 

859 self.assertEqual(result16, expected_res) 

860 self.assertEqual(result17, expected_res) 

861 self.assertEqual(result18, expected_res) 

862 

863 def test_get_publisher_name_publisher_mapping(self): 

864 

865 item = { 

866 "doi": "10.1594/pangaea.777220", 

867 "publisher": {"name":"PANGAEA - Data Publisher for Earth & Environmental Science"} 

868 } 

869 doi = '10.1594/pangaea.777220' 

870 datacite_processor = DataciteProcessing(orcid_index=None, doi_csv=None, 

871 publishers_filepath_dc=PUBLISHERS_MAPPING) 

872 publisher_name = datacite_processor.get_publisher(doi, item) 

873 self.assertEqual(publisher_name, 'PANGAEA - Data Publisher for Earth & Environmental Science [datacite:2]') 

874 

875 def test_get_publisher_name_from_prefix(self): 

876 # The item has no declared publisher, but the DOI prefix is in the publishers' mapping 

877 item = { 

878 'publisher': '', 

879 'doi': '10.12753/sample_test_doi_with_known_prefix', 

880 } 

881 doi = '10.12753/sample_test_doi_with_known_prefix' 

882 datacite_processor = DataciteProcessing(orcid_index=None, doi_csv=None, 

883 publishers_filepath_dc=PUBLISHERS_MAPPING) 

884 publisher_name = datacite_processor.get_publisher(doi, item) 

885 self.assertEqual(publisher_name, 'ADLRO [datacite:3]') 

886 

887 def test_to_validated_id_list(self): 

888 dcp = DataciteProcessing() 

889 # CASE_1: is valid 

890 inp_1 = {'id': 'doi:10.11578/1367552', 'schema': 'doi'} 

891 out_1 = dcp.to_validated_id_list(inp_1) 

892 exp_1 = ['doi:10.11578/1367552'] 

893 self.assertEqual(out_1, exp_1) 

894 dcp.storage_manager.delete_storage() 

895 

896 dcp = DataciteProcessing() 

897 # CASE_2: is invalid 

898 inp_2 = {'id': 'doi:10.11578/136755', 'schema': 'doi'} 

899 out_2 = dcp.to_validated_id_list(inp_2) 

900 exp_2 = [] 

901 self.assertEqual(out_2, exp_2) 

902 

903 dcp = DataciteProcessing() 

904 # CASE_3: valid orcid 

905 inp_3 = {'id': 'orcid:0000-0002-9286-2630', 'schema': 'orcid'} 

906 out_3 = dcp.to_validated_id_list(inp_3) 

907 exp_3 = ['orcid:0000-0002-9286-2630'] 

908 self.assertEqual(out_3, exp_3) 

909 dcp.storage_manager.delete_storage() 

910 

911 dcp = DataciteProcessing() 

912 # CASE_4: invalid doi in self._redis_values_br 

913 inp_4 = {'id': 'doi:10.1089/bsp.2008.002', 'schema': 'doi'} 

914 dcp._redis_values_br.append(inp_4['id']) 

915 out_4 = dcp.to_validated_id_list(inp_4) 

916 exp_4 = ['doi:10.1089/bsp.2008.002'] 

917 self.assertEqual(out_4, exp_4) 

918 value = dcp.tmp_doi_m.storage_manager.get_value('doi:10.1089/bsp.2008.002') 

919 self.assertEqual(value, True) 

920 dcp.storage_manager.delete_storage() 

921 

922 def test_to_validated_id_list_redis(self): 

923 dcp = DataciteProcessing(testing=True) 

924 # CASE_1: is valid 

925 inp_1 = {'id': 'doi:10.11578/1367552', 'schema': 'doi'} 

926 out_1 = dcp.to_validated_id_list(inp_1) 

927 exp_1 = ['doi:10.11578/1367552'] 

928 self.assertEqual(out_1, exp_1) 

929 dcp.storage_manager.delete_storage() 

930 

931 dcp = DataciteProcessing(testing=True) 

932 # CASE_2: is invalid 

933 inp_2 = {'id': 'doi:10.11578/136755', 'schema': 'doi'} 

934 out_2 = dcp.to_validated_id_list(inp_2) 

935 exp_2 = [] 

936 self.assertEqual(out_2, exp_2) 

937 

938 dcp = DataciteProcessing(testing=True) 

939 # CASE_3: valid orcid 

940 inp_3 = {'id': 'orcid:0000-0002-9286-2630', 'schema': 'orcid'} 

941 out_3 = dcp.to_validated_id_list(inp_3) 

942 exp_3 = ['orcid:0000-0002-9286-2630'] 

943 self.assertEqual(out_3, exp_3) 

944 dcp.storage_manager.delete_storage() 

945 

946 dcp = DataciteProcessing(testing=True) 

947 # CASE_4: invalid doi in self._redis_values_br 

948 inp_4 = {'id': 'doi:10.1089/bsp.2008.002', 'schema': 'doi'} 

949 dcp._redis_values_br.append(inp_4['id']) 

950 out_4 = dcp.to_validated_id_list(inp_4) 

951 exp_4 = ['doi:10.1089/bsp.2008.002'] 

952 self.assertEqual(out_4, exp_4) 

953 value = dcp.tmp_doi_m.storage_manager.get_value('doi:10.1089/bsp.2008.002') 

954 self.assertEqual(value, True) 

955 dcp.storage_manager.delete_storage() 

956 

957 def test_find_datacite_orcid(self): 

958 dcp = DataciteProcessing(testing=True) 

959 inp = ["https://orcid.org/0000-0002-9286-2630"] 

960 out = dcp.find_datacite_orcid(inp) 

961 exp_out = "orcid:0000-0002-9286-2630" 

962 self.assertEqual(out, exp_out) 

963 

964 inp_invalid_id = ["https://orcid.org/0000-0002-9286-2631"] 

965 out_invalid_id = dcp.find_datacite_orcid(inp_invalid_id) 

966 exp_invalid_id = "" 

967 self.assertEqual(out_invalid_id, exp_invalid_id) 

968 

969 dcp.orcid_m.storage_manager.delete_storage() 

970 

971 # set a valid id as invalid in storage, so to check that the api check is 

972 # avoided if the info is already in storage 

973 dcp = DataciteProcessing(testing=True) 

974 dcp.orcid_m.storage_manager.set_value("orcid:0000-0002-9286-2630", False) 

975 inp = ["https://orcid.org/0000-0002-9286-2630"] 

976 out = dcp.find_datacite_orcid(inp) 

977 exp_out = "" 

978 self.assertEqual(out, exp_out) 

979 dcp.orcid_m.storage_manager.delete_storage() 

980 

981 dcp = DataciteProcessing(testing=True) 

982 dcp.orcid_m.storage_manager.set_value("orcid:0000-0002-9286-2631", True) 

983 inp = ["https://orcid.org/0000-0002-9286-2631"] 

984 out = dcp.find_datacite_orcid(inp) 

985 exp_out = "orcid:0000-0002-9286-2631" 

986 self.assertEqual(out, exp_out) 

987 dcp.orcid_m.storage_manager.delete_storage() 

988 

989 def test_find_datacite_orcid_api_disabled_not_in_index(self): 

990 """Se l'API è OFF e l'ORCID non è nell'indice, non deve essere risolto.""" 

991 dp = DataciteProcessing(use_orcid_api=False) 

992 test_doi = "10.9999/noindex" 

993 candidate = "0000-0003-4082-1500" # valido sintatticamente 

994 

995 out = dp.find_datacite_orcid([candidate], test_doi) 

996 

997 self.assertEqual(out, "") 

998 # Non deve essere stato scritto in tmp storage 

999 self.assertIsNone(dp.tmp_orcid_m.storage_manager.get_value(f"orcid:{candidate}")) 

1000 

1001 dp.storage_manager.delete_storage() 

1002 

1003 def test_find_datacite_orcid_api_disabled_from_index(self): 

1004 """Se l'API è OFF ma l'ORCID è nell'indice DOI→ORCID, deve essere risolto e salvato in tmp storage.""" 

1005 dp = DataciteProcessing(use_orcid_api=False) 

1006 test_doi = "10.1234/test" 

1007 test_orcid = "0000-0002-1234-5678" 

1008 test_name = "Smith, John" 

1009 

1010 # l'indice DOI→ORCID viene popolato 

1011 dp.orcid_index.data = {test_doi: {f"{test_name} [orcid:{test_orcid}]"}} 

1012 

1013 out = dp.find_datacite_orcid([test_orcid], test_doi) 

1014 

1015 self.assertEqual(out, f"orcid:{test_orcid}") 

1016 self.assertTrue(dp.tmp_orcid_m.storage_manager.get_value(f"orcid:{test_orcid}")) 

1017 

1018 dp.storage_manager.delete_storage() 

1019 

1020 def test_get_venue_container(self): 

1021 item={'container': {'type': 'DataRepository', 'title': 'GEM Datasets'}, 'reason': None, 'prefix': '10.13117', 'citationsOverTime': [], 'registered': '2014-03-24T10:51:17Z', 'language': 'en', 'source': None, 'suffix': 'gem.dataset.ghea-v1.0', 'relatedItems': [], 'descriptions': [{'descriptionType': 'SeriesInformation', 'description': 'GEM Datasets'}, {'descriptionType': 'SeriesInformation', 'description': 'GEM Catalogues'}], 'sizes': ['1011 records'], 'versionOfCount': 0, 'relatedIdentifiers': [{'relationType': 'IsIdenticalTo', 'relatedIdentifier': 'http://emidius.eu/GEH/', 'relatedIdentifierType': 'URL'}, {'relationType': 'IsDocumentedBy', 'relatedIdentifier': '10.13117/gem.gegd.tr2013.01', 'relatedIdentifierType': 'DOI'}, {'relationType': 'Compiles', 'relatedIdentifier': '10.13117/gem.dataset.ghec-v1.0', 'relatedIdentifierType': 'DOI'}, {'relationType': 'References', 'relatedIdentifier': '10.6092/ingv.it-ahead', 'relatedIdentifierType': 'DOI'}], 'created': '2014-03-24T10:51:17Z', 'dates': [{'date': '1008-04-27/1903-12-28', 'dateType': 'Collected'}, {'date': '2013-06-01', 'dateType': 'Available'}, {'date': '2010-11-01/2013-03-31', 'dateType': 'Created'}, {'date': '2013', 'dateType': 'Issued'}], 'published': '2013', 'geoLocations': [], 'partCount': 0, 'publicationYear': 2013, 'partOfCount': 0, 'updated': '2020-07-26T16:07:36Z', 'formats': ['text/html', 'image/svg+xml', 'application/pdf'], 'fundingReferences': [], 'creators': [{'nameType': 'Personal', 'affiliation': [], 'givenName': 'Paola', 'familyName': 'Albini', 'name': 'Albini, Paola', 'nameIdentifiers': [{'nameIdentifierScheme': 'ORCID', 'schemeUri': 'https://orcid.org', 'nameIdentifier': 'https://orcid.org/0000-0003-4149-9760'}]}, {'nameType': 'Personal', 'affiliation': [], 'givenName': 'Roger M.W.', 'familyName': 'Musson', 'name': 'Musson, Roger M.W.', 'nameIdentifiers': [{'nameIdentifierScheme': 'ISNI', 'nameIdentifier': '0000 0000 5424 2727'}]}, {'nameType': 'Personal', 'affiliation': [], 'givenName': 'Antonio A.', 'familyName': 'Gomez Capera', 'name': 'Gomez Capera, Antonio A.', 'nameIdentifiers': []}, {'nameType': 'Personal', 'affiliation': [], 'givenName': 'Mario', 'familyName': 'Locati', 'name': 'Locati, Mario', 'nameIdentifiers': [{'nameIdentifierScheme': 'ORCID', 'schemeUri': 'https://orcid.org', 'nameIdentifier': 'https://orcid.org/0000-0003-2185-3267'}]}, {'nameType': 'Personal', 'affiliation': [], 'givenName': 'Andrea', 'familyName': 'Rovida', 'name': 'Rovida, Andrea', 'nameIdentifiers': [{'nameIdentifierScheme': 'ORCID', 'schemeUri': 'https://orcid.org', 'nameIdentifier': 'https://orcid.org/0000-0001-6147-9981'}]}, {'nameType': 'Personal', 'affiliation': [], 'givenName': 'Massimiliano', 'familyName': 'Stucchi', 'name': 'Stucchi, Massimiliano', 'nameIdentifiers': [{'nameIdentifierScheme': 'ORCID', 'schemeUri': 'https://orcid.org', 'nameIdentifier': 'https://orcid.org/0000-0002-5870-1542'}]}, {'nameType': 'Personal', 'affiliation': [], 'givenName': 'Daniele', 'familyName': 'Viganò', 'name': 'Viganò, Daniele', 'nameIdentifiers': [{'nameIdentifierScheme': 'ORCID', 'schemeUri': 'https://orcid.org', 'nameIdentifier': 'https://orcid.org/0000-0003-2713-8387'}]}], 'schemaVersion': 'http://datacite.org/schema/kernel-3', 'versionCount': 0, 'metadataVersion': 3, 'citationCount': 0, 'types': {'schemaOrg': 'Dataset', 'resourceTypeGeneral': 'Dataset', 'citeproc': 'dataset', 'bibtex': 'misc', 'ris': 'DATA', 'resourceType': 'Dataset/Earthquakes'}, 'isActive': True, 'viewsOverTime': [], 'identifiers': [], 'subjects': [{'subject': 'Earthquake history'}, {'subject': 'Historical seismology'}, {'subject': 'Catalogue'}, {'subject': 'Archive'}, {'subject': 'Macroseismic data'}, {'subject': 'GEM'}], 'titles': [{'title': 'GEM Global Historical Earthquake Archive'}], 'url': 'https://www.emidius.eu/GEH/', 'downloadCount': 0, 'rightsList': [{'rights': 'Copyright © 2013 GEM Foundation, Albini, P., R.M.W. Musson, A.A. Gomez Capera, M. Locati, A. Rovida, M. Stucchi, and D. Viganò'}, {'rightsUri': 'http://creativecommons.org/licenses/by-nc-sa/4.0', 'rights': 'Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International'}], 'contentUrl': None, 'contributors': [{'affiliation': [], 'name': 'Istituto Nazionale Di Geofisica E Vulcanologia (INGV)', 'nameIdentifiers': [], 'contributorType': 'DataCollector'}, {'affiliation': [], 'name': 'British Geological Survey (BGS)', 'nameIdentifiers': [], 'contributorType': 'DataCollector'}], 'referenceCount': 1, 'viewCount': 0, 'downloadsOverTime': [], 'doi': '10.13117/gem.dataset.ghea-v1.0', 'publisher': {'name': 'GEM Foundation, Pavia, Italy'}, 'version': '1.0', 'state': 'findable', 'alternateIdentifiers': []} 

1022 row = {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 

1023 'type': 'dataset', 'publisher': '', 'editor': ''} 

1024 datacite_processor = DataciteProcessing(orcid_index=None, doi_csv=None, 

1025 publishers_filepath_dc=PUBLISHERS_MAPPING) 

1026 venue_name = datacite_processor.get_venue_name(item, row) 

1027 self.assertEqual(venue_name, 'gem datasets') 

1028 

1029 def test_get_venue_name_no_container(self): 

1030 item = { 

1031 "container": {}, 

1032 "relatedIdentifiers": [ 

1033 { 

1034 "relationType": "IsSupplementTo", 

1035 "resourceTypeGeneral": "Text", 

1036 "relatedIdentifier": "10.4230/LIPIcs.ECOOP.2023.39", 

1037 "relatedIdentifierType": "DOI" 

1038 }, 

1039 { 

1040 "relationType": "IsPartOf", 

1041 "relatedIdentifier": "2509-8195", 

1042 "relatedIdentifierType": "ISSN" 

1043 }, 

1044 ] 

1045 } 

1046 row = {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 

1047 'type': 'journal article', 'publisher': '', 'editor': ''} 

1048 datacite_processor = DataciteProcessing(orcid_index=None, doi_csv=None, 

1049 publishers_filepath_dc=PUBLISHERS_MAPPING) 

1050 venue_name = datacite_processor.get_venue_name(item, row) 

1051 self.assertEqual(venue_name, '[issn:2509-8195]') 

1052 

1053 def test_get_venue_name_with_ISSN(self): 

1054 item = { 

1055 "container": {"type": "Series", "identifier": "2509-8195", "identifierType": "ISSN", "title": "DARTS", 

1056 "volume": "Vol. 9", "firstPage": "pages 25:1", "lastPage": "25:2"} 

1057 } 

1058 row = {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 

1059 'type': 'journal article', 'publisher': '', 'editor': ''} 

1060 datacite_processor = DataciteProcessing(orcid_index=None, doi_csv=None, 

1061 publishers_filepath_dc=PUBLISHERS_MAPPING) 

1062 venue_name = datacite_processor.get_venue_name(item, row) 

1063 self.assertEqual(venue_name, 

1064 "darts [issn:2509-8195]") 

1065 # ISSN with wrong number of digits 

1066 item1 = { 

1067 "container": {"type": "Journal", "issue": "18", "title": "Geophysical Research Letters", "volume": "41", 

1068 "lastPage": "6451", "firstPage": "6443", "identifier": "00948276", "identifierType": "ISSN"} 

1069 } 

1070 row1 = {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 

1071 'type': 'journal article', 'publisher': '', 'editor': ''} 

1072 venue_name1 = datacite_processor.get_venue_name(item1, row1) 

1073 self.assertEqual(venue_name1, 

1074 "geophysical research letters [issn:0094-8276]") 

1075 

1076 def test_get_pages(self): 

1077 item = { 

1078 "container": {"type": "Journal", "issue": "7", "title": "Global Biogeochemical Cycles", "volume": "29", 

1079 "lastPage": "1013", "firstPage": "994", "identifier": "08866236", 

1080 "identifierType": "ISSN"} 

1081 } 

1082 datacite_processor = DataciteProcessing(orcid_index=None, 

1083 publishers_filepath_dc=PUBLISHERS_MAPPING) 

1084 pages = datacite_processor.get_datacite_pages(item) 

1085 self.assertEqual(pages, '994-1013') 

1086 

1087 def test_get_pages_right_letter(self): 

1088 item = { 

1089 "container": {"type": "Journal", "issue": "4", "title": "Ecosphere", "volume": "10", 

1090 "firstPage": "e02701", "identifier": "2150-8925", "identifierType": "ISSN"} 

1091 } 

1092 datacite_processor = DataciteProcessing(orcid_index=None, 

1093 publishers_filepath_dc=PUBLISHERS_MAPPING) 

1094 pages = datacite_processor.get_datacite_pages(item) 

1095 self.assertEqual(pages, 'e02701-e02701') 

1096 

1097 def test_get_pages_wrong_letter(self): 

1098 item = { 

1099 "relatedIdentifiers": [ 

1100 {"relationType": "IsPartOf", 

1101 "relatedIdentifier": "0094-2405", 

1102 "relatedIdentifierType": "ISSN", 

1103 "firstPage": "583b", 

1104 "lastPage": "584"}, 

1105 {"relationType": "References", 

1106 "relatedIdentifier": "10.1016/j.ecl.2014.08.007", 

1107 "relatedIdentifierType": "DOI"} 

1108 ] 

1109 } 

1110 datacite_processor = DataciteProcessing(orcid_index=None, 

1111 publishers_filepath_dc=PUBLISHERS_MAPPING) 

1112 pages = datacite_processor.get_datacite_pages(item) 

1113 self.assertEqual(pages, '583-584') 

1114 

1115 def test_get_pages_roman_letters(self): 

1116 item = { 

1117 "relatedIdentifiers": [ 

1118 {"relationType": "IsPartOf", 

1119 "relatedIdentifier": "0094-2405", 

1120 "relatedIdentifierType": "ISSN", 

1121 "firstPage": "iv", 

1122 "lastPage": "l"}, 

1123 {"relationType": "References", 

1124 "relatedIdentifier": "10.1016/j.ecl.2014.08.007", 

1125 "relatedIdentifierType": "DOI"} 

1126 ] 

1127 } 

1128 datacite_processor = DataciteProcessing(orcid_index=None, 

1129 publishers_filepath_dc=PUBLISHERS_MAPPING) 

1130 pages = datacite_processor.get_datacite_pages(item) 

1131 self.assertEqual(pages, 'iv-l') 

1132 

1133 def test_get_pages_non_roman_letters(self): 

1134 item = { 

1135 "relatedIdentifiers": [ 

1136 {"relationType": "IsPartOf", 

1137 "relatedIdentifier": "0094-2405", 

1138 "relatedIdentifierType": "ISSN", 

1139 "firstPage": "kj", 

1140 "lastPage": "hh"}, 

1141 {"relationType": "References", 

1142 "relatedIdentifier": "10.1016/j.ecl.2014.08.007", 

1143 "relatedIdentifierType": "DOI"} 

1144 ] 

1145 } 

1146 datacite_processor = DataciteProcessing(orcid_index=None, 

1147 publishers_filepath_dc=PUBLISHERS_MAPPING) 

1148 pages = datacite_processor.get_datacite_pages(item) 

1149 self.assertEqual(pages, '') 

1150 

1151 def test_get_pages_with_strings_no_venue_id(self): 

1152 item = {'container': { 

1153 'firstPage': '13. Studi umanistici. Serie Antichistica', 

1154 'type': 'Series', 

1155 'title': 'Collana Studi e Ricerche' 

1156 }} 

1157 datacite_processor = DataciteProcessing(orcid_index=None, doi_csv=None, 

1158 publishers_filepath_dc=PUBLISHERS_MAPPING) 

1159 pages = datacite_processor.get_datacite_pages(item) 

1160 self.assertEqual(pages, '') 

1161 

1162 def test_venue_id_cont_and_rel_id(self): 

1163 items = {'data': [ 

1164 { 

1165 "id": "10.1002/2014jd022411", 

1166 "type": "dois", 

1167 "attributes": { 

1168 "doi": "10.1002/2014jd022411", 

1169 "identifiers": [], 

1170 "titles": [{ 

1171 "title": "\n Assessing the magnitude of CO\n \n flux uncertainty in atmospheric CO\n \n records using products from NASA's Carbon Monitoring Flux Pilot Project\n "}], 

1172 "publisher": { 

1173 "name":"(:unav)"}, 

1174 "container": {"type": "Journal", "issue": "2", 

1175 "title": "Journal of Geophysical Research: Atmospheres", "volume": "120", 

1176 "lastPage": "765", "firstPage": "734", "identifier": "2169897X", 

1177 "identifierType": "ISSN"}, 

1178 "types": {"ris": "JOUR", "bibtex": "article", "citeproc": "article-journal", 

1179 "schemaOrg": "ScholarlyArticle", "resourceType": "JournalArticle", 

1180 "resourceTypeGeneral": "Text"}, 

1181 "relatedIdentifiers": [{"relationType": "IsPartOf", "relatedIdentifier": "2169897X", 

1182 "resourceTypeGeneral": "Collection", "relatedIdentifierType": "ISSN"}] 

1183 } 

1184 } 

1185 ]} 

1186 datacite_processor = DataciteProcessing(orcid_index=None, 

1187 publishers_filepath_dc=PUBLISHERS_MAPPING) 

1188 output = list() 

1189 for item in items['data']: 

1190 output.append(datacite_processor.csv_creator(item)) 

1191 expected_output = [{'id': 'doi:10.1002/2014jd022411', 

1192 'title': "Assessing the magnitude of CO flux uncertainty in atmospheric CO records using products from NASA's Carbon Monitoring Flux Pilot Project", 

1193 'author': '', 'pub_date': '', 

1194 'venue': 'journal of geophysical research: atmospheres [issn:2169-897X]', 

1195 'volume': '120', 'issue': '2', 'page': '734-765', 'type': 'journal article', 

1196 'publisher': 'Wiley [datacite:1]', 'editor': ''}] 

1197 self.assertEqual(output, expected_output) 

1198 

1199 def test_venue_id_cont_and_rel_id_no_types(self): 

1200 # the absence of publication types specified excludes the possibility 

1201 # to assert whether the container can have an ISSN or not 

1202 items = {'data': [ 

1203 { 

1204 "id": "10.1002/2014jd022411", 

1205 "type": "dois", 

1206 "attributes": { 

1207 "doi": "10.1002/2014jd022411", 

1208 "identifiers": [], 

1209 "titles": [{ 

1210 "title": "\n Assessing the magnitude of CO\n \n flux uncertainty in atmospheric CO\n \n records using products from NASA's Carbon Monitoring Flux Pilot Project\n "}], 

1211 "publisher": {"name":"(:unav)"}, 

1212 "container": {"type": "Journal", "issue": "2", 

1213 "title": "Journal of Geophysical Research: Atmospheres", "volume": "120", 

1214 "lastPage": "765", "firstPage": "734", "identifier": "2169897X", 

1215 "identifierType": "ISSN"}, 

1216 "relatedIdentifiers": [{"relationType": "IsPartOf", "relatedIdentifier": "2169897X", 

1217 "resourceTypeGeneral": "Collection", "relatedIdentifierType": "ISSN"}] 

1218 } 

1219 } 

1220 ]} 

1221 datacite_processor = DataciteProcessing(orcid_index=None, 

1222 publishers_filepath_dc=PUBLISHERS_MAPPING) 

1223 output = list() 

1224 for item in items['data']: 

1225 output.append(datacite_processor.csv_creator(item)) 

1226 expected_output = [{'id': 'doi:10.1002/2014jd022411', 

1227 'title': "Assessing the magnitude of CO flux uncertainty in atmospheric CO records using products from NASA's Carbon Monitoring Flux Pilot Project", 

1228 'author': '', 'pub_date': '', 'venue': 'journal of geophysical research: atmospheres', 

1229 'volume': '120', 'issue': '2', 'page': '734-765', 'type': '', 

1230 'publisher': 'Wiley [datacite:1]', 'editor': ''}] 

1231 self.assertEqual(output, expected_output) 

1232 

1233 def test_get_agents_strings_list_overlapping_surnames(self): 

1234 # The surname of one author is included in the surname of another. 

1235 entity_attr_dict = { 

1236 "creators": [ 

1237 {"name": "Olivarez Lyle, Annette", 

1238 "givenName": "Annette", 

1239 "familyName": "Olivarez Lyle", 

1240 "affiliation": [], 

1241 "nameIdentifiers": [] 

1242 }, 

1243 {"name": "Lyle, Mitchell W", 

1244 "givenName": "Mitchell W", 

1245 "familyName": "Lyle", 

1246 "nameIdentifiers": [ 

1247 {"schemeUri": "https://orcid.org", 

1248 "nameIdentifier": "https://orcid.org/0000-0002-0861-0511", 

1249 "nameIdentifierScheme": "ORCID"} 

1250 ], 

1251 "affiliation": [] 

1252 } 

1253 ], 

1254 "contributors": [] 

1255 } 

1256 

1257 datacite_processor = DataciteProcessing(None, None) 

1258 authors_list = datacite_processor.add_authors_to_agent_list(entity_attr_dict, [], 

1259 doi="doi:10.1594/pangaea.777220") 

1260 editors_list = datacite_processor.add_editors_to_agent_list(entity_attr_dict, [], 

1261 doi="doi:10.1594/pangaea.777220") 

1262 agents_list = authors_list + editors_list 

1263 csv_manager = CSVManager() 

1264 csv_manager.data = {'10.1594/pangaea.777220': {'Lyle, Mitchell W [0000-0002-0861-0511]'}} 

1265 datacite_processor.orcid_index = csv_manager 

1266 authors_strings_list, editors_strings_list = datacite_processor.get_agents_strings_list( 

1267 '10.1594/pangaea.777220', agents_list) 

1268 

1269 expected_authors_list = ['Olivarez Lyle, Annette', 

1270 'Lyle, Mitchell W [orcid:0000-0002-0861-0511]'] 

1271 expected_editors_list = [] 

1272 self.assertEqual((authors_strings_list, editors_strings_list), (expected_authors_list, expected_editors_list)) 

1273 

1274 def test_get_agents_strings_list(self): 

1275 entity_attr_dict = { 

1276 "doi": "10.1002/2014jd022411", 

1277 "creators": [ 

1278 {"name": "Ott, Lesley E.", "nameType": "Personal", "givenName": "Lesley E.", "familyName": "Ott", 

1279 "affiliation": [], "nameIdentifiers": []}, 

1280 {"name": "Pawson, Steven", "nameType": "Personal", "givenName": "Steven", "familyName": "Pawson", 

1281 "affiliation": [], "nameIdentifiers": []}, 

1282 {"name": "Collatz, George J.", "nameType": "Personal", "givenName": "George J.", 

1283 "familyName": "Collatz", "affiliation": [], "nameIdentifiers": []}, 

1284 {"name": "Gregg, Watson W.", "nameType": "Personal", "givenName": "Watson W.", "familyName": "Gregg", 

1285 "affiliation": [], "nameIdentifiers": []}, 

1286 {"name": "Menemenlis, Dimitris", "nameType": "Personal", "givenName": "Dimitris", 

1287 "familyName": "Menemenlis", "affiliation": [], "nameIdentifiers": [ 

1288 {"schemeUri": "https://orcid.org", "nameIdentifier": "https://orcid.org/0000-0001-9940-8409", 

1289 "nameIdentifierScheme": "ORCID"}]}, 

1290 {"name": "Brix, Holger", "nameType": "Personal", "givenName": "Holger", "familyName": "Brix", 

1291 "affiliation": [], "nameIdentifiers": []}, 

1292 {"name": "Rousseaux, Cecile S.", "nameType": "Personal", "givenName": "Cecile S.", 

1293 "familyName": "Rousseaux", "affiliation": [], "nameIdentifiers": []}, 

1294 {"name": "Bowman, Kevin W.", "nameType": "Personal", "givenName": "Kevin W.", "familyName": "Bowman", 

1295 "affiliation": [], "nameIdentifiers": []}, 

1296 {"name": "Liu, Junjie", "nameType": "Personal", "givenName": "Junjie", "familyName": "Liu", 

1297 "affiliation": [], "nameIdentifiers": []}, 

1298 {"name": "Eldering, Annmarie", "nameType": "Personal", "givenName": "Annmarie", 

1299 "familyName": "Eldering", "affiliation": [], "nameIdentifiers": []}, 

1300 {"name": "Gunson, Michael R.", "nameType": "Personal", "givenName": "Michael R.", 

1301 "familyName": "Gunson", "affiliation": [], "nameIdentifiers": []}, 

1302 {"name": "Kawa, Stephan R.", "nameType": "Personal", "givenName": "Stephan R.", "familyName": "Kawa", 

1303 "affiliation": [], "nameIdentifiers": []}], 

1304 "contributors": [{ 

1305 'name': 'AKMB-News: Informationen Zu Kunst, Museum Und Bibliothek', 

1306 'nameType': 'Personal', 

1307 'givenName': 'Museum Und Bibliothek', 

1308 'familyName': 'AKMB-News: Informationen Zu Kunst', 

1309 'affiliation': [], 

1310 'contributorType': 'Editor', 

1311 'nameIdentifiers': []}]} 

1312 

1313 datacite_processor = DataciteProcessing() 

1314 authors_list = datacite_processor.add_authors_to_agent_list(entity_attr_dict, [], 

1315 doi="doi:10.1002/2014jd022411") 

1316 editors_list = datacite_processor.add_editors_to_agent_list(entity_attr_dict, [], 

1317 doi="doi:10.1002/2014jd022411") 

1318 agents_list = authors_list + editors_list 

1319 authors_strings_list, editors_strings_list = datacite_processor.get_agents_strings_list('10.1002/2014jd022411', 

1320 agents_list) 

1321 expected_authors_list = ['Ott, Lesley E.', 'Pawson, Steven', 'Collatz, George J.', 'Gregg, Watson W.', 

1322 'Menemenlis, Dimitris [orcid:0000-0001-9940-8409]', 'Brix, Holger', 

1323 'Rousseaux, Cecile S.', 'Bowman, Kevin W.', 'Liu, Junjie', 'Eldering, Annmarie', 

1324 'Gunson, Michael R.', 'Kawa, Stephan R.'] 

1325 expected_editors_list = ['AKMB-News: Informationen Zu Kunst, Museum Und Bibliothek'] 

1326 

1327 self.assertEqual(authors_strings_list, expected_authors_list) 

1328 self.assertEqual(editors_strings_list, expected_editors_list) 

1329 

1330 def test_get_agents_strings_list_same_family(self): 

1331 # Two authors have the same family name and the same given name initials 

1332 entity_attr_dict = { 

1333 "creators": [ 

1334 {"name": "Schulz, Heide N", 

1335 "nameType": "Personal", 

1336 "givenName": "Heide N", 

1337 "familyName": "Schulz", 

1338 "nameIdentifiers": 

1339 [ 

1340 {"schemeUri": "https://orcid.org", "nameIdentifier": "https://orcid.org/0000-0003-1445-0291", 

1341 "nameIdentifierScheme": "ORCID"} 

1342 ], 

1343 "affiliation": []}, 

1344 {"name": "Schulz, Horst D", 

1345 "nameType": "Personal", 

1346 "givenName": "Horst D", 

1347 "familyName": "Schulz", 

1348 "affiliation": [], 

1349 "nameIdentifiers": []}], 

1350 "contributors": [] 

1351 } 

1352 datacite_processor = DataciteProcessing() 

1353 authors_list = datacite_processor.add_authors_to_agent_list(entity_attr_dict, [], 

1354 doi="doi:10.1594/pangaea.231378") 

1355 editors_list = datacite_processor.add_editors_to_agent_list(entity_attr_dict, [], 

1356 doi="doi:10.1594/pangaea.231378") 

1357 agents_list = authors_list + editors_list 

1358 authors_strings_list, _ = datacite_processor.get_agents_strings_list('10.1594/pangaea.231378', agents_list) 

1359 expected_authors_list = ['Schulz, Heide N [orcid:0000-0003-1445-0291]', 'Schulz, Horst D'] 

1360 self.assertEqual(authors_strings_list, expected_authors_list) 

1361 

1362 def test_get_agents_strings_list_homonyms(self): 

1363 # Two authors have the same family name and the same given name 

1364 entity_attr_dict = { 

1365 "creators": 

1366 [ 

1367 {"name": "Viorel, Cojocaru", 

1368 "nameType": "Personal", 

1369 "givenName": "Cojocaru", 

1370 "familyName": "Viorel", 

1371 "affiliation": [], 

1372 "nameIdentifiers": []}, 

1373 {"name": "Viorel, Cojocaru", 

1374 "nameType": "Personal", 

1375 "givenName": "Cojocaru", 

1376 "familyName": "Viorel", 

1377 "affiliation": [], 

1378 "nameIdentifiers": [] 

1379 }, 

1380 {"name": "Ciprian, Panait", 

1381 "nameType": "Personal", 

1382 "givenName": "Panait", 

1383 "familyName": "Ciprian", 

1384 "affiliation": [], 

1385 "nameIdentifiers": []} 

1386 ], 

1387 "contributors": [] 

1388 } 

1389 datacite_processor = DataciteProcessing(None, None) 

1390 authors_list = datacite_processor.add_authors_to_agent_list(entity_attr_dict, [], 

1391 doi="doi:10.12753/2066-026x-14-246") 

1392 editors_list = datacite_processor.add_editors_to_agent_list(entity_attr_dict, [], 

1393 doi="doi:10.12753/2066-026x-14-246") 

1394 agents_list = authors_list + editors_list 

1395 authors_strings_list, _ = datacite_processor.get_agents_strings_list('10.12753/2066-026x-14-246', agents_list) 

1396 expected_authors_list = ['Viorel, Cojocaru', 'Viorel, Cojocaru', 'Ciprian, Panait'] 

1397 self.assertEqual(authors_strings_list, expected_authors_list) 

1398 

1399 def test_get_agents_strings_list_inverted_names(self): 

1400 # One author with an ORCID has as a name the surname of another 

1401 entity_attr_dict = { 

1402 "creators": 

1403 [ 

1404 {"name": "Viorel, Cojocaru", 

1405 "nameType": "Personal", 

1406 "givenName": "Cojocaru", 

1407 "familyName": "Viorel", 

1408 "affiliation": [], 

1409 "nameIdentifiers": []}, 

1410 

1411 {"name": "Cojocaru, John", 

1412 "nameType": "Personal", 

1413 "givenName": "John", 

1414 "familyName": "Cojocaru", 

1415 "affiliation": [], 

1416 "nameIdentifiers": [] 

1417 }, 

1418 {"name": "Ciprian, Panait", 

1419 "nameType": "Personal", 

1420 "givenName": "Panait", 

1421 "familyName": "Ciprian", 

1422 "affiliation": [], 

1423 "nameIdentifiers": []} 

1424 ], 

1425 "contributors": [] 

1426 } 

1427 # Note : 'Cojocaru, John' is not one of the authors of the item, the name was made up for testing purposes 

1428 datacite_processor = DataciteProcessing(None, None) 

1429 authors_list = datacite_processor.add_authors_to_agent_list(entity_attr_dict, [], 

1430 doi="doi:10.12753/2066-026x-14-246") 

1431 editors_list = datacite_processor.add_editors_to_agent_list(entity_attr_dict, [], 

1432 doi="doi:10.12753/2066-026x-14-246") 

1433 agents_list = authors_list + editors_list 

1434 authors_strings_list, _ = datacite_processor.get_agents_strings_list('10.12753/2066-026x-14-246', agents_list) 

1435 expected_authors_list = ['Viorel, Cojocaru', 'Cojocaru, John', 'Ciprian, Panait'] 

1436 self.assertEqual(authors_strings_list, expected_authors_list) 

1437 

1438 def test_get_agents_strings_list_api_disabled_no_index(self): 

1439 """Con API OFF e indice vuoto, gli ORCID presenti come nameIdentifier NON devono comparire in output.""" 

1440 entity_attr_dict = { 

1441 "creators": [ 

1442 { 

1443 "name": "Doe, Jane", 

1444 "nameType": "Personal", 

1445 "givenName": "Jane", 

1446 "familyName": "Doe", 

1447 "nameIdentifiers": [ 

1448 { 

1449 "schemeUri": "https://orcid.org", 

1450 "nameIdentifier": "https://orcid.org/0000-0003-4082-1500", 

1451 "nameIdentifierScheme": "ORCID", 

1452 } 

1453 ], 

1454 } 

1455 ], 

1456 "contributors": [], 

1457 } 

1458 

1459 dp = DataciteProcessing(use_orcid_api=False) # indice vuoto, nessuna API 

1460 authors_list = dp.add_authors_to_agent_list(entity_attr_dict, [], doi="doi:10.9999/noindex") 

1461 editors_list = dp.add_editors_to_agent_list(entity_attr_dict, [], doi="doi:10.9999/noindex") 

1462 authors_strings, editors_strings = dp.get_agents_strings_list("10.9999/noindex", authors_list + editors_list) 

1463 

1464 # L'ORCID NON deve essere aggiunto tra [] perché non c'è indice e l'API è OFF 

1465 self.assertEqual(authors_strings, ["Doe, Jane"]) 

1466 self.assertEqual(editors_strings, []) 

1467 

1468 dp.storage_manager.delete_storage() 

1469 

1470 def test_find_datacite_orcid_with_index(self): 

1471 """Test ORCID validation using ORCID index before API validation""" 

1472 # Setup 

1473 test_doi = "10.1234/test123" 

1474 test_orcid = "0000-0002-1234-5678" 

1475 test_name = "Smith, John" 

1476 

1477 # Create DataciteProcessing instance with ORCID index 

1478 dp = DataciteProcessing() 

1479 dp.orcid_index.data = {test_doi: {f"{test_name} [orcid:{test_orcid}]"}} 

1480 

1481 # Test Case 1: ORCID found in index 

1482 inp_1 = [test_orcid] 

1483 out_1 = dp.find_datacite_orcid([test_orcid], test_doi) 

1484 exp_1 = f"orcid:{test_orcid}" 

1485 self.assertEqual(out_1, exp_1) 

1486 # Verify it was added to temporary storage 

1487 self.assertTrue(dp.tmp_orcid_m.storage_manager.get_value(f"orcid:{test_orcid}")) 

1488 

1489 # Test Case 2: ORCID not in index but valid via API 

1490 inp_2 = ["0000-0003-4082-1500"] 

1491 out_2 = dp.find_datacite_orcid(["0000-0003-4082-1500"], test_doi) 

1492 exp_2 = "orcid:0000-0003-4082-1500" 

1493 self.assertEqual(out_2, exp_2) 

1494 

1495 # Test Case 3: ORCID not in index and invalid 

1496 inp_3 = ["0000-0000-0000-0000"] 

1497 out_3 = dp.find_datacite_orcid(["0000-0000-0000-0000"], test_doi) 

1498 exp_3 = "" 

1499 self.assertEqual(out_3, exp_3) 

1500 

1501 # Test Case 4: Valid ORCID but no DOI provided (retrocompatibilità) 

1502 inp_4 = [test_orcid] 

1503 out_4 = dp.find_datacite_orcid(inp_4) # No DOI 

1504 exp_4 = f"orcid:{test_orcid}" # Should still validate via API 

1505 self.assertEqual(out_4, exp_4) 

1506 

1507 # Test Case 5: Multiple ORCIDs, first one valid 

1508 inp_5 = [test_orcid, "0000-0000-0000-0000"] 

1509 out_5 = dp.find_datacite_orcid([test_orcid, "0000-0000-0000-0000"], test_doi) 

1510 exp_5 = f"orcid:{test_orcid}" 

1511 self.assertEqual(out_5, exp_5) 

1512 

1513 # Cleanup 

1514 dp.storage_manager.delete_storage() 

1515 

1516 def test_find_datacite_orcid_api_enabled_invalid_in_storage(self): 

1517 """API ON + ORCID marcato come invalid in storage: rifiuta subito (niente indice/API).""" 

1518 dp = DataciteProcessing(use_orcid_api=True, testing=True) 

1519 oid = "orcid:0000-0002-9286-2630" 

1520 dp.orcid_m.storage_manager.set_value(oid, False) 

1521 out = dp.find_datacite_orcid([oid.split(":")[1]], "10.9999/anything") 

1522 self.assertEqual(out, "") 

1523 # nessuna semina in tmp 

1524 self.assertIsNone(dp.tmp_orcid_m.storage_manager.get_value(oid)) 

1525 dp.orcid_m.storage_manager.delete_storage() 

1526 

1527 def test_find_datacite_orcid_api_enabled_from_redis_snapshot(self): 

1528 """API ON + storage/indice vuoti, ma ORCID presente nello snapshot Redis RA: accetta senza rete.""" 

1529 dp = DataciteProcessing(use_orcid_api=True) 

1530 oid = "orcid:0000-0003-4082-1500" 

1531 dp.update_redis_values(br=[], ra=[oid]) # simula snapshot 

1532 out = dp.find_datacite_orcid([oid.split(":")[1]], "10.9999/noindex") 

1533 self.assertEqual(out, oid) 

1534 self.assertTrue(dp.tmp_orcid_m.storage_manager.get_value(oid)) 

1535 dp.storage_manager.delete_storage() 

1536 

1537 def test_find_datacite_orcid_api_disabled_from_redis_snapshot(self): 

1538 """API OFF + storage/indice vuoti, ORCID nello snapshot Redis RA: accetta offline.""" 

1539 dp = DataciteProcessing(use_orcid_api=False) 

1540 oid = "orcid:0000-0003-4082-1500" 

1541 dp.update_redis_values(br=[], ra=[oid]) 

1542 out = dp.find_datacite_orcid([oid.split(":")[1]], "10.9999/noindex") 

1543 self.assertEqual(out, oid) 

1544 self.assertTrue(dp.tmp_orcid_m.storage_manager.get_value(oid)) 

1545 dp.storage_manager.delete_storage() 

1546 

1547 def test_find_datacite_orcid_api_disabled_in_storage(self): 

1548 """API OFF + ORCID già valido nello storage persistente: deve essere accettato.""" 

1549 dp = DataciteProcessing(use_orcid_api=False, testing=True) 

1550 oid = "orcid:0000-0003-4082-1500" 

1551 dp.orcid_m.storage_manager.set_value(oid, True) 

1552 out = dp.find_datacite_orcid([oid.split(":")[1]], "10.9999/any") 

1553 self.assertEqual(out, oid) 

1554 dp.orcid_m.storage_manager.delete_storage() 

1555 

1556 def test_find_datacite_orcid_index_with_normalized_doi(self): 

1557 """La lookup nell'indice deve funzionare anche se DOI è passato senza prefisso o viceversa.""" 

1558 dp = DataciteProcessing() 

1559 doi_no_prefix = "10.1234/test-norm" 

1560 doi_with_prefix = f"doi:{doi_no_prefix}" 

1561 orcid = "0000-0002-1234-5678" 

1562 dp.orcid_index.add_value(doi_with_prefix, f"Rossi, Mario [orcid:{orcid}]") 

1563 # DOI senza prefisso: deve matchare comunque 

1564 out = dp.find_datacite_orcid([orcid], doi_no_prefix) 

1565 self.assertEqual(out, f"orcid:{orcid}") 

1566 self.assertTrue(dp.tmp_orcid_m.storage_manager.get_value(f"orcid:{orcid}")) 

1567 dp.storage_manager.delete_storage() 

1568 

1569 #PUBLISHER IDENTIFIERS 

1570 def test_find_datacite_publisher_id_api_enabled_no_value_in_storage(self): 

1571 """API ON + id non salvato nello storage.""" 

1572 dp = DataciteProcessing(use_ror_api=True, use_wikidata_api=True, use_viaf_api=True) 

1573 publisher1 = { 

1574 'publisherIdentifierScheme': 'ROR', 

1575 'schemeUri': 'https://ror.org', 

1576 'name': 'DataCite', 

1577 'publisherIdentifier': 'https://ror.org/04wxnsj81' 

1578 } 

1579 publisher2 = { 

1580 'publisherIdentifierScheme': 'VIAF', 

1581 'schemeUri': 'https://viaf.org/', 

1582 'name': 'Deutsches archäologisches Institut', 

1583 'publisherIdentifier': 'http://viaf.org/viaf/148463773' 

1584 } 

1585 publisher3 = { 

1586 'publisherIdentifierScheme': 'Wikidata', 

1587 'schemeUri': 'https://www.wikidata.org/wiki/', 

1588 'name': 'University of Tokyo', 

1589 'publisherIdentifier': 'https://wikidata.org/wiki/Q7842' 

1590 } 

1591 

1592 id1 = "ror:04wxnsj81" 

1593 id2 = "viaf:148463773" 

1594 id3 = "wikidata:Q7842" 

1595 

1596 out1 = dp.get_publisher_id(publisher1) 

1597 out2 = dp.get_publisher_id(publisher2) 

1598 out3 = dp.get_publisher_id(publisher3) 

1599 

1600 self.assertEqual(out1, id1) 

1601 self.assertEqual(out2, id2) 

1602 self.assertEqual(out3, id3) 

1603 

1604 dp.storage_manager.delete_storage() 

1605 

1606 def test_get_pubblisher_api_disabled_no_index(self): 

1607 """Con API OFF e indice vuoto, i publisher id presenti NON devono comparire in output.""" 

1608 

1609 publisher1 = { 

1610 'publisherIdentifierScheme': 'ROR', 

1611 'schemeUri': 'https://ror.org', 

1612 'name': 'DataCite', 

1613 'publisherIdentifier': 'https://ror.org/04wxnsj81' 

1614 } 

1615 publisher2 = { 

1616 'publisherIdentifierScheme': 'VIAF', 

1617 'schemeUri': 'https://viaf.org/', 

1618 'name': 'Deutsches archäologisches Institut', 

1619 'publisherIdentifier': 'http://viaf.org/viaf/148463773' 

1620 } 

1621 publisher3 = { 

1622 'publisherIdentifierScheme': 'Wikidata', 

1623 'schemeUri': 'https://www.wikidata.org/wiki/', 

1624 'name': 'University of Tokyo', 

1625 'publisherIdentifier': 'https://wikidata.org/wiki/Q7842' 

1626 } 

1627 

1628 

1629 dp = DataciteProcessing(use_ror_api=False, use_viaf_api=False, use_wikidata_api=False) # indice vuoto, nessuna API 

1630 publisher_row1 = dp.get_publisher('10.60804/bpmz-jb79', publisher1) 

1631 publisher_row2 = dp.get_publisher('10.60804/bpmz-jb79', publisher2) 

1632 publisher_row3 = dp.get_publisher('10.60804/bpmz-jb79', publisher3) 

1633 

1634 # L'id NON deve essere aggiunto tra [] perché non c'è indice e l'API è OFF 

1635 self.assertEqual(publisher_row1, "DataCite") 

1636 self.assertEqual(publisher_row2, "Deutsches archäologisches Institut") 

1637 self.assertEqual(publisher_row3, "University of Tokyo") 

1638 

1639 dp.storage_manager.delete_storage() 

1640 

1641 def test_find_datacite_publisher_id_api_enabled_invalid_in_storage(self): 

1642 """API ON + id marcato come invalid in storage: rifiuta subito (niente indice/API).""" 

1643 dp = DataciteProcessing(use_ror_api=True, use_wikidata_api=True, use_viaf_api=True) 

1644 publisher1 = { 

1645 'publisherIdentifierScheme': 'ROR', 

1646 'schemeUri': 'https://ror.org', 

1647 'name': 'DataCite', 

1648 'publisherIdentifier': 'https://ror.org/04wxnsj81' 

1649 } 

1650 publisher2 = { 

1651 'publisherIdentifierScheme': 'VIAF', 

1652 'schemeUri': 'https://viaf.org/', 

1653 'name': 'Deutsches archäologisches Institut', 

1654 'publisherIdentifier': 'http://viaf.org/viaf/148463773' 

1655 } 

1656 publisher3 = { 

1657 'publisherIdentifierScheme': 'Wikidata', 

1658 'schemeUri': 'https://www.wikidata.org/wiki/', 

1659 'name': 'University of Tokyo', 

1660 'publisherIdentifier': 'https://wikidata.org/wiki/Q7842' 

1661 } 

1662 

1663 id1 = "ror:04wxnsj81" 

1664 id2 = "viaf:148463773" 

1665 id3 = "wikidata:Q7842" 

1666 

1667 dp.storage_manager.set_value(id1, False) 

1668 dp.storage_manager.set_value(id2, False) 

1669 dp.storage_manager.set_value(id3, False) 

1670 

1671 out1 = dp.get_publisher_id(publisher1) 

1672 out2 = dp.get_publisher_id(publisher2) 

1673 out3 = dp.get_publisher_id(publisher3) 

1674 

1675 self.assertEqual(out1, "") 

1676 self.assertEqual(out2, "") 

1677 self.assertEqual(out3, "") 

1678 

1679 # nessuna semina in tmp 

1680 self.assertIsNone(dp.tmp_viaf_m.storage_manager.get_value(id2)) 

1681 self.assertIsNone(dp.tmp_ror_m.storage_manager.get_value(id1)) 

1682 self.assertIsNone(dp.tmp_wikidata_m.storage_manager.get_value(id3)) 

1683 dp.storage_manager.delete_storage() 

1684 

1685 def test_find_datacite_publisher_id_api_enabled_from_redis_snapshot(self): 

1686 """API ON + storage/indice vuoti, ma id presente nello snapshot Redis RA: accetta senza rete.""" 

1687 dp = DataciteProcessing(use_viaf_api=True, use_wikidata_api=True, use_ror_api=True) 

1688 

1689 id1 = "ror:04wxnsj81" 

1690 id2 = "viaf:148463773" 

1691 id3 = "wikidata:Q7842" 

1692 

1693 publisher1 = { 

1694 'publisherIdentifierScheme': 'ROR', 

1695 'schemeUri': 'https://ror.org', 

1696 'name': 'DataCite', 

1697 'publisherIdentifier': 'https://ror.org/04wxnsj81' 

1698 } 

1699 publisher2 = { 

1700 'publisherIdentifierScheme': 'VIAF', 

1701 'schemeUri': 'https://viaf.org/', 

1702 'name': 'Deutsches archäologisches Institut', 

1703 'publisherIdentifier': 'http://viaf.org/viaf/148463773' 

1704 } 

1705 publisher3 = { 

1706 'publisherIdentifierScheme': 'Wikidata', 

1707 'schemeUri': 'https://www.wikidata.org/wiki/', 

1708 'name': 'University of Tokyo', 

1709 'publisherIdentifier': 'https://wikidata.org/wiki/Q7842' 

1710 } 

1711 

1712 dp.update_redis_values(br=[], ra=[id1, id2, id3]) # simula snapshot 

1713 out1 = dp.get_publisher_id(publisher1) 

1714 out2 = dp.get_publisher_id(publisher2) 

1715 out3 = dp.get_publisher_id(publisher3) 

1716 

1717 self.assertEqual(out1, id1) 

1718 self.assertEqual(out2, id2) 

1719 self.assertEqual(out3, id3) 

1720 

1721 self.assertTrue(dp.tmp_ror_m.storage_manager.get_value(id1)) 

1722 self.assertTrue(dp.tmp_viaf_m.storage_manager.get_value(id2)) 

1723 self.assertTrue(dp.tmp_wikidata_m.storage_manager.get_value(id3)) 

1724 

1725 dp.storage_manager.delete_storage() 

1726 

1727 def test_find_datacite_publisher_id_api_disabled_from_redis_snapshot(self): 

1728 """API OFF + storage/indice vuoti, ORCID nello snapshot Redis RA: accetta offline.""" 

1729 dp = DataciteProcessing(use_ror_api=False, use_viaf_api=False, use_wikidata_api=False) 

1730 

1731 id1 = "ror:04wxnsj81" 

1732 id2 = "viaf:148463773" 

1733 id3 = "wikidata:Q7842" 

1734 id4 = "crossref:501100000739" 

1735 

1736 publisher1 = { 

1737 'publisherIdentifierScheme': 'ROR', 

1738 'schemeUri': 'https://ror.org', 

1739 'name': 'DataCite', 

1740 'publisherIdentifier': 'https://ror.org/04wxnsj81' 

1741 } 

1742 publisher2 = { 

1743 'publisherIdentifierScheme': 'VIAF', 

1744 'schemeUri': 'https://viaf.org/', 

1745 'name': 'Deutsches archäologisches Institut', 

1746 'publisherIdentifier': 'http://viaf.org/viaf/148463773' 

1747 } 

1748 publisher3 = { 

1749 'publisherIdentifierScheme': 'Wikidata', 

1750 'schemeUri': 'https://www.wikidata.org/wiki/', 

1751 'name': 'University of Tokyo', 

1752 'publisherIdentifier': 'https://wikidata.org/wiki/Q7842' 

1753 } 

1754 

1755 dp.update_redis_values(br=[], ra=[id1, id2, id3, id4]) # simula snapshot 

1756 out1 = dp.get_publisher_id(publisher1) 

1757 out2 = dp.get_publisher_id(publisher2) 

1758 out3 = dp.get_publisher_id(publisher3) 

1759 

1760 self.assertEqual(out1, id1) 

1761 self.assertEqual(out2, id2) 

1762 self.assertEqual(out3, id3) 

1763 

1764 self.assertTrue(dp.tmp_ror_m.storage_manager.get_value(id1)) 

1765 self.assertTrue(dp.tmp_viaf_m.storage_manager.get_value(id2)) 

1766 self.assertTrue(dp.tmp_wikidata_m.storage_manager.get_value(id3)) 

1767 

1768 dp.storage_manager.delete_storage() 

1769 

1770 

1771 def test_find_datacite_publisher_id_api_disabled_in_storage(self): 

1772 """API OFF + publisher id già valido nello storage persistente: deve essere accettato.""" 

1773 dp = DataciteProcessing(use_viaf_api=False, use_wikidata_api=False, use_ror_api=False) 

1774 id1 = "ror:04wxnsj89" #invalid 

1775 id2 = "viaf:148463773" 

1776 id3 = "wikidata:Q7842" 

1777 

1778 publisher1 = { 

1779 'publisherIdentifierScheme': 'ROR', 

1780 'schemeUri': 'https://ror.org', 

1781 'name': 'DataCite', 

1782 'publisherIdentifier': 'https://ror.org/04wxnsj89' 

1783 } 

1784 publisher2 = { 

1785 'publisherIdentifierScheme': 'VIAF', 

1786 'schemeUri': 'https://viaf.org/', 

1787 'name': 'Deutsches archäologisches Institut', 

1788 'publisherIdentifier': 'http://viaf.org/viaf/148463773' 

1789 } 

1790 publisher3 = { 

1791 'publisherIdentifierScheme': 'Wikidata', 

1792 'schemeUri': 'https://www.wikidata.org/wiki/', 

1793 'name': 'University of Tokyo', 

1794 'publisherIdentifier': 'https://wikidata.org/wiki/Q7842' 

1795 } 

1796 

1797 

1798 dp.storage_manager.set_value(id1, True) 

1799 dp.storage_manager.set_value(id2, True) 

1800 dp.storage_manager.set_value(id3, True) 

1801 

1802 out1 = dp.get_publisher_id(publisher1) 

1803 out2 = dp.get_publisher_id(publisher2) 

1804 out3 = dp.get_publisher_id(publisher3) 

1805 

1806 self.assertEqual(out1, id1) 

1807 self.assertEqual(out2, id2) 

1808 self.assertEqual(out3, id3) 

1809 

1810 dp.storage_manager.delete_storage() 

1811 

1812 def test_publisher_id_replaced_by_mapping(self): 

1813 

1814 publisher3 = { 

1815 'publisherIdentifierScheme': 'Wikidata', 

1816 'schemeUri': 'https://www.wikidata.org/wiki/', 

1817 'name': 'University of Tokyo', 

1818 'publisherIdentifier': 'https://wikidata.org/wiki/Q7842' 

1819 } 

1820 

1821 dp = DataciteProcessing(publishers_filepath_dc=PUBLISHERS_MAPPING) 

1822 doi = "10.12753/2066-026X-17-015" 

1823 publisher3 = dp.get_publisher(doi, publisher3) 

1824 publisher3_exp = "ADLRO [datacite:3]" 

1825 self.assertEqual(publisher3, publisher3_exp) 

1826 

1827 

1828 def test_update_redis_values_normalization(self): 

1829 """update_redis_values deve normalizzare gli ID (doi:/orcid:) così i confronti funzionano.""" 

1830 dp = DataciteProcessing() 

1831 dp.update_redis_values( 

1832 br=["10.1002/2014jd022411"], # senza prefisso 

1833 ra=["https://orcid.org/0000-0001-8513-8700"] # URL 

1834 ) 

1835 # validazione via snapshot deve riuscire 

1836 out_ra = dp.find_datacite_orcid(["0000-0001-8513-8700"], "10.9999/noindex") 

1837 self.assertEqual(out_ra, "orcid:0000-0001-8513-8700") 

1838 # DOI in BR: check via to_validated_id_list 

1839 out_br = dp.to_validated_id_list({"id": "doi:10.1002/2014jd022411", "schema": "doi"}) 

1840 self.assertEqual(out_br, ["doi:10.1002/2014jd022411"]) 

1841 dp.storage_manager.delete_storage() 

1842 

1843 def test_memory_to_storage_flushes_and_clears(self): 

1844 """Gli aggiornamenti in tmp vengono persistiti in blocco e la memoria temporanea viene svuotata.""" 

1845 dp = DataciteProcessing(testing=True) 

1846 # usa Redis snapshot per marcare True in tmp_orcid_m 

1847 oid = "orcid:0000-0001-8513-8700" 

1848 dp.update_redis_values(br=[], ra=[oid]) 

1849 _ = dp.find_datacite_orcid([oid.split(":")[1]], "10.9999/noindex") 

1850 # dopo la validazione: il valore è in tmp_orcid_m.storage_manager 

1851 self.assertTrue(dp.tmp_orcid_m.storage_manager.get_value(oid)) 

1852 # memory_to_storage svuota temporary_manager (che è già vuoto in questo caso) 

1853 dp.memory_to_storage() 

1854 # la memoria tmp è svuotata (nessun valore residuo) 

1855 self.assertEqual(dp.temporary_manager.get_validity_list_of_tuples(), []) 

1856 dp.tmp_orcid_m.storage_manager.delete_storage() 

1857 

1858 def test_csv_creator_offline_uses_index_for_orcid(self): 

1859 """API OFF: se l'ORCID è nell'indice DOI→ORCID, l'autore deve uscire con [orcid:...] anche offline.""" 

1860 dp = DataciteProcessing(use_orcid_api=False) 

1861 doi = "10.2000/test-offline-index" 

1862 orcid = "0000-0002-1234-5678" 

1863 name = "Doe, Jane" 

1864 dp.orcid_index.add_value(doi, f"{name} [orcid:{orcid}]") 

1865 item = { 

1866 "id": doi, 

1867 "type": "dois", 

1868 "attributes": { 

1869 "doi": doi, 

1870 "titles": [{"title": "Sample"}], 

1871 "types": {"ris": "JOUR"}, 

1872 "creators": [{ 

1873 "nameType": "Personal", 

1874 "familyName": "Doe", 

1875 "givenName": "Jane", 

1876 "nameIdentifiers": [{ 

1877 "nameIdentifierScheme": "ORCID", 

1878 "nameIdentifier": f"https://orcid.org/{orcid}", 

1879 "schemeUri": "https://orcid.org" 

1880 }] 

1881 }] 

1882 } 

1883 } 

1884 row = dp.csv_creator(item) 

1885 self.assertIn("[orcid:0000-0002-1234-5678]", row["author"]) 

1886 dp.storage_manager.delete_storage() 

1887 

1888 def test_get_agents_strings_list_uses_index_with_doi_normalization(self): 

1889 """get_agents_strings_list deve arricchire da indice anche se DOI arriva senza prefisso.""" 

1890 dp = DataciteProcessing() 

1891 doi_no_prefix = "10.3000/abc" 

1892 orcid = "0000-0003-1445-0291" 

1893 dp.orcid_index.add_value(f"doi:{doi_no_prefix}", f"Schulz, Heide N [orcid:{orcid}]") 

1894 entity_attr_dict = { 

1895 "creators": [ 

1896 {"name": "Schulz, Heide N", "nameType": "Personal", 

1897 "givenName": "Heide N", "familyName": "Schulz", "nameIdentifiers": []} 

1898 ], 

1899 "contributors": [] 

1900 } 

1901 authors = dp.add_authors_to_agent_list(entity_attr_dict, [], doi="doi:10.3000/abc") 

1902 editors = dp.add_editors_to_agent_list(entity_attr_dict, [], doi="doi:10.3000/abc") 

1903 authors_strings, editors_strings = dp.get_agents_strings_list(doi_no_prefix, authors + editors) 

1904 self.assertEqual(authors_strings, [f"Schulz, Heide N [orcid:{orcid}]"]) 

1905 self.assertEqual(editors_strings, []) 

1906 dp.storage_manager.delete_storage()