Coverage for test / processing_oroci_test.py: 99%
879 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
1# SPDX-FileCopyrightText: 2023 Arianna Moretti <arianna.moretti4@unibo.it>
2# SPDX-FileCopyrightText: 2023 Marta Soricetti <marta.soricetti@unibo.it>
3# SPDX-FileCopyrightText: 2025-2026 Arcangelo Massari <arcangelo.massari@unibo.it>
4#
5# SPDX-License-Identifier: ISC
7import os
8import unittest
10from oc_ds_converter.lib.jsonmanager import *
11from oc_ds_converter.openaire.openaire_processing import OpenaireProcessing
12#
14BASE = os.path.join('test', 'openaire_processing')
15DATA = os.path.join(BASE, 'jSonFile_1.json')
16DATA_DIR = BASE
17TMP_SUPPORT_MATERIAL = os.path.join(BASE, "tmp_support")
18OUTPUT = os.path.join(BASE, 'meta_input')
19MULTIPROCESS_OUTPUT = os.path.join(BASE, 'multi_process_test')
20MEMO_JSON_PATH = "test/openaire_processing/tmp_support/memo.json"
21SAMPLE_ENTITY = {'collectedFrom': [{'completionStatus': 'complete', 'provider': {'identifiers': [{'identifier': '10|openaire____::081b82f96300b6a6e3d282bad31cb6e2', 'schema': 'DNET Identifier'}], 'name': 'Crossref'}, 'provisionMode': 'collected'}, {'completionStatus': 'complete', 'provider': {'identifiers': [{'identifier': '10|openaire____::8ac8380272269217cb09a928c8caa993', 'schema': 'DNET Identifier'}], 'name': 'UnpayWall'}, 'provisionMode': 'collected'}, {'completionStatus': 'complete', 'provider': {'identifiers': [{'identifier': '10|openaire____::806360c771262b4d6770e7cdf04b5c5a', 'schema': 'DNET Identifier'}], 'name': 'ORCID'}, 'provisionMode': 'collected'}, {'completionStatus': 'complete', 'provider': {'identifiers': [{'identifier': '10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a', 'schema': 'DNET Identifier'}], 'name': 'Microsoft Academic Graph'}, 'provisionMode': 'collected'}, {'completionStatus': 'complete', 'provider': {'identifiers': [{'identifier': '10|openaire____::9e3be59865b2c1c335d32dae2fe7b254', 'schema': 'DNET Identifier'}], 'name': 'Datacite'}, 'provisionMode': 'collected'}, {'completionStatus': 'complete', 'provider': {'identifiers': [{'identifier': '10|opendoar____::6f4922f45568161a8cdf4ad2299f6d23', 'schema': 'DNET Identifier'}], 'name': 'arXiv.org e-Print Archive'}, 'provisionMode': 'collected'}], 'creator': [{'name': 'Matteo Serra'}, {'name': 'Salvatore Mignemi'}, {'identifiers': [{'identifier': '0000-0001-5595-7537', 'schema': 'ORCID', 'url': 'https://orcid.org/0000-0001-5595-7537'}], 'name': 'Mariano Cadoni'}], 'dnetIdentifier': '50|doi_dedup___::41074cd388749ccbdb6668caaf059f4a', 'identifier': [{'identifier': '10.1103/physrevd.84.084046', 'schema': 'doi', 'url': 'https://doi.org/10.1103/physrevd.84.084046'}, {'identifier': '10.1103/physrevd.84.084046', 'schema': 'doi'}, {'identifier': '10.48550/arxiv.1107.5979', 'schema': 'doi', 'url': 'https://dx.doi.org/10.48550/arxiv.1107.5979'}, {'identifier': '1107.5979', 'schema': 'arXiv', 'url': 'http://arxiv.org/abs/1107.5979'}], 'objectSubType': 'Article', 'objectType': 'publication', 'publicationDate': '2011-10-21', 'publisher': [{'name': 'American Physical Society (APS)'}], 'title': 'Exact solutions with AdS asymptotics of Einstein and Einstein-Maxwell gravity minimally coupled to a scalar field'}
22SAMPLE_ENT2 = {"identifier":"000017d2c913b28e09291b811ce3609a","linkprovider":[{"identifiers":[{"identifier":"10|openaire____::0a836ef43dcb67bb7cbd4dd509b11b73","schema":"DNET Identifier"}],"name":"CORE (RIOXX-UK Aggregator)"},{"identifiers":[{"identifier":"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357","schema":"DNET Identifier"}],"name":"PubMed Central"},{"identifiers":[{"identifier":"10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c","schema":"DNET Identifier"}],"name":"Europe PubMed Central"},{"identifiers":[{"identifier":"10|opendoar____::229754d7799160502a143a72f6789927","schema":"DNET Identifier"}],"name":"Publications at Bielefeld University"}],"publicationDate":"2014-02-01","publisher":[{"name":"Springer Nature"}],"relationship":{"inverse":"IsCitedBy","name":"Cites","schema":"datacite"},"source":{"collectedFrom":[{"completionStatus":"complete","provider":{"identifiers":[{"identifier":"10|openaire____::0a836ef43dcb67bb7cbd4dd509b11b73","schema":"DNET Identifier"}],"name":"CORE (RIOXX-UK Aggregator)"},"provisionMode":"collected"},{"completionStatus":"complete","provider":{"identifiers":[{"identifier":"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357","schema":"DNET Identifier"}],"name":"PubMed Central"},"provisionMode":"collected"},{"completionStatus":"complete","provider":{"identifiers":[{"identifier":"10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c","schema":"DNET Identifier"}],"name":"Europe PubMed Central"},"provisionMode":"collected"},{"completionStatus":"complete","provider":{"identifiers":[{"identifier":"10|opendoar____::229754d7799160502a143a72f6789927","schema":"DNET Identifier"}],"name":"Publications at Bielefeld University"},"provisionMode":"collected"}],"creator":[{"identifiers":[{"identifier":"0000-0002-6491-0754","schema":"ORCID","url":"https://orcid.org/0000-0002-6491-0754"}],"name":"Sattler, Sebastian"},{"name":"Mehlkop, Guido"},{"name":"Graeff, Peter"},{"identifiers":[{"identifier":"0000-0002-8090-6886","schema":"ORCID","url":"https://orcid.org/0000-0002-8090-6886"}],"name":"Sauer, Carsten"}],"dnetIdentifier":"50|pmid_dedup__::8936076da7a86820c24ede7ca3ff15b3","identifier":[{"identifier":"PMC3928621","schema":"pmc","url":"http://europepmc.org/articles/PMC3928621"},{"identifier":"24484640","schema":"pmid"},{"identifier":"24484640","schema":"pmid","url":"https://pubmed.ncbi.nlm.nih.gov/24484640"},{"identifier":"PMC3928621","schema":"pmc"}],"objectSubType":"Article","objectType":"publication","publicationDate":"2014-02-01","publisher":[{"name":"Springer Nature"}],"title":"Evaluating the drivers of and obstacles to the willingness to use cognitive enhancement drugs: the influence of drug characteristics, social environment, and personal characteristics"},"target":{"collectedFrom":[{"completionStatus":"complete","provider":{"identifiers":[{"identifier":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","schema":"DNET Identifier"}],"name":"Crossref"},"provisionMode":"collected"},{"completionStatus":"complete","provider":{"identifiers":[{"identifier":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","schema":"DNET Identifier"}],"name":"Microsoft Academic Graph"},"provisionMode":"collected"}],"creator":[{"name":"Harold G. Grasmick"},{"name":"Robert J. Bursik"}],"dnetIdentifier":"50|doi_________::816648c63de74835ec2b0a753a68f037","identifier":[{"identifier":"10.2307/3053861","schema":"doi","url":"https://doi.org/10.2307/3053861"}],"objectSubType":"Article","objectType":"publication","publicationDate":"1990-01-01","publisher":[{"name":"JSTOR"}],"title":"Conscience, significant others, and rational choice: Extending the deterrence model."}}
23SAMPLE_ENTITY_FOR_CSV_CREATOR = {'collectedFrom': [{'completionStatus': 'complete', 'provider': {'identifiers': [{'identifier': '10|openaire____::0a836ef43dcb67bb7cbd4dd509b11b73', 'schema': 'DNET Identifier'}], 'name': 'CORE (RIOXX-UK Aggregator)'}, 'provisionMode': 'collected'}, {'completionStatus': 'complete', 'provider': {'identifiers': [{'identifier': '10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357', 'schema': 'DNET Identifier'}], 'name': 'PubMed Central'}, 'provisionMode': 'collected'}, {'completionStatus': 'complete', 'provider': {'identifiers': [{'identifier': '10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c', 'schema': 'DNET Identifier'}], 'name': 'Europe PubMed Central'}, 'provisionMode': 'collected'}, {'completionStatus': 'complete', 'provider': {'identifiers': [{'identifier': '10|driver______::bee53aa31dc2cbb538c10c2b65fa5824', 'schema': 'DNET Identifier'}], 'name': 'DOAJ-Articles'}, 'provisionMode': 'collected'}, {'completionStatus': 'complete', 'provider': {'identifiers': [{'identifier': '10|opendoar____::566a9968b43628588e76be5a85a0f9e8', 'schema': 'DNET Identifier'}], 'name': "King's Research Portal"}, 'provisionMode': 'collected'}, {'completionStatus': 'complete', 'provider': {'identifiers': [{'identifier': '10|openaire____::c2cdfa5866e03cdd07d313cbc8fb8311', 'schema': 'DNET Identifier'}], 'name': 'Multidisciplinary Digital Publishing Institute'}, 'provisionMode': 'collected'}], 'creator': [{'name': 'Smith, Lee'}, {'name': 'Sawyer, Alexia'}, {'name': 'Gardner, Benjamin'}, {'name': 'Seppala, Katri'}, {'name': 'Ucci, Marcella'}, {'name': 'Marmot, Alexi'}, {'name': 'Lally, Pippa'}, {'name': 'Fisher, Abi'}], 'dnetIdentifier': '50|pmid_dedup__::a1a8687c2378a0d68314566dec29dafb', 'objectSubType': 'Article', 'objectType': 'publication', 'publicationDate': '2018-06-09', 'publisher': [{'name': 'MDPI'}], 'title': 'Occupational physical activity habits of UK office workers: cross-sectional data from the Active Buildings Study', 'identifier': {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'pmid', 'identifier': 'pmid:29890726', 'valid': None}]}, "redis_validity_lists":[[],[]]}
26class TestOpenaireProcessing(unittest.TestCase):
28 def delete_storege(self, storage_type=None, specific_path=None):
29 if not specific_path:
30 if storage_type == "sqlite":
31 auto_db_created_path = os.path.join(os.getcwd(), "storage", "id_valid_dict.db")
32 auto_db_created_path = auto_db_created_path if os.path.exists(auto_db_created_path) else auto_db_created_path+"?mode=rw"
33 if os.path.exists(auto_db_created_path):
34 os.remove(auto_db_created_path)
35 else:
36 auto_db_created_path = os.path.join(os.getcwd(), "storage", "id_value.json")
37 if os.path.exists(auto_db_created_path):
38 os.remove(auto_db_created_path)
39 elif specific_path:
40 if os.path.exists(specific_path):
41 os.remove(specific_path)
43 def test_get_all_ids(self):
44 opp = OpenaireProcessing()
45 allids = opp.extract_all_ids(SAMPLE_ENT2)
46 self.assertCountEqual(['pmid:24484640', 'pmcid:PMC3928621', 'doi:10.2307/3053861'], allids[0])
47 self.assertCountEqual(['orcid:0000-0002-8090-6886', 'orcid:0000-0002-6491-0754'], allids[1])
49 opp.storage_manager.delete_storage()
51 def test_get_all_ids_redis(self):
52 opp = OpenaireProcessing(testing=True)
53 allids = opp.extract_all_ids(SAMPLE_ENT2)
54 self.assertCountEqual(['pmid:24484640', 'pmcid:PMC3928621', 'doi:10.2307/3053861'], allids[0])
55 self.assertCountEqual(['orcid:0000-0002-8090-6886', 'orcid:0000-0002-6491-0754'], allids[1])
56 opp.storage_manager.delete_storage()
58 def test_get_redis_validity_list(self):
59 br = {'pmid:24484640', 'pmcid:PMC3928621', 'doi:10.2307/3053861'}
60 ra = {'orcid:0000-0002-8090-6886', 'orcid:0000-0002-6491-0754'}
62 opp = OpenaireProcessing()
63 br_valid_list = opp.get_redis_validity_list(br, "br")
64 exp_exp_br_valid_list = []
65 ra_valid_list = opp.get_redis_validity_list(ra, "ra")
66 exp_exp_ra_valid_list = []
67 self.assertEqual(ra_valid_list, exp_exp_ra_valid_list)
68 self.assertEqual(br_valid_list, exp_exp_br_valid_list)
70 opp.storage_manager.delete_storage()
72 def test_get_redis_validity_list_redis(self):
73 br = {'pmid:24484640', 'pmcid:PMC3928621', 'doi:10.2307/3053861'}
74 ra = {'orcid:0000-0002-8090-6886', 'orcid:0000-0002-6491-0754'}
76 opp = OpenaireProcessing(testing=True)
77 br_valid_list = opp.get_redis_validity_list(br, "br")
78 exp_exp_br_valid_list = []
79 ra_valid_list = opp.get_redis_validity_list(ra, "ra")
80 exp_exp_ra_valid_list = []
81 self.assertEqual(ra_valid_list, exp_exp_ra_valid_list)
82 self.assertEqual(br_valid_list, exp_exp_br_valid_list)
83 opp.storage_manager.delete_storage()
85 def test_get_reids_validity_dict_w_fakeredis_db_values_sqlite(self):
86 opp = OpenaireProcessing()
87 opp.BR_redis.sadd('pmid:24484640', "omid:1")
88 opp.RA_redis.sadd('orcid:0000-0002-8090-6886', "omid:2")
90 br = {'pmid:24484640', 'pmcid:PMC3928621', 'doi:10.2307/3053861'}
91 ra = {'orcid:0000-0002-8090-6886', 'orcid:0000-0002-6491-0754'}
93 br_validity_dict = opp.get_redis_validity_list(br, "br")
94 exp_br_valid_list = ["pmid:24484640"]
95 ra_validity_dict = opp.get_redis_validity_list(ra, "ra")
96 exp_ra_valid_list = ['orcid:0000-0002-8090-6886']
97 self.assertEqual(br_validity_dict, exp_br_valid_list)
98 self.assertEqual(ra_validity_dict, exp_ra_valid_list)
100 opp.storage_manager.delete_storage()
102 opp.BR_redis.delete('pmid:24484640')
103 opp.BR_redis.delete('pmcid:PMC3928621')
104 opp.RA_redis.delete('orcid:0000-0002-8090-6886')
106 def test_get_reids_validity_dict_w_fakeredis_db_values_redis(self):
107 opp = OpenaireProcessing(testing=True)
108 opp.BR_redis.sadd('pmid:24484640', "omid:1")
109 opp.RA_redis.sadd('orcid:0000-0002-8090-6886', "omid:2")
112 br = {'pmid:24484640', 'pmcid:PMC3928621', 'doi:10.2307/3053861'}
113 ra = {'orcid:0000-0002-8090-6886', 'orcid:0000-0002-6491-0754'}
115 br_validity_dict = opp.get_redis_validity_list(br, "br")
116 exp_br_valid_list = ["pmid:24484640"]
117 ra_validity_dict = opp.get_redis_validity_list(ra, "ra")
118 exp_ra_valid_list = ['orcid:0000-0002-8090-6886']
119 self.assertEqual(br_validity_dict, exp_br_valid_list)
120 self.assertEqual(ra_validity_dict, exp_ra_valid_list)
122 opp.storage_manager.delete_storage()
123 opp.BR_redis.delete('pmid:24484640')
124 opp.BR_redis.delete('pmcid:PMC3928621')
125 opp.RA_redis.delete('orcid:0000-0002-8090-6886')
127 def test_validated_as_default(self):
128 """
129 Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value:
130 string of the identifier, the method "validated_as" returns:
131 - True if the id was already validated as valid
132 - False if the id was already validated as invalid
133 - None if the id was not validated before
134 The procedure is tested
135 - With default storage manager (sqlite) without a pre-existent db associated
136 """
138 opp = OpenaireProcessing()
139 validate_as_none = opp.validated_as({"schema":"pmid", "identifier": "pmid:23483834"})
140 self.assertEqual(validate_as_none, None)
141 opp.storage_manager.delete_storage()
143 def test_validated_as_default_redis(self):
144 '''
145 Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value:
146 string of the identifier, the method "validated_as" returns:
147 - True if the id was already validated as valid
148 - False if the id was already validated as invalid
149 - None if the id was not validated before
150 The procedure is tested
151 - With redis storage manager without a pre-existent db associated
152 '''
154 opp = OpenaireProcessing(testing=True)
155 validate_as_none = opp.validated_as({"schema":"pmid", "identifier": "pmid:23483834"})
156 self.assertEqual(validate_as_none, None)
157 opp.storage_manager.delete_storage()
159 def test_validated_as_redis_with_preexistent_data(self):
160 '''
161 Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value:
162 string of the identifier, the method "validated_as" returns:
163 - True if the id was already validated as valid
164 - False if the id was already validated as invalid
165 - None if the id was not validated before
166 The procedure is tested
167 - With redis storage manager and pre-existent data associated
168 '''
169 valid_pmid_not_in_db = {"identifier":"pmid:2938", "schema":"pmid"}
170 valid_pmid_in_db = {"identifier":"pmid:23483834", "schema":"pmid"}
171 invalid_pmid_in_db = {"identifier":"pmid:18328387372097", "schema":"pmid"}
173 # New class instance and set values directly on the id managers' storage_manager
174 opp_redis = OpenaireProcessing(testing=True)
175 opp_redis.pmid_m.storage_manager.set_value(valid_pmid_in_db["identifier"], True)
176 opp_redis.pmid_m.storage_manager.set_value(invalid_pmid_in_db["identifier"], False)
177 validated_as_True = opp_redis.validated_as(valid_pmid_in_db)
178 validated_as_False = opp_redis.validated_as(invalid_pmid_in_db)
179 not_validated = opp_redis.validated_as(valid_pmid_not_in_db)
181 self.assertEqual(validated_as_True, True)
182 self.assertEqual(validated_as_False, False)
183 self.assertEqual(not_validated, None)
185 opp_redis.pmid_m.storage_manager.delete_storage()
188 def test_validated_as_inmemory(self):
189 '''
190 Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value:
191 string of the identifier, the method "validated_as" returns:
192 - True if the id was already validated as valid
193 - False if the id was already validated as invalid
194 - None if the id was not validated before
195 The procedure is tested
196 - With in Memory + Json storage manager and a pre-existent db associated
197 - With in Memory + Json storage manager without a pre-existent db associated
198 '''
200 valid_pmid_not_in_db = {"identifier":"pmid:2938", "schema":"pmid"}
201 valid_pmid_in_db = {"identifier":"pmid:23483834", "schema":"pmid"}
202 invalid_pmid_in_db = {"identifier":"pmid:18328387372097", "schema":"pmid"}
204 # New class instance and set values directly on the id managers' storage_manager
205 opp_sql = OpenaireProcessing(testing=True)
206 opp_sql.pmid_m.storage_manager.set_value(valid_pmid_in_db["identifier"], True)
207 opp_sql.pmid_m.storage_manager.set_value(invalid_pmid_in_db["identifier"], False)
208 validated_as_True = opp_sql.validated_as(valid_pmid_in_db)
209 validated_as_False = opp_sql.validated_as(invalid_pmid_in_db)
210 not_validated = opp_sql.validated_as(valid_pmid_not_in_db)
212 self.assertEqual(validated_as_True, True)
213 self.assertEqual(validated_as_False, False)
214 self.assertEqual(not_validated, None)
216 opp_sql.pmid_m.storage_manager.delete_storage()
219 def test_validated_as_redis(self):
220 '''
221 Check that, given an ID dict with keys "schema" (value: string of the schema) and "identifier" (value:
222 string of the identifier, the method "validated_as" returns:
223 - True if the id was already validated as valid
224 - False if the id was already validated as invalid
225 - None if the id was not validated before
226 The procedure is tested
227 - With REDIS storage manager and a pre-existent db associated
228 - With REDIS storage manager without a pre-existent db associated
229 '''
231 valid_pmid_not_in_db = {"identifier":"pmid:2938", "schema":"pmid"}
232 valid_pmid_in_db = {"identifier":"pmid:23483834", "schema":"pmid"}
233 invalid_pmid_in_db = {"identifier":"pmid:18328387372097", "schema":"pmid"}
235 # New class instance and set values directly on the id managers' storage_manager
236 opp_redis = OpenaireProcessing(testing=True)
237 opp_redis.pmid_m.storage_manager.set_value(valid_pmid_in_db["identifier"], True)
238 opp_redis.pmid_m.storage_manager.set_value(invalid_pmid_in_db["identifier"], False)
239 validated_as_True = opp_redis.validated_as(valid_pmid_in_db)
240 validated_as_False = opp_redis.validated_as(invalid_pmid_in_db)
241 not_validated = opp_redis.validated_as(valid_pmid_not_in_db)
243 self.assertEqual(validated_as_True, True)
244 self.assertEqual(validated_as_False, False)
245 self.assertEqual(not_validated, None)
246 opp_redis.pmid_m.storage_manager.delete_storage()
248 def test_get_id_manager(self):
249 """Check that, given in input the string of a schema (e.g.:'pmid') or an id with a prefix (e.g.: 'pmid:12334')
250 and a dictionary mapping the strings of the schemas to their id managers, the method returns the correct
251 id manager. Note that each instance of the Preprocessing class needs its own instances of the id managers,
252 in order to avoid conflicts while validating data"""
254 op = OpenaireProcessing()
255 id_man_dict = op._id_man_dict
257 pmid_id = "pmid:12345"
258 pmid_string = "pmid"
259 pmid_man_exp = op.get_id_manager(pmid_id, id_man_dict)
260 pmid_man_exp_2 = op.get_id_manager(pmid_string, id_man_dict)
262 #check that the idmanager for the pmid was returned and that it works as expected
263 self.assertTrue(pmid_man_exp.is_valid(pmid_id))
264 self.assertTrue(pmid_man_exp_2.is_valid(pmid_id))
266 doi_id = "doi:10.1103/physrevd.84.084046"
267 doi_string = "doi"
268 doi_man_exp = op.get_id_manager(doi_id, id_man_dict)
269 doi_man_exp_2 = op.get_id_manager(doi_string, id_man_dict)
271 #check that the idmanager for the doi was returned and that it works as expected
272 self.assertTrue(doi_man_exp.is_valid(doi_id))
273 self.assertTrue(doi_man_exp_2.is_valid(doi_id))
275 pmc_id = "pmcid:PMC5555555"
276 pmc_string = "pmcid"
277 pmc_man_exp = op.get_id_manager(pmc_id, id_man_dict)
278 pmc_man_exp_2 = op.get_id_manager(pmc_string, id_man_dict)
280 #check that the idmanager for the pmc was returned and that it works as expected
281 self.assertTrue(pmc_man_exp.is_valid(pmc_id))
282 self.assertTrue(pmc_man_exp_2.is_valid(pmc_id))
284 arxiv_id = "arxiv:1509.08217"
285 arxiv_string = "arxiv"
286 arxiv_man_exp = op.get_id_manager(arxiv_id, id_man_dict)
287 arxiv_man_exp_2 = op.get_id_manager(arxiv_string, id_man_dict)
289 #check that the idmanager for the arxiv was returned and that it works as expected
290 self.assertTrue(arxiv_man_exp.is_valid(arxiv_id))
291 self.assertTrue(arxiv_man_exp_2.is_valid(arxiv_id))
293 op.storage_manager.delete_storage()
295 def test_get_id_manager_redis(self):
296 """Check that, given in input the string of a schema (e.g.:'pmid') or an id with a prefix (e.g.: 'pmid:12334')
297 and a dictionary mapping the strings of the schemas to their id managers, the method returns the correct
298 id manager. Note that each instance of the Preprocessing class needs its own instances of the id managers,
299 in order to avoid conflicts while validating data"""
301 op = OpenaireProcessing(testing=True)
302 id_man_dict = op._id_man_dict
304 pmid_id = "pmid:12345"
305 pmid_string = "pmid"
306 pmid_man_exp = op.get_id_manager(pmid_id, id_man_dict)
307 pmid_man_exp_2 = op.get_id_manager(pmid_string, id_man_dict)
309 #check that the idmanager for the pmid was returned and that it works as expected
310 self.assertTrue(pmid_man_exp.is_valid(pmid_id))
311 self.assertTrue(pmid_man_exp_2.is_valid(pmid_id))
313 doi_id = "doi:10.1103/physrevd.84.084046"
314 doi_string = "doi"
315 doi_man_exp = op.get_id_manager(doi_id, id_man_dict)
316 doi_man_exp_2 = op.get_id_manager(doi_string, id_man_dict)
318 #check that the idmanager for the doi was returned and that it works as expected
319 self.assertTrue(doi_man_exp.is_valid(doi_id))
320 self.assertTrue(doi_man_exp_2.is_valid(doi_id))
322 pmc_id = "pmcid:PMC5555555"
323 pmc_string = "pmcid"
324 pmc_man_exp = op.get_id_manager(pmc_id, id_man_dict)
325 pmc_man_exp_2 = op.get_id_manager(pmc_string, id_man_dict)
327 #check that the idmanager for the pmc was returned and that it works as expected
328 self.assertTrue(pmc_man_exp.is_valid(pmc_id))
329 self.assertTrue(pmc_man_exp_2.is_valid(pmc_id))
331 arxiv_id = "arxiv:1509.08217"
332 arxiv_string = "arxiv"
333 arxiv_man_exp = op.get_id_manager(arxiv_id, id_man_dict)
334 arxiv_man_exp_2 = op.get_id_manager(arxiv_string, id_man_dict)
336 #check that the idmanager for the arxiv was returned and that it works as expected
337 self.assertTrue(arxiv_man_exp.is_valid(arxiv_id))
338 self.assertTrue(arxiv_man_exp_2.is_valid(arxiv_id))
340 op.storage_manager.delete_storage()
343 def test_normalise_any_id(self):
344 '''
345 Check that, given an id with a prefix, any doi, pmid, pmcid and arxiv id is correctly normalised
346 '''
347 op = OpenaireProcessing()
349 pmid_id = "pmid:12345"
350 doi_id = "doi:10.1103/physrevd.84.084046"
351 arxiv_id = "arxiv:1509.08217"
352 pmc_id = "pmcid:PMC5555555"
354 self.assertEqual(pmid_id, op.normalise_any_id(pmid_id+"abc"))
355 self.assertEqual(doi_id, op.normalise_any_id("doi:" + doi_id.split(":")[1].upper()))
356 self.assertEqual(arxiv_id + "v1", op.normalise_any_id(arxiv_id.replace(".", "....")))
357 self.assertEqual(pmc_id, op.normalise_any_id(pmc_id+" "))
359 op.storage_manager.delete_storage()
361 def test_normalise_any_id_redis(self):
362 '''
363 Check that, given an id with a prefix, any doi, pmid, pmcid and arxiv id is correctly normalised
364 '''
365 op = OpenaireProcessing(testing=True)
367 pmid_id = "pmid:12345"
368 doi_id = "doi:10.1103/physrevd.84.084046"
369 arxiv_id = "arxiv:1509.08217"
370 pmc_id = "pmcid:PMC5555555"
372 self.assertEqual(pmid_id, op.normalise_any_id(pmid_id+"abc"))
373 self.assertEqual(doi_id, op.normalise_any_id("doi:" + doi_id.split(":")[1].upper()))
374 self.assertEqual(arxiv_id + "v1", op.normalise_any_id(arxiv_id.replace(".", "....")))
375 self.assertEqual(pmc_id, op.normalise_any_id(pmc_id+" "))
377 op.storage_manager.delete_storage()
379 def test_get_norm_ids(self):
380 '''
381 Check that, given a list of dictionaries representing the ids of an entity, the method returns a reduced version
382 of the same list, containing only the normalised version of the ids of the schemas managed by opencitations.
383 Each reduced dictionary only contains two key-value pairs, i.e.: "identifier" and "schema".
384 '''
385 op = OpenaireProcessing()
387 list_of_ids_to_norm_with_duplicates = [
388 {'identifier': '10.1103/PHYSREVD.84.084046', 'schema': 'doi',
389 'url': 'https://doi.org/10.1103/physrevd.84.084046'},
390 {'identifier': '10.1103/physrevd.84.084046', 'schema': 'doi'},
391 {'identifier': '10.48550/arxiv.1107.5979', 'schema': 'doi',
392 'url': 'https://dx.doi.org/10.48550/arxiv.1107.5979'},
393 {'identifier': '1107.5979', 'schema': 'arXiv', 'url': 'http://arxiv.org/abs/1107.5979'}]
394 norm_ids = op.get_norm_ids(list_of_ids_to_norm_with_duplicates)
395 exp_norm_ids = [{'identifier': 'doi:10.1103/physrevd.84.084046', 'schema': 'doi'},
396 {'identifier': 'doi:10.48550/arxiv.1107.5979', 'schema': 'doi'},
397 {'identifier': 'arxiv:1107.5979v1', 'schema': 'arxiv'}]
399 list_of_ids_w_not_managed_schema = [
400 {'identifier': '11245/1.357137', 'schema': 'handle', 'url': 'https://hdl.handle.net/11245/1.357137'},
401 {'identifier': '21887584', 'schema': 'pmid', 'url': 'https://pubmed.ncbi.nlm.nih.gov/21887584'},
402 {'identifier': '10.1007/s12160-011-9282-0', 'schema': 'doi','url': 'https://doi.org/10.1007/s12160-011-9282-0'}]
403 norm_ids_2 = op.get_norm_ids(list_of_ids_w_not_managed_schema)
404 exp_norm_ids_2 = [{'identifier': 'pmid:21887584', 'schema': 'pmid'},
405 {'identifier': 'doi:10.1007/s12160-011-9282-0', 'schema': 'doi'}]
407 list_of_ids_not_managed_and_not_normalisable_only = [
408 {'identifier': '11245/1.357137', 'schema': 'handle', 'url': 'https://hdl.handle.net/11245/1.357137'},
409 {'identifier': '20.ABC/s12160-011-9282-FAKEID', 'schema': 'doi','url': 'https://doi.org/10.1007/s12160-011-9282-0'}]
410 norm_ids_3 = op.get_norm_ids(list_of_ids_not_managed_and_not_normalisable_only)
411 exp_norm_ids_3 = []
413 self.assertEqual(norm_ids, exp_norm_ids)
414 self.assertEqual(norm_ids_2, exp_norm_ids_2)
415 self.assertEqual(norm_ids_3, exp_norm_ids_3)
416 op.storage_manager.delete_storage()
419 def test_get_norm_ids_redis(self):
420 '''
421 Check that, given a list of dictionaries representing the ids of an entity, the method returns a reduced version
422 of the same list, containing only the normalised version of the ids of the schemas managed by opencitations.
423 Each reduced dictionary only contains two key-value pairs, i.e.: "identifier" and "schema".
424 '''
425 op = OpenaireProcessing(testing=True)
427 list_of_ids_to_norm_with_duplicates = [
428 {'identifier': '10.1103/PHYSREVD.84.084046', 'schema': 'doi',
429 'url': 'https://doi.org/10.1103/physrevd.84.084046'},
430 {'identifier': '10.1103/physrevd.84.084046', 'schema': 'doi'},
431 {'identifier': '10.48550/arxiv.1107.5979', 'schema': 'doi',
432 'url': 'https://dx.doi.org/10.48550/arxiv.1107.5979'},
433 {'identifier': '1107.5979', 'schema': 'arXiv', 'url': 'http://arxiv.org/abs/1107.5979'}]
434 norm_ids = op.get_norm_ids(list_of_ids_to_norm_with_duplicates)
435 exp_norm_ids = [{'identifier': 'doi:10.1103/physrevd.84.084046', 'schema': 'doi'},
436 {'identifier': 'doi:10.48550/arxiv.1107.5979', 'schema': 'doi'},
437 {'identifier': 'arxiv:1107.5979v1', 'schema': 'arxiv'}]
439 list_of_ids_w_not_managed_schema = [
440 {'identifier': '11245/1.357137', 'schema': 'handle', 'url': 'https://hdl.handle.net/11245/1.357137'},
441 {'identifier': '21887584', 'schema': 'pmid', 'url': 'https://pubmed.ncbi.nlm.nih.gov/21887584'},
442 {'identifier': '10.1007/s12160-011-9282-0', 'schema': 'doi','url': 'https://doi.org/10.1007/s12160-011-9282-0'}]
443 norm_ids_2 = op.get_norm_ids(list_of_ids_w_not_managed_schema)
444 exp_norm_ids_2 = [{'identifier': 'pmid:21887584', 'schema': 'pmid'},
445 {'identifier': 'doi:10.1007/s12160-011-9282-0', 'schema': 'doi'}]
447 list_of_ids_not_managed_and_not_normalisable_only = [
448 {'identifier': '11245/1.357137', 'schema': 'handle', 'url': 'https://hdl.handle.net/11245/1.357137'},
449 {'identifier': '20.ABC/s12160-011-9282-FAKEID', 'schema': 'doi','url': 'https://doi.org/10.1007/s12160-011-9282-0'}]
450 norm_ids_3 = op.get_norm_ids(list_of_ids_not_managed_and_not_normalisable_only)
451 exp_norm_ids_3 = []
453 self.assertEqual(norm_ids, exp_norm_ids)
454 self.assertEqual(norm_ids_2, exp_norm_ids_2)
455 self.assertEqual(norm_ids_3, exp_norm_ids_3)
456 op.storage_manager.delete_storage()
458 def test_dict_to_cache(self):
459 op = OpenaireProcessing()
460 sample_dict = {"dict_type": "sample"}
461 if os.path.exists(MEMO_JSON_PATH):
462 os.remove(MEMO_JSON_PATH)
463 self.assertFalse(os.path.exists(MEMO_JSON_PATH))
464 op.dict_to_cache(sample_dict, MEMO_JSON_PATH)
465 self.assertTrue(os.path.exists(MEMO_JSON_PATH))
466 self.delete_storege(specific_path=MEMO_JSON_PATH)
467 self.assertFalse(os.path.exists(MEMO_JSON_PATH))
468 op.storage_manager.delete_storage()
471 def test_csv_creator_base(self):
472 '''
473 Check that, given an updated openaire entity (i.e.: where the "identifier" field was modified
474 after having checked the presence of the given identifiers in the storage memory) a meta csv
475 table for the entity is created
476 '''
478 op = OpenaireProcessing()
479 csv_row = op.csv_creator(SAMPLE_ENTITY_FOR_CSV_CREATOR)
480 expected_row = {
481 'id': 'pmid:29890726',
482 'title': 'Occupational physical activity habits of UK office workers: cross-sectional data from the Active Buildings Study',
483 'author': 'Smith Lee; Sawyer Alexia; Gardner Benjamin; Seppala Katri; Ucci Marcella; Marmot Alexi; Lally Pippa; Fisher Abi',
484 'pub_date': '2018-06-09',
485 'venue': '',
486 'volume': '',
487 'issue': '',
488 'page': '',
489 'type': 'journal article',
490 'publisher': 'MDPI',
491 'editor': ''
492 }
493 self.assertEqual(csv_row, expected_row)
495 op.storage_manager.delete_storage()
497 def test_csv_creator_base_redis(self):
498 '''
499 Check that, given an updated openaire entity (i.e.: where the "identifier" field was modified
500 after having checked the presence of the given identifiers in the storage memory) a meta csv
501 table for the entity is created
502 '''
504 op = OpenaireProcessing(testing=True)
505 csv_row = op.csv_creator(SAMPLE_ENTITY_FOR_CSV_CREATOR)
506 expected_row = {
507 'id': 'pmid:29890726',
508 'title': 'Occupational physical activity habits of UK office workers: cross-sectional data from the Active Buildings Study',
509 'author': 'Smith Lee; Sawyer Alexia; Gardner Benjamin; Seppala Katri; Ucci Marcella; Marmot Alexi; Lally Pippa; Fisher Abi',
510 'pub_date': '2018-06-09',
511 'venue': '',
512 'volume': '',
513 'issue': '',
514 'page': '',
515 'type': 'journal article',
516 'publisher': 'MDPI',
517 'editor': ''
518 }
519 self.assertEqual(csv_row, expected_row)
521 op.storage_manager.delete_storage()
523 def test_csv_creator_not_accepted_id(self):
524 '''
525 Check that, given an updated openaire entity with NO ids managed by opencitations (i.e.: an handle id),
526 no meta csv rows are created.
527 '''
529 op = OpenaireProcessing()
531 replaced_entity = {'schema': 'handle', 'identifier': 'handle:11245/1.357137', 'valid': None}
532 MODIFIED_ENTITY = {k:v for k,v in SAMPLE_ENTITY_FOR_CSV_CREATOR.items()}
533 MODIFIED_ENTITY["identifier"]["to_be_val"]= []
534 MODIFIED_ENTITY["identifier"]["to_be_val"].append(replaced_entity)
535 csv_row = op.csv_creator(MODIFIED_ENTITY)
536 expected_row = {} #because there is no ID accepted in opencitations for this entity
537 self.assertEqual(csv_row, expected_row)
539 op.storage_manager.delete_storage()
541 def test_csv_creator_not_accepted_id_redis(self):
542 '''
543 Check that, given an updated openaire entity with NO ids managed by opencitations (i.e.: an handle id),
544 no meta csv rows are created.
545 '''
547 op = OpenaireProcessing(testing=True)
549 replaced_entity = {'schema': 'handle', 'identifier': 'handle:11245/1.357137', 'valid': None}
550 MODIFIED_ENTITY = {k:v for k,v in SAMPLE_ENTITY_FOR_CSV_CREATOR.items()}
551 MODIFIED_ENTITY["identifier"]["to_be_val"]= []
552 MODIFIED_ENTITY["identifier"]["to_be_val"].append(replaced_entity)
553 csv_row = op.csv_creator(MODIFIED_ENTITY)
554 expected_row = {} #because there is no ID accepted in opencitations for this entity
555 self.assertEqual(csv_row, expected_row)
557 op.storage_manager.delete_storage()
559 def test_csv_creator_invalid_id(self):
560 '''
561 Check that, given an updated openaire entity with NO ids managed by opencitations (i.e.: an handle id),
562 no meta csv rows are created.
563 '''
565 op = OpenaireProcessing()
567 replaced_entity = {'schema': 'doi', 'identifier': 'doi:10.1000/FAKE_ID', 'valid': None}
568 MODIFIED_ENTITY = {k: v for k, v in SAMPLE_ENTITY_FOR_CSV_CREATOR.items()}
569 MODIFIED_ENTITY["identifier"]["to_be_val"] = []
570 MODIFIED_ENTITY["identifier"]["to_be_val"].append(replaced_entity)
571 csv_row = op.csv_creator(MODIFIED_ENTITY)
572 expected_row = {} # because there is no ID accepted in opencitations for this entity
573 self.assertEqual(csv_row, expected_row)
575 op.storage_manager.delete_storage()
578 def test_csv_creator_invalid_id_redis(self):
579 '''
580 Check that, given an updated openaire entity with NO ids managed by opencitations (i.e.: an handle id),
581 no meta csv rows are created.
582 '''
584 op = OpenaireProcessing(testing=True)
586 replaced_entity = {'schema': 'doi', 'identifier': 'doi:10.1000/FAKE_ID', 'valid': None}
587 MODIFIED_ENTITY = {k: v for k, v in SAMPLE_ENTITY_FOR_CSV_CREATOR.items()}
588 MODIFIED_ENTITY["identifier"]["to_be_val"] = []
589 MODIFIED_ENTITY["identifier"]["to_be_val"].append(replaced_entity)
590 csv_row = op.csv_creator(MODIFIED_ENTITY)
591 expected_row = {} # because there is no ID accepted in opencitations for this entity
592 self.assertEqual(csv_row, expected_row)
594 op.storage_manager.delete_storage()
596 def test_get_publisher_name_base(self):
597 '''
598 Check that, given a doi and a dictionary representing a publisher's data, the string of the publisher's
599 normalised name (and possibly its crossref ID) is returned.
601 Base functionalities: No publisher mapping in input -> only Publisher name retrieved from the datasource dump
602 '''
603 op = OpenaireProcessing()
604 no_doi_pub_input = {'name': 'Blackwell Publishing Ltd'}
606 doi_pub_1_input = {'name': 'Frontiers Media SA'}
607 doi1 = "10.3389/fnana.2012.00034"
609 doi_pub_2_input = {'name': 'Oxford University Press (OUP)'}
610 doi2 = "10.2527/1995.7392834x"
612 no_doi_pub_output = op.get_publisher_name([""], no_doi_pub_input)
613 doi_pub_output_1 = op.get_publisher_name([doi1], doi_pub_1_input)
614 doi_pub_output_2 = op.get_publisher_name([doi2], doi_pub_2_input)
616 self.assertEqual(doi_pub_output_1, "Frontiers Media SA")
617 self.assertEqual(no_doi_pub_output, "Blackwell Publishing Ltd")
618 self.assertEqual(doi_pub_output_2, "Oxford University Press (OUP)")
620 op.storage_manager.delete_storage()
622 def test_get_publisher_name_base_redis(self):
623 '''
624 Check that, given a doi and a dictionary representing a publisher's data, the string of the publisher's
625 normalised name (and possibly its crossref ID) is returned.
627 Base functionalities: No publisher mapping in input -> only Publisher name retrieved from the datasource dump
628 '''
629 op = OpenaireProcessing(testing=True)
630 no_doi_pub_input = {'name': 'Blackwell Publishing Ltd'}
632 doi_pub_1_input = {'name': 'Frontiers Media SA'}
633 doi1 = "10.3389/fnana.2012.00034"
635 doi_pub_2_input = {'name': 'Oxford University Press (OUP)'}
636 doi2 = "10.2527/1995.7392834x"
638 no_doi_pub_output = op.get_publisher_name([""], no_doi_pub_input)
639 doi_pub_output_1 = op.get_publisher_name([doi1], doi_pub_1_input)
640 doi_pub_output_2 = op.get_publisher_name([doi2], doi_pub_2_input)
642 self.assertEqual(doi_pub_output_1, "Frontiers Media SA")
643 self.assertEqual(no_doi_pub_output, "Blackwell Publishing Ltd")
644 self.assertEqual(doi_pub_output_2, "Oxford University Press (OUP)")
646 op.storage_manager.delete_storage()
648 def test_get_publisher_name_publishers_mapping(self):
649 '''
650 Check that, given a doi and a dictionary representing a publisher's data, the string of the publisher's
651 normalised name (and possibly its crossref ID) is returned.
653 Mapping Provided: Publisher name retrieved + crossref member returned,
654 only if :
655 - the doi prefix is a crossref doi prefix,
656 - it is present in the mapping,
657 -the name of the publisher provided by the datasource corresponds to the from the datasource dump
658 '''
660 op = OpenaireProcessing(publishers_filepath_openaire="test/openaire_processing/support_material/publishers.json")
662 no_doi_pub_input = {'name': 'Blackwell Publishing Ltd'}
664 doi_pub_1_input = {'name': 'Frontiers Media SA'}
665 doi1 = "10.3389/fnana.2012.00034"
667 doi_pub_2_input = {'name': 'Oxford University Press (OUP)'}
668 doi2 = "10.2527/1995.7392834x"
670 no_doi_pub_output = op.get_publisher_name([""], no_doi_pub_input)
671 doi_pub_output_1 = op.get_publisher_name([doi1], doi_pub_1_input)
672 doi_pub_output_2 = op.get_publisher_name([doi2], doi_pub_2_input)
674 self.assertEqual(doi_pub_output_1, "Frontiers Media SA")
675 self.assertEqual(no_doi_pub_output, "Blackwell Publishing Ltd")
676 self.assertEqual(doi_pub_output_2, "Oxford University Press (OUP)")
678 op.storage_manager.delete_storage()
680 def test_get_publisher_name_publishers_mapping_redis(self):
681 '''
682 Check that, given a doi and a dictionary representing a publisher's data, the string of the publisher's
683 normalised name (and possibly its crossref ID) is returned.
685 Mapping Provided: Publisher name retrieved + crossref member returned,
686 only if :
687 - the doi prefix is a crossref doi prefix,
688 - it is present in the mapping,
689 -the name of the publisher provided by the datasource corresponds to the from the datasource dump
690 '''
692 op = OpenaireProcessing(testing=True,publishers_filepath_openaire="test/openaire_processing/support_material/publishers.json")
694 no_doi_pub_input = {'name': 'Blackwell Publishing Ltd'}
696 doi_pub_1_input = {'name': 'Frontiers Media SA'}
697 doi1 = "10.3389/fnana.2012.00034"
699 doi_pub_2_input = {'name': 'Oxford University Press (OUP)'}
700 doi2 = "10.2527/1995.7392834x"
702 no_doi_pub_output = op.get_publisher_name([""], no_doi_pub_input)
703 doi_pub_output_1 = op.get_publisher_name([doi1], doi_pub_1_input)
704 doi_pub_output_2 = op.get_publisher_name([doi2], doi_pub_2_input)
706 self.assertEqual(doi_pub_output_1, "Frontiers Media SA")
707 self.assertEqual(no_doi_pub_output, "Blackwell Publishing Ltd")
708 self.assertEqual(doi_pub_output_2, "Oxford University Press (OUP)")
710 op.storage_manager.delete_storage()
712 def test_get_publisher_name_publishers_mapping_multi_dois(self):
713 '''
714 Check that, given a doi and a dictionary representing a publisher's data, the string of the publisher's
715 normalised name (and possibly its crossref ID) is returned.
717 Mapping Provided: Publisher name retrieved + crossref member returned,
718 only if :
719 - the doi prefix is a crossref doi prefix,
720 - it is present in the mapping,
721 -the name of the publisher provided by the datasource corresponds to the from the datasource dump
722 '''
724 op = OpenaireProcessing(publishers_filepath_openaire="test/openaire_processing/support_material/publishers.json")
726 # CASE 1: The Publisher Name provided by OPENAIRE corresponds to the Publisher Name mapped to one of the
727 # entity's dois prefixes in the prefix-to-publisher-data mapping in input
728 # EXPECTED OUTPUT: The publisher name is retrieved with its crossref member
730 ent_1_doi_1 = "10.1152/sample_doi" #this prefix is in the mapping and corresponds to American Physiological Society
731 ent_1_doi_2 = "10.1153/sample_doi"
732 pub_input_1 = {'name': 'American Physiological Society'}
734 no_doi_pub_output = op.get_publisher_name([ent_1_doi_1, ent_1_doi_2], pub_input_1)
736 self.assertEqual(no_doi_pub_output, "American Physiological Society [crossref:24]")
738 # CASE 2: The Publisher Name provided by OPENAIRE does not correspond to the Publisher Name mapped to one of the
739 # entity's dois prefixes in the prefix-to-publisher-data mapping in input
740 # EXPECTED OUTPUT: The publisher name provided by Openaire is retrieved without any crossref member
742 ent_2_doi_1 = "10.1152/sample_doi" #this prefix is in the mapping and corresponds to American Physiological Society
743 ent_2_doi_2 = "10.1153/sample_doi"
744 pub_input_2 = {'name': 'Sample Publisher Name'}
746 no_doi_pub_output2 = op.get_publisher_name([ent_2_doi_1, ent_2_doi_2], pub_input_2)
747 self.assertEqual(no_doi_pub_output2, "Sample Publisher Name")
749 # CASE 3: The Publisher Name provided by OPENAIRE corresponds to the Publisher Name mapped to one of the
750 # entity's dois prefixes in the prefix-to-publisher-data mapping in input BUT it is not the first doi of the list
751 # EXPECTED OUTPUT: The publisher name is retrieved with its crossref member
753 ent_3_doi_1 = "10.1152/sample_doi" #this prefix is in the mapping and corresponds to American Physiological Society
754 ent_3_doi_2 = "10.1153/sample_doi"
755 pub_input_3 = {'name': 'American Physiological Society'}
757 doi_pub_output3 = op.get_publisher_name([ent_3_doi_2, ent_3_doi_1], pub_input_3)
759 self.assertEqual(doi_pub_output3, "American Physiological Society [crossref:24]")
761 op.storage_manager.delete_storage()
763 # CASE 4: OPENAIRE does not provide a publisher name but one of the entity's DOI prefixes is in the
764 # prefix-to-publisher-data mapping in input
765 # EXPECTED OUTPUT: empty string
767 ent_4_doi_1 = "10.1152/sample_doi" #this prefix is in the mapping and corresponds to American Physiological Society
768 ent_4_doi_2 = "10.1153/sample_doi"
769 pub_input_4 = {'name': ''}
770 pub_input_4_1 = {}
771 pub_input_4_2 = ''
773 doi_pub_output4 = op.get_publisher_name([ent_4_doi_1, ent_4_doi_2], pub_input_4)
774 doi_pub_output4_1 = op.get_publisher_name([ent_4_doi_1, ent_4_doi_2], pub_input_4_1)
775 doi_pub_output4_2= op.get_publisher_name([ent_4_doi_1, ent_4_doi_2], pub_input_4_2)
777 self.assertEqual(doi_pub_output4, "")
778 self.assertEqual(doi_pub_output4_1, "")
779 self.assertEqual(doi_pub_output4_2, "")
781 op.storage_manager.delete_storage()
783 def test_get_publisher_name_publishers_mapping_multi_dois_redis(self):
784 '''
785 Check that, given a doi and a dictionary representing a publisher's data, the string of the publisher's
786 normalised name (and possibly its crossref ID) is returned.
788 Mapping Provided: Publisher name retrieved + crossref member returned,
789 only if :
790 - the doi prefix is a crossref doi prefix,
791 - it is present in the mapping,
792 -the name of the publisher provided by the datasource corresponds to the from the datasource dump
793 '''
795 op = OpenaireProcessing(testing=True, publishers_filepath_openaire="test/openaire_processing/support_material/publishers.json")
797 # CASE 1: The Publisher Name provided by OPENAIRE corresponds to the Publisher Name mapped to one of the
798 # entity's dois prefixes in the prefix-to-publisher-data mapping in input
799 # EXPECTED OUTPUT: The publisher name is retrieved with its crossref member
801 ent_1_doi_1 = "10.1152/sample_doi" #this prefix is in the mapping and corresponds to American Physiological Society
802 ent_1_doi_2 = "10.1153/sample_doi"
803 pub_input_1 = {'name': 'American Physiological Society'}
805 no_doi_pub_output = op.get_publisher_name([ent_1_doi_1, ent_1_doi_2], pub_input_1)
807 self.assertEqual(no_doi_pub_output, "American Physiological Society [crossref:24]")
809 # CASE 2: The Publisher Name provided by OPENAIRE does not correspond to the Publisher Name mapped to one of the
810 # entity's dois prefixes in the prefix-to-publisher-data mapping in input
811 # EXPECTED OUTPUT: The publisher name provided by Openaire is retrieved without any crossref member
813 ent_2_doi_1 = "10.1152/sample_doi" #this prefix is in the mapping and corresponds to American Physiological Society
814 ent_2_doi_2 = "10.1153/sample_doi"
815 pub_input_2 = {'name': 'Sample Publisher Name'}
817 no_doi_pub_output2 = op.get_publisher_name([ent_2_doi_1, ent_2_doi_2], pub_input_2)
818 self.assertEqual(no_doi_pub_output2, "Sample Publisher Name")
820 # CASE 3: The Publisher Name provided by OPENAIRE corresponds to the Publisher Name mapped to one of the
821 # entity's dois prefixes in the prefix-to-publisher-data mapping in input BUT it is not the first doi of the list
822 # EXPECTED OUTPUT: The publisher name is retrieved with its crossref member
824 ent_3_doi_1 = "10.1152/sample_doi" #this prefix is in the mapping and corresponds to American Physiological Society
825 ent_3_doi_2 = "10.1153/sample_doi"
826 pub_input_3 = {'name': 'American Physiological Society'}
828 doi_pub_output3 = op.get_publisher_name([ent_3_doi_2, ent_3_doi_1], pub_input_3)
830 self.assertEqual(doi_pub_output3, "American Physiological Society [crossref:24]")
832 op.storage_manager.delete_storage()
834 # CASE 4: OPENAIRE does not provide a publisher name but one of the entity's DOI prefixes is in the
835 # prefix-to-publisher-data mapping in input
836 # EXPECTED OUTPUT: empty string
838 ent_4_doi_1 = "10.1152/sample_doi" #this prefix is in the mapping and corresponds to American Physiological Society
839 ent_4_doi_2 = "10.1153/sample_doi"
840 pub_input_4 = {'name': ''}
841 pub_input_4_1 = {}
842 pub_input_4_2 = ''
844 doi_pub_output4 = op.get_publisher_name([ent_4_doi_1, ent_4_doi_2], pub_input_4)
845 doi_pub_output4_1 = op.get_publisher_name([ent_4_doi_1, ent_4_doi_2], pub_input_4_1)
846 doi_pub_output4_2= op.get_publisher_name([ent_4_doi_1, ent_4_doi_2], pub_input_4_2)
848 self.assertEqual(doi_pub_output4, "")
849 self.assertEqual(doi_pub_output4_1, "")
850 self.assertEqual(doi_pub_output4_2, "")
852 op.storage_manager.delete_storage()
854 def test_manage_arxiv_single_id(self):
855 '''Check the correct management of entities with only one ID, in particular in
856 case it is an arxiv. In this case, if it is an arxiv DOI, we return the normalised
857 version of the correspondent arxiv. Both in case of an arxiv id and of an arxiv doi,
858 we return the versioned arxiv id where the version is available (never in ARXIV doi).
859 If no version is provided, we normalise the arxiv id as arxiv id version 1.
860 In all the other id cases (pmid, pmc, handle (which is discarded in a later step) '''
861 sample_doi_any = [{'schema': 'doi', 'identifier': 'doi:10.1000/FAKE_ID', 'valid': None}]
862 sample_doi_arxiv = [{'schema': 'doi', 'identifier': 'doi:10.48550/arXiv.1509.08217', 'valid': None}]
863 sample_arxiv_no_ver = [{'schema': 'arxiv', 'identifier': 'arxiv:1509.08217', 'valid': None}]
864 sample_arxiv_ver = [{'schema': 'arxiv', 'identifier': 'arxiv:1509...08217v3', 'valid': None}]
866 op = OpenaireProcessing()
868 # CASE 1: the unique input id dict in list is a not-arxiv doi : the input list is returned
869 out_sample_doi_any = op.manage_arxiv_single_id(sample_doi_any)
870 self.assertEqual(out_sample_doi_any, [{'schema': 'doi', 'identifier': 'doi:10.1000/FAKE_ID', 'valid': None}])
872 # CASE 2: the unique input id dict in list is an arxiv doi: the doi is replaced with its correspondent arxiv v1
873 out_sample_doi_arxiv = op.manage_arxiv_single_id(sample_doi_arxiv)
874 self.assertEqual(out_sample_doi_arxiv, [{'schema': 'arxiv', 'identifier': 'arxiv:1509.08217v1'}])
876 # CASE 3: the unique input id dict in list is an arxiv id without version:
877 # the arxiv id is replaced with its v1
878 out_sample_arxiv_no_ver = op.manage_arxiv_single_id(sample_arxiv_no_ver)
879 self.assertEqual(out_sample_arxiv_no_ver, [{'schema': 'arxiv', 'identifier': 'arxiv:1509.08217v1'}])
881 # CASE 4: the unique input id dict in list is an arxiv id with version: the id is just normalised
882 out_sample_arxiv_ver = op.manage_arxiv_single_id(sample_arxiv_ver)
883 self.assertEqual(out_sample_arxiv_ver, [{'schema': 'arxiv', 'identifier': 'arxiv:1509.08217v3'}])
885 op.storage_manager.delete_storage()
887 def test_manage_arxiv_single_id_redis(self):
888 '''Check the correct management of entities with only one ID, in particular in
889 case it is an arxiv. In this case, if it is an arxiv DOI, we return the normalised
890 version of the correspondent arxiv. Both in case of an arxiv id and of an arxiv doi,
891 we return the versioned arxiv id where the version is available (never in ARXIV doi).
892 If no version is provided, we normalise the arxiv id as arxiv id version 1.
893 In all the other id cases (pmid, pmc, handle (which is discarded in a later step) '''
894 sample_doi_any = [{'schema': 'doi', 'identifier': 'doi:10.1000/FAKE_ID', 'valid': None}]
895 sample_doi_arxiv = [{'schema': 'doi', 'identifier': 'doi:10.48550/arXiv.1509.08217', 'valid': None}]
896 sample_arxiv_no_ver = [{'schema': 'arxiv', 'identifier': 'arxiv:1509.08217', 'valid': None}]
897 sample_arxiv_ver = [{'schema': 'arxiv', 'identifier': 'arxiv:1509...08217v3', 'valid': None}]
899 op = OpenaireProcessing(testing=True)
901 # CASE 1: the unique input id dict in list is a not-arxiv doi : the input list is returned
902 out_sample_doi_any = op.manage_arxiv_single_id(sample_doi_any)
903 self.assertEqual(out_sample_doi_any, [{'schema': 'doi', 'identifier': 'doi:10.1000/FAKE_ID', 'valid': None}])
905 # CASE 2: the unique input id dict in list is an arxiv doi: the doi is replaced with its correspondent arxiv v1
906 out_sample_doi_arxiv = op.manage_arxiv_single_id(sample_doi_arxiv)
907 self.assertEqual(out_sample_doi_arxiv, [{'schema': 'arxiv', 'identifier': 'arxiv:1509.08217v1'}])
909 # CASE 3: the unique input id dict in list is an arxiv id without version:
910 # the arxiv id is replaced with its v1
911 out_sample_arxiv_no_ver = op.manage_arxiv_single_id(sample_arxiv_no_ver)
912 self.assertEqual(out_sample_arxiv_no_ver, [{'schema': 'arxiv', 'identifier': 'arxiv:1509.08217v1'}])
914 # CASE 4: the unique input id dict in list is an arxiv id with version: the id is just normalised
915 out_sample_arxiv_ver = op.manage_arxiv_single_id(sample_arxiv_ver)
916 self.assertEqual(out_sample_arxiv_ver, [{'schema': 'arxiv', 'identifier': 'arxiv:1509.08217v3'}])
918 op.storage_manager.delete_storage()
920 def test_manage_doi_prefixes_priorities(self):
921 op = OpenaireProcessing()
923 # CASE1: 1 figshare doi (priority 1) with version --> returned as it is
924 es_1 = [{'schema': 'doi', 'identifier': 'doi:10.6084/1234.1234v3', 'valid': None}]
925 out_1 = op.manage_doi_prefixes_priorities(es_1)
926 self.assertEqual(out_1, es_1)
928 # CASE2: 1 figshare doi (priority 1) without version --> returned with version v1
929 es_2 = [{'schema': 'doi', 'identifier': 'doi:10.6084/1234.1234', 'valid': None}]
930 exp_2 = [{'schema': 'doi', 'identifier': 'doi:10.6084/1234.1234v1', 'valid': None}]
931 out_2 = op.manage_doi_prefixes_priorities(es_2)
932 self.assertEqual(exp_2, out_2)
934 # CASE3: 1 arxiv doi (always without and version) --> returned as correspondent arxiv id version v1
935 es_3 = [{'schema': 'doi', 'identifier': 'doi:10.48550/1234.1234', 'valid': None}]
936 out_3 = op.manage_doi_prefixes_priorities(es_3)
937 exp_3 = [{'identifier': 'arxiv:1234.1234v1', 'schema': 'arxiv'}]
938 self.assertEqual(exp_3, out_3)
940 # CASE4: >1 arxiv doi or figshare and at least one has version --> return the one(s) with version
941 es_4 = [{'schema': 'doi', 'identifier': 'doi:10.48550/1234.1234', 'valid': None}, {'schema': 'doi', 'identifier': 'doi:10.6084/5678v3', 'valid': None}]
942 out_4 = op.manage_doi_prefixes_priorities(es_4)
943 exp_4 = [{'schema': 'doi', 'identifier': 'doi:10.6084/5678v3', 'valid': None}]
944 self.assertEqual(exp_4, out_4)
946 # CASE5: >1 arxiv doi or figshare and none has version --> return, as first choice, the arxiv version v1 of the first arxiv doi encountered
947 es_5 = [{'schema': 'doi', 'identifier': 'doi:10.6084/5678', 'valid': None}, {'schema': 'doi', 'identifier': 'doi:10.48550/1234.1234', 'valid': None}]
948 out_5 = op.manage_doi_prefixes_priorities(es_5)
949 exp_5 = [{'identifier': 'arxiv:1234.1234v1', 'schema': 'arxiv'}]
950 self.assertEqual(exp_5, out_5)
952 # CASE6: >1 figshare dois and none has version --> return, return version v1 doi of the first figshare doi encountered
953 es_6 = [{'schema': 'doi', 'identifier': 'doi:10.6084/5678', 'valid': None}, {'schema': 'doi', 'identifier': 'doi:10.6084/1234', 'valid': None}]
954 out_6 = op.manage_doi_prefixes_priorities(es_6)
955 exp_6 = [{'identifier': 'doi:10.6084/5678v1', 'schema': 'doi', 'valid': None}]
956 self.assertEqual(exp_6, out_6)
958 # CASE7: >1 more than one zenodo doi --> return the one with the highest number: it is the last one assigned and thus it
959 # is a version doi and not the collector doi (which is the first one to be assigned when a publication is uploaded on zenodo).
960 es_7 = [{'schema': 'doi', 'identifier': '10.5281/zenodo.111', 'valid': None}, {'schema': 'doi', 'identifier': '10.5281/zenodo.112', 'valid': None}]
961 es_7_1 = [{'schema': 'doi', 'identifier': 'doi:10.5281/zenodo.111', 'valid': None}, {'schema': 'doi', 'identifier': 'doi:10.5281/zenodo.112', 'valid': None}]
962 out_7 = op.manage_doi_prefixes_priorities(es_7)
963 out_7_1 = op.manage_doi_prefixes_priorities(es_7_1)
964 exp_7 = [{'identifier': '10.5281/zenodo.112', 'schema': 'doi', 'valid': None}]
965 exp_7_1 = [{'identifier': 'doi:10.5281/zenodo.112', 'schema': 'doi', 'valid': None}]
966 self.assertEqual(exp_7, out_7)
967 self.assertEqual(exp_7_1, out_7_1)
969 # CASE8: None of the previous cases: return the first VALID DOI with highest priority prefix
970 #No one of the ids is valid, return an empty list
971 es_8 = [
972 {'schema': 'doi', 'identifier': 'doi:10.5281/zenodo.111', 'valid': None},
973 {'schema': 'doi', 'identifier': 'doi:10.1184/abc', 'valid': None},
974 {'schema': 'doi', 'identifier': 'doi:10.25384/efg', 'valid': None},
975 ]
977 out_8 = op.manage_doi_prefixes_priorities(es_8)
978 exp_8 = []
979 self.assertEqual(exp_8, out_8)
981 # CASE8_1:
982 # No valid id among the ones with a max priority prefix --> return the first valid ID in order of prefix priority
983 es_8_1 = [
984 {'schema': 'doi', 'identifier': '10.5281/zenodo.4725899', 'valid': None},
985 {'schema': 'doi', 'identifier': 'doi:10.1184/abc', 'valid': None},
986 {'schema': 'doi', 'identifier': 'doi:10.25384/efg', 'valid': None},
987 ]
989 out_8_1 = op.manage_doi_prefixes_priorities(es_8_1)
990 exp_8_1 = [{'schema': 'doi', 'identifier': '10.5281/zenodo.4725899', 'valid': None}]
991 self.assertEqual(exp_8_1, out_8_1)
993 # CASE8_2:
994 # more valid ids among the ones with a max priority prefix --> return the first one encountered
995 es_8_2 = [
996 {'schema': 'doi', 'identifier': 'doi:10.5281/zenodo.4725899', 'valid': None},
997 {'schema': 'doi', 'identifier': 'doi:10.1184/R1/12841247.v1', 'valid': None},
998 {'schema': 'doi', 'identifier': 'doi:10.25384/sage.c.4112909', 'valid': None},
999 ]
1001 out_8_2 = op.manage_doi_prefixes_priorities(es_8_2)
1002 exp_8_2 = [{'schema': 'doi', 'identifier': 'doi:10.1184/R1/12841247.v1', 'valid': None}]
1003 self.assertEqual(exp_8_2, out_8_2)
1005 op.storage_manager.delete_storage()
1007 def test_manage_doi_prefixes_priorities_redis(self):
1008 op = OpenaireProcessing(testing=True)
1010 # CASE1: 1 figshare doi (priority 1) with version --> returned as it is
1011 es_1 = [{'schema': 'doi', 'identifier': 'doi:10.6084/1234.1234v3', 'valid': None}]
1012 out_1 = op.manage_doi_prefixes_priorities(es_1)
1013 self.assertEqual(out_1, es_1)
1015 # CASE2: 1 figshare doi (priority 1) without version --> returned with version v1
1016 es_2 = [{'schema': 'doi', 'identifier': 'doi:10.6084/1234.1234', 'valid': None}]
1017 exp_2 = [{'schema': 'doi', 'identifier': 'doi:10.6084/1234.1234v1', 'valid': None}]
1018 out_2 = op.manage_doi_prefixes_priorities(es_2)
1019 self.assertEqual(exp_2, out_2)
1021 # CASE3: 1 arxiv doi (always without and version) --> returned as correspondent arxiv id version v1
1022 es_3 = [{'schema': 'doi', 'identifier': 'doi:10.48550/1234.1234', 'valid': None}]
1023 out_3 = op.manage_doi_prefixes_priorities(es_3)
1024 exp_3 = [{'identifier': 'arxiv:1234.1234v1', 'schema': 'arxiv'}]
1025 self.assertEqual(exp_3, out_3)
1027 # CASE4: >1 arxiv doi or figshare and at least one has version --> return the one(s) with version
1028 es_4 = [{'schema': 'doi', 'identifier': 'doi:10.48550/1234.1234', 'valid': None}, {'schema': 'doi', 'identifier': 'doi:10.6084/5678v3', 'valid': None}]
1029 out_4 = op.manage_doi_prefixes_priorities(es_4)
1030 exp_4 = [{'schema': 'doi', 'identifier': 'doi:10.6084/5678v3', 'valid': None}]
1031 self.assertEqual(exp_4, out_4)
1033 # CASE5: >1 arxiv doi or figshare and none has version --> return, as first choice, the arxiv version v1 of the first arxiv doi encountered
1034 es_5 = [{'schema': 'doi', 'identifier': 'doi:10.6084/5678', 'valid': None}, {'schema': 'doi', 'identifier': 'doi:10.48550/1234.1234', 'valid': None}]
1035 out_5 = op.manage_doi_prefixes_priorities(es_5)
1036 exp_5 = [{'identifier': 'arxiv:1234.1234v1', 'schema': 'arxiv'}]
1037 self.assertEqual(exp_5, out_5)
1039 # CASE6: >1 figshare dois and none has version --> return, return version v1 doi of the first figshare doi encountered
1040 es_6 = [{'schema': 'doi', 'identifier': 'doi:10.6084/5678', 'valid': None}, {'schema': 'doi', 'identifier': 'doi:10.6084/1234', 'valid': None}]
1041 out_6 = op.manage_doi_prefixes_priorities(es_6)
1042 exp_6 = [{'identifier': 'doi:10.6084/5678v1', 'schema': 'doi', 'valid': None}]
1043 self.assertEqual(exp_6, out_6)
1045 # CASE7: >1 more than one zenodo doi --> return the one with the highest number: it is the last one assigned and thus it
1046 # is a version doi and not the collector doi (which is the first one to be assigned when a publication is uploaded on zenodo).
1047 es_7 = [{'schema': 'doi', 'identifier': '10.5281/zenodo.111', 'valid': None}, {'schema': 'doi', 'identifier': '10.5281/zenodo.112', 'valid': None}]
1048 es_7_1 = [{'schema': 'doi', 'identifier': 'doi:10.5281/zenodo.111', 'valid': None}, {'schema': 'doi', 'identifier': 'doi:10.5281/zenodo.112', 'valid': None}]
1049 out_7 = op.manage_doi_prefixes_priorities(es_7)
1050 out_7_1 = op.manage_doi_prefixes_priorities(es_7_1)
1051 exp_7 = [{'identifier': '10.5281/zenodo.112', 'schema': 'doi', 'valid': None}]
1052 exp_7_1 = [{'identifier': 'doi:10.5281/zenodo.112', 'schema': 'doi', 'valid': None}]
1053 self.assertEqual(exp_7, out_7)
1054 self.assertEqual(exp_7_1, out_7_1)
1056 # CASE8: None of the previous cases: return the first VALID DOI with highest priority prefix
1057 #No one of the ids is valid, return an empty list
1058 es_8 = [
1059 {'schema': 'doi', 'identifier': 'doi:10.5281/zenodo.111', 'valid': None},
1060 {'schema': 'doi', 'identifier': 'doi:10.1184/abc', 'valid': None},
1061 {'schema': 'doi', 'identifier': 'doi:10.25384/efg', 'valid': None},
1062 ]
1064 out_8 = op.manage_doi_prefixes_priorities(es_8)
1065 exp_8 = []
1066 self.assertEqual(exp_8, out_8)
1068 # CASE8_1:
1069 # No valid id among the ones with a max priority prefix --> return the first valid ID in order of prefix priority
1070 es_8_1 = [
1071 {'schema': 'doi', 'identifier': '10.5281/zenodo.4725899', 'valid': None},
1072 {'schema': 'doi', 'identifier': 'doi:10.1184/abc', 'valid': None},
1073 {'schema': 'doi', 'identifier': 'doi:10.25384/efg', 'valid': None},
1074 ]
1076 out_8_1 = op.manage_doi_prefixes_priorities(es_8_1)
1077 exp_8_1 = [{'schema': 'doi', 'identifier': '10.5281/zenodo.4725899', 'valid': None}]
1078 self.assertEqual(exp_8_1, out_8_1)
1080 # CASE8_2:
1081 # more valid ids among the ones with a max priority prefix --> return the first one encountered
1082 es_8_2 = [
1083 {'schema': 'doi', 'identifier': 'doi:10.5281/zenodo.4725899', 'valid': None},
1084 {'schema': 'doi', 'identifier': 'doi:10.1184/R1/12841247.v1', 'valid': None},
1085 {'schema': 'doi', 'identifier': 'doi:10.25384/sage.c.4112909', 'valid': None},
1086 ]
1088 out_8_2 = op.manage_doi_prefixes_priorities(es_8_2)
1089 exp_8_2 = [{'schema': 'doi', 'identifier': 'doi:10.1184/R1/12841247.v1', 'valid': None}]
1090 self.assertEqual(exp_8_2, out_8_2)
1092 op.storage_manager.delete_storage()
1094 def test_to_validated_id_list(self):
1095 # NOTE: in tests using the sqlite storage method it must be avoided to delete the storage
1096 # while using the same OpenaireProcessing() instance, otherwise the process would try to
1097 # store data in a filepath that has just been deleted, with no new connection created after it.
1099 # 2 OPTIONS: 1) instantiate OpenaireProcessing only once at the beginning and delete the
1100 # storage only at the end; 2) create a new OpenaireProcessing instance at every check and
1101 # delete the storage each time after the check is done.
1103 op = OpenaireProcessing()
1104 # CASE1_1: No already validated ids + 1 id to be validated, which is valid
1105 inp_1 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'pmid', 'identifier': 'pmid:20662931', 'valid': None}]}
1106 out_1 = op.to_validated_id_list(inp_1)
1107 exp_1 = ['pmid:20662931']
1108 self.assertEqual(out_1, exp_1)
1109 op.storage_manager.delete_storage()
1111 op = OpenaireProcessing()
1112 # CASE1_2: No already validated ids + 1 id to be validated, which is invalid
1113 inp_2 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'pmid', 'identifier': 'pmid:999920662931', 'valid': None}]}
1114 out_2 = op.to_validated_id_list(inp_2)
1115 exp_2 = []
1116 self.assertEqual(out_2, exp_2)
1118 op = OpenaireProcessing()
1119 # CASE1_3: No already validated ids + 1 id to be validated, which is a valid arxiv doi
1120 inp_3 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'doi', 'identifier': 'doi:10.48550/arXiv.1509.08217', 'valid': None}]}
1121 out_3 = op.to_validated_id_list(inp_3)
1122 exp_3 = ['arxiv:1509.08217v1']
1123 self.assertEqual(out_3, exp_3)
1124 op.storage_manager.delete_storage()
1127 op = OpenaireProcessing()
1128 # CASE1_4: No already validated ids + 1 id to be validated, which hasn't a valid schema
1129 inp_4 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': "0", 'identifier': 'doi:10.48550/arXiv.1509.08217', 'valid': None}]}
1130 out_4 = op.to_validated_id_list(inp_4)
1131 exp_4 = []
1132 self.assertEqual(out_4, exp_4)
1133 op.storage_manager.delete_storage()
1135 op = OpenaireProcessing()
1136 # CASE1_5: No already validated ids + 1 id to be validated, which is not valid
1137 inp_5 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': "doi", 'identifier': 'doi:10.0000/fake_id', 'valid': None}]}
1138 out_5 = op.to_validated_id_list(inp_5)
1139 exp_5 = []
1140 self.assertEqual(out_5, exp_5)
1141 op.storage_manager.delete_storage()
1143 op = OpenaireProcessing()
1144 # CASE1_9: No already validated ids + 1 id to be validated, which is a valid PMC
1145 inp_9 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': "pmcid", 'identifier': 'pmcid:PMC2873764', 'valid': None}]}
1146 out_9 = op.to_validated_id_list(inp_9)
1147 exp_9 = ['pmcid:PMC2873764']
1148 self.assertEqual(out_9, exp_9)
1149 op.storage_manager.delete_storage()
1151 op = OpenaireProcessing()
1152 # CASE2_1: No already validated ids + >1 id to be validated, both valid and with accepted schemas
1153 inp_6 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'pmid', 'identifier': 'pmid:20662931', 'valid': None},
1154 {'schema': 'doi', 'identifier': 'doi:10.1007/s12160-011-9282-0', 'valid': None}]}
1155 out_6 = op.to_validated_id_list(inp_6)
1156 exp_6 = ['pmid:20662931', 'doi:10.1007/s12160-011-9282-0']
1157 self.assertCountEqual(out_6, exp_6) #Test that sequence first contains the same elements as second, regardless of their order
1158 op.storage_manager.delete_storage()
1160 op = OpenaireProcessing()
1161 # CASE2_2: No already validated ids + >1 id to be validated, both valid, one of the two is an arxiv id
1162 inp_8 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'pmid', 'identifier': 'pmid:20662931', 'valid': None},
1163 {'schema': 'arxiv', 'identifier': 'arxiv:1107.5979', 'valid': None}]}
1164 out_8 = op.to_validated_id_list(inp_8)
1165 exp_8 = ['pmid:20662931']
1166 self.assertEqual(out_8, exp_8)
1167 op.storage_manager.delete_storage()
1169 op = OpenaireProcessing()
1170 # CASE2_3: No already validated ids + >1 id to be validated, both valid, one of the two is an arxiv doi
1171 inp_7 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'pmid', 'identifier': 'pmid:20662931', 'valid': None}, {'schema': "doi", 'identifier': 'doi:10.48550/arXiv.1509.08217', 'valid': None}]}
1172 out_7 = op.to_validated_id_list(inp_7)
1173 exp_7 = ['pmid:20662931']
1174 self.assertEqual(out_7, exp_7)
1175 op.storage_manager.delete_storage()
1177 op = OpenaireProcessing()
1178 # CASE2_4: No already validated ids + >1 id to be validated, both valid, one of the two is a PMC
1179 inp_10 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'pmid', 'identifier': 'pmid:20662931', 'valid': None},
1180 {'schema': "pmcid", 'identifier': 'pmcid:PMC2873764', 'valid': None}]}
1181 out_10 = op.to_validated_id_list(inp_10)
1182 exp_10 = ['pmid:20662931']
1183 self.assertEqual(out_10, exp_10)
1184 op.storage_manager.delete_storage()
1186 op = OpenaireProcessing()
1187 # CASE2_5: No already validated ids + >1 id to be validated, 1 valid pmid, 1 valid doi, 1 valid doi with a "critic" prefix
1188 # for opencitations entities management
1190 inp_11 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'pmid', 'identifier': 'pmid:20662931', 'valid': None},
1191 {'schema': 'doi', 'identifier': 'doi:10.1007/s12160-011-9282-0', 'valid': None},
1192 {'schema': 'doi',
1193 'identifier': 'doi:10.48550/arXiv.1509.08217',
1194 'valid': None}
1195 ]}
1196 out_11 = op.to_validated_id_list(inp_11)
1197 exp_11 = ['pmid:20662931', 'doi:10.1007/s12160-011-9282-0']
1198 self.assertCountEqual(out_11, exp_11) #Test that sequence first contains the same elements as second, regardless of their order
1199 op.storage_manager.delete_storage()
1201 op = OpenaireProcessing()
1202 # CASE2_6: No already validated ids + >1 id to be validated, one doi with a "critic" prefix and a PMCID
1203 # for opencitations entities management
1205 inp_12 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'pmcid', 'identifier': 'pmcid:PMC5555555', 'valid': None},
1206 {'schema': 'doi',
1207 'identifier': 'doi:10.48550/arXiv.1509.08217',
1208 'valid': None}
1209 ]}
1210 out_12 = op.to_validated_id_list(inp_12)
1211 exp_12 = ['pmcid:PMC5555555']
1212 self.assertEqual(out_12, exp_12)
1213 op.storage_manager.delete_storage()
1215 op = OpenaireProcessing()
1216 # CASE2_7: no already validated ids + >1 id to be validated, one doi with a "critic" prefix for opencitations
1217 # ingestion workflow and an ARXIV
1219 inp_13 = {'valid': [], 'not_valid': [], 'to_be_val': [
1220 {'schema': 'arxiv', 'identifier': 'arxiv:1107.5979v1', 'valid': None},
1221 {'schema': 'doi', 'identifier': 'doi:10.1184/R1/12841247.v1', 'valid': None}
1222 ]}
1223 out_13 = op.to_validated_id_list(inp_13)
1224 exp_13 = ['arxiv:1107.5979v1']
1225 self.assertEqual(out_13, exp_13)
1226 op.storage_manager.delete_storage()
1228 op = OpenaireProcessing()
1229 # CASE2_8: no already validated ids and more dois with "critic" prefixes for opencitations
1230 # ingestion workflow
1232 inp_14 = {'valid': [], 'not_valid': [], 'to_be_val': [
1233 {'schema': 'doi', 'identifier': 'doi:10.5281/zenodo.4725899', 'valid': None},
1234 {'schema': 'doi', 'identifier': 'doi:10.1184/r1/12841247.v1', 'valid': None}
1235 ]}
1236 out_14 = op.to_validated_id_list(inp_14)
1237 exp_14 = ['doi:10.1184/r1/12841247.v1']
1238 self.assertEqual(out_14, exp_14)
1239 op.storage_manager.delete_storage()
1241 op = OpenaireProcessing()
1242 # CASE3: an already validated id and more dois with "critic" prefixes for opencitations
1243 # ingestion workflow
1245 inp_15 = {'valid': [], 'not_valid': [], 'to_be_val': [
1246 {'schema': 'doi', 'identifier': 'doi:10.5281/zenodo.4725899', 'valid': None},
1247 {'schema': 'doi', 'identifier': 'doi:10.1184/r1/12841247.v1', 'valid': None},
1248 {'schema': 'doi', 'identifier': 'doi:10.7557/5.5607', 'valid': None},
1249 {}
1250 ]}
1251 out_15 = op.to_validated_id_list(inp_15)
1252 exp_15 = ['doi:10.7557/5.5607']
1253 self.assertEqual(out_15, exp_15)
1254 op.storage_manager.delete_storage()
1256 def test_to_validated_id_list_redis(self):
1257 # NOTE: in tests using the sqlite storage method it must be avoided to delete the storage
1258 # while using the same OpenaireProcessing() instance, otherwise the process would try to
1259 # store data in a filepath that has just been deleted, with no new connection created after it.
1261 # 2 OPTIONS: 1) instantiate OpenaireProcessing only once at the beginning and delete the
1262 # storage only at the end; 2) create a new OpenaireProcessing instance at every check and
1263 # delete the storage each time after the check is done.
1265 op = OpenaireProcessing(testing=True)
1266 # CASE1_1: No already validated ids + 1 id to be validated, which is valid
1267 inp_1 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'pmid', 'identifier': 'pmid:20662931', 'valid': None}]}
1268 out_1 = op.to_validated_id_list(inp_1)
1269 exp_1 = ['pmid:20662931']
1270 self.assertEqual(out_1, exp_1)
1271 op.storage_manager.delete_storage()
1273 op = OpenaireProcessing(testing=True)
1274 # CASE1_2: No already validated ids + 1 id to be validated, which is invalid
1275 inp_2 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'pmid', 'identifier': 'pmid:999920662931', 'valid': None}]}
1276 out_2 = op.to_validated_id_list(inp_2)
1277 exp_2 = []
1278 self.assertEqual(out_2, exp_2)
1279 op.storage_manager.delete_storage()
1281 op = OpenaireProcessing(testing=True)
1282 # CASE1_3: No already validated ids + 1 id to be validated, which is a valid arxiv doi
1283 inp_3 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'doi', 'identifier': 'doi:10.48550/arXiv.1509.08217', 'valid': None}]}
1284 out_3 = op.to_validated_id_list(inp_3)
1285 exp_3 = ['arxiv:1509.08217v1']
1286 self.assertEqual(out_3, exp_3)
1287 op.storage_manager.delete_storage()
1289 op = OpenaireProcessing(testing=True)
1290 # CASE1_4: No already validated ids + 1 id to be validated, which hasn't a valid schema
1291 inp_4 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': "0", 'identifier': 'doi:10.48550/arXiv.1509.08217', 'valid': None}]}
1292 out_4 = op.to_validated_id_list(inp_4)
1293 exp_4 = []
1294 self.assertEqual(out_4, exp_4)
1295 op.storage_manager.delete_storage()
1297 op = OpenaireProcessing(testing=True)
1298 # CASE1_5: No already validated ids + 1 id to be validated, which is not valid
1299 inp_5 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': "doi", 'identifier': 'doi:10.0000/fake_id', 'valid': None}]}
1300 out_5 = op.to_validated_id_list(inp_5)
1301 exp_5 = []
1302 self.assertEqual(out_5, exp_5)
1303 op.storage_manager.delete_storage()
1305 op = OpenaireProcessing(testing=True)
1306 # CASE1_9: No already validated ids + 1 id to be validated, which is a valid PMC
1307 inp_9 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': "pmcid", 'identifier': 'pmcid:PMC2873764', 'valid': None}]}
1308 out_9 = op.to_validated_id_list(inp_9)
1309 exp_9 = ['pmcid:PMC2873764']
1310 self.assertEqual(out_9, exp_9)
1311 op.storage_manager.delete_storage()
1313 op = OpenaireProcessing(testing=True)
1314 # CASE2_1: No already validated ids + >1 id to be validated, both valid and with accepted schemas
1315 inp_6 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'pmid', 'identifier': 'pmid:20662931', 'valid': None},
1316 {'schema': 'doi', 'identifier': 'doi:10.1007/s12160-011-9282-0', 'valid': None}]}
1317 out_6 = op.to_validated_id_list(inp_6)
1318 exp_6 = ['pmid:20662931', 'doi:10.1007/s12160-011-9282-0']
1319 self.assertCountEqual(out_6, exp_6) #Test that sequence first contains the same elements as second, regardless of their order
1320 op.storage_manager.delete_storage()
1322 op = OpenaireProcessing(testing=True)
1323 # CASE2_2: No already validated ids + >1 id to be validated, both valid, one of the two is an arxiv id
1324 inp_8 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'pmid', 'identifier': 'pmid:20662931', 'valid': None},
1325 {'schema': 'arxiv', 'identifier': 'arxiv:1107.5979', 'valid': None}]}
1326 out_8 = op.to_validated_id_list(inp_8)
1327 exp_8 = ['pmid:20662931']
1328 self.assertEqual(out_8, exp_8)
1329 op.storage_manager.delete_storage()
1331 op = OpenaireProcessing(testing=True)
1332 # CASE2_3: No already validated ids + >1 id to be validated, both valid, one of the two is an arxiv doi
1333 inp_7 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'pmid', 'identifier': 'pmid:20662931', 'valid': None}, {'schema': "doi", 'identifier': 'doi:10.48550/arXiv.1509.08217', 'valid': None}]}
1334 out_7 = op.to_validated_id_list(inp_7)
1335 exp_7 = ['pmid:20662931']
1336 self.assertEqual(out_7, exp_7)
1337 op.storage_manager.delete_storage()
1339 op = OpenaireProcessing(testing=True)
1340 # CASE2_4: No already validated ids + >1 id to be validated, both valid, one of the two is a PMC
1341 inp_10 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'pmid', 'identifier': 'pmid:20662931', 'valid': None},
1342 {'schema': "pmcid", 'identifier': 'pmcid:PMC2873764', 'valid': None}]}
1343 out_10 = op.to_validated_id_list(inp_10)
1344 exp_10 = ['pmid:20662931']
1345 self.assertEqual(out_10, exp_10)
1346 op.storage_manager.delete_storage()
1348 op = OpenaireProcessing(testing=True)
1349 # CASE2_5: No already validated ids + >1 id to be validated, 1 valid pmid, 1 valid doi, 1 valid doi with a "critic" prefix
1350 # for opencitations entities management
1352 inp_11 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'pmid', 'identifier': 'pmid:20662931', 'valid': None},
1353 {'schema': 'doi', 'identifier': 'doi:10.1007/s12160-011-9282-0', 'valid': None},
1354 {'schema': 'doi',
1355 'identifier': 'doi:10.48550/arXiv.1509.08217',
1356 'valid': None}
1357 ]}
1358 out_11 = op.to_validated_id_list(inp_11)
1359 exp_11 = ['pmid:20662931', 'doi:10.1007/s12160-011-9282-0']
1360 self.assertCountEqual(out_11, exp_11) #Test that sequence first contains the same elements as second, regardless of their order
1361 op.storage_manager.delete_storage()
1363 op = OpenaireProcessing(testing=True)
1364 # CASE2_6: No already validated ids + >1 id to be validated, one doi with a "critic" prefix and a PMCID
1365 # for opencitations entities management
1367 inp_12 = {'valid': [], 'not_valid': [], 'to_be_val': [{'schema': 'pmcid', 'identifier': 'pmcid:PMC5555555', 'valid': None},
1368 {'schema': 'doi',
1369 'identifier': 'doi:10.48550/arXiv.1509.08217',
1370 'valid': None}
1371 ]}
1372 out_12 = op.to_validated_id_list(inp_12)
1373 exp_12 = ['pmcid:PMC5555555']
1374 self.assertEqual(out_12, exp_12)
1375 op.storage_manager.delete_storage()
1377 op = OpenaireProcessing(testing=True)
1378 # CASE2_7: no already validated ids + >1 id to be validated, one doi with a "critic" prefix for opencitations
1379 # ingestion workflow and an ARXIV
1381 inp_13 = {'valid': [], 'not_valid': [], 'to_be_val': [
1382 {'schema': 'arxiv', 'identifier': 'arxiv:1107.5979v1', 'valid': None},
1383 {'schema': 'doi', 'identifier': 'doi:10.1184/R1/12841247.v1', 'valid': None}
1384 ]}
1385 out_13 = op.to_validated_id_list(inp_13)
1386 exp_13 = ['arxiv:1107.5979v1']
1387 self.assertEqual(out_13, exp_13)
1388 op.storage_manager.delete_storage()
1390 op = OpenaireProcessing(testing=True)
1391 # CASE2_8: no already validated ids and more dois with "critic" prefixes for opencitations
1392 # ingestion workflow
1394 inp_14 = {'valid': [], 'not_valid': [], 'to_be_val': [
1395 {'schema': 'doi', 'identifier': 'doi:10.5281/zenodo.4725899', 'valid': None},
1396 {'schema': 'doi', 'identifier': 'doi:10.1184/r1/12841247.v1', 'valid': None}
1397 ]}
1398 out_14 = op.to_validated_id_list(inp_14)
1399 exp_14 = ['doi:10.1184/r1/12841247.v1']
1400 self.assertEqual(out_14, exp_14)
1401 op.storage_manager.delete_storage()
1403 op = OpenaireProcessing(testing=True)
1404 # CASE3: an already validated id and more dois with "critic" prefixes for opencitations
1405 # ingestion workflow
1407 inp_15 = {'valid': [], 'not_valid': [], 'to_be_val': [
1408 {'schema': 'doi', 'identifier': 'doi:10.5281/zenodo.4725899', 'valid': None},
1409 {'schema': 'doi', 'identifier': 'doi:10.1184/r1/12841247.v1', 'valid': None},
1410 {'schema': 'doi', 'identifier': 'doi:10.7557/5.5607', 'valid': None},
1411 {}
1412 ]}
1413 out_15 = op.to_validated_id_list(inp_15)
1414 exp_15 = ['doi:10.7557/5.5607']
1415 self.assertEqual(out_15, exp_15)
1416 op.storage_manager.delete_storage()
1419 def test_add_authors_to_agent_list(self):
1420 op = OpenaireProcessing()
1421 sample_inp = {'creator': [{'name': 'Carlos Hoyos'}, {'name': 'Yaron Oz'}, {'identifiers': [{'identifier': '0000-0001-6946-5074', 'schema': 'ORCID', 'url': 'https://orcid.org/0000-0001-6946-5074'}], 'name': 'Bom Soo Kim'}]}
1422 sample_exp = op.add_authors_to_agent_list(sample_inp, [])
1423 sample_out = [{'role': 'author', 'name': 'Carlos Hoyos', 'family': '', 'given': ''}, {'role': 'author', 'name': 'Yaron Oz', 'family': '', 'given': ''}, {'role': 'author', 'name': 'Bom Soo Kim', 'family': '', 'given': '', 'orcid': 'orcid:0000-0001-6946-5074'}]
1424 self.assertEqual(sample_out, sample_exp)
1425 op.storage_manager.delete_storage()
1428 def test_add_authors_to_agent_list_redis(self):
1429 op = OpenaireProcessing(testing=True)
1430 sample_inp = {'creator': [{'name': 'Carlos Hoyos'}, {'name': 'Yaron Oz'}, {'identifiers': [{'identifier': '0000-0001-6946-5074', 'schema': 'ORCID', 'url': 'https://orcid.org/0000-0001-6946-5074'}], 'name': 'Bom Soo Kim'}]}
1431 sample_exp = op.add_authors_to_agent_list(sample_inp, [])
1432 sample_out = [{'role': 'author', 'name': 'Carlos Hoyos', 'family': '', 'given': ''}, {'role': 'author', 'name': 'Yaron Oz', 'family': '', 'given': ''}, {'role': 'author', 'name': 'Bom Soo Kim', 'family': '', 'given': '', 'orcid': 'orcid:0000-0001-6946-5074'}]
1433 self.assertEqual(sample_out, sample_exp)
1434 op.storage_manager.delete_storage()
1436 def test_add_authors_to_agent_list_no_creator(self):
1437 op = OpenaireProcessing()
1438 sample_inp = {'creator': []}
1439 sample_exp = op.add_authors_to_agent_list(sample_inp, [])
1440 sample_out = []
1441 self.assertEqual(sample_out, sample_exp)
1442 op.storage_manager.delete_storage()
1445 def test_add_authors_to_agent_list_no_creator_redis(self):
1446 op = OpenaireProcessing(testing=True)
1447 sample_inp = {'creator': []}
1448 sample_exp = op.add_authors_to_agent_list(sample_inp, [])
1449 sample_out = []
1450 self.assertEqual(sample_out, sample_exp)
1451 op.storage_manager.delete_storage()
1453 def test_get_agents_strings_list(self):
1454 best_doi = "doi:10.1007/jhep03(2014)050"
1455 agents_list_2 = [{'role': 'author', 'name': 'Hoyos, Carlos', 'family': '', 'given': ''}, {'role': 'author', 'name': 'Oz, Yaron', 'family': '', 'given': ''}, {'role': 'author', 'name': 'Kim, Bom Soo', 'family': '', 'given': '', 'orcid': 'orcid:0000-0001-6946-5074'}]
1456 op = OpenaireProcessing()
1457 sample_exp = op.get_agents_strings_list(best_doi, agents_list_2)
1458 self.assertEqual(sample_exp, (['Hoyos Carlos', 'Oz Yaron', 'Kim Bom Soo [orcid:0000-0001-6946-5074]'], []))
1459 op.storage_manager.delete_storage()
1461 def test_get_agents_strings_list_redis(self):
1462 best_doi = "doi:10.1007/jhep03(2014)050"
1463 agents_list_2 = [{'role': 'author', 'name': 'Hoyos, Carlos', 'family': '', 'given': ''}, {'role': 'author', 'name': 'Oz, Yaron', 'family': '', 'given': ''}, {'role': 'author', 'name': 'Kim, Bom Soo', 'family': '', 'given': '', 'orcid': 'orcid:0000-0001-6946-5074'}]
1464 op = OpenaireProcessing(testing=True)
1465 sample_exp = op.get_agents_strings_list(best_doi, agents_list_2)
1466 self.assertEqual(sample_exp, (['Hoyos Carlos', 'Oz Yaron', 'Kim Bom Soo [orcid:0000-0001-6946-5074]'], []))
1467 op.storage_manager.delete_storage()
1469 def test_find_openaire_orcid(self):
1470 op = OpenaireProcessing(testing=True)
1471 inp = [{'identifier': '0000-0001-9759-3938', 'schema': 'ORCID', 'url': 'https://orcid.org/0000-0001-9759-3938'}]
1472 out = op.find_openaire_orcid(inp)
1473 exp = "orcid:0000-0001-9759-3938"
1474 self.assertEqual(out, exp)
1476 inp_wrong_schema = [{'identifier': '0000-0001-9759-3938', 'schema': 'fake_schema', 'url': 'https://orcid.org/0000-0001-9759-3938'}]
1477 out_wrong_schema = op.find_openaire_orcid(inp_wrong_schema)
1478 exp_wrong_schema = ""
1479 self.assertEqual(out_wrong_schema, exp_wrong_schema)
1481 inp_invalid_id = [{'identifier': '5500-0001-9759-3938', 'schema': 'ORCID', 'url': 'https://orcid.org/0000-0001-9759-3938'}]
1482 out_invalid_id = op.find_openaire_orcid(inp_invalid_id)
1483 exp_invalid_id = ""
1484 self.assertEqual(out_invalid_id, exp_invalid_id)
1486 op.orcid_m.storage_manager.delete_storage()
1488 # set a valid id as invalid in storage, so to check that the api check is
1489 # avoided if the info is already in storage
1490 op = OpenaireProcessing(testing=True)
1491 op.orcid_m.storage_manager.set_value("orcid:0000-0001-9759-3938", False)
1493 inp = [{'identifier': '0000-0001-9759-3938', 'schema': 'ORCID', 'url': 'https://orcid.org/0000-0001-9759-3938'}]
1494 out = op.find_openaire_orcid(inp)
1495 exp = ""
1496 self.assertEqual(out, exp)
1498 op.orcid_m.storage_manager.delete_storage()
1499 op = OpenaireProcessing(testing=True)
1500 op.orcid_m.storage_manager.set_value("orcid:0000-0001-9759-3938", True)
1501 inp = [{'identifier': '0000-0001-9759-3938', 'schema': 'ORCID', 'url': 'https://orcid.org/0000-0001-9759-3938'}]
1502 out = op.find_openaire_orcid(inp)
1503 exp = "orcid:0000-0001-9759-3938"
1504 self.assertEqual(out, exp)
1505 op.orcid_m.storage_manager.delete_storage()
1508 def test_find_openaire_orcid_redis(self):
1509 op = OpenaireProcessing(testing=True)
1510 inp = [{'identifier': '0000-0001-9759-3938', 'schema': 'ORCID', 'url': 'https://orcid.org/0000-0001-9759-3938'}]
1511 out = op.find_openaire_orcid(inp)
1512 exp = "orcid:0000-0001-9759-3938"
1513 self.assertEqual(out, exp)
1515 inp_wrong_schema = [{'identifier': '0000-0001-9759-3938', 'schema': 'fake_schema', 'url': 'https://orcid.org/0000-0001-9759-3938'}]
1516 out_wrong_schema = op.find_openaire_orcid(inp_wrong_schema)
1517 exp_wrong_schema = ""
1518 self.assertEqual(out_wrong_schema, exp_wrong_schema)
1520 inp_invalid_id = [{'identifier': '5500-0001-9759-3938', 'schema': 'ORCID', 'url': 'https://orcid.org/0000-0001-9759-3938'}]
1521 out_invalid_id = op.find_openaire_orcid(inp_invalid_id)
1522 exp_invalid_id = ""
1523 self.assertEqual(out_invalid_id, exp_invalid_id)
1525 op.orcid_m.storage_manager.delete_storage()
1527 # set a valid id as invalid in storage, so to check that the api check is
1528 # avoided if the info is already in storage
1529 op = OpenaireProcessing(testing=True)
1530 op.orcid_m.storage_manager.set_value("orcid:0000-0001-9759-3938", False)
1532 inp = [{'identifier': '0000-0001-9759-3938', 'schema': 'ORCID', 'url': 'https://orcid.org/0000-0001-9759-3938'}]
1533 out = op.find_openaire_orcid(inp)
1534 exp = ""
1535 self.assertEqual(out, exp)
1537 op.orcid_m.storage_manager.delete_storage()
1538 op = OpenaireProcessing(testing=True)
1539 op.orcid_m.storage_manager.set_value("orcid:0000-0001-9759-3938", True)
1540 inp = [{'identifier': '0000-0001-9759-3938', 'schema': 'ORCID', 'url': 'https://orcid.org/0000-0001-9759-3938'}]
1541 out = op.find_openaire_orcid(inp)
1542 exp = "orcid:0000-0001-9759-3938"
1543 self.assertEqual(out, exp)
1544 op.orcid_m.storage_manager.delete_storage()
1546 def test_update_redis_values(self):
1547 br = ["pmid:2", "pmid:3"]
1548 ra = ["orcid:0000-0003-0530-4305"]
1549 op = OpenaireProcessing(testing=True)
1550 op.update_redis_values(br,ra)
1551 self.assertEqual(op._redis_values_br, br)
1552 self.assertEqual(op._redis_values_ra, ra)
1555 def test_find_openaire_orcid_with_index(self):
1556 """Test ORCID validation using ORCID index before API validation"""
1557 # Setup
1558 test_doi = "10.1234/test123"
1559 test_orcid = "0000-0002-1234-5678"
1560 test_name = "Smith, John"
1562 # Create OpenaireProcessing instance with ORCID index
1563 op = OpenaireProcessing()
1564 # Correct format for add_value: id_string -> value
1565 op.orcid_index.add_value(test_doi, f"{test_name} [orcid:{test_orcid}]")
1567 # Test Case 1: ORCID found in index
1568 inp_1 = [{'identifier': test_orcid, 'schema': 'ORCID'}]
1569 out_1 = op.find_openaire_orcid(inp_1, test_doi)
1570 exp_1 = f"orcid:{test_orcid}"
1571 self.assertEqual(out_1, exp_1)
1572 # Verify it was added to temporary storage
1573 self.assertTrue(op.tmp_orcid_m.storage_manager.get_value(f"orcid:{test_orcid}"))
1575 # Test Case 2: ORCID not in index but valid via API
1576 inp_2 = [{'identifier': '0000-0003-4082-1500', 'schema': 'ORCID'}]
1577 out_2 = op.find_openaire_orcid(inp_2, test_doi)
1578 exp_2 = "orcid:0000-0003-4082-1500"
1579 self.assertEqual(out_2, exp_2)
1581 # Test Case 3: ORCID not in index and invalid
1582 inp_3 = [{'identifier': '0000-0000-0000-0000', 'schema': 'ORCID'}]
1583 out_3 = op.find_openaire_orcid(inp_3, test_doi)
1584 exp_3 = ""
1585 self.assertEqual(out_3, exp_3)
1587 # Test Case 4: Valid ORCID but no DOI provided
1588 inp_4 = [{'identifier': test_orcid, 'schema': 'ORCID'}]
1589 out_4 = op.find_openaire_orcid(inp_4) # No DOI
1590 exp_4 = f"orcid:{test_orcid}" # Should still validate via API
1591 self.assertEqual(out_4, exp_4)
1593 # Cleanup
1594 op.storage_manager.delete_storage()
1597def test_validated_as_with_storage_manager(storage_manager):
1598 valid_doi_not_in_db = {"identifier": "doi:10.1001/2012.jama.10158", "schema": "doi"}
1599 valid_doi_in_db = {"identifier": "doi:10.1001/2012.jama.10368", "schema": "doi"}
1600 invalid_doi_in_db = {"identifier": "doi:10.1001/2012.jama.1036", "schema": "doi"}
1602 op_processing = OpenaireProcessing(storage_manager=storage_manager, testing=True)
1603 op_processing.doi_m.storage_manager.set_value(valid_doi_in_db["identifier"], True)
1604 op_processing.doi_m.storage_manager.set_value(invalid_doi_in_db["identifier"], False)
1606 assert op_processing.validated_as(valid_doi_in_db) is True
1607 assert op_processing.validated_as(invalid_doi_in_db) is False
1608 assert op_processing.validated_as(valid_doi_not_in_db) is None
1611if __name__ == '__main__':
1612 unittest.main()