Coverage for test / idm_pmid_test.py: 99%
131 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
1# SPDX-FileCopyrightText: 2023 Arianna Moretti <arianna.moretti4@unibo.it>
2# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it>
3#
4# SPDX-License-Identifier: ISC
6import json
7import unittest
8from os import makedirs
9from os.path import exists, join
11from oc_ds_converter.oc_idmanager.pmid import PMIDManager
14class pmidIdentifierManagerTest(unittest.TestCase):
15 """This class aim at testing identifiers manager."""
17 def setUp(self):
18 if not exists("tmp"):
19 makedirs("tmp")
21 self.test_dir = join("test", "data")
22 self.test_json_path = join(self.test_dir, "glob.json")
23 with open(self.test_json_path, encoding="utf-8") as fp:
24 self.data = json.load(fp)
26 self.valid_pmid_1 = "2942070"
27 self.valid_pmid_2 = "1509982"
28 self.invalid_pmid_1 = "0067308798798"
29 self.invalid_pmid_2 = "pmid:174777777777"
30 self.invalid_pmid_3 = "pmid:174777777779"
33 def test_pmid_normalise(self):
34 pm = PMIDManager()
35 self.assertEqual(
36 self.valid_pmid_1, pm.normalise(self.valid_pmid_1.replace("", "pmid:"))
37 )
38 self.assertEqual(
39 self.valid_pmid_1, pm.normalise(self.valid_pmid_1.replace("", " "))
40 )
41 self.assertEqual(
42 self.valid_pmid_1,
43 pm.normalise("https://pubmed.ncbi.nlm.nih.gov/" + self.valid_pmid_1),
44 )
45 self.assertEqual(self.valid_pmid_2, pm.normalise("000" + self.valid_pmid_2))
47 def test_pmid_is_valid(self):
48 pm_nofile = PMIDManager()
49 self.assertTrue(pm_nofile.is_valid(self.valid_pmid_1))
50 self.assertTrue(pm_nofile.is_valid(self.valid_pmid_2))
51 self.assertFalse(pm_nofile.is_valid(self.invalid_pmid_1))
52 self.assertFalse(pm_nofile.is_valid(self.invalid_pmid_2))
54 pm_file = PMIDManager(use_api_service=False, testing=True)
55 # Pre-seed storage with data from glob.json
56 for key, value in self.data.items():
57 if key.startswith("pmid:"):
58 pm_file.storage_manager.set_value(key, value.get("valid", False))
59 self.assertTrue(pm_file.normalise(self.valid_pmid_1, include_prefix=True) in self.data)
60 self.assertTrue(pm_file.normalise(self.invalid_pmid_1, include_prefix=True) in self.data)
61 self.assertTrue(pm_file.is_valid(self.valid_pmid_1))
62 self.assertFalse(pm_file.is_valid(self.invalid_pmid_1))
64 pm_nofile_noapi = PMIDManager(testing=True, use_api_service=False)
65 self.assertTrue(pm_nofile_noapi.is_valid(self.valid_pmid_1))
66 self.assertTrue(pm_nofile_noapi.is_valid(self.invalid_pmid_3))
68 def test_exists(self):
69 with self.subTest(msg="get_extra_info=True, allow_extra_api=None"):
70 pmid_manager = PMIDManager()
71 output = pmid_manager.exists('pmid:8384044', get_extra_info=True, allow_extra_api=None)
72 expected_output = (True, {'valid': True, 'title': 'Brevetoxin depresses synaptic transmission in guinea pig hippocampal slices.', 'author': ['Adler, M', 'Sheridan, R E', 'Apland, J P'], 'pub_date': '1993', 'venue': 'Brain research bulletin [issn:0361-9230]', 'volume': '31', 'issue': '1-2', 'page': '201-7', 'type': ['journal article'], 'publisher': [], 'editor': [], 'doi': '10.1016/0361-9230(93)90026-8', 'id': 'pmid:8384044'})
73 self.assertEqual(expected_output[0], output[0])
74 self.assertCountEqual({k:v for k,v in expected_output[1].items() if k!= "author"}, {k:v for k,v in output[1].items() if k!= "author"})
75 self.assertCountEqual(expected_output[1]["author"], output[1]["author"])
77 with self.subTest(msg="get_extra_info=False, allow_extra_api=None"):
78 pmid_manager = PMIDManager()
79 output = pmid_manager.exists('pmid6716460', get_extra_info=False, allow_extra_api=None)
80 expected_output = True
81 self.assertEqual(output, expected_output)
84 def test_pmidid_normalise(self):
85 pcm = PMIDManager()
86 self.assertEqual(
87 pcm.normalise(self.valid_pmid_1),
88 pcm.normalise(' ' + self.valid_pmid_1),
89 )
90 self.assertEqual(
91 pcm.normalise(self.valid_pmid_2),
92 pcm.normalise("https://www.ncbi.nlm.nih.gov/pmid/articles/" + self.valid_pmid_2),
93 )
95 dm_file = PMIDManager(testing=True, use_api_service=False)
96 # Pre-seed storage with data from glob.json
97 for key, value in self.data.items():
98 if key.startswith("pmid:"):
99 dm_file.storage_manager.set_value(key, value.get("valid", False))
100 self.assertTrue(dm_file.normalise(self.valid_pmid_1, include_prefix=True) in self.data)
101 self.assertTrue(dm_file.normalise(self.invalid_pmid_2, include_prefix=True) in self.data)
102 self.assertTrue(dm_file.is_valid(self.valid_pmid_1))
103 self.assertFalse(dm_file.is_valid(self.invalid_pmid_2))
106 def test_pmid_default(self):
107 am_nofile = PMIDManager(testing=True)
108 # Uses RedisStorageManager with testing=True (fakeredis)
109 # uses API
110 self.assertTrue(am_nofile.is_valid(self.valid_pmid_1))
111 self.assertTrue(am_nofile.is_valid(self.valid_pmid_2))
112 self.assertFalse(am_nofile.is_valid(self.invalid_pmid_2))
113 self.assertFalse(am_nofile.is_valid(self.invalid_pmid_1))
114 validated_ids = [self.valid_pmid_1, self.valid_pmid_2, self.invalid_pmid_1, self.invalid_pmid_2]
115 validated = [am_nofile.normalise(x, include_prefix=True) for x in validated_ids if am_nofile.normalise(x, include_prefix=True)]
116 # check that all the validated ids are stored in redis
117 all_ids_stored = am_nofile.storage_manager.get_all_keys()
118 self.assertTrue(all(x in all_ids_stored for x in validated))
119 am_nofile.storage_manager.delete_storage()
120 # check that the storage was correctly deleted
121 self.assertEqual(am_nofile.storage_manager.get_all_keys(), set())
123 def test_pmid_memory_file_noapi(self):
124 # Uses pre-seeded data (without updating it)
125 # Uses RedisStorageManager storage manager
126 # does not use API (so a syntactically correct id is considered to be valid)
127 am_file = PMIDManager(testing=True, use_api_service=False)
128 # Pre-seed storage with data from glob.json
129 for key, value in self.data.items():
130 if key.startswith("pmid:"):
131 am_file.storage_manager.set_value(key, value.get("valid", False))
132 self.assertTrue(am_file.normalise(self.valid_pmid_1, include_prefix=True) in self.data)
133 self.assertTrue(am_file.normalise(self.invalid_pmid_2, include_prefix=True) in self.data)
134 self.assertFalse(am_file.is_valid(self.invalid_pmid_2)) # is stored as invalid
135 self.assertTrue(am_file.is_valid(am_file.normalise(self.invalid_pmid_3, include_prefix=True))) # not stored as invalid, has correct syntax
137 def test_pmid_memory_file_api(self):
138 # Uses support file (without updating it)
139 # Uses RedisStorageManager storage manager
140 # uses API (so a syntactically correct id which is not valid is considered to be invalid)
141 am_file = PMIDManager(testing=True, use_api_service=True)
142 self.assertFalse(am_file.is_valid(self.invalid_pmid_1))
144 def test_pmid_memory_nofile_noapi(self):
145 # Does not use support file
146 # Uses RedisStorageManager storage manager
147 # Does not use API (so a syntactically correct id which is not valid is considered to be valid)
148 am_nofile_noapi = PMIDManager(testing=True, use_api_service=False)
149 self.assertTrue(am_nofile_noapi.is_valid(self.valid_pmid_1))
150 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_pmid_1))
151 am_nofile_noapi.storage_manager.delete_storage()
155 def test_pmid_sqlite_nofile_api(self):
156 # No pre-existing data
157 # storage manager : RedisStorageManager
158 # uses API
159 sql_am_nofile = PMIDManager(testing=True)
160 self.assertTrue(sql_am_nofile.is_valid(self.valid_pmid_1))
161 self.assertTrue(sql_am_nofile.is_valid(self.valid_pmid_2))
162 self.assertFalse(sql_am_nofile.is_valid(self.invalid_pmid_1))
163 self.assertFalse(sql_am_nofile.is_valid(self.invalid_pmid_2))
164 # check that the redis storage contains all the validated ids
165 validated_ids = [self.valid_pmid_1, self.valid_pmid_2, self.invalid_pmid_1, self.invalid_pmid_2]
166 all_ids_stored = sql_am_nofile.storage_manager.get_all_keys()
167 validated = [sql_am_nofile.normalise(x, include_prefix=True) for x in validated_ids if sql_am_nofile.normalise(x, include_prefix=True)]
168 self.assertTrue(all(x in all_ids_stored for x in validated))
169 sql_am_nofile.storage_manager.delete_storage()
170 # check that the storage was correctly deleted
171 self.assertEqual(sql_am_nofile.storage_manager.get_all_keys(), set())
173 def test_pmid_sqlite_file_api(self):
174 # Uses pre-existing data in Redis
175 # Uses RedisStorageManager storage manager
176 # tests validation behavior with pre-seeded data
177 to_insert = [self.invalid_pmid_1, self.valid_pmid_1]
178 sql_file = PMIDManager(testing=True, use_api_service=True)
179 for pmid in to_insert:
180 norm_id = sql_file.normalise(pmid, include_prefix=True)
181 is_valid = sql_file.is_valid(norm_id)
182 sql_file.storage_manager.set_value(norm_id, is_valid)
184 sql_no_api = PMIDManager(testing=True, use_api_service=False)
185 # Copy values from the first manager to the second for testing
186 for pmid in to_insert:
187 norm_id = sql_no_api.normalise(pmid, include_prefix=True)
188 value = sql_file.storage_manager.get_value(norm_id)
189 if value is not None:
190 sql_no_api.storage_manager.set_value(norm_id, value)
191 all_db_keys = sql_no_api.storage_manager.get_all_keys()
192 # check that all the normalised ids in the list were correctly inserted
193 self.assertTrue(all(sql_no_api.normalise(x, include_prefix=True) in all_db_keys for x in to_insert))
194 self.assertTrue(sql_no_api.is_valid(self.valid_pmid_1)) # is stored as valid
195 self.assertFalse(sql_no_api.is_valid(self.invalid_pmid_1)) # is stored as invalid
196 self.assertTrue(sql_no_api.is_valid(sql_no_api.normalise(self.invalid_pmid_2, include_prefix=True))) # not stored, has correct syntax
197 sql_no_api.storage_manager.delete_storage()
199 def test_pmid_sqlite_nofile_noapi(self):
200 # Does not use support file
201 # Uses RedisStorageManager storage manager
202 # Does not use API (so a syntactically correct id which is not valid is considered to be valid)
203 am_nofile_noapi = PMIDManager(testing=True, use_api_service=False)
204 self.assertTrue(am_nofile_noapi.is_valid(self.valid_pmid_1))
205 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_pmid_2))
206 am_nofile_noapi.storage_manager.delete_storage()