Coverage for test / idm_pmc_test.py: 99%
119 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-25 18:06 +0000
1# SPDX-FileCopyrightText: 2023 Arianna Moretti <arianna.moretti4@unibo.it>
2# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it>
3#
4# SPDX-License-Identifier: ISC
6import json
7import unittest
8from os import makedirs
9from os.path import exists, join
11from oc_ds_converter.oc_idmanager.pmcid import PMCIDManager
14class pmcIdentifierManagerTest(unittest.TestCase):
15 """This class aim at testing identifiers manager."""
17 def setUp(self):
18 if not exists("tmp"):
19 makedirs("tmp")
21 self.test_dir = join("test", "data")
22 self.test_json_path = join(self.test_dir, "glob.json")
23 with open(self.test_json_path, encoding="utf-8") as fp:
24 self.data = json.load(fp)
26 self.valid_pmc_1 = "PMC8384044"
27 self.valid_pmc_2 = "PMC6716460"
28 self.invalid_pmc_1 = "0128564"
29 self.invalid_pmc_2 = "PMC6716"
30 self.invalid_pmc_3 = "PMC10000716468"
31 self.invalid_pmc_4 = "PMC100007468"
33 def test_exists(self):
34 with self.subTest(msg="get_extra_info=True, allow_extra_api=None"):
35 pmc_manager = PMCIDManager()
36 output = pmc_manager.exists('PMC8384044', get_extra_info=True, allow_extra_api=None)
37 expected_output = (True, {'id': 'PMC8384044', 'valid': True})
38 self.assertEqual(output, expected_output)
39 with self.subTest(msg="get_extra_info=False, allow_extra_api=None"):
40 pmc_manager = PMCIDManager()
41 output = pmc_manager.exists('PMC6716460', get_extra_info=False, allow_extra_api=None)
42 expected_output = True
43 self.assertEqual(output, expected_output)
46 def test_pmcid_normalise(self):
47 pcm = PMCIDManager()
48 self.assertEqual(
49 pcm.normalise(self.valid_pmc_1),
50 pcm.normalise(' ' + self.valid_pmc_1),
51 )
52 self.assertEqual(
53 pcm.normalise(self.valid_pmc_2),
54 pcm.normalise("https://www.ncbi.nlm.nih.gov/pmc/articles/" + self.valid_pmc_2),
55 )
57 def test_pmcid_is_valid(self):
58 pcm = PMCIDManager()
59 self.assertTrue(pcm.is_valid(self.valid_pmc_1))
60 self.assertTrue(pcm.is_valid(self.valid_pmc_2))
61 self.assertFalse(pcm.is_valid(self.invalid_pmc_1))
62 self.assertFalse(pcm.is_valid(self.invalid_pmc_2))
64 def test_pmc_is_valid(self):
65 dm_nofile = PMCIDManager()
66 self.assertTrue(dm_nofile.is_valid(self.valid_pmc_1))
67 self.assertTrue(dm_nofile.is_valid(self.valid_pmc_2))
68 self.assertFalse(dm_nofile.is_valid(self.invalid_pmc_1))
69 self.assertFalse(dm_nofile.is_valid(self.invalid_pmc_2))
71 dm_file = PMCIDManager(testing=True, use_api_service=False)
72 # Pre-seed storage with data from glob.json
73 for key, value in self.data.items():
74 if key.startswith("pmcid:"):
75 dm_file.storage_manager.set_value(key, value.get("valid", False))
76 self.assertTrue(dm_file.normalise(self.valid_pmc_1, include_prefix=True) in self.data)
77 self.assertTrue(dm_file.normalise(self.invalid_pmc_4, include_prefix=True) in self.data)
78 self.assertTrue(dm_file.is_valid(self.valid_pmc_1))
79 self.assertFalse(dm_file.is_valid(self.invalid_pmc_4))
82 def test_pmc_default(self):
83 am_nofile = PMCIDManager(testing=True)
84 # Uses RedisStorageManager with testing=True (fakeredis)
85 # uses API
86 self.assertTrue(am_nofile.is_valid(self.valid_pmc_1))
87 self.assertTrue(am_nofile.is_valid(self.valid_pmc_2))
88 self.assertFalse(am_nofile.is_valid(self.invalid_pmc_2))
89 self.assertFalse(am_nofile.is_valid(self.invalid_pmc_1))
90 validated_ids = [self.valid_pmc_1, self.valid_pmc_2, self.invalid_pmc_1, self.invalid_pmc_2]
91 validated = [am_nofile.normalise(x, include_prefix=True) for x in validated_ids if am_nofile.normalise(x, include_prefix=True)]
92 # check that all the validated ids are stored in redis
93 all_ids_stored = am_nofile.storage_manager.get_all_keys()
94 self.assertTrue(all(x in all_ids_stored for x in validated))
95 am_nofile.storage_manager.delete_storage()
96 # check that the storage was correctly deleted
97 self.assertEqual(am_nofile.storage_manager.get_all_keys(), set())
99 def test_pmc_memory_file_noapi(self):
100 # Uses support file (without updating it)
101 # Uses RedisStorageManager storage manager
102 # does not use API (so a syntactically correct id is considered to be valid)
103 am_file = PMCIDManager(testing=True, use_api_service=False)
104 self.assertTrue(am_file.normalise(self.valid_pmc_1, include_prefix=True) in self.data)
105 self.assertTrue(am_file.normalise(self.invalid_pmc_4, include_prefix=True) in self.data)
106 self.assertFalse(am_file.is_valid(self.invalid_pmc_1)) # is stored in support file as invalid
107 self.assertTrue(am_file.is_valid(am_file.normalise(self.invalid_pmc_3, include_prefix=True))) # is not stored in support file as invalid, does not exist but has correct syntax
109 def test_pmc_memory_file_api(self):
110 # Uses support file (without updating it)
111 # Uses RedisStorageManager storage manager
112 # uses API (so a syntactically correct id which is not valid is considered to be invalid)
113 am_file = PMCIDManager(testing=True, use_api_service=True)
114 self.assertFalse(am_file.is_valid(self.invalid_pmc_1))
116 def test_pmc_memory_nofile_noapi(self):
117 # Does not use support file
118 # Uses RedisStorageManager storage manager
119 # Does not use API (so a syntactically correct id which is not valid is considered to be valid)
120 am_nofile_noapi = PMCIDManager(testing=True, use_api_service=False)
121 self.assertTrue(am_nofile_noapi.is_valid(self.valid_pmc_1))
122 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_pmc_3))
123 am_nofile_noapi.storage_manager.delete_storage()
127 def test_pmc_sqlite_nofile_api(self):
128 # No pre-existing data
129 # storage manager : RedisStorageManager
130 # uses API
131 sql_am_nofile = PMCIDManager(testing=True)
132 self.assertTrue(sql_am_nofile.is_valid(self.valid_pmc_1))
133 self.assertTrue(sql_am_nofile.is_valid(self.valid_pmc_2))
134 self.assertFalse(sql_am_nofile.is_valid(self.invalid_pmc_1))
135 self.assertFalse(sql_am_nofile.is_valid(self.invalid_pmc_2))
136 # check that the redis storage contains all the validated ids
137 validated_ids = [self.valid_pmc_1, self.valid_pmc_2, self.invalid_pmc_1, self.invalid_pmc_2]
138 all_ids_stored = sql_am_nofile.storage_manager.get_all_keys()
139 validated = [sql_am_nofile.normalise(x, include_prefix=True) for x in validated_ids if sql_am_nofile.normalise(x, include_prefix=True)]
140 self.assertTrue(all(x in all_ids_stored for x in validated))
141 sql_am_nofile.storage_manager.delete_storage()
142 # check that the storage was correctly deleted
143 self.assertEqual(sql_am_nofile.storage_manager.get_all_keys(), set())
145 def test_pmc_sqlite_file_api(self):
146 # Uses pre-existing data in Redis
147 # Uses RedisStorageManager storage manager
148 # tests validation behavior with pre-seeded data
149 # Note: invalid_pmc_4 has valid PMC format but doesn't exist
150 to_insert = [self.invalid_pmc_4, self.valid_pmc_1]
151 sql_file = PMCIDManager(testing=True, use_api_service=True)
152 for pmcid in to_insert:
153 norm_id = sql_file.normalise(pmcid, include_prefix=True)
154 if norm_id:
155 is_valid = sql_file.is_valid(norm_id)
156 sql_file.storage_manager.set_value(norm_id, is_valid)
158 sql_no_api = PMCIDManager(testing=True, use_api_service=False)
159 # Copy values from the first manager to the second for testing
160 for pmcid in to_insert:
161 norm_id = sql_no_api.normalise(pmcid, include_prefix=True)
162 if norm_id:
163 value = sql_file.storage_manager.get_value(norm_id)
164 if value is not None:
165 sql_no_api.storage_manager.set_value(norm_id, value)
166 all_db_keys = sql_no_api.storage_manager.get_all_keys()
167 # check that all the normalised ids in the list were correctly inserted
168 normalized_ids = [sql_no_api.normalise(x, include_prefix=True) for x in to_insert]
169 self.assertTrue(all(nid in all_db_keys for nid in normalized_ids if nid))
170 self.assertTrue(sql_no_api.is_valid(self.valid_pmc_1)) # is stored as valid
171 self.assertFalse(sql_no_api.is_valid(self.invalid_pmc_4)) # is stored as invalid
172 self.assertTrue(sql_no_api.is_valid(sql_no_api.normalise(self.invalid_pmc_3, include_prefix=True))) # not stored, has correct syntax
173 sql_no_api.storage_manager.delete_storage()
175 def test_pmc_sqlite_nofile_noapi(self):
176 # Does not use support file
177 # Uses RedisStorageManager storage manager
178 # Does not use API (so a syntactically correct id which is not valid is considered to be valid)
179 am_nofile_noapi = PMCIDManager(testing=True, use_api_service=False)
180 self.assertTrue(am_nofile_noapi.is_valid(self.valid_pmc_1))
181 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_pmc_3))
182 am_nofile_noapi.storage_manager.delete_storage()