Coverage for test / idm_pmid_test.py: 99%

131 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-25 18:06 +0000

1# SPDX-FileCopyrightText: 2023 Arianna Moretti <arianna.moretti4@unibo.it> 

2# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it> 

3# 

4# SPDX-License-Identifier: ISC 

5 

6import json 

7import unittest 

8from os import makedirs 

9from os.path import exists, join 

10 

11from oc_ds_converter.oc_idmanager.pmid import PMIDManager 

12 

13 

14class pmidIdentifierManagerTest(unittest.TestCase): 

15 """This class aim at testing identifiers manager.""" 

16 

17 def setUp(self): 

18 if not exists("tmp"): 

19 makedirs("tmp") 

20 

21 self.test_dir = join("test", "data") 

22 self.test_json_path = join(self.test_dir, "glob.json") 

23 with open(self.test_json_path, encoding="utf-8") as fp: 

24 self.data = json.load(fp) 

25 

26 self.valid_pmid_1 = "2942070" 

27 self.valid_pmid_2 = "1509982" 

28 self.invalid_pmid_1 = "0067308798798" 

29 self.invalid_pmid_2 = "pmid:174777777777" 

30 self.invalid_pmid_3 = "pmid:174777777779" 

31 

32 

33 def test_pmid_normalise(self): 

34 pm = PMIDManager() 

35 self.assertEqual( 

36 self.valid_pmid_1, pm.normalise(self.valid_pmid_1.replace("", "pmid:")) 

37 ) 

38 self.assertEqual( 

39 self.valid_pmid_1, pm.normalise(self.valid_pmid_1.replace("", " ")) 

40 ) 

41 self.assertEqual( 

42 self.valid_pmid_1, 

43 pm.normalise("https://pubmed.ncbi.nlm.nih.gov/" + self.valid_pmid_1), 

44 ) 

45 self.assertEqual(self.valid_pmid_2, pm.normalise("000" + self.valid_pmid_2)) 

46 

47 def test_pmid_is_valid(self): 

48 pm_nofile = PMIDManager() 

49 self.assertTrue(pm_nofile.is_valid(self.valid_pmid_1)) 

50 self.assertTrue(pm_nofile.is_valid(self.valid_pmid_2)) 

51 self.assertFalse(pm_nofile.is_valid(self.invalid_pmid_1)) 

52 self.assertFalse(pm_nofile.is_valid(self.invalid_pmid_2)) 

53 

54 pm_file = PMIDManager(use_api_service=False, testing=True) 

55 # Pre-seed storage with data from glob.json 

56 for key, value in self.data.items(): 

57 if key.startswith("pmid:"): 

58 pm_file.storage_manager.set_value(key, value.get("valid", False)) 

59 self.assertTrue(pm_file.normalise(self.valid_pmid_1, include_prefix=True) in self.data) 

60 self.assertTrue(pm_file.normalise(self.invalid_pmid_1, include_prefix=True) in self.data) 

61 self.assertTrue(pm_file.is_valid(self.valid_pmid_1)) 

62 self.assertFalse(pm_file.is_valid(self.invalid_pmid_1)) 

63 

64 pm_nofile_noapi = PMIDManager(testing=True, use_api_service=False) 

65 self.assertTrue(pm_nofile_noapi.is_valid(self.valid_pmid_1)) 

66 self.assertTrue(pm_nofile_noapi.is_valid(self.invalid_pmid_3)) 

67 

68 def test_exists(self): 

69 with self.subTest(msg="get_extra_info=True, allow_extra_api=None"): 

70 pmid_manager = PMIDManager() 

71 output = pmid_manager.exists('pmid:8384044', get_extra_info=True, allow_extra_api=None) 

72 expected_output = (True, {'valid': True, 'title': 'Brevetoxin depresses synaptic transmission in guinea pig hippocampal slices.', 'author': ['Adler, M', 'Sheridan, R E', 'Apland, J P'], 'pub_date': '1993', 'venue': 'Brain research bulletin [issn:0361-9230]', 'volume': '31', 'issue': '1-2', 'page': '201-7', 'type': ['journal article'], 'publisher': [], 'editor': [], 'doi': '10.1016/0361-9230(93)90026-8', 'id': 'pmid:8384044'}) 

73 self.assertEqual(expected_output[0], output[0]) 

74 self.assertCountEqual({k:v for k,v in expected_output[1].items() if k!= "author"}, {k:v for k,v in output[1].items() if k!= "author"}) 

75 self.assertCountEqual(expected_output[1]["author"], output[1]["author"]) 

76 

77 with self.subTest(msg="get_extra_info=False, allow_extra_api=None"): 

78 pmid_manager = PMIDManager() 

79 output = pmid_manager.exists('pmid6716460', get_extra_info=False, allow_extra_api=None) 

80 expected_output = True 

81 self.assertEqual(output, expected_output) 

82 

83 

84 def test_pmidid_normalise(self): 

85 pcm = PMIDManager() 

86 self.assertEqual( 

87 pcm.normalise(self.valid_pmid_1), 

88 pcm.normalise(' ' + self.valid_pmid_1), 

89 ) 

90 self.assertEqual( 

91 pcm.normalise(self.valid_pmid_2), 

92 pcm.normalise("https://www.ncbi.nlm.nih.gov/pmid/articles/" + self.valid_pmid_2), 

93 ) 

94 

95 dm_file = PMIDManager(testing=True, use_api_service=False) 

96 # Pre-seed storage with data from glob.json 

97 for key, value in self.data.items(): 

98 if key.startswith("pmid:"): 

99 dm_file.storage_manager.set_value(key, value.get("valid", False)) 

100 self.assertTrue(dm_file.normalise(self.valid_pmid_1, include_prefix=True) in self.data) 

101 self.assertTrue(dm_file.normalise(self.invalid_pmid_2, include_prefix=True) in self.data) 

102 self.assertTrue(dm_file.is_valid(self.valid_pmid_1)) 

103 self.assertFalse(dm_file.is_valid(self.invalid_pmid_2)) 

104 

105 

106 def test_pmid_default(self): 

107 am_nofile = PMIDManager(testing=True) 

108 # Uses RedisStorageManager with testing=True (fakeredis) 

109 # uses API 

110 self.assertTrue(am_nofile.is_valid(self.valid_pmid_1)) 

111 self.assertTrue(am_nofile.is_valid(self.valid_pmid_2)) 

112 self.assertFalse(am_nofile.is_valid(self.invalid_pmid_2)) 

113 self.assertFalse(am_nofile.is_valid(self.invalid_pmid_1)) 

114 validated_ids = [self.valid_pmid_1, self.valid_pmid_2, self.invalid_pmid_1, self.invalid_pmid_2] 

115 validated = [am_nofile.normalise(x, include_prefix=True) for x in validated_ids if am_nofile.normalise(x, include_prefix=True)] 

116 # check that all the validated ids are stored in redis 

117 all_ids_stored = am_nofile.storage_manager.get_all_keys() 

118 self.assertTrue(all(x in all_ids_stored for x in validated)) 

119 am_nofile.storage_manager.delete_storage() 

120 # check that the storage was correctly deleted 

121 self.assertEqual(am_nofile.storage_manager.get_all_keys(), set()) 

122 

123 def test_pmid_memory_file_noapi(self): 

124 # Uses pre-seeded data (without updating it) 

125 # Uses RedisStorageManager storage manager 

126 # does not use API (so a syntactically correct id is considered to be valid) 

127 am_file = PMIDManager(testing=True, use_api_service=False) 

128 # Pre-seed storage with data from glob.json 

129 for key, value in self.data.items(): 

130 if key.startswith("pmid:"): 

131 am_file.storage_manager.set_value(key, value.get("valid", False)) 

132 self.assertTrue(am_file.normalise(self.valid_pmid_1, include_prefix=True) in self.data) 

133 self.assertTrue(am_file.normalise(self.invalid_pmid_2, include_prefix=True) in self.data) 

134 self.assertFalse(am_file.is_valid(self.invalid_pmid_2)) # is stored as invalid 

135 self.assertTrue(am_file.is_valid(am_file.normalise(self.invalid_pmid_3, include_prefix=True))) # not stored as invalid, has correct syntax 

136 

137 def test_pmid_memory_file_api(self): 

138 # Uses support file (without updating it) 

139 # Uses RedisStorageManager storage manager 

140 # uses API (so a syntactically correct id which is not valid is considered to be invalid) 

141 am_file = PMIDManager(testing=True, use_api_service=True) 

142 self.assertFalse(am_file.is_valid(self.invalid_pmid_1)) 

143 

144 def test_pmid_memory_nofile_noapi(self): 

145 # Does not use support file 

146 # Uses RedisStorageManager storage manager 

147 # Does not use API (so a syntactically correct id which is not valid is considered to be valid) 

148 am_nofile_noapi = PMIDManager(testing=True, use_api_service=False) 

149 self.assertTrue(am_nofile_noapi.is_valid(self.valid_pmid_1)) 

150 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_pmid_1)) 

151 am_nofile_noapi.storage_manager.delete_storage() 

152 

153 

154 

155 def test_pmid_sqlite_nofile_api(self): 

156 # No pre-existing data 

157 # storage manager : RedisStorageManager 

158 # uses API 

159 sql_am_nofile = PMIDManager(testing=True) 

160 self.assertTrue(sql_am_nofile.is_valid(self.valid_pmid_1)) 

161 self.assertTrue(sql_am_nofile.is_valid(self.valid_pmid_2)) 

162 self.assertFalse(sql_am_nofile.is_valid(self.invalid_pmid_1)) 

163 self.assertFalse(sql_am_nofile.is_valid(self.invalid_pmid_2)) 

164 # check that the redis storage contains all the validated ids 

165 validated_ids = [self.valid_pmid_1, self.valid_pmid_2, self.invalid_pmid_1, self.invalid_pmid_2] 

166 all_ids_stored = sql_am_nofile.storage_manager.get_all_keys() 

167 validated = [sql_am_nofile.normalise(x, include_prefix=True) for x in validated_ids if sql_am_nofile.normalise(x, include_prefix=True)] 

168 self.assertTrue(all(x in all_ids_stored for x in validated)) 

169 sql_am_nofile.storage_manager.delete_storage() 

170 # check that the storage was correctly deleted 

171 self.assertEqual(sql_am_nofile.storage_manager.get_all_keys(), set()) 

172 

173 def test_pmid_sqlite_file_api(self): 

174 # Uses pre-existing data in Redis 

175 # Uses RedisStorageManager storage manager 

176 # tests validation behavior with pre-seeded data 

177 to_insert = [self.invalid_pmid_1, self.valid_pmid_1] 

178 sql_file = PMIDManager(testing=True, use_api_service=True) 

179 for pmid in to_insert: 

180 norm_id = sql_file.normalise(pmid, include_prefix=True) 

181 is_valid = sql_file.is_valid(norm_id) 

182 sql_file.storage_manager.set_value(norm_id, is_valid) 

183 

184 sql_no_api = PMIDManager(testing=True, use_api_service=False) 

185 # Copy values from the first manager to the second for testing 

186 for pmid in to_insert: 

187 norm_id = sql_no_api.normalise(pmid, include_prefix=True) 

188 value = sql_file.storage_manager.get_value(norm_id) 

189 if value is not None: 

190 sql_no_api.storage_manager.set_value(norm_id, value) 

191 all_db_keys = sql_no_api.storage_manager.get_all_keys() 

192 # check that all the normalised ids in the list were correctly inserted 

193 self.assertTrue(all(sql_no_api.normalise(x, include_prefix=True) in all_db_keys for x in to_insert)) 

194 self.assertTrue(sql_no_api.is_valid(self.valid_pmid_1)) # is stored as valid 

195 self.assertFalse(sql_no_api.is_valid(self.invalid_pmid_1)) # is stored as invalid 

196 self.assertTrue(sql_no_api.is_valid(sql_no_api.normalise(self.invalid_pmid_2, include_prefix=True))) # not stored, has correct syntax 

197 sql_no_api.storage_manager.delete_storage() 

198 

199 def test_pmid_sqlite_nofile_noapi(self): 

200 # Does not use support file 

201 # Uses RedisStorageManager storage manager 

202 # Does not use API (so a syntactically correct id which is not valid is considered to be valid) 

203 am_nofile_noapi = PMIDManager(testing=True, use_api_service=False) 

204 self.assertTrue(am_nofile_noapi.is_valid(self.valid_pmid_1)) 

205 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_pmid_2)) 

206 am_nofile_noapi.storage_manager.delete_storage()