Coverage for test / idm_pmc_test.py: 99%

119 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-25 18:06 +0000

1# SPDX-FileCopyrightText: 2023 Arianna Moretti <arianna.moretti4@unibo.it> 

2# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it> 

3# 

4# SPDX-License-Identifier: ISC 

5 

6import json 

7import unittest 

8from os import makedirs 

9from os.path import exists, join 

10 

11from oc_ds_converter.oc_idmanager.pmcid import PMCIDManager 

12 

13 

14class pmcIdentifierManagerTest(unittest.TestCase): 

15 """This class aim at testing identifiers manager.""" 

16 

17 def setUp(self): 

18 if not exists("tmp"): 

19 makedirs("tmp") 

20 

21 self.test_dir = join("test", "data") 

22 self.test_json_path = join(self.test_dir, "glob.json") 

23 with open(self.test_json_path, encoding="utf-8") as fp: 

24 self.data = json.load(fp) 

25 

26 self.valid_pmc_1 = "PMC8384044" 

27 self.valid_pmc_2 = "PMC6716460" 

28 self.invalid_pmc_1 = "0128564" 

29 self.invalid_pmc_2 = "PMC6716" 

30 self.invalid_pmc_3 = "PMC10000716468" 

31 self.invalid_pmc_4 = "PMC100007468" 

32 

33 def test_exists(self): 

34 with self.subTest(msg="get_extra_info=True, allow_extra_api=None"): 

35 pmc_manager = PMCIDManager() 

36 output = pmc_manager.exists('PMC8384044', get_extra_info=True, allow_extra_api=None) 

37 expected_output = (True, {'id': 'PMC8384044', 'valid': True}) 

38 self.assertEqual(output, expected_output) 

39 with self.subTest(msg="get_extra_info=False, allow_extra_api=None"): 

40 pmc_manager = PMCIDManager() 

41 output = pmc_manager.exists('PMC6716460', get_extra_info=False, allow_extra_api=None) 

42 expected_output = True 

43 self.assertEqual(output, expected_output) 

44 

45 

46 def test_pmcid_normalise(self): 

47 pcm = PMCIDManager() 

48 self.assertEqual( 

49 pcm.normalise(self.valid_pmc_1), 

50 pcm.normalise(' ' + self.valid_pmc_1), 

51 ) 

52 self.assertEqual( 

53 pcm.normalise(self.valid_pmc_2), 

54 pcm.normalise("https://www.ncbi.nlm.nih.gov/pmc/articles/" + self.valid_pmc_2), 

55 ) 

56 

57 def test_pmcid_is_valid(self): 

58 pcm = PMCIDManager() 

59 self.assertTrue(pcm.is_valid(self.valid_pmc_1)) 

60 self.assertTrue(pcm.is_valid(self.valid_pmc_2)) 

61 self.assertFalse(pcm.is_valid(self.invalid_pmc_1)) 

62 self.assertFalse(pcm.is_valid(self.invalid_pmc_2)) 

63 

64 def test_pmc_is_valid(self): 

65 dm_nofile = PMCIDManager() 

66 self.assertTrue(dm_nofile.is_valid(self.valid_pmc_1)) 

67 self.assertTrue(dm_nofile.is_valid(self.valid_pmc_2)) 

68 self.assertFalse(dm_nofile.is_valid(self.invalid_pmc_1)) 

69 self.assertFalse(dm_nofile.is_valid(self.invalid_pmc_2)) 

70 

71 dm_file = PMCIDManager(testing=True, use_api_service=False) 

72 # Pre-seed storage with data from glob.json 

73 for key, value in self.data.items(): 

74 if key.startswith("pmcid:"): 

75 dm_file.storage_manager.set_value(key, value.get("valid", False)) 

76 self.assertTrue(dm_file.normalise(self.valid_pmc_1, include_prefix=True) in self.data) 

77 self.assertTrue(dm_file.normalise(self.invalid_pmc_4, include_prefix=True) in self.data) 

78 self.assertTrue(dm_file.is_valid(self.valid_pmc_1)) 

79 self.assertFalse(dm_file.is_valid(self.invalid_pmc_4)) 

80 

81 

82 def test_pmc_default(self): 

83 am_nofile = PMCIDManager(testing=True) 

84 # Uses RedisStorageManager with testing=True (fakeredis) 

85 # uses API 

86 self.assertTrue(am_nofile.is_valid(self.valid_pmc_1)) 

87 self.assertTrue(am_nofile.is_valid(self.valid_pmc_2)) 

88 self.assertFalse(am_nofile.is_valid(self.invalid_pmc_2)) 

89 self.assertFalse(am_nofile.is_valid(self.invalid_pmc_1)) 

90 validated_ids = [self.valid_pmc_1, self.valid_pmc_2, self.invalid_pmc_1, self.invalid_pmc_2] 

91 validated = [am_nofile.normalise(x, include_prefix=True) for x in validated_ids if am_nofile.normalise(x, include_prefix=True)] 

92 # check that all the validated ids are stored in redis 

93 all_ids_stored = am_nofile.storage_manager.get_all_keys() 

94 self.assertTrue(all(x in all_ids_stored for x in validated)) 

95 am_nofile.storage_manager.delete_storage() 

96 # check that the storage was correctly deleted 

97 self.assertEqual(am_nofile.storage_manager.get_all_keys(), set()) 

98 

99 def test_pmc_memory_file_noapi(self): 

100 # Uses support file (without updating it) 

101 # Uses RedisStorageManager storage manager 

102 # does not use API (so a syntactically correct id is considered to be valid) 

103 am_file = PMCIDManager(testing=True, use_api_service=False) 

104 self.assertTrue(am_file.normalise(self.valid_pmc_1, include_prefix=True) in self.data) 

105 self.assertTrue(am_file.normalise(self.invalid_pmc_4, include_prefix=True) in self.data) 

106 self.assertFalse(am_file.is_valid(self.invalid_pmc_1)) # is stored in support file as invalid 

107 self.assertTrue(am_file.is_valid(am_file.normalise(self.invalid_pmc_3, include_prefix=True))) # is not stored in support file as invalid, does not exist but has correct syntax 

108 

109 def test_pmc_memory_file_api(self): 

110 # Uses support file (without updating it) 

111 # Uses RedisStorageManager storage manager 

112 # uses API (so a syntactically correct id which is not valid is considered to be invalid) 

113 am_file = PMCIDManager(testing=True, use_api_service=True) 

114 self.assertFalse(am_file.is_valid(self.invalid_pmc_1)) 

115 

116 def test_pmc_memory_nofile_noapi(self): 

117 # Does not use support file 

118 # Uses RedisStorageManager storage manager 

119 # Does not use API (so a syntactically correct id which is not valid is considered to be valid) 

120 am_nofile_noapi = PMCIDManager(testing=True, use_api_service=False) 

121 self.assertTrue(am_nofile_noapi.is_valid(self.valid_pmc_1)) 

122 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_pmc_3)) 

123 am_nofile_noapi.storage_manager.delete_storage() 

124 

125 

126 

127 def test_pmc_sqlite_nofile_api(self): 

128 # No pre-existing data 

129 # storage manager : RedisStorageManager 

130 # uses API 

131 sql_am_nofile = PMCIDManager(testing=True) 

132 self.assertTrue(sql_am_nofile.is_valid(self.valid_pmc_1)) 

133 self.assertTrue(sql_am_nofile.is_valid(self.valid_pmc_2)) 

134 self.assertFalse(sql_am_nofile.is_valid(self.invalid_pmc_1)) 

135 self.assertFalse(sql_am_nofile.is_valid(self.invalid_pmc_2)) 

136 # check that the redis storage contains all the validated ids 

137 validated_ids = [self.valid_pmc_1, self.valid_pmc_2, self.invalid_pmc_1, self.invalid_pmc_2] 

138 all_ids_stored = sql_am_nofile.storage_manager.get_all_keys() 

139 validated = [sql_am_nofile.normalise(x, include_prefix=True) for x in validated_ids if sql_am_nofile.normalise(x, include_prefix=True)] 

140 self.assertTrue(all(x in all_ids_stored for x in validated)) 

141 sql_am_nofile.storage_manager.delete_storage() 

142 # check that the storage was correctly deleted 

143 self.assertEqual(sql_am_nofile.storage_manager.get_all_keys(), set()) 

144 

145 def test_pmc_sqlite_file_api(self): 

146 # Uses pre-existing data in Redis 

147 # Uses RedisStorageManager storage manager 

148 # tests validation behavior with pre-seeded data 

149 # Note: invalid_pmc_4 has valid PMC format but doesn't exist 

150 to_insert = [self.invalid_pmc_4, self.valid_pmc_1] 

151 sql_file = PMCIDManager(testing=True, use_api_service=True) 

152 for pmcid in to_insert: 

153 norm_id = sql_file.normalise(pmcid, include_prefix=True) 

154 if norm_id: 

155 is_valid = sql_file.is_valid(norm_id) 

156 sql_file.storage_manager.set_value(norm_id, is_valid) 

157 

158 sql_no_api = PMCIDManager(testing=True, use_api_service=False) 

159 # Copy values from the first manager to the second for testing 

160 for pmcid in to_insert: 

161 norm_id = sql_no_api.normalise(pmcid, include_prefix=True) 

162 if norm_id: 

163 value = sql_file.storage_manager.get_value(norm_id) 

164 if value is not None: 

165 sql_no_api.storage_manager.set_value(norm_id, value) 

166 all_db_keys = sql_no_api.storage_manager.get_all_keys() 

167 # check that all the normalised ids in the list were correctly inserted 

168 normalized_ids = [sql_no_api.normalise(x, include_prefix=True) for x in to_insert] 

169 self.assertTrue(all(nid in all_db_keys for nid in normalized_ids if nid)) 

170 self.assertTrue(sql_no_api.is_valid(self.valid_pmc_1)) # is stored as valid 

171 self.assertFalse(sql_no_api.is_valid(self.invalid_pmc_4)) # is stored as invalid 

172 self.assertTrue(sql_no_api.is_valid(sql_no_api.normalise(self.invalid_pmc_3, include_prefix=True))) # not stored, has correct syntax 

173 sql_no_api.storage_manager.delete_storage() 

174 

175 def test_pmc_sqlite_nofile_noapi(self): 

176 # Does not use support file 

177 # Uses RedisStorageManager storage manager 

178 # Does not use API (so a syntactically correct id which is not valid is considered to be valid) 

179 am_nofile_noapi = PMCIDManager(testing=True, use_api_service=False) 

180 self.assertTrue(am_nofile_noapi.is_valid(self.valid_pmc_1)) 

181 self.assertTrue(am_nofile_noapi.is_valid(self.invalid_pmc_3)) 

182 am_nofile_noapi.storage_manager.delete_storage()