Coverage for test/index_orcid_doi_test.py: 100%

58 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-07-14 14:06 +0000

1#!/usr/bin/python 

2# -*- coding: utf-8 -*- 

3# Copyright 2019 Silvio Peroni <essepuntato@gmail.com> 

4# Copyright 2019-2020 Fabio Mariani <fabio.mariani555@gmail.com> 

5# Copyright 2021 Simone Persiani <iosonopersia@gmail.com> 

6# Copyright 2021-2022 Arcangelo Massari <arcangelo.massari@unibo.it> 

7# 

8# Permission to use, copy, modify, and/or distribute this software for any purpose 

9# with or without fee is hereby granted, provided that the above copyright notice 

10# and this permission notice appear in all copies. 

11# 

12# THE SOFTWARE IS PROVIDED 'AS IS' AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 

13# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 

14# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, 

15# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 

16# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

17# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 

18# SOFTWARE. 

19 

20 

21import os 

22import shutil 

23import unittest 

24from csv import DictReader, DictWriter 

25 

26from oc_meta.plugins.orcid.index_orcid_doi import Index_orcid_doi 

27 

28CSV_PATH = os.path.join('test', 'index_orcid_doi', 'output') 

29SUMMARIES_PATH = os.path.join('test', 'index_orcid_doi', 'orcid') 

30 

31def load_files_from_dir(dir:str): 

32 output = list() 

33 for file in sorted(os.listdir(dir), key=lambda filename: int(filename.split('-')[0].replace('.csv', ''))): 

34 with open(os.path.join(dir, file), 'r', encoding='utf-8') as f: 

35 output.extend(list(DictReader(f))) 

36 return output 

37 

38 

39class test_Index_orcid_doi(unittest.TestCase): 

40 def test_explorer(self): 

41 iOd = Index_orcid_doi(output_path=CSV_PATH, verbose=False) 

42 iOd.explorer(summaries_path=SUMMARIES_PATH) 

43 output = sorted(load_files_from_dir(CSV_PATH), key=lambda d: d['id']) 

44 expected_output = sorted([ 

45 {'id': 'None', 'value': '[0000-0001-5002-1000]'}, 

46 {'id': '10.1016/j.indcrop.2020.112103', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'}, 

47 {'id': '10.1155/2019/3213521', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'}, 

48 {'id': '10.1016/j.bioorg.2018.11.028', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'}, 

49 {'id': '10.1016/j.bioorg.2018.03.004', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'}, 

50 {'id': '10.1186/s13568-016-0300-2', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'}, 

51 {'id': '10.1016/j.toxicon.2014.04.010', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'}, 

52 {'id': '10.1155/2014/691742', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'}, 

53 {'id': '10.1002/rmv.2149', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

54 {'id': '10.1016/j.transproceed.2019.01.147', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

55 {'id': '10.1016/j.transproceed.2019.02.044', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

56 {'id': '10.1016/j.ijcard.2016.06.064', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

57 {'id': '10.4103/1319-2442.190782', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

58 {'id': '10.1053/j.jrn.2015.04.009', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

59 {'id': '10.26719/2015.21.5.354', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

60 {'id': '10.1093/ckj/sfu046', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

61 {'id': '10.1007/s00393-012-1058-9', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

62 {'id': '10.1159/000356118', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

63 {'id': '10.1111/1756-185x.12007', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

64 {'id': '10.1007/s11255-011-0007-x', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

65 {'id': '10.1016/j.jbspin.2011.06.009', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

66 {'id': '10.1093/ndt/gfq089', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'} 

67 ], key=lambda d: d['id']) 

68 shutil.rmtree(CSV_PATH) 

69 self.assertEqual(output, expected_output) 

70 

71 def test_cache(self): 

72 os.mkdir(CSV_PATH) 

73 with open(os.path.join(CSV_PATH, '0.csv'), 'w', encoding='utf-8') as output_file: 

74 data_to_write = [ 

75 {'id': '10.1002/rmv.2149', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

76 {'id': '10.1016/j.transproceed.2019.01.147', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

77 {'id': '10.1016/j.transproceed.2019.02.044', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

78 {'id': '10.1016/j.ijcard.2016.06.064', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

79 {'id': '10.4103/1319-2442.190782', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

80 {'id': '10.1053/j.jrn.2015.04.009', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

81 {'id': '10.26719/2015.21.5.354', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

82 {'id': '10.1093/ckj/sfu046', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

83 {'id': '10.1007/s00393-012-1058-9', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

84 {'id': '10.1159/000356118', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

85 {'id': '10.1111/1756-185x.12007', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

86 {'id': '10.1007/s11255-011-0007-x', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

87 {'id': '10.1016/j.jbspin.2011.06.009', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

88 {'id': '10.1093/ndt/gfq089', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'} 

89 ] 

90 dict_writer = DictWriter(output_file, ['id', 'value']) 

91 dict_writer.writeheader() 

92 dict_writer.writerows(data_to_write) 

93 iOd = Index_orcid_doi(output_path=CSV_PATH, verbose=False) 

94 cache = iOd.cache 

95 iOd.explorer(summaries_path=SUMMARIES_PATH) 

96 output = load_files_from_dir(CSV_PATH) 

97 unordered_output = {key_value['id']:key_value['value'] for key_value in output} 

98 expected_output = { 

99 '10.1002/rmv.2149': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

100 '10.1016/j.transproceed.2019.01.147': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

101 '10.1016/j.transproceed.2019.02.044': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

102 '10.1016/j.ijcard.2016.06.064': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

103 '10.4103/1319-2442.190782': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

104 '10.1053/j.jrn.2015.04.009': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

105 '10.26719/2015.21.5.354': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

106 '10.1093/ckj/sfu046': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

107 '10.1007/s00393-012-1058-9': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

108 '10.1159/000356118': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

109 '10.1111/1756-185x.12007': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

110 '10.1007/s11255-011-0007-x': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

111 '10.1016/j.jbspin.2011.06.009': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

112 '10.1093/ndt/gfq089': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

113 'None': '[0000-0001-5002-1000]', 

114 '10.1016/j.indcrop.2020.112103': 'Gargouri, Ali [0000-0001-5009-9000]', 

115 '10.1155/2019/3213521': 'Gargouri, Ali [0000-0001-5009-9000]', 

116 '10.1016/j.bioorg.2018.11.028': 'Gargouri, Ali [0000-0001-5009-9000]', 

117 '10.1016/j.bioorg.2018.03.004': 'Gargouri, Ali [0000-0001-5009-9000]', 

118 '10.1186/s13568-016-0300-2': 'Gargouri, Ali [0000-0001-5009-9000]', 

119 '10.1016/j.toxicon.2014.04.010': 'Gargouri, Ali [0000-0001-5009-9000]', 

120 '10.1155/2014/691742': 'Gargouri, Ali [0000-0001-5009-9000]'} 

121 shutil.rmtree(CSV_PATH) 

122 self.assertEqual((unordered_output, cache), (expected_output, {'0000-0001-5650-3000'})) 

123 

124 def test_low_memory(self): 

125 iOd = Index_orcid_doi(output_path=CSV_PATH, low_memory=True, threshold=10, verbose=False) 

126 iOd.explorer(summaries_path=SUMMARIES_PATH) 

127 output = sorted(load_files_from_dir(CSV_PATH), key=lambda d: d['id']) 

128 expected_output = sorted([ 

129 {'id': 'None', 'value': '[0000-0001-5002-1000]'}, 

130 {'id': '10.1016/j.indcrop.2020.112103', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'}, 

131 {'id': '10.1155/2019/3213521', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'}, 

132 {'id': '10.1016/j.bioorg.2018.11.028', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'}, 

133 {'id': '10.1016/j.bioorg.2018.03.004', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'}, 

134 {'id': '10.1186/s13568-016-0300-2', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'}, 

135 {'id': '10.1016/j.toxicon.2014.04.010', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'}, 

136 {'id': '10.1155/2014/691742', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'}, 

137 {'id': '10.1002/rmv.2149', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

138 {'id': '10.1016/j.transproceed.2019.01.147', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

139 {'id': '10.1016/j.transproceed.2019.02.044', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

140 {'id': '10.1016/j.ijcard.2016.06.064', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

141 {'id': '10.4103/1319-2442.190782', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

142 {'id': '10.1053/j.jrn.2015.04.009', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

143 {'id': '10.26719/2015.21.5.354', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

144 {'id': '10.1093/ckj/sfu046', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

145 {'id': '10.1007/s00393-012-1058-9', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

146 {'id': '10.1159/000356118', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

147 {'id': '10.1111/1756-185x.12007', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

148 {'id': '10.1007/s11255-011-0007-x', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

149 {'id': '10.1016/j.jbspin.2011.06.009', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

150 {'id': '10.1093/ndt/gfq089', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'} 

151 ], key=lambda d: d['id']) 

152 shutil.rmtree(CSV_PATH) 

153 self.assertEqual(output, expected_output) 

154 

155 def test_cache_low_memory(self): 

156 os.mkdir(CSV_PATH) 

157 with open(os.path.join(CSV_PATH, '0.csv'), 'w', encoding='utf-8') as output_file: 

158 data_to_write = [ 

159 {'id': '10.1002/rmv.2149', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

160 {'id': '10.1016/j.transproceed.2019.01.147', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

161 {'id': '10.1016/j.transproceed.2019.02.044', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

162 {'id': '10.1016/j.ijcard.2016.06.064', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

163 {'id': '10.4103/1319-2442.190782', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

164 {'id': '10.1053/j.jrn.2015.04.009', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

165 {'id': '10.26719/2015.21.5.354', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

166 {'id': '10.1093/ckj/sfu046', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

167 {'id': '10.1007/s00393-012-1058-9', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

168 {'id': '10.1159/000356118', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

169 {'id': '10.1111/1756-185x.12007', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

170 {'id': '10.1007/s11255-011-0007-x', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

171 {'id': '10.1016/j.jbspin.2011.06.009', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}, 

172 {'id': '10.1093/ndt/gfq089', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'} 

173 ] 

174 dict_writer = DictWriter(output_file, ['id', 'value']) 

175 dict_writer.writeheader() 

176 dict_writer.writerows(data_to_write) 

177 iOd = Index_orcid_doi(output_path=CSV_PATH, low_memory=True, verbose=False) 

178 cache = iOd.cache 

179 iOd.explorer(summaries_path=SUMMARIES_PATH) 

180 output = load_files_from_dir(CSV_PATH) 

181 unordered_output = {key_value['id']:key_value['value'] for key_value in output} 

182 expected_output = { 

183 '10.1002/rmv.2149': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

184 '10.1016/j.transproceed.2019.01.147': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

185 '10.1016/j.transproceed.2019.02.044': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

186 '10.1016/j.ijcard.2016.06.064': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

187 '10.4103/1319-2442.190782': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

188 '10.1053/j.jrn.2015.04.009': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

189 '10.26719/2015.21.5.354': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

190 '10.1093/ckj/sfu046': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

191 '10.1007/s00393-012-1058-9': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

192 '10.1159/000356118': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

193 '10.1111/1756-185x.12007': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

194 '10.1007/s11255-011-0007-x': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

195 '10.1016/j.jbspin.2011.06.009': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

196 '10.1093/ndt/gfq089': 'NasrAllah, Mohamed M [0000-0001-5650-3000]', 

197 'None': '[0000-0001-5002-1000]', 

198 '10.1016/j.indcrop.2020.112103': 'Gargouri, Ali [0000-0001-5009-9000]', 

199 '10.1155/2019/3213521': 'Gargouri, Ali [0000-0001-5009-9000]', 

200 '10.1016/j.bioorg.2018.11.028': 'Gargouri, Ali [0000-0001-5009-9000]', 

201 '10.1016/j.bioorg.2018.03.004': 'Gargouri, Ali [0000-0001-5009-9000]', 

202 '10.1186/s13568-016-0300-2': 'Gargouri, Ali [0000-0001-5009-9000]', 

203 '10.1016/j.toxicon.2014.04.010': 'Gargouri, Ali [0000-0001-5009-9000]', 

204 '10.1155/2014/691742': 'Gargouri, Ali [0000-0001-5009-9000]'} 

205 shutil.rmtree(CSV_PATH) 

206 self.assertEqual((unordered_output, cache), (expected_output, {'0000-0001-5650-3000'})) 

207 

208if __name__ == '__main__': # pragma: no cover 

209 unittest.main()