Coverage for test/index_orcid_doi_test.py: 100%
58 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-07-14 14:06 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-07-14 14:06 +0000
1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3# Copyright 2019 Silvio Peroni <essepuntato@gmail.com>
4# Copyright 2019-2020 Fabio Mariani <fabio.mariani555@gmail.com>
5# Copyright 2021 Simone Persiani <iosonopersia@gmail.com>
6# Copyright 2021-2022 Arcangelo Massari <arcangelo.massari@unibo.it>
7#
8# Permission to use, copy, modify, and/or distribute this software for any purpose
9# with or without fee is hereby granted, provided that the above copyright notice
10# and this permission notice appear in all copies.
11#
12# THE SOFTWARE IS PROVIDED 'AS IS' AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
13# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
14# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
15# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
16# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
17# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
18# SOFTWARE.
21import os
22import shutil
23import unittest
24from csv import DictReader, DictWriter
26from oc_meta.plugins.orcid.index_orcid_doi import Index_orcid_doi
28CSV_PATH = os.path.join('test', 'index_orcid_doi', 'output')
29SUMMARIES_PATH = os.path.join('test', 'index_orcid_doi', 'orcid')
31def load_files_from_dir(dir:str):
32 output = list()
33 for file in sorted(os.listdir(dir), key=lambda filename: int(filename.split('-')[0].replace('.csv', ''))):
34 with open(os.path.join(dir, file), 'r', encoding='utf-8') as f:
35 output.extend(list(DictReader(f)))
36 return output
39class test_Index_orcid_doi(unittest.TestCase):
40 def test_explorer(self):
41 iOd = Index_orcid_doi(output_path=CSV_PATH, verbose=False)
42 iOd.explorer(summaries_path=SUMMARIES_PATH)
43 output = sorted(load_files_from_dir(CSV_PATH), key=lambda d: d['id'])
44 expected_output = sorted([
45 {'id': 'None', 'value': '[0000-0001-5002-1000]'},
46 {'id': '10.1016/j.indcrop.2020.112103', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'},
47 {'id': '10.1155/2019/3213521', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'},
48 {'id': '10.1016/j.bioorg.2018.11.028', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'},
49 {'id': '10.1016/j.bioorg.2018.03.004', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'},
50 {'id': '10.1186/s13568-016-0300-2', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'},
51 {'id': '10.1016/j.toxicon.2014.04.010', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'},
52 {'id': '10.1155/2014/691742', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'},
53 {'id': '10.1002/rmv.2149', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
54 {'id': '10.1016/j.transproceed.2019.01.147', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
55 {'id': '10.1016/j.transproceed.2019.02.044', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
56 {'id': '10.1016/j.ijcard.2016.06.064', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
57 {'id': '10.4103/1319-2442.190782', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
58 {'id': '10.1053/j.jrn.2015.04.009', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
59 {'id': '10.26719/2015.21.5.354', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
60 {'id': '10.1093/ckj/sfu046', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
61 {'id': '10.1007/s00393-012-1058-9', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
62 {'id': '10.1159/000356118', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
63 {'id': '10.1111/1756-185x.12007', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
64 {'id': '10.1007/s11255-011-0007-x', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
65 {'id': '10.1016/j.jbspin.2011.06.009', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
66 {'id': '10.1093/ndt/gfq089', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}
67 ], key=lambda d: d['id'])
68 shutil.rmtree(CSV_PATH)
69 self.assertEqual(output, expected_output)
71 def test_cache(self):
72 os.mkdir(CSV_PATH)
73 with open(os.path.join(CSV_PATH, '0.csv'), 'w', encoding='utf-8') as output_file:
74 data_to_write = [
75 {'id': '10.1002/rmv.2149', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
76 {'id': '10.1016/j.transproceed.2019.01.147', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
77 {'id': '10.1016/j.transproceed.2019.02.044', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
78 {'id': '10.1016/j.ijcard.2016.06.064', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
79 {'id': '10.4103/1319-2442.190782', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
80 {'id': '10.1053/j.jrn.2015.04.009', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
81 {'id': '10.26719/2015.21.5.354', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
82 {'id': '10.1093/ckj/sfu046', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
83 {'id': '10.1007/s00393-012-1058-9', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
84 {'id': '10.1159/000356118', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
85 {'id': '10.1111/1756-185x.12007', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
86 {'id': '10.1007/s11255-011-0007-x', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
87 {'id': '10.1016/j.jbspin.2011.06.009', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
88 {'id': '10.1093/ndt/gfq089', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}
89 ]
90 dict_writer = DictWriter(output_file, ['id', 'value'])
91 dict_writer.writeheader()
92 dict_writer.writerows(data_to_write)
93 iOd = Index_orcid_doi(output_path=CSV_PATH, verbose=False)
94 cache = iOd.cache
95 iOd.explorer(summaries_path=SUMMARIES_PATH)
96 output = load_files_from_dir(CSV_PATH)
97 unordered_output = {key_value['id']:key_value['value'] for key_value in output}
98 expected_output = {
99 '10.1002/rmv.2149': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
100 '10.1016/j.transproceed.2019.01.147': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
101 '10.1016/j.transproceed.2019.02.044': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
102 '10.1016/j.ijcard.2016.06.064': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
103 '10.4103/1319-2442.190782': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
104 '10.1053/j.jrn.2015.04.009': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
105 '10.26719/2015.21.5.354': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
106 '10.1093/ckj/sfu046': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
107 '10.1007/s00393-012-1058-9': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
108 '10.1159/000356118': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
109 '10.1111/1756-185x.12007': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
110 '10.1007/s11255-011-0007-x': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
111 '10.1016/j.jbspin.2011.06.009': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
112 '10.1093/ndt/gfq089': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
113 'None': '[0000-0001-5002-1000]',
114 '10.1016/j.indcrop.2020.112103': 'Gargouri, Ali [0000-0001-5009-9000]',
115 '10.1155/2019/3213521': 'Gargouri, Ali [0000-0001-5009-9000]',
116 '10.1016/j.bioorg.2018.11.028': 'Gargouri, Ali [0000-0001-5009-9000]',
117 '10.1016/j.bioorg.2018.03.004': 'Gargouri, Ali [0000-0001-5009-9000]',
118 '10.1186/s13568-016-0300-2': 'Gargouri, Ali [0000-0001-5009-9000]',
119 '10.1016/j.toxicon.2014.04.010': 'Gargouri, Ali [0000-0001-5009-9000]',
120 '10.1155/2014/691742': 'Gargouri, Ali [0000-0001-5009-9000]'}
121 shutil.rmtree(CSV_PATH)
122 self.assertEqual((unordered_output, cache), (expected_output, {'0000-0001-5650-3000'}))
124 def test_low_memory(self):
125 iOd = Index_orcid_doi(output_path=CSV_PATH, low_memory=True, threshold=10, verbose=False)
126 iOd.explorer(summaries_path=SUMMARIES_PATH)
127 output = sorted(load_files_from_dir(CSV_PATH), key=lambda d: d['id'])
128 expected_output = sorted([
129 {'id': 'None', 'value': '[0000-0001-5002-1000]'},
130 {'id': '10.1016/j.indcrop.2020.112103', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'},
131 {'id': '10.1155/2019/3213521', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'},
132 {'id': '10.1016/j.bioorg.2018.11.028', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'},
133 {'id': '10.1016/j.bioorg.2018.03.004', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'},
134 {'id': '10.1186/s13568-016-0300-2', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'},
135 {'id': '10.1016/j.toxicon.2014.04.010', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'},
136 {'id': '10.1155/2014/691742', 'value': 'Gargouri, Ali [0000-0001-5009-9000]'},
137 {'id': '10.1002/rmv.2149', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
138 {'id': '10.1016/j.transproceed.2019.01.147', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
139 {'id': '10.1016/j.transproceed.2019.02.044', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
140 {'id': '10.1016/j.ijcard.2016.06.064', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
141 {'id': '10.4103/1319-2442.190782', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
142 {'id': '10.1053/j.jrn.2015.04.009', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
143 {'id': '10.26719/2015.21.5.354', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
144 {'id': '10.1093/ckj/sfu046', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
145 {'id': '10.1007/s00393-012-1058-9', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
146 {'id': '10.1159/000356118', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
147 {'id': '10.1111/1756-185x.12007', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
148 {'id': '10.1007/s11255-011-0007-x', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
149 {'id': '10.1016/j.jbspin.2011.06.009', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
150 {'id': '10.1093/ndt/gfq089', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}
151 ], key=lambda d: d['id'])
152 shutil.rmtree(CSV_PATH)
153 self.assertEqual(output, expected_output)
155 def test_cache_low_memory(self):
156 os.mkdir(CSV_PATH)
157 with open(os.path.join(CSV_PATH, '0.csv'), 'w', encoding='utf-8') as output_file:
158 data_to_write = [
159 {'id': '10.1002/rmv.2149', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
160 {'id': '10.1016/j.transproceed.2019.01.147', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
161 {'id': '10.1016/j.transproceed.2019.02.044', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
162 {'id': '10.1016/j.ijcard.2016.06.064', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
163 {'id': '10.4103/1319-2442.190782', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
164 {'id': '10.1053/j.jrn.2015.04.009', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
165 {'id': '10.26719/2015.21.5.354', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
166 {'id': '10.1093/ckj/sfu046', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
167 {'id': '10.1007/s00393-012-1058-9', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
168 {'id': '10.1159/000356118', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
169 {'id': '10.1111/1756-185x.12007', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
170 {'id': '10.1007/s11255-011-0007-x', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
171 {'id': '10.1016/j.jbspin.2011.06.009', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'},
172 {'id': '10.1093/ndt/gfq089', 'value': 'NasrAllah, Mohamed M [0000-0001-5650-3000]'}
173 ]
174 dict_writer = DictWriter(output_file, ['id', 'value'])
175 dict_writer.writeheader()
176 dict_writer.writerows(data_to_write)
177 iOd = Index_orcid_doi(output_path=CSV_PATH, low_memory=True, verbose=False)
178 cache = iOd.cache
179 iOd.explorer(summaries_path=SUMMARIES_PATH)
180 output = load_files_from_dir(CSV_PATH)
181 unordered_output = {key_value['id']:key_value['value'] for key_value in output}
182 expected_output = {
183 '10.1002/rmv.2149': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
184 '10.1016/j.transproceed.2019.01.147': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
185 '10.1016/j.transproceed.2019.02.044': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
186 '10.1016/j.ijcard.2016.06.064': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
187 '10.4103/1319-2442.190782': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
188 '10.1053/j.jrn.2015.04.009': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
189 '10.26719/2015.21.5.354': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
190 '10.1093/ckj/sfu046': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
191 '10.1007/s00393-012-1058-9': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
192 '10.1159/000356118': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
193 '10.1111/1756-185x.12007': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
194 '10.1007/s11255-011-0007-x': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
195 '10.1016/j.jbspin.2011.06.009': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
196 '10.1093/ndt/gfq089': 'NasrAllah, Mohamed M [0000-0001-5650-3000]',
197 'None': '[0000-0001-5002-1000]',
198 '10.1016/j.indcrop.2020.112103': 'Gargouri, Ali [0000-0001-5009-9000]',
199 '10.1155/2019/3213521': 'Gargouri, Ali [0000-0001-5009-9000]',
200 '10.1016/j.bioorg.2018.11.028': 'Gargouri, Ali [0000-0001-5009-9000]',
201 '10.1016/j.bioorg.2018.03.004': 'Gargouri, Ali [0000-0001-5009-9000]',
202 '10.1186/s13568-016-0300-2': 'Gargouri, Ali [0000-0001-5009-9000]',
203 '10.1016/j.toxicon.2014.04.010': 'Gargouri, Ali [0000-0001-5009-9000]',
204 '10.1155/2014/691742': 'Gargouri, Ali [0000-0001-5009-9000]'}
205 shutil.rmtree(CSV_PATH)
206 self.assertEqual((unordered_output, cache), (expected_output, {'0000-0001-5650-3000'}))
208if __name__ == '__main__': # pragma: no cover
209 unittest.main()