Coverage for test / pubmed_processing_test.py: 99%

155 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-25 18:06 +0000

1# SPDX-FileCopyrightText: 2023-2026 Arcangelo Massari <arcangelo.massari@unibo.it> 

2# SPDX-FileCopyrightText: 2024 Arianna Moretti <arianna.moretti4@unibo.it> 

3# 

4# SPDX-License-Identifier: ISC 

5 

6import csv 

7import os 

8import unittest 

9 

10from oc_ds_converter.lib.jsonmanager import * 

11 

12from oc_ds_converter.pubmed.pubmed_processing import PubmedProcessing 

13 

14BASE = os.path.join('test', 'pubmed_processing') 

15IOD = os.path.join(BASE, 'iod') 

16ALL_CASES_IOD = os.path.join(BASE, 'iod_all_cases') 

17WANTED_PMIDS = os.path.join(BASE, 'wanted_pmids.csv') 

18WANTED_PMIDS_FOLDER = os.path.join(BASE, 'wanted_pmids') 

19JOURNALS_DICT = os.path.join(BASE, 'journals.json') 

20DATA = os.path.join(BASE, '40228.csv') 

21DATA_TEST_NAMES = os.path.join(BASE, 'CSVFile_iod_test.csv') 

22DATA_DIR = BASE 

23OUTPUT = os.path.join(BASE, 'meta_input') 

24MULTIPROCESS_OUTPUT = os.path.join(BASE, 'multi_process_test') 

25ZIP_INPUT = os.path.join(BASE, 'zip_test') 

26PUBLISHERS_MAPPING = os.path.join(BASE, 'publishers.csv') 

27 

28 

29class TestPubmedProcessing(unittest.TestCase): 

30 maxDiff = None 

31 

32 def test_csv_creator(self): 

33 pubmed_processor = PubmedProcessing(orcid_index=IOD, publishers_filepath_pubmed=None, journals_filepath=None) 

34 data = open(DATA, newline="") 

35 reader = csv.DictReader(data) 

36 output = list() 

37 for item in reader: 

38 tabular_data = pubmed_processor.csv_creator(item) 

39 if tabular_data: 

40 output.append(tabular_data) 

41 

42 issnstr_1 = "[issn:0006-291X issn:1090-2104]" 

43 issnstr_2 = "[issn:1090-2104 issn:0006-291X]" 

44 expected_output = [ 

45 {'id': 'pmid:1 doi:10.1016/0006-2944(75)90147-7', 

46 'title': 'Formate assay in body fluids: application in methanol poisoning.', 

47 'author': 'A B Makar; K E McMartin; M Palese; T R Tephly', 

48 'pub_date': '1975', 

49 'venue': 'Biochemical medicine [issn:0006-2944]', 

50 'volume': '', 

51 'issue': '', 

52 'page': '', 

53 'type': 'journal article', 

54 'publisher': 'Elsevier BV', 

55 'editor': ''}, 

56 {'id': 'pmid:2 doi:10.1016/0006-291x(75)90482-9', 

57 'title': 'Delineation of the intimate details of the backbone conformation of pyridine nucleotide coenzymes in aqueous solution.', 

58 'author': 'K S Bose; Sarma, R H [orcid:0000-0000-0000-0000]', 

59 'pub_date': '1975', 

60 'venue': f'Biochemical and biophysical research communications {issnstr_1}', 

61 'volume': '', 

62 'issue': '', 

63 'page': '', 

64 'type': 'journal article', 

65 'publisher': 'Elsevier BV', 

66 'editor': ''}, 

67 {'id': 'pmid:3 doi:10.1016/0006-291x(75)90498-2', 

68 'title': 'Metal substitutions incarbonic anhydrase: a halide ion probe study.', 

69 'author': 'R J Smith; R G Bryant', 

70 'pub_date': '1975', 

71 'venue': f'Biochemical and biophysical research communications {issnstr_1}', 

72 'volume': '', 

73 'issue': '', 

74 'page': '', 

75 'type': 'journal article', 

76 'publisher': 'Elsevier BV', 

77 'editor': ''}, 

78 {'id': 'pmid:4 doi:10.1016/0006-291x(75)90506-9', 

79 'title': 'Effect of chloroquine on cultured fibroblasts: release of lysosomal hydrolases and inhibition of their uptake.', 

80 'author': 'U N Wiesmann; S DiDonato; N N Herschkowitz', 

81 'pub_date': '1975', 

82 'venue': f'Biochemical and biophysical research communications {issnstr_1}', 

83 'volume': '', 

84 'issue': '', 

85 'page': '', 

86 'type': 'journal article', 

87 'publisher': 'Elsevier BV', 

88 'editor': ''}, 

89 {'id': 'pmid:5 doi:10.1016/0006-291x(75)90508-2', 

90 'title': 'Atomic models for the polypeptide backbones of myohemerythrin and hemerythrin.', 

91 'author': 'W A Hendrickson; K B Ward', 

92 'pub_date': '1975', 

93 'venue': f'Biochemical and biophysical research communications {issnstr_1}', 

94 'volume': '', 

95 'issue': '', 

96 'page': '', 

97 'type': 'journal article', 

98 'publisher': 'Elsevier BV', 

99 'editor': ''} 

100 ] 

101 expected_output_alt= [ 

102 {'id': 'pmid:1 doi:10.1016/0006-2944(75)90147-7', 

103 'title': 'Formate assay in body fluids: application in methanol poisoning.', 

104 'author': 'A B Makar; K E McMartin; M Palese; T R Tephly', 

105 'pub_date': '1975', 

106 'venue': 'Biochemical medicine [issn:0006-2944]', 

107 'volume': '', 

108 'issue': '', 

109 'page': '', 

110 'type': 'journal article', 

111 'publisher': 'Elsevier BV', 

112 'editor': ''}, 

113 {'id': 'pmid:2 doi:10.1016/0006-291x(75)90482-9', 

114 'title': 'Delineation of the intimate details of the backbone conformation of pyridine nucleotide coenzymes in aqueous solution.', 

115 'author': 'K S Bose; Sarma, R H [orcid:0000-0000-0000-0000]', 

116 'pub_date': '1975', 

117 'venue': f'Biochemical and biophysical research communications {issnstr_2}', 

118 'volume': '', 

119 'issue': '', 

120 'page': '', 

121 'type': 'journal article', 

122 'publisher': 'Elsevier BV', 

123 'editor': ''}, 

124 {'id': 'pmid:3 doi:10.1016/0006-291x(75)90498-2', 

125 'title': 'Metal substitutions incarbonic anhydrase: a halide ion probe study.', 

126 'author': 'R J Smith; R G Bryant', 

127 'pub_date': '1975', 

128 'venue': f'Biochemical and biophysical research communications {issnstr_2}', 

129 'volume': '', 

130 'issue': '', 

131 'page': '', 

132 'type': 'journal article', 

133 'publisher': 'Elsevier BV', 

134 'editor': ''}, 

135 {'id': 'pmid:4 doi:10.1016/0006-291x(75)90506-9', 

136 'title': 'Effect of chloroquine on cultured fibroblasts: release of lysosomal hydrolases and inhibition of their uptake.', 

137 'author': 'U N Wiesmann; S DiDonato; N N Herschkowitz', 

138 'pub_date': '1975', 

139 'venue': f'Biochemical and biophysical research communications {issnstr_2}', 

140 'volume': '', 

141 'issue': '', 

142 'page': '', 

143 'type': 'journal article', 

144 'publisher': 'Elsevier BV', 

145 'editor': ''}, 

146 {'id': 'pmid:5 doi:10.1016/0006-291x(75)90508-2', 

147 'title': 'Atomic models for the polypeptide backbones of myohemerythrin and hemerythrin.', 

148 'author': 'W A Hendrickson; K B Ward', 

149 'pub_date': '1975', 

150 'venue': f'Biochemical and biophysical research communications {issnstr_2}', 

151 'volume': '', 

152 'issue': '', 

153 'page': '', 

154 'type': 'journal article', 

155 'publisher': 'Elsevier BV', 

156 'editor': ''} 

157 ] 

158 data.close() 

159 self.assertTrue(output == expected_output or output == expected_output_alt) 

160 

161 def test_csv_creator_w_journal_dict(self): 

162 pubmed_processor = PubmedProcessing(orcid_index=IOD, journals_filepath=JOURNALS_DICT) 

163 data = open(DATA, newline="") 

164 reader = csv.DictReader(data) 

165 output = list() 

166 for item in reader: 

167 tabular_data = pubmed_processor.csv_creator(item) 

168 if tabular_data: 

169 output.append(tabular_data) 

170 issnstr_1 = "[issn:0006-291X issn:1090-2104]" 

171 issnstr_2 = "[issn:1090-2104 issn:0006-291X]" 

172 expected_output = [ 

173 {'id': 'pmid:1 doi:10.1016/0006-2944(75)90147-7', 

174 'title': 'Formate assay in body fluids: application in methanol poisoning.', 

175 'author': 'A B Makar; K E McMartin; M Palese; T R Tephly', 

176 'pub_date': '1975', 

177 'venue': 'Biochemical medicine [issn:0006-2944]', 

178 'volume': '', 

179 'issue': '', 

180 'page': '', 

181 'type': 'journal article', 

182 'publisher': 'Elsevier BV', 

183 'editor': ''}, 

184 {'id': 'pmid:2 doi:10.1016/0006-291x(75)90482-9', 

185 'title': 'Delineation of the intimate details of the backbone conformation of pyridine nucleotide coenzymes in aqueous solution.', 

186 'author': 'K S Bose; Sarma, R H [orcid:0000-0000-0000-0000]', 

187 'pub_date': '1975', 

188 'venue': f'Biochemical and biophysical research communications {issnstr_2}', 

189 'volume': '', 

190 'issue': '', 

191 'page': '', 

192 'type': 'journal article', 

193 'publisher': 'Elsevier BV', 

194 'editor': ''}, 

195 {'id': 'pmid:3 doi:10.1016/0006-291x(75)90498-2', 

196 'title': 'Metal substitutions incarbonic anhydrase: a halide ion probe study.', 

197 'author': 'R J Smith; R G Bryant', 

198 'pub_date': '1975', 

199 'venue': f'Biochemical and biophysical research communications {issnstr_2}', 

200 'volume': '', 

201 'issue': '', 

202 'page': '', 

203 'type': 'journal article', 

204 'publisher': 'Elsevier BV', 

205 'editor': ''}, 

206 {'id': 'pmid:4 doi:10.1016/0006-291x(75)90506-9', 

207 'title': 'Effect of chloroquine on cultured fibroblasts: release of lysosomal hydrolases and inhibition of their uptake.', 

208 'author': 'U N Wiesmann; S DiDonato; N N Herschkowitz', 

209 'pub_date': '1975', 

210 'venue': f'Biochemical and biophysical research communications {issnstr_2}', 

211 'volume': '', 

212 'issue': '', 

213 'page': '', 

214 'type': 'journal article', 

215 'publisher': 'Elsevier BV', 

216 'editor': ''}, 

217 {'id': 'pmid:5 doi:10.1016/0006-291x(75)90508-2', 

218 'title': 'Atomic models for the polypeptide backbones of myohemerythrin and hemerythrin.', 

219 'author': 'W A Hendrickson; K B Ward', 

220 'pub_date': '1975', 

221 'venue': f'Biochemical and biophysical research communications {issnstr_2}', 

222 'volume': '', 

223 'issue': '', 

224 'page': '', 

225 'type': 'journal article', 

226 'publisher': 'Elsevier BV', 

227 'editor': '' 

228 } 

229 ] 

230 expected_output_alt = [ 

231 {'id': 'pmid:1 doi:10.1016/0006-2944(75)90147-7', 

232 'title': 'Formate assay in body fluids: application in methanol poisoning.', 

233 'author': 'A B Makar; K E McMartin; M Palese; T R Tephly', 

234 'pub_date': '1975', 

235 'venue': 'Biochemical medicine [issn:0006-2944]', 

236 'volume': '', 

237 'issue': '', 

238 'page': '', 

239 'type': 'journal article', 

240 'publisher': 'Elsevier BV', 

241 'editor': ''}, 

242 {'id': 'pmid:2 doi:10.1016/0006-291x(75)90482-9', 

243 'title': 'Delineation of the intimate details of the backbone conformation of pyridine nucleotide coenzymes in aqueous solution.', 

244 'author': 'K S Bose; Sarma, R H [orcid:0000-0000-0000-0000]', 

245 'pub_date': '1975', 

246 'venue': f'Biochemical and biophysical research communications {issnstr_1}', 

247 'volume': '', 

248 'issue': '', 

249 'page': '', 

250 'type': 'journal article', 

251 'publisher': 'Elsevier BV', 

252 'editor': ''}, 

253 {'id': 'pmid:3 doi:10.1016/0006-291x(75)90498-2', 

254 'title': 'Metal substitutions incarbonic anhydrase: a halide ion probe study.', 

255 'author': 'R J Smith; R G Bryant', 

256 'pub_date': '1975', 

257 'venue': f'Biochemical and biophysical research communications {issnstr_1}', 

258 'volume': '', 

259 'issue': '', 

260 'page': '', 

261 'type': 'journal article', 

262 'publisher': 'Elsevier BV', 

263 'editor': ''}, 

264 {'id': 'pmid:4 doi:10.1016/0006-291x(75)90506-9', 

265 'title': 'Effect of chloroquine on cultured fibroblasts: release of lysosomal hydrolases and inhibition of their uptake.', 

266 'author': 'U N Wiesmann; S DiDonato; N N Herschkowitz', 

267 'pub_date': '1975', 

268 'venue': f'Biochemical and biophysical research communications {issnstr_1}', 

269 'volume': '', 

270 'issue': '', 

271 'page': '', 

272 'type': 'journal article', 

273 'publisher': 'Elsevier BV', 

274 'editor': ''}, 

275 {'id': 'pmid:5 doi:10.1016/0006-291x(75)90508-2', 

276 'title': 'Atomic models for the polypeptide backbones of myohemerythrin and hemerythrin.', 

277 'author': 'W A Hendrickson; K B Ward', 

278 'pub_date': '1975', 

279 'venue': f'Biochemical and biophysical research communications {issnstr_1}', 

280 'volume': '', 

281 'issue': '', 

282 'page': '', 

283 'type': 'journal article', 

284 'publisher': 'Elsevier BV', 

285 'editor': '' 

286 } 

287 ] 

288 

289 data.close() 

290 self.assertTrue(output == expected_output or output == expected_output_alt) 

291 

292 def test_orcid_finder(self): 

293 pubmed_processor = PubmedProcessing(orcid_index=IOD) 

294 orcid_found = pubmed_processor.orcid_finder('10.1016/0006-291x(75)90482-9') 

295 expected_output = {'0000-0000-0000-0000': 'sarma, r h'} 

296 self.assertEqual(orcid_found, expected_output) 

297 

298 def test_get_agents_strings_list(self): 

299 pubmed_processor = PubmedProcessing(orcid_index=ALL_CASES_IOD, ) 

300 agents_list = [{'role': 'author', 'name': 'Arianna Moretti', 'family': '', 'given': ''}, {'role': 'author', 'name': 'S Peroni', 'family': '', 'given': ''}, {'role': 'author', 'name': 'Chiara D Giambattista', 'family': '', 'given': ''}] 

301 ag = pubmed_processor.get_agents_strings_list("10.3000/1000000001", agents_list) 

302 self.assertEqual(ag[0], ['Moretti, Arianna [orcid:0000-0001-5486-7070]', 'Peroni, Silvio [orcid:0000-0003-0530-4305]', 'Di Giambattista, Chiara [orcid:0000-0001-8665-095X]'] ) 

303 

304 def test_get_venue_name_with_extended_map(self): 

305 item = { 

306 "journal": "Biochim Biophys Acta"} 

307 pubmed_processor = PubmedProcessing(orcid_index=None, journals_filepath=JOURNALS_DICT) 

308 venue_name = pubmed_processor.get_venue_name(item, "59") 

309 self.assertEqual(venue_name, "Biochimica et biophysica acta [issn:0006-3002]") 

310 

311 def test_get_venue_name_with_extended_map_ISSN(self): 

312 item = { 

313 "journal": "Biochem Biophys Res Commun"} 

314 pubmed_processor = PubmedProcessing(orcid_index=None, journals_filepath=JOURNALS_DICT) 

315 venue_name = pubmed_processor.get_venue_name(item, "2") 

316 alt_1 = "Biochemical and biophysical research communications [issn:0006-291X issn:1090-2104]" 

317 alt_2 = "Biochemical and biophysical research communications [issn:1090-2104 issn:0006-291X]" 

318 self.assertTrue(venue_name == alt_1 or venue_name == alt_2) 

319 

320 def test_get_venue_name_no_extended_map(self): 

321 item = { 

322 "journal": "Biochim Biophys Acta"} 

323 pubmed_processor = PubmedProcessing(orcid_index=None, journals_filepath=None) 

324 venue_name = pubmed_processor.get_venue_name(item, "59") 

325 self.assertEqual(venue_name, "Biochimica et biophysica acta [issn:0006-3002]") 

326 

327 def test_issn_worker(self): 

328 input = 'ISSN 0006-291X' 

329 output = list() 

330 PubmedProcessing.issn_worker(input, output) 

331 expected_output = ['issn:0006-291X'] 

332 self.assertEqual(output, expected_output) 

333 

334 def test_id_worker(self): 

335 field_issn = '00062952' 

336 field_isbn = ['978-3-905673-82-1'] 

337 issn_list = list() 

338 isbn_list = list() 

339 PubmedProcessing.id_worker(field_issn, issn_list, PubmedProcessing.issn_worker) 

340 PubmedProcessing.id_worker(field_isbn, isbn_list, PubmedProcessing.isbn_worker) 

341 expected_issn_list = ['issn:0006-2952'] 

342 expected_isbn_list = ['isbn:9783905673821'] 

343 self.assertEqual((issn_list, isbn_list), (expected_issn_list, expected_isbn_list)) 

344 

345 def test_find_homonyms(self): 

346 ra_list_1 = [{'role': 'author', 'name': 'K S Bose', 'family': '', 'given': ''}, 

347 {'role': 'author', 'name': 'R H Sarma', 'family': '', 'given': ''}] 

348 ra_list_2 = [{'role': 'author', 'name': 'K S Bose', 'family': '', 'given': ''}, 

349 {'role': 'author', 'name': 'R H Bose', 'family': '', 'given': ''}] 

350 ra_list_3 = [{'role': 'author', 'name': 'Anna Maria Rossi', 'family': '', 'given': ''}, 

351 {'role': 'author', 'name': 'Katia Rossi Bianchi', 'family': '', 'given': ''}, 

352 {'role': 'author', 'name': 'K Rossi B', 'family': '', 'given': ''}] 

353 ra_list_4 = [{'role': 'author', 'name': 'R J Smith', 'family': '', 'given': ''}, 

354 {'role': 'author', 'name': 'R J Smith Bryant', 'family': '', 'given': ''}, 

355 {'role': 'author', 'name': 'Ronald Bryant', 'family': '', 'given': ''}] 

356 pubmed_processor_ra = PubmedProcessing(orcid_index=IOD, publishers_filepath_pubmed=None, journals_filepath=None) 

357 homonyms_l1 = pubmed_processor_ra.find_homonyms(ra_list_1) 

358 homonyms_l2 = pubmed_processor_ra.find_homonyms(ra_list_2) 

359 homonyms_l3 = pubmed_processor_ra.find_homonyms(ra_list_3) 

360 homonyms_l4 = pubmed_processor_ra.find_homonyms(ra_list_4) 

361 

362 self.assertEqual(homonyms_l1, {}) 

363 self.assertCountEqual(homonyms_l2, {'K S Bose': ['R H Bose'], 'R H Bose': ['K S Bose']}) 

364 self.assertCountEqual(homonyms_l3, {'Anna Maria Rossi': ['Katia Rossi Bianchi', 'K Rossi B'], 'Katia Rossi Bianchi': ['Anna Maria Rossi', 'K Rossi B'], 'K Rossi B': ['Anna Maria Rossi', 'Katia Rossi Bianchi']}) 

365 self.assertCountEqual(homonyms_l4, {'R J Smith': ['R J Smith Bryant'], 'R J Smith Bryant': ['Ronald Bryant', 'R J Smith'], 'Ronald Bryant': ['R J Smith Bryant']}) 

366 

367 def test_compute_affinity(self): 

368 pubmed_processor_ra_ca = PubmedProcessing(orcid_index=IOD, publishers_filepath_pubmed=None, journals_filepath=None) 

369 target_full_names = "Anna Cristiana Cardinali Santelli" 

370 ra_list_1 = ["Anna Cardinali", "Anna C. Santelli", "A.Cristiana Santelli Cardinali", "Anna Cristiana Santelli Cardinali", "Anna C S C", "Anna cristiana Santelli CARDINALI"] 

371 ra_list_2 = ["Anna Cardinali", "Anna C. Santelli", "A.Cristiana Santelli Cardinali", "Anna C S C", "Anna cristiana Santelli CARDINALI"] 

372 ra_list_3 = ["Anna Cardinali", "Anna C. Santelli", "A.Cristiana Santelli Cardinali", "Anna C S C"] 

373 ra_list_4 = ["Anna Cardinali", "Anna C. Santelli", "Anna C S C"] 

374 ra_list_5 = ["Anna Cardinali", "Anna C S C"] 

375 ra_list_6 = ["Filippo C. Moroni", "Silvia C."] 

376 ra_list_7 = ["Olga Santelli", " Vincenzo Cardinali"] 

377 ra_list_8 = ["Olga Santelli", " Vincenzo Cardinali", "Carla Anna Cardinali Santelli"] 

378 

379 self.assertEqual(pubmed_processor_ra_ca.compute_affinity(target_full_names, ra_list_1), "Anna Cristiana Santelli Cardinali") 

380 self.assertEqual(pubmed_processor_ra_ca.compute_affinity(target_full_names, ra_list_2), "Anna cristiana Santelli CARDINALI") 

381 self.assertEqual(pubmed_processor_ra_ca.compute_affinity(target_full_names, ra_list_3), "A.Cristiana Santelli Cardinali") 

382 self.assertEqual(pubmed_processor_ra_ca.compute_affinity(target_full_names, ra_list_4), "Anna C. Santelli") 

383 self.assertEqual(pubmed_processor_ra_ca.compute_affinity(target_full_names, ra_list_5), "Anna Cardinali") 

384 self.assertEqual(pubmed_processor_ra_ca.compute_affinity(target_full_names, ra_list_6), "") 

385 self.assertEqual(pubmed_processor_ra_ca.compute_affinity(target_full_names, ra_list_7), "") 

386 self.assertEqual(pubmed_processor_ra_ca.compute_affinity(target_full_names, ra_list_8), "Carla Anna Cardinali Santelli") 

387 

388 

389 def test_redis_db(self): 

390 pubmed_processor = PubmedProcessing(orcid_index=IOD, journals_filepath=JOURNALS_DICT) 

391 inp_ent = {'pmid': '5', 'doi': '10.1016/0006-291x(75)90508-2', 

392 'title': 'Atomic models for the polypeptide backbones of myohemerythrin and hemerythrin.', 

393 'authors': 'W A Hendrickson, K B Ward', 'year': '1975', 'journal': 'Biochem Biophys Res Commun', 

394 'cited_by': '7118409 6768892 2619971 2190210 3380793 20577584 8372226 7012375 856811 678527 33255345 33973855 402092 7012894 1257769 861288 1061139 3681996', 'references': '4882249 5059118 14834145 1056020 5509841'} 

395 exp_res = {'id': 'pmid:5 doi:10.1016/0006-291x(75)90508-2', 'title': 'Atomic models for the polypeptide backbones of myohemerythrin and hemerythrin.', 

396 'author': 'W A Hendrickson; K B Ward', 'pub_date': '1975', 'venue': 'Biochemical and biophysical research communications [issn:0006-291X issn:1090-2104]', 

397 'volume': '', 'issue': '', 'page': '', 'type': 'journal article', 'publisher': 'Elsevier BV', 'editor': ''} 

398 

399 tabular_data = pubmed_processor.csv_creator(inp_ent) 

400 self.assertEqual(exp_res, tabular_data) 

401 

402 inp_ent_fake_doi = {'pmid': '5', 'doi': '10.1016/a_fake_doi', 

403 'title': 'Atomic models for the polypeptide backbones of myohemerythrin and hemerythrin.', 

404 'authors': 'W A Hendrickson, K B Ward', 'year': '1975', 'journal': 'Biochem Biophys Res Commun', 

405 'cited_by': '7118409 6768892 2619971 2190210 3380793 20577584 8372226 7012375 856811 678527 33255345 33973855 402092 7012894 1257769 861288 1061139 3681996', 'references': '4882249 5059118 14834145 1056020 5509841'} 

406 

407 tabular_data_no_redis_data = pubmed_processor.csv_creator(inp_ent_fake_doi) 

408 self.assertEqual(tabular_data_no_redis_data['id'], 'pmid:5') 

409 

410 pubmed_processor.BR_redis.sadd('doi:10.1016/a_fake_doi', 'omid:000101') 

411 

412 tabular_data_w_redis_data = pubmed_processor.csv_creator(inp_ent_fake_doi) 

413 self.assertEqual(tabular_data_w_redis_data['id'], 'pmid:5 doi:10.1016/a_fake_doi') 

414 

415 pubmed_processor.doi_m.storage_manager.delete_storage() 

416 pubmed_processor.BR_redis.flushdb() 

417 pubmed_processor.RA_redis.flushdb() 

418 

419 def test_get_citations(self): 

420 inp_ent = {'pmid': '5', 'doi': '10.1016/0006-291x(75)90508-2', 

421 'title': 'Atomic models for the polypeptide backbones of myohemerythrin and hemerythrin.', 

422 'authors': 'W A Hendrickson, K B Ward', 'year': '1975', 'journal': 'Biochem Biophys Res Commun', 

423 'cited_by': '7118409 6768892 2619971 2190210 3380793 20577584 8372226 7012375 856811 678527 33255345 33973855 402092 7012894 1257769 861288 1061139 3681996', 'references': '4882249 5059118 14834145 1056020 5509841'} 

424 

425 pubmed_processor = PubmedProcessing(orcid_index=IOD, 

426 journals_filepath=JOURNALS_DICT) 

427 

428 citation_list = pubmed_processor.get_citations("pmid:5", inp_ent) 

429 exp_citation_list = [('pmid:5', 'pmid:1056020'), ('pmid:5', 'pmid:4882249'), ('pmid:5', 'pmid:14834145'), ('pmid:5', 'pmid:5509841'), ('pmid:5', 'pmid:5059118')] 

430 

431 self.assertEqual(set(citation_list), set(exp_citation_list)) 

432 

433 

434 def test_get_citing_pmid(self): 

435 inp_ent_meta = {'id': 'pmid:5 doi:10.1016/0006-291x(75)90508-2', 'title': 'Atomic models for the polypeptide backbones of myohemerythrin and hemerythrin.', 

436 'author': 'W A Hendrickson; K B Ward', 'pub_date': '1975', 'venue': 'Biochemical and biophysical research communications [issn:0006-291X issn:1090-2104]', 

437 'volume': '', 'issue': '', 'page': '', 'type': 'journal article', 'publisher': 'Elsevier BV', 'editor': ''} 

438 

439 pubmed_processor = PubmedProcessing(orcid_index=IOD, 

440 journals_filepath=JOURNALS_DICT) 

441 

442 out_citing_pmid = pubmed_processor.get_citing_pmid((inp_ent_meta)) 

443 exp_out_citing_pmid = 'pmid:5' 

444 

445 self.assertEqual(out_citing_pmid, exp_out_citing_pmid) 

446 

447 

448if __name__ == '__main__': 

449 unittest.main()