Coverage for test/prepare_multiprocess_test.py: 100%

66 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-07-14 14:06 +0000

1import os 

2import shutil 

3import unittest 

4from csv import DictReader 

5 

6from oc_meta.plugins.multiprocess.prepare_multiprocess import ( 

7 _do_collective_merge, _find_all_names, _get_duplicated_ids, 

8 _get_relevant_venues, _get_resp_agents, prepare_relevant_items) 

9 

10BASE = os.path.join('test', 'prepare_multiprocess') 

11TMP_DIR = os.path.join(BASE, 'tmp') 

12CSV_DIR = os.path.join(BASE, 'input') 

13 

14 

15class TestPrepareMultiprocess(unittest.TestCase): 

16 def test_prepare_relevant_items(self): 

17 prepare_relevant_items(csv_dir=CSV_DIR, output_dir=TMP_DIR, items_per_file=3, verbose=False) 

18 output = list() 

19 for root, _, files in os.walk(TMP_DIR): 

20 for file in files: 

21 if file.endswith('.csv'): 

22 with open(os.path.join(root, file), 'r', encoding='utf-8') as f: 

23 output.extend(list(DictReader(f))) 

24 expected_output = [ 

25 {'id': 'doi:10.17117/aaaaaaaaaaaa', 'title': 'Ecdotics [issn:1225-3333]', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': 'journal', 'publisher': 'Consulting Company Ucom [crossref:6623]', 'editor': ''}, 

26 {'id': 'doi:10.17117/aaaaaaaaaaaa', 'title': 'Ecdotics [issn:1225-3333]', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': 'journal', 'publisher': 'Consulting Company Ucom [crossref:6623]', 'editor': ''}, 

27 {'id': 'doi:10.9799/ksfan.2012.25.1.069', 'title': 'Nonthermal Sterilization and Shelf-life Extension of Seafood Products by Intense Pulsed Light Treatment', 'author': 'Cheigh, Chan-Ick [orcid:0000-0003-2542-5788]; Mun, Ji-Hye; Chung, Myong-Soo', 'pub_date': '2012-3-31', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '25', 'issue': '1', 'page': '69-76', 'type': 'journal article', 'publisher': 'The Korean Society of Food and Nutrition [crossref:4768]', 'editor': 'Chung, Myong-Soo [orcid:0000-0002-9666-2513]'}, 

28 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': 'Chung, Myong-Soo [orcid:0000-0002-9666-2513]'}, 

29 {'id': '', 'title': '', 'author': 'Cheigh, Chan-Ick [orcid:0000-0003-2542-5788]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, 

30 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': 'The Korean Society of Food and Nutrition [crossref:4768]', 'editor': ''}, 

31 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': 'Consulting Company Ucom [crossref:6623]', 'editor': ''}, 

32 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': 'American Society for Microbiology [crossref:235]', 'editor': ''}, 

33 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '25', 'issue': '1', 'page': '', 'type': 'journal issue', 'publisher': 'Consulting Company Ucom [crossref:6623]', 'editor': ''}, 

34 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': 'Journal of Bacteriology [issn:1098-5530 issn:0021-9193]', 'volume': '197', 'issue': '6', 'page': '', 'type': 'journal issue', 'publisher': 'American Society for Microbiology [crossref:235]', 'editor': ''}] 

35 self.assertEqual(sorted(output, key=lambda x: x['id']+x['title']+x['author']+x['editor']+x['venue']+x['issue']+x['volume']+x['type']+x['publisher']), sorted(expected_output, key=lambda x: x['id']+x['title']+x['author']+x['editor']+x['venue']+x['issue']+x['volume']+x['type']+x['publisher'])) 

36 

37 def test__get_duplicated_ids(self): 

38 data = [ 

39 {'id': 'doi:10.9799/ksfan.2012.25.1.069', 'title': 'Nonthermal Sterilization and Shelf-life Extension of Seafood Products by Intense Pulsed Light Treatment', 'author': 'Cheigh, Chan-Ick [orcid:0000-0003-2542-5788]; Mun, Ji-Hye; Chung, Myong-Soo', 'pub_date': '2012-3-31', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '25', 'issue': '1', 'page': '69-76', 'type': 'book chapter', 'publisher': 'The Korean Society of Food and Nutrition [crossref:4768]', 'editor': 'Chung, Myong-Soo [orcid:0000-0002-9666-2513]'}, 

40 {'id': 'doi:10.9799/uirca.2012.25.1.069', 'title': '', 'author': 'Cheigh, Chan-Ick [orcid:0000-0003-2542-5788]; Mun, Ji-Hye; Chung, Myong-Soo', 'pub_date': '2012-3-31', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '25', 'issue': '1', 'page': '69-76', 'type': 'journal article', 'publisher': 'The Korean Society of Food and Nutrition [crossref:4768]', 'editor': 'Massari, Arcangelo'}, 

41 {'id': 'issn:0098-7484 issn:0003-987X', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '50-55', 'type': '', 'publisher': '', 'editor': ''}, 

42 {'id': 'issn:0090-4295', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, 

43 {'id': 'issn:2341-4022 issn:2341-4023', 'title': 'Acta urológica portuguesa', 'author': '', 'pub_date': '', 'venue': 'Transit Migration in Europe [issn:0003-987X]', 'volume': '', 'issue': '', 'page': '25', 'type': 'journal', 'publisher': '', 'editor': ''}, 

44 {'id': 'issn:0098-7484', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '50-55', 'type': '', 'publisher': '', 'editor': ''}, 

45 {'id': 'doi:10.9799/ksfan.2012.25.1.077', 'title': '', 'author': 'Peroni, Silvio', 'pub_date': '', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '', 'issue': '2', 'page': '', 'type': 'journal article', 'publisher': '', 'editor': 'Massari, Arcangelo'}, 

46 {'id': 'doi:10.9799/ksfan.2012.25.1.078', 'title': '', 'author': 'Peroni, Silvio', 'pub_date': '', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '', 'issue': '2', 'page': '', 'type': 'journal article', 'publisher': '', 'editor': 'Peroni, Silvio'}, 

47 {'id': 'doi:10.1128/jb.01991-14', 'title': 'Pseudomonas aeruginosa LysR PA4203 Regulator NmoR Acts as a Repressor of the PA4202 nmoA Gene, Encoding a Nitronate Monooxygenase', 'author': 'Vercammen, Ken; Wei, Qing; Charlier, Daniel [orcid:0000-0002-6844-376X]; Dötsch, Andreas [orcid:0000-0001-9086-2584]; Haüssler, Susanne; Schulz, Sebastian; Salvi, Francesca [orcid:0000-0001-5294-1310]; Gadda, Giovanni; Spain, Jim; Rybtke, Morten Levin; Tolker-Nielsen, Tim [orcid:0000-0002-9751-474X]; Dingemans, Jozef [orcid:0000-0001-8079-3087]; Ye, Lumeng; Cornelis, Pierre', 'pub_date': '', 'venue': 'Journal of Bacteriology [issn:0021-9193 issn:1098-5530]', 'volume': '197', 'issue': '6', 'page': '1026-1039', 'type': 'journal article', 'publisher': 'American Society for Microbiology [crossref:235]', 'editor': "O'Toole, G. A."} 

48 ] 

49 ids_found = {'doi:10.9799/ksfan.2012.25.1.077'} 

50 items_by_id = dict() 

51 _get_duplicated_ids(data, ids_found, {'issn:1225-4339'}, items_by_id) 

52 expected_output = { 

53 'doi:10.9799/ksfan.2012.25.1.069': {'others': set(), 'title': 'Nonthermal Sterilization and Shelf-life Extension of Seafood Products by Intense Pulsed Light Treatment', 'author': 'Cheigh, Chan-Ick [orcid:0000-0003-2542-5788]; Mun, Ji-Hye; Chung, Myong-Soo', 'pub_date': '2012-3-31', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '25', 'issue': '1', 'page': '69-76', 'type': 'book chapter', 'publisher': 'The Korean Society of Food and Nutrition [crossref:4768]', 'editor': 'Chung, Myong-Soo [orcid:0000-0002-9666-2513]'}, 

54 'doi:10.9799/ksfan.2012.25.1.077': {'others': set(), 'title': '', 'author': 'Peroni, Silvio', 'pub_date': '', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '', 'issue': '2', 'page': '', 'type': 'journal article', 'publisher': '', 'editor': 'Massari, Arcangelo'}} 

55 self.assertEqual(items_by_id, expected_output) 

56 

57 def test__get_relevant_venues(self): 

58 items_by_id = dict() 

59 self.maxDiff = None 

60 item_1 = {'venue': 'Venue [issn:0098-7484 issn:0003-987X issn:0041-1345]', 'volume': '1', 'issue': 'a', 'type': 'journal article', 'publisher': 'Georg Thieme Verlag KG [crossref:194]'} 

61 item_2 = {'venue': 'Venue [issn:0098-7484 issn:0040-6090]', 'volume': '2', 'issue': 'b', 'type': 'journal article', 'publisher': 'Georg Thieme Verlag KG [crossref:194]'} 

62 item_3 = {'venue': 'Venue [issn:0090-4295 issn:0040-6090]', 'volume': '3', 'issue': 'c', 'type': 'journal article', 'publisher': 'Georg Thieme Verlag KG [crossref:194]'} 

63 item_4 = {'venue': 'Venue [issn:0090-4295 issn:2341-4022 issn:0000-0000]', 'volume': '', 'issue': 'd', 'type': 'journal article', 'publisher': 'Georg Thieme Verlag KG [crossref:194]'} 

64 item_5 = {'venue': 'Venue [issn:2341-4022]', 'volume': '', 'issue': 'e', 'type': 'journal article', 'publisher': 'Georg Thieme Verlag KG [crossref:194]'} 

65 item_6 = {'id': 'isbn:9789089646491', 'title': 'Transit Migration in Europe', 'venue': '', 'volume': '', 'issue': '', 'type': 'book', 'publisher': 'Georg Thieme Verlag KG [crossref:194]'} 

66 item_7 = {'id': 'isbn:9789089646491', 'title': 'Transit Migration in Europe', 'venue': 'An Introduction to Immigrant Incorporation Studies [issn:1750-743X]', 'volume': '', 'issue': '', 'type': 'book', 'publisher': 'Georg Thieme Verlag KG [crossref:194]'} 

67 items = [item_1, item_2, item_3, item_4, item_5, item_6, item_7] 

68 _get_relevant_venues(data= items, ids_found={'issn:0098-7484': {'volumes': {'1': {'a'}}, 'issues': set()}, 'issn:2341-4022': {'volumes': dict(), 'issues': {'d', 'e'}}, 'isbn:9789089646491': {'volumes': dict(), 'issues': set()}, 'issn:1750-743X': {'volumes': dict(), 'issues': set()}}, items_by_id=items_by_id, duplicated_items=items_by_id) 

69 expected_output = { 

70 'issn:0098-7484': {'others': {'issn:0041-1345', 'issn:0040-6090', 'issn:0003-987X'}, 'name': 'Venue', 'type': 'journal', 'volume': {'1': {'a'}}, 'issue': set(), 'publisher': 'Georg Thieme Verlag KG [crossref:194]'}, 

71 'issn:0003-987X': {'others': {'issn:0041-1345', 'issn:0098-7484'}, 'name': 'Venue', 'type': 'journal', 'volume': {'1': {'a'}}, 'issue': set(), 'publisher': 'Georg Thieme Verlag KG [crossref:194]'}, 

72 'issn:0041-1345': {'others': {'issn:0098-7484', 'issn:0003-987X'}, 'name': 'Venue', 'type': 'journal', 'volume': {'1': {'a'}}, 'issue': set(), 'publisher': 'Georg Thieme Verlag KG [crossref:194]'}, 

73 'issn:0040-6090': {'others': {'issn:0090-4295', 'issn:0098-7484'}, 'name': 'Venue', 'type': 'journal', 'volume': {}, 'issue': set(), 'publisher': 'Georg Thieme Verlag KG [crossref:194]'}, 

74 'issn:0090-4295': {'others': {'issn:2341-4022', 'issn:0040-6090'}, 'name': 'Venue', 'type': 'journal', 'volume': {}, 'issue': {'d'}, 'publisher': 'Georg Thieme Verlag KG [crossref:194]'}, 

75 'issn:2341-4022': {'others': {'issn:0090-4295'}, 'name': 'Venue', 'type': 'journal', 'volume': {}, 'issue': {'d', 'e'}, 'publisher': 'Georg Thieme Verlag KG [crossref:194]'}, 

76 'isbn:9789089646491': {'others': set(), 'name': 'Transit Migration in Europe', 'type': 'book', 'volume': {}, 'issue': set(), 'publisher': 'Georg Thieme Verlag KG [crossref:194]'}, 

77 'issn:1750-743X': {'others': set(), 'name': 'An Introduction to Immigrant Incorporation Studies', 'type': 'book series', 'volume': {}, 'issue': set(), 'publisher': 'Georg Thieme Verlag KG [crossref:194]'}} 

78 self.assertEqual(items_by_id, expected_output) 

79 

80 def test__get_resp_agents(self): 

81 items_by_id = dict() 

82 duplicated_items = dict() 

83 items = [ 

84 {'author': 'Massari, [orcid:0000-0002-8420-0696]', 'editor': '', 'publisher': 'American Medical Association (AMA) [crossref:10 crossref:9999]'}, 

85 {'author': '', 'editor': 'Massari, A. [orcid:0000-0002-8420-0696 viaf:1]', 'publisher': 'Elsevier BV [crossref:78]'}, 

86 {'author': 'Massari, Arcangelo [viaf:1]', 'editor': '', 'publisher': 'Wiley [crossref:311]'}, 

87 {'author': 'Peroni, Silvio [orcid:0000-0003-0530-4305]', 'editor': '', 'publisher': 'Wiley [crossref:311]'} 

88 ] 

89 _get_resp_agents(data=items, ids_found={'orcid:0000-0002-8420-0696', 'orcid:0000-0003-0530-4305', 'crossref:10'}, items_by_id=items_by_id, duplicated_items=duplicated_items) 

90 expected_output = { 

91 'orcid:0000-0002-8420-0696': {'others': {'viaf:1'}, 'name': 'Massari, A.', 'type': 'author'}, 

92 'viaf:1': {'others': {'orcid:0000-0002-8420-0696'}, 'name': 'Massari, Arcangelo', 'type': 'editor'}, 

93 'orcid:0000-0003-0530-4305': {'others': set(), 'name': 'Peroni, Silvio', 'type': 'author'}, 

94 'crossref:10': {'others': {'crossref:9999'}, 'type': 'publisher', 'name': 'American Medical Association (AMA)'}, 

95 'crossref:9999': {'others': {'crossref:10'}, 'type': 'publisher', 'name': 'American Medical Association (AMA)'}} 

96 self.assertEqual(duplicated_items, expected_output) 

97 

98 def test__merge_publishers(self): 

99 publishers_by_id = { 

100 'crossref:10': {'others': {'crossref:9999'}, 'name': 'American Medical Association (AMA)', 'type': 'publisher'}, 

101 'crossref:9999': {'others': {'crossref:10'}, 'name': 'American Medical Association (AMA)', 'type': 'publisher'}, 

102 'crossref:78': {'others': set(), 'name': 'Elsevier BV', 'type': 'publisher'}, 

103 'crossref:311': {'others': set(), 'name': 'Wiley', 'type': 'publisher'}} 

104 output = _do_collective_merge(publishers_by_id, publishers_by_id) 

105 expected_output = { 

106 'crossref:10': {'name': 'American Medical Association (AMA)', 'type': 'publisher', 'others': {'crossref:9999'}}, 

107 'crossref:78': {'name': 'Elsevier BV', 'type': 'publisher', 'others': set()}, 

108 'crossref:311': {'name': 'Wiley', 'type': 'publisher', 'others': set()}} 

109 self.assertEqual(output, expected_output) 

110 

111 def test__do_collective_merge(self): 

112 items = { 

113 'id:a': {'others': {'id:c', 'id:d', 'id:b'}, 'name': 'Venue', 'type': 'journal', 'volume': {'1': {'a'}, '2': {'b'}}, 'issue': set(), 'publisher': 'Georg Thieme Verlag KG'}, 

114 'id:b': {'others': {'id:c', 'id:a'}, 'name': 'Venue', 'type': 'journal', 'volume': {'1': {'a'}}, 'issue': set(), 'publisher': 'Georg Thieme Verlag KG [crossref:194]'}, 

115 'id:c': {'others': {'id:a', 'id:b'}, 'name': 'Venue', 'type': 'journal', 'volume': {'1': {'a'}}, 'issue': set(), 'publisher': 'Georg Thieme Verlag KG [crossref:194]'}, 

116 'id:d': {'others': {'id:a', 'id:e'}, 'name': 'Venue', 'type': 'journal', 'volume': {'2': {'c'}, '3': {'c'}}, 'issue': set(), 'publisher': 'Georg Thieme Verlag KG [crossref:194]'}, 

117 'id:e': {'others': {'id:f', 'id:d'}, 'name': 'Venue', 'type': 'journal', 'volume': {'3': {'c'}}, 'issue': {'d'}}, 

118 'id:f': {'others': {'id:e'}, 'name': 'Venue', 'type': 'journal', 'volume': dict(), 'issue': {'vol. 17, n° 2', 'e'}, 'publisher': 'Georg Thieme Verlag KG [crossref:194]'}, 

119 'orcid:0000-0002-8420-0696': {'others': {'viaf:1'}, 'name': 'Massari, A.', 'type': 'author'}, 

120 'viaf:1': {'others': {'orcid:0000-0002-8420-0696'}, 'name': 'Massari, Arcangelo', 'type': 'author'}, 

121 'orcid:0000-0003-0530-4305': {'others': set(), 'name': 'Peroni, Silvio', 'type': 'author'}} 

122 output = _do_collective_merge(items, items) 

123 expected_output = { 

124 'id:a': {'others': {'id:c', 'id:b', 'id:f', 'id:d', 'id:e'}, 'name': 'Venue', 'type': 'journal', 'volume': {'1': {'a'}, '2': {'b', 'c'}, '3': {'c'}}, 'issue': {'vol. 17, n° 2', 'd', 'e'}, 'publisher': 'Georg Thieme Verlag KG [crossref:194]'}, 

125 'orcid:0000-0002-8420-0696': {'others': {'viaf:1'}, 'name': 'Massari, Arcangelo', 'type': 'author'}, 

126 'orcid:0000-0003-0530-4305': {'others': set(), 'name': 'Peroni, Silvio', 'type': 'author'}} 

127 self.assertEqual(output, expected_output) 

128 

129 def test___find_all_names(self): 

130 items_by_id = { 

131 'orcid:0000-0002-8420-0696': {'others': {'viaf:1', 'viaf:2'}, 'name': 'Arcangelo Massari', 'type': 'author'}, 

132 'viaf:1': {'others': {'orcid:0000-0002-8420-0696', 'viaf:2'}, 'name': 'Massari, Arcangelo', 'type': 'author'}, 

133 'viaf:2': {'others': {'viaf:1', 'orcid:0000-0002-8420-0696'}, 'name': 'Massari, A.', 'type': 'author'}, 

134 'orcid:0000-0002-8420-0695': {'others': set(), 'name': 'Silvio Peroni', 'type': 'author'}} 

135 longest_name_1 = _find_all_names(items_by_id, ids_list = ['orcid:0000-0002-8420-0696', 'viaf:1', 'viaf:2'], cur_name='Arcangelo Massari') 

136 longest_name_2 = _find_all_names(items_by_id, ids_list = ['orcid:0000-0002-8420-0696', 'viaf:1', 'viaf:2'], cur_name='Massari, A.') 

137 longest_name_3 = _find_all_names(items_by_id, ids_list = ['orcid:0000-0002-8420-0696', 'viaf:1', 'viaf:2'], cur_name='Massari, Arcangelo') 

138 longest_name_4 = _find_all_names(items_by_id, ids_list = ['orcid:0000-0002-8420-0696', 'viaf:1', 'viaf:2'], cur_name='Massari, A') 

139 longest_name_5 = _find_all_names(items_by_id, ids_list = ['orcid:0000-0002-8420-0696', 'viaf:1', 'viaf:2'], cur_name='Massari,') 

140 longest_name_6 = _find_all_names(items_by_id, ids_list = ['orcorcid:0000-0002-8420-0695'], cur_name='Silvio Peroni') 

141 self.assertEqual((longest_name_1, longest_name_2, longest_name_3, longest_name_4, longest_name_5, longest_name_6), ('Massari, Arcangelo', 'Massari, Arcangelo', 'Massari, Arcangelo', 'Massari, Arcangelo', 'Massari, Arcangelo', 'Silvio Peroni')) 

142 

143 # def test_split_csvs_in_chunk(self): 

144 # CHUNK_SIZE = 4 

145 # split_csvs_in_chunks(csv_dir=CSV_DIR, output_dir=TMP_DIR, chunk_size=CHUNK_SIZE, verbose=False) 

146 # output = dict() 

147 # for file in os.listdir(TMP_DIR): 

148 # output[file] = get_csv_data(os.path.join(TMP_DIR, file)) 

149 # expected_outputs = [{ 

150 # '0.csv': [ 

151 # {'id': 'issn:1524-4539 issn:0009-7322', 'title': 'Circulation', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': 'journal', 'publisher': '', 'editor': ''},  

152 # {'id': 'doi:10.17117/na.2015.08.1067', 'title': '', 'author': '', 'pub_date': '', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '26', 'issue': '', 'page': '', 'type': 'journal article', 'publisher': 'Consulting Company Ucom [crossref:6623]', 'editor': 'NAIMI, ELMEHDI [orcid:0000-0002-4126-8519]'},  

153 # {'id': 'doi:10.9799/ksfan.2012.25.1.069', 'title': 'Nonthermal Sterilization and Shelf-life Extension of Seafood Products by Intense Pulsed Light Treatment', 'author': 'Cheigh, Chan-Ick [orcid:0000-0003-2542-5788]; Mun, Ji-Hye; Chung, Myong-Soo', 'pub_date': '2012-3-31', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '25', 'issue': '1', 'page': '69-76', 'type': 'journal article', 'publisher': 'The Korean Society of Food and Nutrition [crossref:4768]', 'editor': 'Chung, Myong-Soo [orcid:0000-0002-9666-2513]'},  

154 # {'id': 'doi:10.9799/ksfan.2012.25.1.069', 'title': 'Nonthermal Sterilization and Shelf-life Extension of Seafood Products by Intense Pulsed Light Treatment', 'author': 'Cheigh, Chan-Ick [orcid:0000-0003-2542-5788]; Mun, Ji-Hye; Chung, Myong-Soo', 'pub_date': '2012-3-31', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '25', 'issue': '1', 'page': '69-76', 'type': 'journal article', 'publisher': 'The Korean Society of Food and Nutrition [crossref:4768]', 'editor': 'Chung, Myong-Soo [orcid:0000-0002-9666-2513]'},  

155 # {'id': 'doi:10.9799/ksfan.2012.25.1.077', 'title': 'Properties of Immature Green Cherry Tomato Pickles', 'author': 'Koh, Jong-Ho; Shin, Hae-Hun; Kim, Young-Shik [orcid:0000-0001-5673-6314]; Kook, Moo-Chang', 'pub_date': '2012-3-31', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '', 'issue': '2', 'page': '77-82', 'type': 'journal article', 'publisher': 'Consulting Company Ucom [crossref:6623]', 'editor': 'Massari,Arcangelo'}],  

156 # '1.csv': [ 

157 # {'id': 'doi:10.1128/jb.00727-18', 'title': 'Effect of the MotA(M206I) Mutation on Torque Generation and Stator Assembly in the Salmonella H + -Driven Flagellar Motor', 'author': 'Suzuki, Yuya', 'pub_date': '2019-3-15', 'venue': 'Journal of Bacteriology [issn:0021-9193 issn:1098-5530]', 'volume': '197', 'issue': '6', 'page': '', 'type': 'journal article', 'publisher': 'American Society for Microbiology [crossref:235]', 'editor': 'Mullineaux, Conrad W.'},  

158 # {'id': 'doi:10.1128/jb.01991-14', 'title': 'Pseudomonas aeruginosa LysR PA4203 Regulator NmoR Acts as a Repressor of the PA4202 nmoA Gene, Encoding a Nitronate Monooxygenase', 'author': 'Vercammen, Ken; Wei, Qing; Charlier, Daniel [orcid:0000-0002-6844-376X]', 'pub_date': '2015-3-15', 'venue': 'Journal of Bacteriology [issn:0021-9193 issn:1098-5530]', 'volume': '197', 'issue': '6', 'page': '1026-1039', 'type': 'journal article', 'publisher': 'American Society for Microbiology [crossref:235]', 'editor': "O'Toole, G. A."},  

159 # {'id': 'doi:10.1128/jb.01991-15', 'title': '', 'author': 'Vercammen, Ken', 'pub_date': '', 'venue': 'Journal of Bacteriology [issn:0021-9193 issn:1098-5530]', 'volume': '197', 'issue': '6', 'page': '', 'type': 'journal article', 'publisher': '', 'editor': 'Peroni, Silvio'},  

160 # {'id': 'doi:10.1093/ije/31.3.555', 'title': 'Teen pregnancy is not a public health crisis in the United States. It is time we made it one', 'author': 'Rich-Edwards, Janet', 'pub_date': '2002-6', 'venue': 'International Journal of Epidemiology [issn:1464-3685 issn:0300-5771]', 'volume': '31', 'issue': '3', 'page': '555-556', 'type': 'journal article', 'publisher': 'Oxford University Press (OUP) [crossref:286]', 'editor': 'Chung, Myong-Soo [orcid:0000-0002-9666-2513]'},  

161 # {'id': 'doi:10.1073/pnas.152186199', 'title': 'Y586F mutation in murine leukemia virus reverse transcriptase decreases fidelity of DNA synthesis in regions associated with adenine-thymine tracts', 'author': 'Zhang, W.-H.; Svarovskaia, E. S.; Barr, R.; Pathak, V. K. [orcid:0000-0003-2441-8412]', 'pub_date': '2002-7-15', 'venue': 'International Journal of Epidemiology [issn:1464-3685 issn:0300-5771]', 'volume': '31', 'issue': '3', 'page': '10090-10095', 'type': 'journal article', 'publisher': 'Proceedings of the National Academy of Sciences [crossref:341]', 'editor': 'Chung, Myong-Soo [orcid:0000-0002-9666-2513]'}]}, 

162 # {'0.csv': [ 

163 # {'id': 'doi:10.1093/ije/31.3.555', 'title': 'Teen pregnancy is not a public health crisis in the United States. It is time we made it one', 'author': 'Rich-Edwards, Janet', 'pub_date': '2002-6', 'venue': 'International Journal of Epidemiology [issn:1464-3685 issn:0300-5771]', 'volume': '31', 'issue': '3', 'page': '555-556', 'type': 'journal article', 'publisher': 'Oxford University Press (OUP) [crossref:286]', 'editor': 'Chung, Myong-Soo [orcid:0000-0002-9666-2513]'},  

164 # {'id': 'doi:10.1073/pnas.152186199', 'title': 'Y586F mutation in murine leukemia virus reverse transcriptase decreases fidelity of DNA synthesis in regions associated with adenine-thymine tracts', 'author': 'Zhang, W.-H.; Svarovskaia, E. S.; Barr, R.; Pathak, V. K. [orcid:0000-0003-2441-8412]', 'pub_date': '2002-7-15', 'venue': 'International Journal of Epidemiology [issn:1464-3685 issn:0300-5771]', 'volume': '31', 'issue': '3', 'page': '10090-10095', 'type': 'journal article', 'publisher': 'Proceedings of the National Academy of Sciences [crossref:341]', 'editor': 'Chung, Myong-Soo [orcid:0000-0002-9666-2513]'},  

165 # {'id': 'issn:1524-4539 issn:0009-7322', 'title': 'Circulation', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': 'journal', 'publisher': '', 'editor': ''},  

166 # {'id': 'doi:10.1128/jb.00727-18', 'title': 'Effect of the MotA(M206I) Mutation on Torque Generation and Stator Assembly in the Salmonella H + -Driven Flagellar Motor', 'author': 'Suzuki, Yuya', 'pub_date': '2019-3-15', 'venue': 'Journal of Bacteriology [issn:0021-9193 issn:1098-5530]', 'volume': '197', 'issue': '6', 'page': '', 'type': 'journal article', 'publisher': 'American Society for Microbiology [crossref:235]', 'editor': 'Mullineaux, Conrad W.'},  

167 # {'id': 'doi:10.1128/jb.01991-14', 'title': 'Pseudomonas aeruginosa LysR PA4203 Regulator NmoR Acts as a Repressor of the PA4202 nmoA Gene, Encoding a Nitronate Monooxygenase', 'author': 'Vercammen, Ken; Wei, Qing; Charlier, Daniel [orcid:0000-0002-6844-376X]', 'pub_date': '2015-3-15', 'venue': 'Journal of Bacteriology [issn:0021-9193 issn:1098-5530]', 'volume': '197', 'issue': '6', 'page': '1026-1039', 'type': 'journal article', 'publisher': 'American Society for Microbiology [crossref:235]', 'editor': "O'Toole, G. A."},  

168 # {'id': 'doi:10.1128/jb.01991-15', 'title': '', 'author': 'Vercammen, Ken', 'pub_date': '', 'venue': 'Journal of Bacteriology [issn:0021-9193 issn:1098-5530]', 'volume': '197', 'issue': '6', 'page': '', 'type': 'journal article', 'publisher': '', 'editor': 'Peroni, Silvio'}],  

169 # '1.csv': [ 

170 # {'id': 'doi:10.17117/na.2015.08.1067', 'title': '', 'author': '', 'pub_date': '', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '26', 'issue': '', 'page': '', 'type': 'journal article', 'publisher': 'Consulting Company Ucom [crossref:6623]', 'editor': 'NAIMI, ELMEHDI [orcid:0000-0002-4126-8519]'},  

171 # {'id': 'doi:10.9799/ksfan.2012.25.1.069', 'title': 'Nonthermal Sterilization and Shelf-life Extension of Seafood Products by Intense Pulsed Light Treatment', 'author': 'Cheigh, Chan-Ick [orcid:0000-0003-2542-5788]; Mun, Ji-Hye; Chung, Myong-Soo', 'pub_date': '2012-3-31', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '25', 'issue': '1', 'page': '69-76', 'type': 'journal article', 'publisher': 'The Korean Society of Food and Nutrition [crossref:4768]', 'editor': 'Chung, Myong-Soo [orcid:0000-0002-9666-2513]'},  

172 # {'id': 'doi:10.9799/ksfan.2012.25.1.069', 'title': 'Nonthermal Sterilization and Shelf-life Extension of Seafood Products by Intense Pulsed Light Treatment', 'author': 'Cheigh, Chan-Ick [orcid:0000-0003-2542-5788]; Mun, Ji-Hye; Chung, Myong-Soo', 'pub_date': '2012-3-31', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '25', 'issue': '1', 'page': '69-76', 'type': 'journal article', 'publisher': 'The Korean Society of Food and Nutrition [crossref:4768]', 'editor': 'Chung, Myong-Soo [orcid:0000-0002-9666-2513]'},  

173 # {'id': 'doi:10.9799/ksfan.2012.25.1.077', 'title': 'Properties of Immature Green Cherry Tomato Pickles', 'author': 'Koh, Jong-Ho; Shin, Hae-Hun; Kim, Young-Shik [orcid:0000-0001-5673-6314]; Kook, Moo-Chang', 'pub_date': '2012-3-31', 'venue': 'The Korean Journal of Food And Nutrition [issn:1225-4339]', 'volume': '', 'issue': '2', 'page': '77-82', 'type': 'journal article', 'publisher': 'Consulting Company Ucom [crossref:6623]', 'editor': 'Massari,Arcangelo'}]}]  

174 # output = {k:v.sort(key=lambda x: x['id']+x['title']+x['author']+x['venue']+x['issue']+x['volume']+x['type']) for k,v in output.items()} 

175 # expected_outputs = [{k:v.sort(key=lambda x: x['id']+x['title']+x['author']+x['venue']+x['issue']+x['volume']+x['type']) for k,v in expected_output.items()} for expected_output in expected_outputs] 

176 # shutil.rmtree(TMP_DIR) 

177 # self.assertIn(output, expected_outputs) 

178 

179 # def test__enrich_duplicated_ids_found(self): 

180 # data = [{'id': 'issn:2341-4022', 'title': 'Acta urológica portuguesa 1', 'author': 'Cheigh, Chan-Ick [orcid:0000-0003-2542-5788]', 'pub_date': '2012-3-31', 'venue': 'Transit Migration in Europe [issn:0003-987X]', 'volume': '25', 'issue': '1', 'page': '25', 'type': 'journal article', 'publisher': 'Consulting Company Ucom [crossref:6623]', 'editor': 'Chung, Myong-Soo [orcid:0000-0002-9666-2513]'}] 

181 # items_by_id = { 

182 # 'issn:2341-4022': {'others': {'issn:2341-4023'}, 'title': 'Acta urológica portuguesa', 'author': 'Cheigh, Chan-Ick', 'pub_date': '2012-3-31', 'venue': 'Transit Migration in Europe [issn:0003-987X]', 'volume': '', 'issue': '', 'page': '25', 'type': 'journal article', 'publisher': '', 'editor': 'Chung, Myong-Soo'},  

183 # 'issn:2341-4023': {'others': {'issn:2341-4022'}, 'title': 'Acta urológica portuguesa', 'author': 'Cheigh, Chan-Ick', 'pub_date': '2012-3-31', 'venue': 'Transit Migration in Europe [issn:0003-987X]', 'volume': '', 'issue': '', 'page': '25', 'type': 'journal article', 'publisher': '', 'editor': 'Chung, Myong-Soo'}} 

184 # _enrich_duplicated_ids_found(data, items_by_id) 

185 # expected_output = { 

186 # 'issn:2341-4022': {'others': {'issn:2341-4023'}, 'title': 'Acta urológica portuguesa 1', 'author': 'Cheigh, Chan-Ick [orcid:0000-0003-2542-5788]', 'pub_date': '2012-3-31', 'venue': 'Transit Migration in Europe [issn:0003-987X]', 'volume': '25', 'issue': '1', 'page': '25', 'type': 'journal article', 'publisher': 'Consulting Company Ucom [crossref:6623]', 'editor': 'Chung, Myong-Soo [orcid:0000-0002-9666-2513]'},  

187 # 'issn:2341-4023': {'others': {'issn:2341-4022'}, 'title': 'Acta urológica portuguesa 1', 'author': 'Cheigh, Chan-Ick [orcid:0000-0003-2542-5788]', 'pub_date': '2012-3-31', 'venue': 'Transit Migration in Europe [issn:0003-987X]', 'volume': '25', 'issue': '1', 'page': '25', 'type': 'journal article', 'publisher': 'Consulting Company Ucom [crossref:6623]', 'editor': 'Chung, Myong-Soo [orcid:0000-0002-9666-2513]'}} 

188 # self.assertEqual(items_by_id, expected_output) 

189 

190 

191if __name__ == '__main__': # pragma: no cover 

192 unittest.main()