Coverage for test/curator_test.py: 99%

784 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-07-14 14:06 +0000

1import csv 

2import shutil 

3import unittest 

4 

5from oc_ocdm import Storer 

6from SPARQLWrapper import POST, SPARQLWrapper 

7from rdflib import Graph, ConjunctiveGraph 

8import redis 

9from oc_ocdm.counter_handler.redis_counter_handler import RedisCounterHandler 

10 

11from oc_meta.core.creator import Creator 

12from oc_meta.core.curator import * 

13from oc_meta.lib.file_manager import get_csv_data 

14from oc_meta.lib.finder import ResourceFinder 

15from oc_meta.plugins.multiprocess.resp_agents_curator import RespAgentsCurator 

16 

17SERVER = 'http://127.0.0.1:8805/sparql' 

18BASE_DIR = os.path.join('test') 

19MANUAL_DATA_CSV = f'{BASE_DIR}/manual_data.csv' 

20MANUAL_DATA_RDF = f'{BASE_DIR}/testcases/ts/testcase_ts-13.ttl' 

21REAL_DATA_CSV = os.path.join(BASE_DIR, 'real_data.csv') 

22REAL_DATA_RDF = f'{BASE_DIR}/testcases/ts/real_data.nt' 

23REAL_DATA_RDF_WITH_PROV = f'{BASE_DIR}/testcases/ts/real_data_with_prov.nq' 

24BASE_IRI = 'https://w3id.org/oc/meta/' 

25CURATOR_COUNTER_DIR = f'{BASE_DIR}/curator_counter' 

26OUTPUT_DIR = f'{BASE_DIR}/output' 

27PROV_CONFIG = f'{BASE_DIR}/prov_config.json' 

28 

29# Redis configuration 

30REDIS_HOST = 'localhost' 

31REDIS_PORT = 6379 

32REDIS_DB = 5 

33 

34def get_path(path:str) -> str: 

35 # absolute_path:str = os.path.abspath(path) 

36 universal_path = path.replace('\\', '/') 

37 return universal_path 

38 

39def reset_redis_counters(): 

40 redis_client = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB) 

41 redis_client.flushdb() 

42 

43def get_counter_handler(): 

44 return RedisCounterHandler(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB) 

45 

46def reset(): 

47 reset_redis_counters() 

48 

49def reset_server(server:str=SERVER) -> None: 

50 ts = SPARQLWrapper(server) 

51 for graph in {'https://w3id.org/oc/meta/br/', 'https://w3id.org/oc/meta/ra/', 'https://w3id.org/oc/meta/re/', 'https://w3id.org/oc/meta/id/', 'https://w3id.org/oc/meta/ar/', 'http://default.graph/'}: 

52 ts.setQuery(f'CLEAR GRAPH <{graph}>') 

53 ts.setMethod(POST) 

54 ts.query() 

55 

56def add_data_ts(server:str=SERVER, data_path:str=os.path.abspath(os.path.join('test', 'testcases', 'ts', 'real_data.nt')).replace('\\', '/'), batch_size:int=100, default_graph_uri=URIRef("http://default.graph/")): 

57 reset_server(server) 

58 f_path = get_path(data_path) 

59 

60 # Determina il formato del file 

61 file_extension = os.path.splitext(f_path)[1].lower() 

62 if file_extension == '.nt': 

63 g = Graph() 

64 g.parse(location=f_path, format='nt') 

65 elif file_extension == '.nq': 

66 g = ConjunctiveGraph() 

67 g.parse(location=f_path, format='nquads') 

68 elif file_extension == '.ttl': 

69 g = Graph() 

70 g.parse(location=f_path, format='turtle') 

71 else: 

72 raise ValueError(f"Unsupported file extension: {file_extension}") 

73 

74 triples_list = [] 

75 if file_extension in {'.nt', '.ttl'}: 

76 for subj, pred, obj in g: 

77 triples_list.append((subj, pred, obj, default_graph_uri)) 

78 elif file_extension == '.nq': 

79 for subj, pred, obj, ctx in g.quads((None, None, None, None)): 

80 triples_list.append((subj, pred, obj, ctx)) 

81 

82 for i in range(0, len(triples_list), batch_size): 

83 batch_triples = triples_list[i:i + batch_size] 

84 

85 triples_str = "" 

86 for subj, pred, obj, ctx in batch_triples: 

87 if ctx: 

88 triples_str += f"GRAPH {ctx.n3().replace('[', '').replace(']', '')} {{ {subj.n3()} {pred.n3()} {obj.n3()} }} " 

89 else: 

90 triples_str += f"{subj.n3()} {pred.n3()} {obj.n3()} . " 

91 

92 query = f"INSERT DATA {{ {triples_str} }}" 

93 

94 ts = SPARQLWrapper(server) 

95 ts.setQuery(query) 

96 ts.setMethod(POST) 

97 ts.query() 

98 

99def store_curated_data(curator_obj:Curator, server:str) -> None: 

100 creator_obj = Creator(curator_obj.data, SERVER, BASE_IRI, None, None, 'https://orcid.org/0000-0002-8420-0696', 

101 curator_obj.index_id_ra, curator_obj.index_id_br, curator_obj.re_index, 

102 curator_obj.ar_index, curator_obj.VolIss, preexisting_entities=set(), everything_everywhere_allatonce=Graph()) 

103 creator = creator_obj.creator(source=None) 

104 res_storer = Storer(creator) 

105 res_storer.upload_all(server, base_dir=None, batch_size=100) 

106 

107def prepare_to_test(data, name): 

108 reset_redis_counters() 

109 

110 reset_server(SERVER) 

111 if float(name) > 12: 

112 add_data_ts(SERVER, os.path.abspath(os.path.join('test', 'testcases', 'ts', 'testcase_ts-13.ttl')).replace('\\', '/')) 

113 

114 testcase_csv = get_path('test/testcases/testcase_data/testcase_' + name + '_data.csv') 

115 testcase_id_br = get_path('test/testcases/testcase_data/indices/' + name + '/index_id_br_' + name + '.csv') 

116 testcase_id_ra = get_path('test/testcases/testcase_data/indices/' + name + '/index_id_ra_' + name + '.csv') 

117 testcase_ar = get_path('test/testcases/testcase_data/indices/' + name + '/index_ar_' + name + '.csv') 

118 testcase_re = get_path('test/testcases/testcase_data/indices/' + name + '/index_re_' + name + '.csv') 

119 testcase_vi = get_path('test/testcases/testcase_data/indices/' + name + '/index_vi_' + name + '.json') 

120 

121 counter_handler = get_counter_handler() 

122 settings = {'normalize_titles': True} 

123 curator_obj = Curator(data, SERVER, prov_config=PROV_CONFIG, counter_handler=counter_handler, settings=settings) 

124 curator_obj.curator() 

125 testcase_csv = get_csv_data(testcase_csv) 

126 for csv in [testcase_csv, curator_obj.data]: 

127 for row in csv: 

128 row['id'] = sorted(row['id'].split()) 

129 testcase_id_br = get_csv_data(testcase_id_br) 

130 testcase_id_ra = get_csv_data(testcase_id_ra) 

131 testcase_ar = get_csv_data(testcase_ar) 

132 testcase_re = get_csv_data(testcase_re) 

133 for csv in [testcase_id_br, testcase_id_ra, testcase_ar, testcase_re, curator_obj.index_id_br, curator_obj.index_id_ra, curator_obj.ar_index, curator_obj.re_index]: 

134 try: 

135 csv.sort(key=lambda x:x['id']) 

136 except KeyError: 

137 try: 

138 csv.sort(key=lambda x:x['meta']) 

139 except KeyError: 

140 csv.sort(key=lambda x:x['br']) 

141 with open(testcase_vi) as json_file: 

142 testcase_vi = json.load(json_file) 

143 testcase = [testcase_csv, testcase_id_br, testcase_id_ra, testcase_ar, testcase_re, testcase_vi] 

144 data_curated = [curator_obj.data, curator_obj.index_id_br, curator_obj.index_id_ra, curator_obj.ar_index, 

145 curator_obj.re_index, curator_obj.VolIss] 

146 return data_curated, testcase 

147 

148def prepareCurator(data:list, server:str=SERVER, resp_agents_only:bool=False) -> Curator: 

149 settings = {'normalize_titles': True} 

150 reset_redis_counters() 

151 counter_handler = get_counter_handler() 

152 if resp_agents_only: 

153 curator = RespAgentsCurator(data, server, prov_config=PROV_CONFIG, counter_handler=counter_handler) 

154 else: 

155 curator = Curator(data, server, prov_config=PROV_CONFIG, counter_handler=counter_handler, settings=settings) 

156 return curator 

157 

158 

159class test_Curator(unittest.TestCase): 

160 @classmethod 

161 def setUpClass(cls): 

162 add_data_ts() 

163 

164 def setUp(self): 

165 reset_redis_counters() 

166 

167 def tearDown(self): 

168 reset_redis_counters() 

169 

170 def test_merge_entities_in_csv(self): 

171 curator = prepareCurator(list()) 

172 curator.counter_handler.set_counter(4, 'id', supplier_prefix='060') 

173 entity_dict = {'0601': {'ids': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'others': []}} 

174 id_dict = dict() 

175 curator.merge_entities_in_csv(['doi:10.1787/eco_outlook-v2011-2-graph138-en'], '0601', 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', entity_dict, id_dict) 

176 expected_output = ( 

177 {'0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'others': []}}, 

178 {'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0605'} 

179 ) 

180 self.assertEqual((entity_dict, id_dict), expected_output) 

181 

182 def test_clean_id_list(self): 

183 input = ['doi:10.001/B-1', 'wikidata:B1111111', 'OMID:br/060101'] 

184 output = Curator.clean_id_list(input, br=True) 

185 expected_output = (['doi:10.001/b-1', 'wikidata:B1111111'], '060101') 

186 self.assertEqual(output, expected_output) 

187 

188 def test_equalizer(self): 

189 # Test equalizer with a row that contains an ID that can be resolved to an existing entity 

190 row = {'id': 'doi:10.1001/archderm.104.1.106', 'title': '', 'author': '', 'pub_date': '1972-12-01', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''} 

191 curator = prepareCurator(list()) 

192 curator.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI) 

193 

194 metavals, identifiers, vvis = curator.extract_identifiers_and_metavals(row, valid_dois_cache=set()) 

195 curator.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis) 

196 

197 curator.log[0] = {'id': {}} 

198 curator.clean_id(row) 

199 extracted_metaval = row['id'] 

200 self.assertEqual(extracted_metaval, '3757') 

201 

202 # Reset the row to test equalizer 

203 row = {'id': '', 'title': '', 'author': '', 'pub_date': '1972-12-01', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''} 

204 

205 curator.rowcnt = 0 

206 curator.log[0] = { 

207 'id': {}, 

208 'author': {}, 

209 'venue': {}, 

210 'editor': {}, 

211 'publisher': {}, 

212 'page': {}, 

213 'volume': {}, 

214 'issue': {}, 

215 'pub_date': {}, 

216 'type': {}, 

217 'title': {} 

218 } 

219 curator.equalizer(row, extracted_metaval) 

220 output = (curator.log, row) 

221 

222 expected_output = ( 

223 {0: {'id': {'status': 'Entity already exists'}, 'author': {}, 'venue': {}, 'editor': {}, 'publisher': {}, 'page': {}, 'volume': {}, 'issue': {}, 'pub_date': {'status': 'New value proposed'}, 'type': {}, 'title': {}}}, 

224 {'id': '', 'title': '', 'author': 'Curth, W. [omid:ra/6033]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416 issn:0003-987X]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': 'American Medical Association (ama) [omid:ra/3309 crossref:10]', 'editor': ''} 

225 ) 

226 self.assertEqual(output, expected_output) 

227 

228 def test_clean_id_metaid_not_in_ts(self): 

229 # A MetaId was specified, but it is not on ts. Therefore, it is invalid 

230 curator = prepareCurator(list()) 

231 row = {'id': 'omid:br/131313', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''} 

232 curator.log[0] = {'id': {}} 

233 curator.clean_id(row) 

234 expected_output = {'id': 'wannabe_0', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''} 

235 self.assertEqual(row, expected_output) 

236 

237 def test_clean_id(self): 

238 curator = prepareCurator(list()) 

239 row = {'id': 'doi:10.1001/archderm.104.1.106', 'title': 'Multiple Blasto', 'author': '', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''} 

240 curator.log[0] = {'id': {}} 

241 curator.finder.get_everything_about_res(metavals=set(), identifiers={'doi:10.1001/archderm.104.1.106'}, vvis=set()) 

242 curator.clean_id(row) 

243 expected_output = {'id': '3757', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''} 

244 self.assertEqual(row, expected_output) 

245 

246 def test_merge_duplicate_entities(self): 

247 # Test merge_duplicate_entities with realistic data that includes an ID that resolves to an existing entity 

248 data = [ 

249 {'id': 'doi:10.1001/archderm.104.1.106', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''}, 

250 {'id': '', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-02', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''}, 

251 {'id': '', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-03', 'venue': 'Archives Of Blast [omid:br/4416]', 'volume': '105', 'issue': '2', 'page': '106-108', 'type': 'journal volume', 'publisher': '', 'editor': ''}, 

252 ] 

253 curator = prepareCurator(list()) 

254 curator.data = data 

255 curator.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI) 

256 

257 # Extract metavals and identifiers from each row 

258 all_metavals = set() 

259 all_identifiers = set() 

260 all_vvis = set() 

261 

262 for row in data: 

263 metavals, identifiers, vvis = curator.extract_identifiers_and_metavals(row, valid_dois_cache=set()) 

264 all_metavals.update(metavals) 

265 all_identifiers.update(identifiers) 

266 all_vvis.update(vvis) 

267 

268 curator.finder.get_everything_about_res(metavals=all_metavals, identifiers=all_identifiers, vvis=all_vvis) 

269 

270 # Process each row with clean_id to get the actual metavals 

271 for i, row in enumerate(data): 

272 curator.log[i] = {'id': {}} 

273 curator.rowcnt = i 

274 curator.clean_id(row) 

275 

276 # Initialize log for merge_duplicate_entities 

277 for i in range(3): 

278 curator.log[i] = { 

279 'id': {}, 

280 'author': {}, 

281 'venue': {}, 

282 'editor': {}, 

283 'publisher': {}, 

284 'page': {}, 

285 'volume': {}, 

286 'issue': {}, 

287 'pub_date': {}, 

288 'type': {} 

289 } 

290 

291 # The brdict should be populated by clean_id, but we need to set up the "others" relationship 

292 # The first row should have resolved to '3757', and the other rows should be wannabes 

293 first_row_metaval = curator.data[0]['id'] # Should be '3757' 

294 self.assertEqual(first_row_metaval, '3757') 

295 

296 # Set up the relationship between the existing entity and the wannabes 

297 if first_row_metaval in curator.brdict: 

298 curator.brdict[first_row_metaval]['others'].extend(['wannabe_0', 'wannabe_1']) 

299 

300 curator.merge_duplicate_entities() 

301 output = (curator.data, curator.log) 

302 

303 expected_output = ( 

304 [ 

305 {'id': '3757', 'title': 'Multiple Keloids', 'author': 'Curth, W. [omid:ra/6033]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [issn:0003-987X omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': 'American Medical Association (ama) [omid:ra/3309 crossref:10]', 'editor': ''}, 

306 {'id': '3757', 'title': 'Multiple Keloids', 'author': 'Curth, W. [omid:ra/6033]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [issn:0003-987X omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': 'American Medical Association (ama) [omid:ra/3309 crossref:10]', 'editor': ''}, 

307 {'id': '3757', 'title': 'Multiple Keloids', 'author': 'Curth, W. [omid:ra/6033]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [issn:0003-987X omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': 'American Medical Association (ama) [omid:ra/3309 crossref:10]', 'editor': ''} 

308 ], 

309 { 

310 0: {'id': {'status': 'Entity already exists'}, 'author': {}, 'venue': {}, 'editor': {}, 'publisher': {}, 'page': {}, 'volume': {}, 'issue': {}, 'pub_date': {}, 'type': {}}, 

311 1: {'id': {'status': 'Entity already exists'}, 'author': {}, 'venue': {'status': 'New value proposed'}, 'editor': {}, 'publisher': {}, 'page': {}, 'volume': {}, 'issue': {}, 'pub_date': {'status': 'New value proposed'}, 'type': {}}, 

312 2: {'id': {'status': 'Entity already exists'}, 'author': {}, 'venue': {'status': 'New value proposed'}, 'editor': {}, 'publisher': {}, 'page': {'status': 'New value proposed'}, 'volume': {'status': 'New value proposed'}, 'issue': {'status': 'New value proposed'}, 'pub_date': {'status': 'New value proposed'}, 'type': {'status': 'New value proposed'}} 

313 } 

314 ) 

315 self.assertEqual(output, expected_output) 

316 

317 def test_clean_vvi_all_data_on_ts(self): 

318 # All data are already on the triplestore. They need to be retrieved and organized correctly 

319 row = {'id': 'doi:10.1001/archderm.104.1.106', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''} 

320 curator = prepareCurator(list()) 

321 curator.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI) 

322 

323 metavals, identifiers, vvis = curator.extract_identifiers_and_metavals(row, valid_dois_cache=set()) 

324 curator.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis) 

325 

326 curator.log[0] = {'id': {}} 

327 curator.clean_id(row) 

328 

329 curator.clean_vvi(row) 

330 expected_output = { 

331 "4416": { 

332 "issue": {}, 

333 "volume": { 

334 "104": { 

335 "id": "4712", 

336 "issue": { 

337 "1": { 

338 "id": "4713" 

339 } 

340 } 

341 } 

342 } 

343 } 

344 } 

345 self.assertEqual(curator.vvi, expected_output) 

346 

347 def test_clean_vvi_new_venue(self): 

348 # It is a new venue 

349 row = {'id': 'wannabe_1', 'title': 'Money growth, interest rates, inflation and raw materials prices: China', 'author': '', 'pub_date': '2011-11-28', 'venue': 'OECD Economic Outlook', 'volume': '2011', 'issue': '2', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''} 

350 curator = prepareCurator(list()) 

351 curator.clean_vvi(row) 

352 expected_output = {'wannabe_0': {'volume': {'2011': {'id': 'wannabe_1', 'issue': {'2': {'id': 'wannabe_2'}}}}, 'issue': {}}} 

353 self.assertEqual(curator.vvi, expected_output) 

354 

355 def test_clean_vvi_volume_with_title(self): 

356 # A journal volume having a title 

357 row = [{'id': '', 'title': 'The volume title', 'author': '', 'pub_date': '', 'venue': 'OECD Economic Outlook', 'volume': '2011', 'issue': '2', 'page': '', 'type': 'journal volume', 'publisher': '', 'editor': ''}] 

358 curator = prepareCurator(row) 

359 curator.curator() 

360 expected_output = [{'id': 'omid:br/0601', 'title': 'The Volume Title', 'author': '', 'pub_date': '', 'venue': 'OECD Economic Outlook [omid:br/0602]', 'volume': '', 'issue': '', 'page': '', 'type': 'journal volume', 'publisher': '', 'editor': ''}] 

361 self.assertEqual(curator.data, expected_output) 

362 

363 def test_clean_vvi_invalid_volume(self): 

364 # The data must be invalidated, because the resource is journal volume but an issue has also been specified 

365 row = {'id': 'wannabe_1', 'title': '', 'author': '', 'pub_date': '', 'venue': 'OECD Economic Outlook', 'volume': '2011', 'issue': '2', 'page': '', 'type': 'journal volume', 'publisher': '', 'editor': ''} 

366 curator = prepareCurator(list()) 

367 curator.clean_vvi(row) 

368 expected_output = {'wannabe_0': {'volume': {}, 'issue': {}}} 

369 self.assertEqual(curator.vvi, expected_output) 

370 

371 def test_clean_vvi_invalid_venue(self): 

372 # The data must be invalidated, because the resource is journal but a volume has also been specified 

373 row = {'id': 'wannabe_1', 'title': '', 'author': '', 'pub_date': '', 'venue': 'OECD Economic Outlook', 'volume': '2011', 'issue': '', 'page': '', 'type': 'journal', 'publisher': '', 'editor': ''} 

374 curator = prepareCurator(list()) 

375 curator.clean_vvi(row) 

376 expected_output = {'wannabe_0': {'volume': {}, 'issue': {}}} 

377 self.assertEqual(curator.vvi, expected_output) 

378 

379 def test_clean_vvi_new_volume_and_issue(self): 

380 # There is a row with vvi and no ids 

381 row = {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': 'Archives Of Surgery [omid:br/4480]', 'volume': '147', 'issue': '11', 'page': '', 'type': 'journal article', 'publisher': '', 'editor': ''} 

382 curator = prepareCurator(list()) 

383 curator.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI) 

384 

385 metavals, identifiers, vvis = curator.extract_identifiers_and_metavals(row, valid_dois_cache=set()) 

386 curator.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis) 

387 curator.clean_id(row) 

388 curator.clean_vvi(row) 

389 expected_output = { 

390 "4480": { 

391 "issue": {}, 

392 "volume": { 

393 "147": { 

394 "id": "4481", 

395 "issue": { 

396 "11": { 

397 "id": "4482" 

398 } 

399 } 

400 } 

401 } 

402 } 

403 } 

404 self.assertEqual(curator.vvi, expected_output) 

405 

406 def test_clean_ra_overlapping_surnames(self): 

407 # The surname of one author is included in the surname of another. 

408 row = {'id': 'wannabe_0', 'title': 'Giant Oyster Mushroom Pleurotus giganteus (Agaricomycetes) Enhances Adipocyte Differentiation and Glucose Uptake via Activation of PPARγ and Glucose Transporters 1 and 4 in 3T3-L1 Cells', 'author': 'Paravamsivam, Puvaneswari; Heng, Chua Kek; Malek, Sri Nurestri Abdul [orcid:0000-0001-6278-8559]; Sabaratnam, Vikineswary; M, Ravishankar Ram; Kuppusamy, Umah Rani', 'pub_date': '2016', 'venue': 'International Journal of Medicinal Mushrooms [issn:1521-9437]', 'volume': '18', 'issue': '9', 'page': '821-831', 'type': 'journal article', 'publisher': 'Begell House [crossref:613]', 'editor': ''} 

409 curator = prepareCurator(list()) 

410 curator.brdict = {'wannabe_0': {'ids': ['doi:10.1615/intjmedmushrooms.v18.i9.60'], 'title': 'Giant Oyster Mushroom Pleurotus giganteus (Agaricomycetes) Enhances Adipocyte Differentiation and Glucose Uptake via Activation of PPARγ and Glucose Transporters 1 and 4 in 3T3-L1 Cells', 'others': []}} 

411 curator.clean_ra(row, 'author') 

412 output = (curator.ardict, curator.radict, curator.idra) 

413 expected_output = ( 

414 {'wannabe_0': {'author': [('0601', 'wannabe_0'), ('0602', 'wannabe_1'), ('0603', 'wannabe_2'), ('0604', 'wannabe_3'), ('0605', 'wannabe_4'), ('0606', 'wannabe_5')], 'editor': [], 'publisher': []}}, 

415 {'wannabe_0': {'ids': [], 'others': [], 'title': 'Paravamsivam, Puvaneswari'}, 'wannabe_1': {'ids': [], 'others': [], 'title': 'Heng, Chua Kek'}, 'wannabe_2': {'ids': ['orcid:0000-0001-6278-8559'], 'others': [], 'title': 'Malek, Sri Nurestri Abdul'}, 'wannabe_3': {'ids': [], 'others': [], 'title': 'Sabaratnam, Vikineswary'}, 'wannabe_4': {'ids': [], 'others': [], 'title': 'M, Ravishankar Ram'}, 'wannabe_5': {'ids': [], 'others': [], 'title': 'Kuppusamy, Umah Rani'}}, 

416 {'orcid:0000-0001-6278-8559': '0601'} 

417 ) 

418 self.assertEqual(output, expected_output) 

419 

420 def test_clean_ra_with_br_metaid(self): 

421 # One author is in the triplestore, the other is not.  

422 # br_metaval is a MetaID 

423 # There are two ids for one author 

424 row = {'id': 'doi:10.1001/archderm.104.1.106', 'title': 'Multiple Keloids', 'author': 'Curth, W.; McSorley, J. [orcid:0000-0003-0530-4305 schema:12345]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''} 

425 curator = prepareCurator(list()) 

426 curator.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI) 

427 metavals, identifiers, vvis = curator.extract_identifiers_and_metavals(row, valid_dois_cache=set()) 

428 curator.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis) 

429 

430 curator.log[0] = {'id': {}} 

431 curator.clean_id(row) 

432 

433 resolved_metaval = row['id'] 

434 self.assertEqual(resolved_metaval, '3757') 

435 curator.brdict = {resolved_metaval: {'ids': ['doi:10.1001/archderm.104.1.106'], 'title': 'Multiple Keloids', 'others': []}} 

436 

437 curator.clean_ra(row, 'author') 

438 output = (curator.ardict, curator.radict, curator.idra) 

439 expected_output = ( 

440 {'3757': {'author': [('9445', '6033'), ('0601', 'wannabe_0')], 'editor': [], 'publisher': []}}, 

441 {'6033': {'ids': [], 'others': [], 'title': 'Curth, W.'}, 'wannabe_0': {'ids': ['orcid:0000-0003-0530-4305', 'schema:12345'], 'others': [], 'title': 'McSorley, J.'}}, 

442 {'orcid:0000-0003-0530-4305': '0601', 'schema:12345': '0602'} 

443 ) 

444 self.assertEqual(output, expected_output) 

445 

446 def test_clean_ra_with_br_wannabe(self): 

447 # Authors not on the triplestore.  

448 # br_metaval is a wannabe 

449 row = {'id': 'wannabe_0', 'title': 'Multiple Keloids', 'author': 'Curth, W. [orcid:0000-0002-8420-0696] ; McSorley, J. [orcid:0000-0003-0530-4305]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''} 

450 curator = prepareCurator(list()) 

451 curator.brdict = {'wannabe_0': {'ids': ['doi:10.1001/archderm.104.1.106'], 'title': 'Multiple Keloids', 'others': []}} 

452 curator.wnb_cnt = 1 

453 curator.clean_ra(row, 'author') 

454 output = (curator.ardict, curator.radict, curator.idra) 

455 expected_output = ( 

456 {'wannabe_0': {'author': [('0601', 'wannabe_1'), ('0602', 'wannabe_2')], 'editor': [], 'publisher': []}}, 

457 {'wannabe_1': {'ids': ['orcid:0000-0002-8420-0696'], 'others': [], 'title': 'Curth, W.'}, 'wannabe_2': {'ids': ['orcid:0000-0003-0530-4305'], 'others': [], 'title': 'McSorley, J.'}}, 

458 {'orcid:0000-0002-8420-0696': '0601', 'orcid:0000-0003-0530-4305': '0602'} 

459 ) 

460 self.assertEqual(output, expected_output) 

461 

462 def test_clean_ra_with_empty_square_brackets(self): 

463 # One author's name contains a closed square bracket. 

464 row = {'id': 'doi:10.1001/archderm.104.1.106', 'title': 'Multiple Keloids', 'author': 'Bernacki, Edward J. [ ]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''} 

465 curator = prepareCurator(list()) 

466 curator.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI) 

467 

468 metavals, identifiers, vvis = curator.extract_identifiers_and_metavals(row, valid_dois_cache=set()) 

469 curator.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis) 

470 

471 curator.log[0] = {'id': {}} 

472 curator.clean_id(row) 

473 

474 resolved_metaval = row['id'] 

475 self.assertEqual(resolved_metaval, '3757') 

476 curator.brdict = {resolved_metaval: {'ids': ['doi:10.1001/archderm.104.1.106'], 'title': 'Multiple Keloids', 'others': []}} 

477 

478 curator.clean_ra(row, 'author') 

479 output = (curator.ardict, curator.radict, curator.idra) 

480 expected_output = ( 

481 {'3757': {'author': [('9445', '6033'), ('0601', 'wannabe_0')], 'editor': [], 'publisher': []}}, 

482 {'6033': {'ids': [], 'others': [], 'title': 'Curth, W.'}, 'wannabe_0': {'ids': [], 'others': [], 'title': 'Bernacki, Edward J.'}}, 

483 {} 

484 ) 

485 self.assertEqual(output, expected_output) 

486 

487 def test_meta_maker(self): 

488 curator = prepareCurator(list()) 

489 curator.brdict = {'3757': {'ids': ['doi:10.1001/archderm.104.1.106', 'pmid:29098884'], 'title': 'Multiple Keloids', 'others': []}, '4416': {'ids': ['issn:0003-987X'], 'title': 'Archives Of Dermatology', 'others': []}} 

490 curator.radict = {'6033': {'ids': [], 'others': [], 'title': 'Curth, W.'}, 'wannabe_0': {'ids': ['orcid:0000-0003-0530-4305', 'schema:12345'], 'others': [], 'title': 'Mcsorley, J.'}} 

491 curator.ardict = {'3757': {'author': [('9445', '6033'), ('0601', 'wannabe_0')], 'editor': [], 'publisher': []}} 

492 curator.vvi = {'4416': {'issue': {}, 'volume': {'107': {'id': '4733', 'issue': {'1': {'id': '4734'}, '2': {'id': '4735'}, '3': {'id': '4736'}, '4': {'id': '4737'}, '5': {'id': '4738'}, '6': {'id': '4739'}}}, '108': {'id': '4740', 'issue': {'1': {'id': '4741'}, '2': {'id': '4742'}, '3': {'id': '4743'}, '4': {'id': '4744'}}}, '104': {'id': '4712', 'issue': {'1': {'id': '4713'}, '2': {'id': '4714'}, '3': {'id': '4715'}, '4': {'id': '4716'}, '5': {'id': '4717'}, '6': {'id': '4718'}}}, '148': {'id': '4417', 'issue': {'12': {'id': '4418'}, '11': {'id': '4419'}}}, '100': {'id': '4684', 'issue': {'1': {'id': '4685'}, '2': {'id': '4686'}, '3': {'id': '4687'}, '4': {'id': '4688'}, '5': {'id': '4689'}, '6': {'id': '4690'}}}, '101': {'id': '4691', 'issue': {'1': {'id': '4692'}, '2': {'id': '4693'}, '3': {'id': '4694'}, '4': {'id': '4695'}, '5': {'id': '4696'}, '6': {'id': '4697'}}}, '102': {'id': '4698', 'issue': {'1': {'id': '4699'}, '2': {'id': '4700'}, '3': {'id': '4701'}, '4': {'id': '4702'}, '5': {'id': '4703'}, '6': {'id': '4704'}}}, '103': {'id': '4705', 'issue': {'1': {'id': '4706'}, '2': {'id': '4707'}, '3': {'id': '4708'}, '4': {'id': '4709'}, '5': {'id': '4710'}, '6': {'id': '4711'}}}, '105': {'id': '4719', 'issue': {'1': {'id': '4720'}, '2': {'id': '4721'}, '3': {'id': '4722'}, '4': {'id': '4723'}, '5': {'id': '4724'}, '6': {'id': '4725'}}}, '106': {'id': '4726', 'issue': {'6': {'id': '4732'}, '1': {'id': '4727'}, '2': {'id': '4728'}, '3': {'id': '4729'}, '4': {'id': '4730'}, '5': {'id': '4731'}}}}}} 

493 curator.meta_maker() 

494 output = (curator.brmeta, curator.rameta, curator.armeta) 

495 expected_output = ( 

496 {'3757': {'ids': ['doi:10.1001/archderm.104.1.106', 'pmid:29098884', 'omid:br/3757'], 'title': 'Multiple Keloids', 'others': []}, '4416': {'ids': ['issn:0003-987X', 'omid:br/4416'], 'title': 'Archives Of Dermatology', 'others': []}}, 

497 {'6033': {'ids': ['omid:ra/6033'], 'others': [], 'title': 'Curth, W.'}, '0601': {'ids': ['orcid:0000-0003-0530-4305', 'schema:12345', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Mcsorley, J.'}}, 

498 {'3757': {'author': [('9445', '6033'), ('0601', '0601')], 'editor': [], 'publisher': []}} 

499 ) 

500 self.assertEqual(output, expected_output) 

501 

502 def test_enricher(self): 

503 curator = prepareCurator(list()) 

504 curator.data = [{'id': 'wannabe_0', 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'author': '', 'pub_date': '2011-11-28', 'venue': 'wannabe_1', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': 'OECD [crossref:1963]', 'editor': ''}] 

505 curator.brmeta = { 

506 '0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en', 'omid:br/0601'], 'others': ['wannabe_0'], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'}, 

507 '0602': {'ids': ['omid:br/0604'], 'others': ['wannabe_1'], 'title': 'OECD Economic Outlook'} 

508 } 

509 curator.armeta = {'0601': {'author': [], 'editor': [], 'publisher': [('0601', '0601')]}} 

510 curator.rameta = {'0601': {'ids': ['crossref:1963', 'omid:ra/0601'], 'others': ['wannabe_2'], 'title': 'Oecd'}} 

511 curator.remeta = dict() 

512 curator.meta_maker() 

513 curator.enrich() 

514 output = curator.data 

515 expected_output = [{'id': 'doi:10.1787/eco_outlook-v2011-2-graph138-en omid:br/0601', 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'author': '', 'pub_date': '2011-11-28', 'venue': 'OECD Economic Outlook [omid:br/0604]', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': 'Oecd [crossref:1963 omid:ra/0601]', 'editor': ''}] 

516 self.assertEqual(output, expected_output) 

517 

518 def test_indexer(self): 

519 path_index = f'{OUTPUT_DIR}/index' 

520 path_csv = f'{OUTPUT_DIR}' 

521 curator = prepareCurator(list()) 

522 curator.filename = '0.csv' 

523 curator.idra = {'orcid:0000-0003-0530-4305': '0601', 'schema:12345': '0602'} 

524 curator.idbr = {'doi:10.1001/2013.jamasurg.270': '2585'} 

525 curator.armeta = {'2585': {'author': [('9445', '0602'), ('0601', '0601')], 'editor': [], 'publisher': []}} 

526 curator.remeta = dict() 

527 curator.brmeta = { 

528 '0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en', 'omid:br/0601'], 'others': ['wannabe_0'], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'}, 

529 '0602': {'ids': ['omid:br/0602'], 'others': ['wannabe_1'], 'title': 'OECD Economic Outlook'} 

530 } 

531 curator.vvi = { 

532 'wannabe_1': { 

533 'issue': {}, 

534 'volume': { 

535 '107': {'id': '4733', 'issue': {'1': {'id': '4734'}, '2': {'id': '4735'}, '3': {'id': '4736'}, '4': {'id': '4737'}, '5': {'id': '4738'}, '6': {'id': '4739'}}}, 

536 '108': {'id': '4740', 'issue': {'1': {'id': '4741'}, '2': {'id': '4742'}, '3': {'id': '4743'}, '4': {'id': '4744'}}}, 

537 '104': {'id': '4712', 'issue': {'1': {'id': '4713'}, '2': {'id': '4714'}, '3': {'id': '4715'}, '4': {'id': '4716'}, '5': {'id': '4717'}, '6': {'id': '4718'}}}, 

538 '148': {'id': '4417', 'issue': {'12': {'id': '4418'}, '11': {'id': '4419'}}}, 

539 '100': {'id': '4684', 'issue': {'1': {'id': '4685'}, '2': {'id': '4686'}, '3': {'id': '4687'}, '4': {'id': '4688'}, '5': {'id': '4689'}, '6': {'id': '4690'}}}, 

540 '101': {'id': '4691', 'issue': {'1': {'id': '4692'}, '2': {'id': '4693'}, '3': {'id': '4694'}, '4': {'id': '4695'}, '5': {'id': '4696'}, '6': {'id': '4697'}}}, 

541 '102': {'id': '4698', 'issue': {'1': {'id': '4699'}, '2': {'id': '4700'}, '3': {'id': '4701'}, '4': {'id': '4702'}, '5': {'id': '4703'}, '6': {'id': '4704'}}}, 

542 '103': {'id': '4705', 'issue': {'1': {'id': '4706'}, '2': {'id': '4707'}, '3': {'id': '4708'}, '4': {'id': '4709'}, '5': {'id': '4710'}, '6': {'id': '4711'}}}, 

543 '105': {'id': '4719', 'issue': {'1': {'id': '4720'}, '2': {'id': '4721'}, '3': {'id': '4722'}, '4': {'id': '4723'}, '5': {'id': '4724'}, '6': {'id': '4725'}}}, 

544 '106': {'id': '4726', 'issue': {'6': {'id': '4732'}, '1': {'id': '4727'}, '2': {'id': '4728'}, '3': {'id': '4729'}, '4': {'id': '4730'}, '5': {'id': '4731'}}} 

545 } 

546 } 

547 } 

548 curator.meta_maker() 

549 curator.indexer(path_index, path_csv) 

550 with open(os.path.join(path_index, 'index_ar.csv'), 'r', encoding='utf-8') as f: 

551 index_ar = list(csv.DictReader(f)) 

552 with open(os.path.join(path_index, 'index_id_br.csv'), 'r', encoding='utf-8') as f: 

553 index_id_br = list(csv.DictReader(f)) 

554 with open(os.path.join(path_index, 'index_id_ra.csv'), 'r', encoding='utf-8') as f: 

555 index_id_ra = list(csv.DictReader(f)) 

556 with open(os.path.join(path_index, 'index_vi.json'), 'r', encoding='utf-8') as f: 

557 index_vi = json.load(f) 

558 with open(os.path.join(path_index, 'index_re.csv'), 'r', encoding='utf-8') as f: 

559 index_re = list(csv.DictReader(f)) 

560 expected_index_ar = [{'meta': '2585', 'author': '9445, 0602; 0601, 0601', 'editor': '', 'publisher': ''}] 

561 expected_index_id_br = [{'id': 'doi:10.1001/2013.jamasurg.270', 'meta': '2585'}] 

562 expected_index_id_ra = [{'id': 'orcid:0000-0003-0530-4305', 'meta': '0601'}, {'id': 'schema:12345', 'meta': '0602'}] 

563 expected_index_re = [{'br': '', 're': ''}] 

564 expected_index_vi = {'0602': {'issue': {}, 'volume': {'107': {'id': '4733', 'issue': {'1': {'id': '4734'}, '2': {'id': '4735'}, '3': {'id': '4736'}, '4': {'id': '4737'}, '5': {'id': '4738'}, '6': {'id': '4739'}}}, '108': {'id': '4740', 'issue': {'1': {'id': '4741'}, '2': {'id': '4742'}, '3': {'id': '4743'}, '4': {'id': '4744'}}}, '104': {'id': '4712', 'issue': {'1': {'id': '4713'}, '2': {'id': '4714'}, '3': {'id': '4715'}, '4': {'id': '4716'}, '5': {'id': '4717'}, '6': {'id': '4718'}}}, '148': {'id': '4417', 'issue': {'12': {'id': '4418'}, '11': {'id': '4419'}}}, '100': {'id': '4684', 'issue': {'1': {'id': '4685'}, '2': {'id': '4686'}, '3': {'id': '4687'}, '4': {'id': '4688'}, '5': {'id': '4689'}, '6': {'id': '4690'}}}, '101': {'id': '4691', 'issue': {'1': {'id': '4692'}, '2': {'id': '4693'}, '3': {'id': '4694'}, '4': {'id': '4695'}, '5': {'id': '4696'}, '6': {'id': '4697'}}}, '102': {'id': '4698', 'issue': {'1': {'id': '4699'}, '2': {'id': '4700'}, '3': {'id': '4701'}, '4': {'id': '4702'}, '5': {'id': '4703'}, '6': {'id': '4704'}}}, '103': {'id': '4705', 'issue': {'1': {'id': '4706'}, '2': {'id': '4707'}, '3': {'id': '4708'}, '4': {'id': '4709'}, '5': {'id': '4710'}, '6': {'id': '4711'}}}, '105': {'id': '4719', 'issue': {'1': {'id': '4720'}, '2': {'id': '4721'}, '3': {'id': '4722'}, '4': {'id': '4723'}, '5': {'id': '4724'}, '6': {'id': '4725'}}}, '106': {'id': '4726', 'issue': {'6': {'id': '4732'}, '1': {'id': '4727'}, '2': {'id': '4728'}, '3': {'id': '4729'}, '4': {'id': '4730'}, '5': {'id': '4731'}}}}}} 

565 output = (index_ar, index_id_br, index_id_ra, index_re, index_vi) 

566 expected_output = (expected_index_ar, expected_index_id_br, expected_index_id_ra, expected_index_re, expected_index_vi) 

567 shutil.rmtree(OUTPUT_DIR) 

568 self.assertEqual(output, expected_output) 

569 

570 def test_is_a_valid_row(self): 

571 rows = [ 

572 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, 

573 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '1', 'issue': '', 'page': '', 'type': 'journal volume', 'publisher': '', 'editor': ''}, 

574 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '1', 'page': '', 'type': 'journal issue', 'publisher': '', 'editor': ''}, 

575 {'id': 'doi:10.1001/2013.jamasurg.270', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, 

576 {'id': '', 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'author': 'Deckert, Ron J. [orcid:0000-0003-2100-6412]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, 

577 {'id': '', 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'author': 'Deckert, Ron J. [orcid:0000-0003-2100-6412]', 'pub_date': '03-01-2020', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': 'book'}, 

578 {'id': 'doi:10.1001/2013.jamasurg.270', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '5', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''} 

579 ] 

580 output = [] 

581 for row in rows: 

582 output.append(is_a_valid_row(row)) 

583 expected_output = [False, False, False, True, False, True, False] 

584 self.assertEqual(output, expected_output) 

585 

586 def test_get_preexisting_entities(self): 

587 row = {'id': 'omid:br/2715', 'title': 'Image Of The Year For 2012', 'author': '', 'pub_date': '', 'venue': 'Archives Of Surgery [omid:br/4480]', 'volume': '99', 'issue': '1', 'page': '', 'type': 'journal article', 'publisher': '', 'editor': ''} 

588 curator = prepareCurator(data=[row]) 

589 curator.curator() 

590 expected_output = ( 

591 {'id/4270', 'ra/3309', 'ar/7240', 'br/4481', 'br/2715', 'br/4480', 'id/4274', 'id/2581', 'br/4487', 're/2350'}, 

592 [{'id': 'doi:10.1001/2013.jamasurg.202 omid:br/2715', 'title': 'Image Of The Year For 2012', 'author': '', 'pub_date': '2012-12-01', 'venue': 'Archives Of Surgery [issn:0004-0010 omid:br/4480]', 'volume': '147', 'issue': '12', 'page': '1140-1140', 'type': 'journal article', 'publisher': 'American Medical Association (ama) [crossref:10 omid:ra/3309]', 'editor': ''}] 

593 ) 

594 self.assertEqual((curator.preexisting_entities, curator.data), expected_output) 

595 

596 

597class test_RespAgentsCurator(unittest.TestCase): 

598 def test_curator_publishers(self): 

599 reset() 

600 data = [ 

601 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': 'American Medical Association (AMA) [crossref:10 crossref:9999]', 'editor': ''}, 

602 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': 'Elsevier BV [crossref:78]', 'editor': ''}, 

603 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': 'Wiley [crossref:311]', 'editor': ''}] 

604 resp_agents_curator = prepareCurator(data=data, server=SERVER, resp_agents_only=True) 

605 resp_agents_curator.curator(filename=None, path_csv=None, path_index=None) 

606 output = (resp_agents_curator.data, resp_agents_curator.radict, resp_agents_curator.idra, resp_agents_curator.rameta) 

607 expected_output = ( 

608 [ 

609 {'id': '', 'title': '', 'author': '', 'venue': '', 'editor': '', 'publisher': 'American Medical Association (ama) [crossref:10 crossref:9999 omid:ra/3309]', 'page': '', 'volume': '', 'issue': '', 'pub_date': '', 'type': ''}, 

610 {'id': '', 'title': '', 'author': '', 'venue': '', 'editor': '', 'publisher': 'Elsevier Bv [crossref:78 omid:ra/0601]', 'page': '', 'volume': '', 'issue': '', 'pub_date': '', 'type': ''}, 

611 {'id': '', 'title': '', 'author': '', 'venue': '', 'editor': '', 'publisher': 'Wiley [crossref:311 omid:ra/0602]', 'page': '', 'volume': '', 'issue': '', 'pub_date': '', 'type': ''}], 

612 { 

613 '3309': {'ids': ['crossref:10', 'crossref:9999', 'omid:ra/3309'], 'others': [], 'title': 'American Medical Association (ama)'}, 

614 'wannabe_0': {'ids': ['crossref:78', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Elsevier Bv'}, 

615 'wannabe_1': {'ids': ['crossref:311', 'omid:ra/0602'], 'others': ['wannabe_1'], 'title': 'Wiley'}}, 

616 {'crossref:10': '4274', 'crossref:9999': '0601', 'crossref:78': '0602', 'crossref:311': '0603'}, 

617 { 

618 '3309': {'ids': ['crossref:10', 'crossref:9999', 'omid:ra/3309'], 'others': [], 'title': 'American Medical Association (ama)'}, 

619 '0601': {'ids': ['crossref:78', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Elsevier Bv'}, 

620 '0602': {'ids': ['crossref:311', 'omid:ra/0602'], 'others': ['wannabe_1'], 'title': 'Wiley'}} 

621 ) 

622 self.assertEqual(output, expected_output) 

623 

624 def test_curator(self): 

625 reset() 

626 data = [ 

627 {'id': '', 'title': '', 'author': 'Deckert, Ron J. [orcid:0000-0003-2100-6412]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, 

628 {'id': '', 'title': '', 'author': 'Ruso, Juan M. [orcid:0000-0001-5909-6754]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, 

629 {'id': '', 'title': '', 'author': 'Sarmiento, Félix [orcid:0000-0002-4487-6894]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''} 

630 ] 

631 resp_agents_curator = prepareCurator(data=data, server=SERVER, resp_agents_only=True) 

632 resp_agents_curator.curator(filename='resp_agents_curator_output', path_csv='test/testcases/testcase_data', path_index='test/testcases/testcase_data/indices') 

633 output = (resp_agents_curator.data, resp_agents_curator.radict, resp_agents_curator.idra, resp_agents_curator.rameta) 

634 expected_output = ( 

635 [ 

636 {'id': '', 'title': '', 'author': 'Deckert, Ron J. [orcid:0000-0003-2100-6412 omid:ra/0601]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, 

637 {'id': '', 'title': '', 'author': 'Ruso, Juan M. [orcid:0000-0001-5909-6754 omid:ra/0602]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, 

638 {'id': '', 'title': '', 'author': 'Sarmiento, Félix [orcid:0000-0002-4487-6894 omid:ra/0603]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}], 

639 { 

640 'wannabe_0': {'ids': ['orcid:0000-0003-2100-6412', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Deckert, Ron J.'}, 

641 'wannabe_1': {'ids': ['orcid:0000-0001-5909-6754', 'omid:ra/0602'], 'others': ['wannabe_1'], 'title': 'Ruso, Juan M.'}, 

642 'wannabe_2': {'ids': ['orcid:0000-0002-4487-6894', 'omid:ra/0603'], 'others': ['wannabe_2'], 'title': 'Sarmiento, Félix'}}, 

643 {'orcid:0000-0003-2100-6412': '0601', 'orcid:0000-0001-5909-6754': '0602', 'orcid:0000-0002-4487-6894': '0603'}, 

644 { 

645 '0601': {'ids': ['orcid:0000-0003-2100-6412', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Deckert, Ron J.'}, 

646 '0602': {'ids': ['orcid:0000-0001-5909-6754', 'omid:ra/0602'], 'others': ['wannabe_1'], 'title': 'Ruso, Juan M.'}, 

647 '0603': {'ids': ['orcid:0000-0002-4487-6894', 'omid:ra/0603'], 'others': ['wannabe_2'], 'title': 'Sarmiento, Félix'}} 

648 ) 

649 self.assertEqual(output, expected_output) 

650 

651 def test_curator_ra_on_ts(self): 

652 # A responsible agent is already on the triplestore 

653 add_data_ts(server=SERVER, data_path=os.path.abspath(os.path.join('test', 'testcases', 'ts', 'real_data.nt')).replace('\\', '/')) 

654 self.maxDiff = None 

655 data = [ 

656 {'id': '', 'title': '', 'author': 'Deckert, Ron J. [orcid:0000-0003-2100-6412]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, 

657 {'id': '', 'title': '', 'author': 'Mehrotra, Ateev [orcid:0000-0003-2223-1582]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, 

658 {'id': '', 'title': '', 'author': 'Sarmiento, Félix [orcid:0000-0002-4487-6894]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''} 

659 ] 

660 resp_agents_curator = prepareCurator(data=data, server=SERVER, resp_agents_only=True) 

661 resp_agents_curator.curator() 

662 output = (resp_agents_curator.data, resp_agents_curator.radict, resp_agents_curator.idra, resp_agents_curator.rameta) 

663 expected_output = ( 

664 [ 

665 {'id': '', 'title': '', 'author': 'Deckert, Ron J. [orcid:0000-0003-2100-6412 omid:ra/0601]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, 

666 {'id': '', 'title': '', 'author': 'Mehrotra, Ateev [orcid:0000-0003-2223-1582 omid:ra/3976]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}, 

667 {'id': '', 'title': '', 'author': 'Sarmiento, Félix [orcid:0000-0002-4487-6894 omid:ra/0602]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}], 

668 { 

669 'wannabe_0': {'ids': ['orcid:0000-0003-2100-6412', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Deckert, Ron J.'}, 

670 '3976': {'ids': ['orcid:0000-0003-2223-1582', 'omid:ra/3976'], 'others': [], 'title': 'Mehrotra, Ateev'}, 

671 'wannabe_1': {'ids': ['orcid:0000-0002-4487-6894', 'omid:ra/0602'], 'others': ['wannabe_1'], 'title': 'Sarmiento, Félix'}}, 

672 {'orcid:0000-0003-2100-6412': '0601', 'orcid:0000-0003-2223-1582': '4351', 'orcid:0000-0002-4487-6894': '0602'}, 

673 { 

674 '0601': {'ids': ['orcid:0000-0003-2100-6412', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Deckert, Ron J.'}, 

675 '3976': {'ids': ['orcid:0000-0003-2223-1582', 'omid:ra/3976'], 'others': [], 'title': 'Mehrotra, Ateev'}, 

676 '0602': {'ids': ['orcid:0000-0002-4487-6894', 'omid:ra/0602'], 'others': ['wannabe_1'], 'title': 'Sarmiento, Félix'}} 

677 ) 

678 self.assertEqual(output, expected_output) 

679 

680 

681class test_id_worker(unittest.TestCase): 

682 @classmethod 

683 def setUpClass(cls): 

684 add_data_ts(SERVER, os.path.abspath(os.path.join('test', 'testcases', 'ts', 'real_data.nt')).replace('\\', '/')) 

685 cls.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI) 

686 cls.finder.get_everything_about_res(metavals={'omid:br/3309', 'omid:br/2438', 'omid:br/0601'}, identifiers={'doi:10.1001/2013.jamasurg.270', 'doi:10.1787/eco_outlook-v2011-2-graph138-en', 'orcid:0000-0001-6994-8412', 'doi:10.1001/archderm.104.1.106', 'pmid:29098884'}, vvis=set()) 

687 

688 def test_id_worker_1(self): 

689 # 1 EntityA is a new one 

690 curator = prepareCurator(list()) 

691 name = 'βέβαιος, α, ον' 

692 idslist = ['doi:10.1163/2214-8655_lgo_lgo_02_0074_ger'] 

693 wannabe_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False) 

694 output = (wannabe_id, curator.brdict, curator.radict, curator.idbr, curator.idra, curator.log) 

695 expected_output = ( 

696 'wannabe_0', 

697 {'wannabe_0': {'ids': ['doi:10.1163/2214-8655_lgo_lgo_02_0074_ger'], 'others': [], 'title': 'βέβαιος, α, ον'}}, 

698 {}, 

699 {'doi:10.1163/2214-8655_lgo_lgo_02_0074_ger': '0601'}, 

700 {}, 

701 {} 

702 ) 

703 self.assertEqual(output, expected_output) 

704 

705 def test_id_worker_1_no_id(self): 

706 # 1 EntityA is a new one and has no ids 

707 curator = prepareCurator(list()) 

708 name = 'βέβαιος, α, ον' 

709 idslist = [] 

710 wannabe_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False) 

711 output = (wannabe_id, curator.brdict, curator.radict, curator.idbr, curator.idra, curator.log) 

712 expected_output = ( 

713 'wannabe_0', 

714 {'wannabe_0': {'ids': [], 'others': [], 'title': 'βέβαιος, α, ον'}}, 

715 {}, 

716 {}, 

717 {}, 

718 {} 

719 ) 

720 self.assertEqual(output, expected_output) 

721 

722 def test_id_worker_2_id_ts(self): 

723 # 2 Retrieve EntityA data in triplestore to update EntityA inside CSV 

724 curator = prepareCurator(list()) 

725 curator.finder = self.finder 

726 name = 'American Medical Association (AMA)' # *(ama) on the ts. The name on the ts must prevail 

727 idslist = ['crossref:10'] 

728 wannabe_id = curator.id_worker('editor', name, idslist, '', ra_ent=True, br_ent=False, vvi_ent=False, publ_entity=True) 

729 output = (wannabe_id, curator.brdict, curator.radict, curator.idbr, curator.idra, curator.log) 

730 expected_output = ('3309', {}, {'3309': {'ids': ['crossref:10'], 'others': [], 'title': 'American Medical Association (ama)'}}, {}, {'crossref:10': '4274'}, {}) 

731 self.assertEqual(output, expected_output) 

732 

733 def test_id_worker_2_metaid_ts(self): 

734 # 2 Retrieve EntityA data in triplestore to update EntityA inside CSV 

735 curator = prepareCurator(list()) 

736 curator.finder = self.finder 

737 name = 'American Medical Association (AMA)' # *(ama) on the ts. The name on the ts must prevail 

738 # MetaID only 

739 wannabe_id = curator.id_worker('editor', name, [], '3309', ra_ent=True, br_ent=False, vvi_ent=False, publ_entity=True) 

740 output = (wannabe_id, curator.brdict, curator.radict, curator.idbr, curator.idra, curator.log) 

741 expected_output = ('3309', {}, {'3309': {'ids': ['crossref:10'], 'others': [], 'title': 'American Medical Association (ama)'}}, {}, {'crossref:10': '4274'}, {}) 

742 self.assertEqual(output, expected_output) 

743 

744 def test_id_worker_2_id_metaid_ts(self): 

745 # 2 Retrieve EntityA data in triplestore to update EntityA inside CSV 

746 curator = prepareCurator(list()) 

747 name = 'American Medical Association (AMA)' # *(ama) on the ts. The name on the ts must prevail 

748 curator.finder = self.finder 

749 # ID and MetaID 

750 wannabe_id = curator.id_worker('publisher', name, ['crossref:10'], '3309', ra_ent=True, br_ent=False, vvi_ent=False, publ_entity=True) 

751 output = (wannabe_id, curator.brdict, curator.radict, curator.idbr, curator.idra, curator.log) 

752 expected_output = ('3309', {}, {'3309': {'ids': ['crossref:10'], 'others': [], 'title': 'American Medical Association (ama)'}}, {}, {'crossref:10': '4274'}, {}) 

753 self.assertEqual(output, expected_output) 

754 

755 def test_id_worker_3(self): 

756 # 2 Retrieve EntityA data in triplestore to update EntityA inside CSV. MetaID on ts has precedence 

757 curator = prepareCurator(list()) 

758 name = 'American Medical Association (AMA)' # *(ama) on the ts. The name on the ts must prevail 

759 curator.finder = self.finder 

760 # ID and MetaID, but it's omid:ra/3309 on ts 

761 wannabe_id = curator.id_worker('publisher', name, ['crossref:10'], '33090', ra_ent=True, br_ent=False, vvi_ent=False, publ_entity=True) 

762 output = (wannabe_id, curator.brdict, curator.radict, curator.idbr, curator.idra, curator.log) 

763 expected_output = ('3309', {}, {'3309': {'ids': ['crossref:10'], 'others': [], 'title': 'American Medical Association (ama)'}}, {}, {'crossref:10': '4274'}, {}) 

764 self.assertEqual(output, expected_output) 

765 

766 def test_id_worker_conflict(self): 

767 # there's no meta or there was one but it didn't exist 

768 # There are other ids that already exist, but refer to multiple entities on ts. 

769 # Conflict! 

770 idslist = ['doi:10.1001/2013.jamasurg.270'] 

771 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China' 

772 curator = prepareCurator(list()) 

773 curator.finder = self.finder 

774 curator.log[0] = {'id': {}} 

775 id_dict = dict() 

776 metaval = curator.conflict(idslist, name, id_dict, 'id') # Only the conflict function is tested here, not id_worker 

777 output = (metaval, curator.brdict, curator.log, id_dict) 

778 expected_output = ( 

779 'wannabe_0', 

780 {'wannabe_0': {'ids': ['doi:10.1001/2013.jamasurg.270'], 'others': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'}}, 

781 {0: {'id': {'Conflict entity': 'wannabe_0'}}}, 

782 {'doi:10.1001/2013.jamasurg.270': '2585'} 

783 ) 

784 self.assertEqual(output, expected_output) 

785 

786 def test_conflict_br(self): 

787 # No MetaId, an identifier to which two separate br point: there is a conflict, and a new entity must be created 

788 curator = prepareCurator(list()) 

789 curator.log[0] = {'id': {}} 

790 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China' 

791 idslist = ['doi:10.1001/2013.jamasurg.270'] 

792 curator.finder = self.finder 

793 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False) 

794 output = (meta_id, curator.idbr, curator.idra, curator.brdict, curator.log) 

795 expected_output_1 = ( 

796 '2719', 

797 {'doi:10.1001/2013.jamasurg.270': '2585'}, 

798 {}, 

799 {'2719': {'ids': ['doi:10.1001/2013.jamasurg.270'], 'others': [], 'title': 'Patient Satisfaction As A Possible Indicator Of Quality Surgical Care'}}, 

800 {0: {'id': {}}} 

801 ) 

802 expected_output_2 = ('2720', 

803 {'doi:10.1001/2013.jamasurg.270': '2585'}, 

804 {}, 

805 {'2720': {'ids': ['doi:10.1001/2013.jamasurg.270'], 

806 'others': [], 

807 'title': 'Pediatric Injury Outcomes In Racial/Ethnic Minorities In ' 

808 'California'}}, 

809 {0: {'id': {}}} 

810 ) 

811 self.assertTrue(output == expected_output_1 or output == expected_output_2) 

812 

813 def test_conflict_ra(self): 

814 # No MetaId, an identifier to which two separate ra point: there is a conflict, and a new entity must be created 

815 idslist = ['orcid:0000-0001-6994-8412'] 

816 name = 'Alarcon, Louis H.' 

817 curator = prepareCurator(list()) 

818 curator.finder = self.finder 

819 curator.log[0] = {'author': {}} 

820 meta_id = curator.id_worker('author', name, idslist, '', ra_ent=True, br_ent=False, vvi_ent=False, publ_entity=False) 

821 output = (meta_id, curator.idbr, curator.idra, curator.brdict, curator.radict, curator.log) 

822 expected_output_1 = ( 

823 '4940', 

824 {}, 

825 {'orcid:0000-0001-6994-8412': '4475'}, 

826 {}, 

827 {'4940': {'ids': ['orcid:0000-0001-6994-8412'], 'others': [], 'title': 'Alarcon, Louis H.'}}, 

828 {0: {'author': {}}} 

829 ) 

830 expected_output_2 = ('1000000', 

831 {}, 

832 {'orcid:0000-0001-6994-8412': '4475'}, 

833 {}, 

834 {'1000000': {'ids': ['orcid:0000-0001-6994-8412'], 

835 'others': [], 

836 'title': 'Alarcon, Louis H.'}}, 

837 {0: {'author': {}}}) 

838 self.assertTrue(output == expected_output_1 or output == expected_output_2) 

839 

840 def test_conflict_suspect_id_among_existing(self): 

841 # ID already exist in entity_dict and refer to one entity having a MetaID, but there is another ID not in entity_dict that highlights a conflict on ts 

842 br_dict = { 

843 'omid:br/0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'}, 

844 'omid:br/0602': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 'others': [], 'title': 'Contributions To GDP Growth And Inflation: South Africa'}, 

845 'omid:br/0603': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph18-en'], 'others': [], 'title': 'Official Loans To The Governments Of Greece, Ireland And Portugal'}, 

846 } 

847 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: Japan' # The first title must have precedence (China, not Japan) 

848 idslist = ['doi:10.1787/eco_outlook-v2011-2-graph138-en', 'doi:10.1001/2013.jamasurg.270'] 

849 curator = prepareCurator(get_csv_data(REAL_DATA_CSV)) 

850 curator.log[0] = {'id': {}} 

851 curator.brdict = br_dict 

852 curator.finder = self.finder 

853 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False) 

854 output = (meta_id, curator.idbr, curator.idra, curator.brdict, curator.radict, curator.log) 

855 expected_output = ( 

856 'wannabe_0', 

857 { 

858 'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601', 

859 'doi:10.1001/2013.jamasurg.270': '2585' 

860 }, 

861 {}, 

862 {'omid:br/0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 

863 'others': [], 

864 'title': 'Money Growth, Interest Rates, Inflation And Raw ' 

865 'Materials Prices: China'}, 

866 'omid:br/0602': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 

867 'others': [], 

868 'title': 'Contributions To GDP Growth And Inflation: South ' 

869 'Africa'}, 

870 'omid:br/0603': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph18-en'], 

871 'others': [], 

872 'title': 'Official Loans To The Governments Of Greece, ' 

873 'Ireland And Portugal'}, 

874 'wannabe_0': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en', 

875 'doi:10.1001/2013.jamasurg.270'], 

876 'others': [], 

877 'title': 'Money Growth, Interest Rates, Inflation And Raw ' 

878 'Materials Prices: Japan'}}, 

879 {}, 

880 {0: {'id': {'Conflict entity': 'wannabe_0'}}} 

881 ) 

882 self.assertEqual(output, expected_output) 

883 

884 def test_conflict_suspect_id_among_wannabe(self): 

885 # ID already exist in entity_dict and refer to one temporary, but there is another ID not in entity_dict that highlights a conflict on ts 

886 br_dict = { 

887 'wannabe_0': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'}, 

888 'wannabe_2': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 'others': [], 'title': 'Contributions To GDP Growth And Inflation: South Africa'}, 

889 'wannabe_3': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph18-en'], 'others': [], 'title': 'Official Loans To The Governments Of Greece, Ireland And Portugal'}, 

890 } 

891 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: Japan' # The first title must have precedence (China, not Japan) 

892 idslist = ['doi:10.1787/eco_outlook-v2011-2-graph138-en', 'doi:10.1001/2013.jamasurg.270'] 

893 curator = prepareCurator(get_csv_data(REAL_DATA_CSV)) 

894 curator.log[0] = {'id': {}} 

895 curator.brdict = br_dict 

896 curator.finder = self.finder 

897 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False) 

898 output = (meta_id, curator.idbr, curator.idra, curator.brdict, curator.radict, curator.log) 

899 expected_output_1 = ( 

900 '2720', 

901 { 

902 'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601', 

903 'doi:10.1001/2013.jamasurg.270': '2585' 

904 }, 

905 {}, 

906 {'2720': {'ids': ['doi:10.1001/2013.jamasurg.270', 'doi:10.1787/eco_outlook-v2011-2-graph138-en'], 

907 'others': ['wannabe_0'], 

908 'title': 'Pediatric Injury Outcomes In Racial/Ethnic Minorities In ' 

909 'California'}, 

910 'wannabe_2': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 

911 'others': [], 

912 'title': 'Contributions To GDP Growth And Inflation: South ' 

913 'Africa'}, 

914 'wannabe_3': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph18-en'], 

915 'others': [], 

916 'title': 'Official Loans To The Governments Of Greece, Ireland ' 

917 'And Portugal'}}, 

918 {}, 

919 {0: {'id': {}}} 

920 ) 

921 expected_output_2 = ( 

922 '2719', 

923 { 

924 'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601', 

925 'doi:10.1001/2013.jamasurg.270': '2585' 

926 }, 

927 {}, 

928 {'2719': {'ids': ['doi:10.1001/2013.jamasurg.270', 'doi:10.1787/eco_outlook-v2011-2-graph138-en'], 

929 'others': ['wannabe_0'], 

930 'title': 'Patient Satisfaction As A Possible Indicator Of Quality ' 

931 'Surgical Care'}, 

932 'wannabe_2': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 

933 'others': [], 

934 'title': 'Contributions To GDP Growth And Inflation: South ' 

935 'Africa'}, 

936 'wannabe_3': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph18-en'], 

937 'others': [], 

938 'title': 'Official Loans To The Governments Of Greece, Ireland ' 

939 'And Portugal'}}, 

940 {}, 

941 {0: {'id': {}}} 

942 ) 

943 self.assertTrue(output == expected_output_1 or output == expected_output_2) 

944 

945 def test_id_worker_4(self): 

946 # 4 Merge data from EntityA (CSV) with data from EntityX (CSV), update both with data from EntityA (RDF) 

947 br_dict = { 

948 'wannabe_0': {'ids': ['doi:10.1001/archderm.104.1.106'], 'others': [], 'title': 'Multiple eloids'}, 

949 'wannabe_1': {'ids': ['doi:10.1001/archderm.104.1.106'], 'others': [], 'title': 'Multiple Blastoids'}, 

950 } 

951 name = 'Multiple Palloids' 

952 idslist = ['doi:10.1001/archderm.104.1.106', 'pmid:29098884'] 

953 curator = prepareCurator(list()) 

954 curator.brdict = br_dict 

955 curator.wnb_cnt = 2 

956 curator.finder = self.finder 

957 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False) 

958 output = (meta_id, curator.idbr, curator.idra, curator.log) 

959 expected_output = ( 

960 '3757', 

961 {'doi:10.1001/archderm.104.1.106': '3624', 'pmid:29098884': '2000000'}, 

962 {}, 

963 {} 

964 ) 

965 self.assertEqual(output, expected_output) 

966 

967class test_id_worker_with_reset(unittest.TestCase): 

968 def test_id_worker_2_meta_in_entity_dict(self): 

969 # MetaID exists among data. 

970 # MetaID already in entity_dict (no care about conflicts, we have a MetaID specified) 

971 # 2 Retrieve EntityA data to update EntityA inside CSV 

972 reset_server() 

973 data = get_csv_data(REAL_DATA_CSV) 

974 curator = prepareCurator(data) 

975 curator.curator() 

976 store_curated_data(curator, SERVER) 

977 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China' 

978 curator_empty = prepareCurator(list()) 

979 finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI, local_g=curator_empty.everything_everywhere_allatonce) 

980 finder.get_everything_about_res(metavals=set(), identifiers={'doi:10.1787/eco_outlook-v2011-2-graph138-en'}, vvis=set()) 

981 # put metaval in entity_dict  

982 meta_id = curator_empty.id_worker('id', name, [], '0601', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False) 

983 # metaval is in entity_dict 

984 meta_id = curator_empty.id_worker('id', name, [], '0601', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False) 

985 output = (meta_id, curator_empty.brdict, curator_empty.radict, curator_empty.idbr, curator_empty.idra, curator_empty.log) 

986 expected_output = ('0601', {'0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'others': []}}, {}, {'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601'}, {}, {}) 

987 self.assertEqual(output, expected_output) 

988 

989 def test_conflict_existing(self): 

990 # ID already exist in entity_dict but refer to multiple entities having a MetaID 

991 reset_server() 

992 br_dict = { 

993 'omid:br/0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'}, 

994 'omid:br/0602': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 'others': [], 'title': 'Contributions To GDP Growth And Inflation: South Africa'}, 

995 'omid:br/0603': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Official Loans To The Governments Of Greece, Ireland And Portugal'}, 

996 } 

997 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China' 

998 idslist = ['doi:10.1787/eco_outlook-v2011-2-graph138-en'] 

999 curator = prepareCurator(list()) 

1000 curator.log[0] = {'id': {}} 

1001 curator.brdict = br_dict 

1002 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False) 

1003 output = (meta_id, curator.idbr, curator.idra, curator.brdict, curator.radict, curator.log) 

1004 expected_output = ( 

1005 'wannabe_0', 

1006 {'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601'}, 

1007 {}, 

1008 {'omid:br/0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 

1009 'others': [], 

1010 'title': 'Money Growth, Interest Rates, Inflation And Raw ' 

1011 'Materials Prices: China'}, 

1012 'omid:br/0602': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 

1013 'others': [], 

1014 'title': 'Contributions To GDP Growth And Inflation: South ' 

1015 'Africa'}, 

1016 'omid:br/0603': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 

1017 'others': [], 

1018 'title': 'Official Loans To The Governments Of Greece, ' 

1019 'Ireland And Portugal'}, 

1020 'wannabe_0': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 

1021 'others': [], 

1022 'title': 'Money Growth, Interest Rates, Inflation And Raw ' 

1023 'Materials Prices: China'}}, 

1024 {}, 

1025 {0: {'id': {'Conflict entity': 'wannabe_0'}}} 

1026 ) 

1027 self.assertEqual(output, expected_output) 

1028 

1029 def test_id_worker_5(self): 

1030 # ID already exist in entity_dict and refer to one or more temporary entities -> collective merge 

1031 reset_server() 

1032 br_dict = { 

1033 'wannabe_0': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'}, 

1034 'wannabe_1': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 'others': [], 'title': 'Contributions To GDP Growth And Inflation: South Africa'}, 

1035 'wannabe_2': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Official Loans To The Governments Of Greece, Ireland And Portugal'}, 

1036 } 

1037 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China' 

1038 idslist = ['doi:10.1787/eco_outlook-v2011-2-graph138-en'] 

1039 curator = prepareCurator(list()) 

1040 curator.brdict = br_dict 

1041 curator.wnb_cnt = 2 

1042 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False) 

1043 output = (meta_id, curator.idbr, curator.idra, curator.log) 

1044 expected_output = ( 

1045 'wannabe_0', 

1046 {'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601'}, 

1047 {}, 

1048 {} 

1049 ) 

1050 self.assertEqual(output, expected_output) 

1051 

1052 def test_no_conflict_existing(self): 

1053 # ID already exist in entity_dict and refer to one entity 

1054 reset_server() 

1055 br_dict = { 

1056 'omid:br/0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'}, 

1057 'omid:br/0602': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 'others': [], 'title': 'Contributions To GDP Growth And Inflation: South Africa'}, 

1058 'omid:br/0603': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph18-en'], 'others': [], 'title': 'Official Loans To The Governments Of Greece, Ireland And Portugal'}, 

1059 } 

1060 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: Japan' # The first title must have precedence (China, not Japan) 

1061 idslist = ['doi:10.1787/eco_outlook-v2011-2-graph138-en'] 

1062 curator = prepareCurator(list()) 

1063 curator.log[0] = {'id': {}} 

1064 curator.brdict = br_dict 

1065 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False) 

1066 output = (meta_id, curator.idbr, curator.idra, curator.log) 

1067 expected_output = ( 

1068 'omid:br/0601', 

1069 {'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601'}, 

1070 {}, 

1071 {0: {'id': {}}} 

1072 ) 

1073 self.assertEqual(output, expected_output) 

1074 

1075 def test_metaid_in_prov(self): 

1076 # MetaID not found in data, but found in the provenance metadata. 

1077 reset_server() 

1078 add_data_ts(server=SERVER, data_path=os.path.abspath(os.path.join('test', 'testcases', 'ts', 'real_data_with_prov.nq')).replace('\\', '/')) 

1079 name = '' 

1080 curator = prepareCurator(list()) 

1081 meta_id = curator.id_worker('id', name, [], '4321', ra_ent=True, br_ent=False, vvi_ent=False, publ_entity=False) 

1082 self.assertEqual(meta_id, '38013') 

1083 

1084 

1085class testcase_01(unittest.TestCase): 

1086 def test(self): 

1087 # testcase1: 2 different issues of the same venue (no volume) 

1088 name = '01' 

1089 data = get_csv_data(MANUAL_DATA_CSV) 

1090 partial_data = list() 

1091 partial_data.append(data[0]) 

1092 partial_data.append(data[5]) 

1093 data_curated, testcase = prepare_to_test(partial_data, name) 

1094 for pos, element in enumerate(data_curated): 

1095 self.assertEqual(element, testcase[pos]) 

1096 

1097 

1098class testcase_02(unittest.TestCase): 

1099 def test(self): 

1100 # testcase2: 2 different volumes of the same venue (no issue) 

1101 name = '02' 

1102 data = get_csv_data(MANUAL_DATA_CSV) 

1103 partial_data = list() 

1104 partial_data.append(data[1]) 

1105 partial_data.append(data[3]) 

1106 data_curated, testcase = prepare_to_test(partial_data, name) 

1107 self.assertEqual(data_curated, testcase) 

1108 

1109 

1110class testcase_03(unittest.TestCase): 

1111 def test(self): 

1112 # testcase3: 2 different issues of the same volume 

1113 name = '03' 

1114 data = get_csv_data(MANUAL_DATA_CSV) 

1115 partial_data = list() 

1116 partial_data.append(data[2]) 

1117 partial_data.append(data[4]) 

1118 data_curated, testcase = prepare_to_test(partial_data, name) 

1119 self.assertEqual(data_curated, testcase) 

1120 

1121 

1122class testcase_04(unittest.TestCase): 

1123 def test(self): 

1124 # testcase4: 2 new IDS and different date format (yyyy-mm and yyyy-mm-dd) 

1125 name = '04' 

1126 data = get_csv_data(MANUAL_DATA_CSV) 

1127 partial_data = list() 

1128 partial_data.append(data[6]) 

1129 partial_data.append(data[7]) 

1130 data_curated, testcase = prepare_to_test(partial_data, name) 

1131 for pos, element in enumerate(data_curated): 

1132 self.assertEqual(element, testcase[pos]) 

1133 

1134 

1135class testcase_05(unittest.TestCase): 

1136 def test(self): 

1137 # testcase5: NO ID scenario 

1138 name = '05' 

1139 data = get_csv_data(MANUAL_DATA_CSV) 

1140 partial_data = list() 

1141 partial_data.append(data[8]) 

1142 data_curated, testcase = prepare_to_test(partial_data, name) 

1143 self.assertEqual(data_curated, testcase) 

1144 

1145 

1146class testcase_06(unittest.TestCase): 

1147 def test(self): 

1148 # testcase6: ALL types test 

1149 name = '06' 

1150 data = get_csv_data(MANUAL_DATA_CSV) 

1151 partial_data = data[9:33] 

1152 data_curated, testcase = prepare_to_test(partial_data, name) 

1153 self.assertEqual(data_curated, testcase) 

1154 

1155 

1156class testcase_07(unittest.TestCase): 

1157 def test(self): 

1158 # testcase7: all journal related types with an editor 

1159 name = '07' 

1160 data = get_csv_data(MANUAL_DATA_CSV) 

1161 partial_data = data[34:40] 

1162 data_curated, testcase = prepare_to_test(partial_data, name) 

1163 self.assertEqual(data_curated, testcase) 

1164 

1165 

1166class testcase_08(unittest.TestCase): 

1167 def test(self): 

1168 # testcase8: all book related types with an editor 

1169 name = '08' 

1170 data = get_csv_data(MANUAL_DATA_CSV) 

1171 partial_data = data[40:43] 

1172 data_curated, testcase = prepare_to_test(partial_data, name) 

1173 self.assertEqual(data_curated, testcase) 

1174 

1175 

1176class testcase_09(unittest.TestCase): 

1177 def test(self): 

1178 # testcase09: all proceeding related types with an editor 

1179 name = '09' 

1180 data = get_csv_data(MANUAL_DATA_CSV) 

1181 partial_data = data[43:45] 

1182 data_curated, testcase = prepare_to_test(partial_data, name) 

1183 self.assertEqual(data_curated, testcase) 

1184 

1185 

1186class testcase_10(unittest.TestCase): 

1187 def test(self): 

1188 # testcase10: a book inside a book series and a book inside a book set 

1189 name = '10' 

1190 data = get_csv_data(MANUAL_DATA_CSV) 

1191 partial_data = data[45:49] 

1192 data_curated, testcase = prepare_to_test(partial_data, name) 

1193 self.assertEqual(data_curated, testcase) 

1194 

1195 

1196class testcase_11(unittest.TestCase): 

1197 def test(self): 

1198 # testcase11: real time entity update 

1199 name = '11' 

1200 data = get_csv_data(MANUAL_DATA_CSV) 

1201 partial_data = data[49:52] 

1202 data_curated, testcase = prepare_to_test(partial_data, name) 

1203 self.assertEqual(data_curated, testcase) 

1204 

1205 

1206class testcase_12(unittest.TestCase): 

1207 def test(self): 

1208 # testcase12: clean name, title, ids 

1209 name = '12' 

1210 data = get_csv_data(MANUAL_DATA_CSV) 

1211 partial_data = data[52:53] 

1212 data_curated, testcase = prepare_to_test(partial_data, name) 

1213 self.assertEqual(data_curated, testcase) 

1214 

1215 

1216class testcase_13(unittest.TestCase): 

1217 # testcase13: ID_clean massive test 

1218 

1219 def test1(self): 

1220 # 1--- meta specified br in a row, wannabe with a new id in a row, meta specified with an id related to wannabe 

1221 # in a row 

1222 name = '13.1' 

1223 data = get_csv_data(MANUAL_DATA_CSV) 

1224 partial_data = data[53:56] 

1225 data_curated, testcase = prepare_to_test(partial_data, name) 

1226 self.assertEqual(data_curated, testcase) 

1227 

1228 def test2(self): 

1229 # 2---Conflict with META precedence: a br has a meta_id and an id related to another meta_id, the first 

1230 # specified meta has precedence 

1231 data = get_csv_data(MANUAL_DATA_CSV) 

1232 name = '13.2' 

1233 partial_data = data[56:57] 

1234 data_curated, testcase = prepare_to_test(partial_data, name) 

1235 self.assertEqual(data_curated, testcase) 

1236 

1237 def test3(self): 

1238 # 3--- conflict: br with id shared with 2 meta 

1239 data = get_csv_data(MANUAL_DATA_CSV) 

1240 name_1 = '13.3' 

1241 name_2 = '13.31' 

1242 partial_data = data[57:58] 

1243 data_curated, testcase_1 = prepare_to_test(partial_data, name_1) 

1244 _, testcase_2 = prepare_to_test(partial_data, name_2) 

1245 self.assertTrue(data_curated == testcase_1 or data_curated == testcase_2) 

1246 

1247 

1248class testcase_14(unittest.TestCase): 

1249 

1250 def test1(self): 

1251 # update existing sequence, in particular, a new author and an existing author without an existing id (matched 

1252 # thanks to surname,name(BAD WRITTEN!) 

1253 name = '14.1' 

1254 data = get_csv_data(MANUAL_DATA_CSV) 

1255 partial_data = data[58:59] 

1256 data_curated, testcase = prepare_to_test(partial_data, name) 

1257 self.assertEqual(data_curated, testcase) 

1258 

1259 def test2(self): 

1260 # same sequence different order, with new ids 

1261 name = '14.2' 

1262 data = get_csv_data(MANUAL_DATA_CSV) 

1263 partial_data = data[59:60] 

1264 data_curated, testcase = prepare_to_test(partial_data, name) 

1265 self.assertEqual(data_curated, testcase) 

1266 

1267 def test3(self): 

1268 # RA 

1269 # Author with two different ids 

1270 name_1 = '14.3' 

1271 data = get_csv_data(MANUAL_DATA_CSV) 

1272 partial_data = data[60:61] 

1273 data_curated, testcase_1 = prepare_to_test(partial_data, name_1) 

1274 self.assertEqual(data_curated, testcase_1) 

1275 

1276 def test4(self): 

1277 # meta specified ra in a row, wannabe ra with a new id in a row, meta specified with an id related to wannabe 

1278 # in a ra 

1279 name = '14.4' 

1280 data = get_csv_data(MANUAL_DATA_CSV) 

1281 partial_data = data[61:64] 

1282 data_curated, testcase = prepare_to_test(partial_data, name) 

1283 self.assertEqual(data_curated, testcase) 

1284 

1285 

1286class testcase_15(unittest.TestCase): 

1287 

1288 def test1(self): 

1289 # venue volume issue already exists in ts 

1290 name = '15.1' 

1291 data = get_csv_data(MANUAL_DATA_CSV) 

1292 partial_data = data[64:65] 

1293 data_curated, testcase = prepare_to_test(partial_data, name) 

1294 self.assertEqual(data_curated, testcase) 

1295 

1296 def test2(self): 

1297 # venue conflict 

1298 name = '15.2' 

1299 data = get_csv_data(MANUAL_DATA_CSV) 

1300 partial_data = data[65:66] 

1301 data_curated, testcase = prepare_to_test(partial_data, name) 

1302 # _, testcase_2 = prepare_to_test(partial_data, name_2) 

1303 self.assertEqual(data_curated, testcase) 

1304 

1305 def test3(self): 

1306 # venue in ts is now the br 

1307 name = '15.3' 

1308 data = get_csv_data(MANUAL_DATA_CSV) 

1309 partial_data = data[66:67] 

1310 data_curated, testcase = prepare_to_test(partial_data, name) 

1311 self.assertEqual(data_curated, testcase) 

1312 

1313 def test4(self): 

1314 # br in ts is now the venue 

1315 name = '15.4' 

1316 data = get_csv_data(MANUAL_DATA_CSV) 

1317 partial_data = data[67:68] 

1318 data_curated, testcase = prepare_to_test(partial_data, name) 

1319 self.assertEqual(data_curated, testcase) 

1320 

1321 def test5(self): 

1322 # volume in ts is now the br 

1323 name = '15.5' 

1324 data = get_csv_data(MANUAL_DATA_CSV) 

1325 partial_data = data[71:72] 

1326 data_curated, testcase = prepare_to_test(partial_data, name) 

1327 self.assertEqual(data_curated, testcase) 

1328 

1329 def test6(self): 

1330 # br is a volume 

1331 name = '15.6' 

1332 data = get_csv_data(MANUAL_DATA_CSV) 

1333 partial_data = data[72:73] 

1334 data_curated, testcase = prepare_to_test(partial_data, name) 

1335 self.assertEqual(data_curated, testcase) 

1336 

1337 def test7(self): 

1338 # issue in ts is now the br 

1339 name = '15.7' 

1340 data = get_csv_data(MANUAL_DATA_CSV) 

1341 partial_data = data[73:74] 

1342 data_curated, testcase = prepare_to_test(partial_data, name) 

1343 self.assertEqual(data_curated, testcase) 

1344 

1345 def test8(self): 

1346 # br is a issue 

1347 name = '15.8' 

1348 data = get_csv_data(MANUAL_DATA_CSV) 

1349 partial_data = data[74:75] 

1350 data_curated, testcase = prepare_to_test(partial_data, name) 

1351 self.assertEqual(data_curated, testcase) 

1352 

1353 

1354class testcase_16(unittest.TestCase): 

1355 

1356 def test1(self): 

1357 # Date cleaning 2019-02-29 

1358 name = '16.1' 

1359 # add_data_ts('http://127.0.0.1:8805/sparql') 

1360 # wrong date (2019/02/29) 

1361 data = get_csv_data(MANUAL_DATA_CSV) 

1362 partial_data = data[75:76] 

1363 data_curated, testcase = prepare_to_test(partial_data, name) 

1364 self.assertEqual(data_curated, testcase) 

1365 

1366 def test2(self): 

1367 # existing re 

1368 name = '16.2' 

1369 data = get_csv_data(MANUAL_DATA_CSV) 

1370 partial_data = data[76:77] 

1371 data_curated, testcase = prepare_to_test(partial_data, name) 

1372 self.assertEqual(data_curated, testcase) 

1373 

1374 def test3(self): 

1375 # given name for an RA with only a family name in TS 

1376 name = '16.3' 

1377 data = get_csv_data(MANUAL_DATA_CSV) 

1378 partial_data = data[77:78] 

1379 data_curated, testcase = prepare_to_test(partial_data, name) 

1380 self.assertEqual(data_curated, testcase) 

1381 

1382 

1383if __name__ == '__main__': # pragma: no cover 

1384 unittest.main()