Coverage for test/curator_test.py: 99%
784 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-07-14 14:06 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-07-14 14:06 +0000
1import csv
2import shutil
3import unittest
5from oc_ocdm import Storer
6from SPARQLWrapper import POST, SPARQLWrapper
7from rdflib import Graph, ConjunctiveGraph
8import redis
9from oc_ocdm.counter_handler.redis_counter_handler import RedisCounterHandler
11from oc_meta.core.creator import Creator
12from oc_meta.core.curator import *
13from oc_meta.lib.file_manager import get_csv_data
14from oc_meta.lib.finder import ResourceFinder
15from oc_meta.plugins.multiprocess.resp_agents_curator import RespAgentsCurator
17SERVER = 'http://127.0.0.1:8805/sparql'
18BASE_DIR = os.path.join('test')
19MANUAL_DATA_CSV = f'{BASE_DIR}/manual_data.csv'
20MANUAL_DATA_RDF = f'{BASE_DIR}/testcases/ts/testcase_ts-13.ttl'
21REAL_DATA_CSV = os.path.join(BASE_DIR, 'real_data.csv')
22REAL_DATA_RDF = f'{BASE_DIR}/testcases/ts/real_data.nt'
23REAL_DATA_RDF_WITH_PROV = f'{BASE_DIR}/testcases/ts/real_data_with_prov.nq'
24BASE_IRI = 'https://w3id.org/oc/meta/'
25CURATOR_COUNTER_DIR = f'{BASE_DIR}/curator_counter'
26OUTPUT_DIR = f'{BASE_DIR}/output'
27PROV_CONFIG = f'{BASE_DIR}/prov_config.json'
29# Redis configuration
30REDIS_HOST = 'localhost'
31REDIS_PORT = 6379
32REDIS_DB = 5
34def get_path(path:str) -> str:
35 # absolute_path:str = os.path.abspath(path)
36 universal_path = path.replace('\\', '/')
37 return universal_path
39def reset_redis_counters():
40 redis_client = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB)
41 redis_client.flushdb()
43def get_counter_handler():
44 return RedisCounterHandler(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB)
46def reset():
47 reset_redis_counters()
49def reset_server(server:str=SERVER) -> None:
50 ts = SPARQLWrapper(server)
51 for graph in {'https://w3id.org/oc/meta/br/', 'https://w3id.org/oc/meta/ra/', 'https://w3id.org/oc/meta/re/', 'https://w3id.org/oc/meta/id/', 'https://w3id.org/oc/meta/ar/', 'http://default.graph/'}:
52 ts.setQuery(f'CLEAR GRAPH <{graph}>')
53 ts.setMethod(POST)
54 ts.query()
56def add_data_ts(server:str=SERVER, data_path:str=os.path.abspath(os.path.join('test', 'testcases', 'ts', 'real_data.nt')).replace('\\', '/'), batch_size:int=100, default_graph_uri=URIRef("http://default.graph/")):
57 reset_server(server)
58 f_path = get_path(data_path)
60 # Determina il formato del file
61 file_extension = os.path.splitext(f_path)[1].lower()
62 if file_extension == '.nt':
63 g = Graph()
64 g.parse(location=f_path, format='nt')
65 elif file_extension == '.nq':
66 g = ConjunctiveGraph()
67 g.parse(location=f_path, format='nquads')
68 elif file_extension == '.ttl':
69 g = Graph()
70 g.parse(location=f_path, format='turtle')
71 else:
72 raise ValueError(f"Unsupported file extension: {file_extension}")
74 triples_list = []
75 if file_extension in {'.nt', '.ttl'}:
76 for subj, pred, obj in g:
77 triples_list.append((subj, pred, obj, default_graph_uri))
78 elif file_extension == '.nq':
79 for subj, pred, obj, ctx in g.quads((None, None, None, None)):
80 triples_list.append((subj, pred, obj, ctx))
82 for i in range(0, len(triples_list), batch_size):
83 batch_triples = triples_list[i:i + batch_size]
85 triples_str = ""
86 for subj, pred, obj, ctx in batch_triples:
87 if ctx:
88 triples_str += f"GRAPH {ctx.n3().replace('[', '').replace(']', '')} {{ {subj.n3()} {pred.n3()} {obj.n3()} }} "
89 else:
90 triples_str += f"{subj.n3()} {pred.n3()} {obj.n3()} . "
92 query = f"INSERT DATA {{ {triples_str} }}"
94 ts = SPARQLWrapper(server)
95 ts.setQuery(query)
96 ts.setMethod(POST)
97 ts.query()
99def store_curated_data(curator_obj:Curator, server:str) -> None:
100 creator_obj = Creator(curator_obj.data, SERVER, BASE_IRI, None, None, 'https://orcid.org/0000-0002-8420-0696',
101 curator_obj.index_id_ra, curator_obj.index_id_br, curator_obj.re_index,
102 curator_obj.ar_index, curator_obj.VolIss, preexisting_entities=set(), everything_everywhere_allatonce=Graph())
103 creator = creator_obj.creator(source=None)
104 res_storer = Storer(creator)
105 res_storer.upload_all(server, base_dir=None, batch_size=100)
107def prepare_to_test(data, name):
108 reset_redis_counters()
110 reset_server(SERVER)
111 if float(name) > 12:
112 add_data_ts(SERVER, os.path.abspath(os.path.join('test', 'testcases', 'ts', 'testcase_ts-13.ttl')).replace('\\', '/'))
114 testcase_csv = get_path('test/testcases/testcase_data/testcase_' + name + '_data.csv')
115 testcase_id_br = get_path('test/testcases/testcase_data/indices/' + name + '/index_id_br_' + name + '.csv')
116 testcase_id_ra = get_path('test/testcases/testcase_data/indices/' + name + '/index_id_ra_' + name + '.csv')
117 testcase_ar = get_path('test/testcases/testcase_data/indices/' + name + '/index_ar_' + name + '.csv')
118 testcase_re = get_path('test/testcases/testcase_data/indices/' + name + '/index_re_' + name + '.csv')
119 testcase_vi = get_path('test/testcases/testcase_data/indices/' + name + '/index_vi_' + name + '.json')
121 counter_handler = get_counter_handler()
122 settings = {'normalize_titles': True}
123 curator_obj = Curator(data, SERVER, prov_config=PROV_CONFIG, counter_handler=counter_handler, settings=settings)
124 curator_obj.curator()
125 testcase_csv = get_csv_data(testcase_csv)
126 for csv in [testcase_csv, curator_obj.data]:
127 for row in csv:
128 row['id'] = sorted(row['id'].split())
129 testcase_id_br = get_csv_data(testcase_id_br)
130 testcase_id_ra = get_csv_data(testcase_id_ra)
131 testcase_ar = get_csv_data(testcase_ar)
132 testcase_re = get_csv_data(testcase_re)
133 for csv in [testcase_id_br, testcase_id_ra, testcase_ar, testcase_re, curator_obj.index_id_br, curator_obj.index_id_ra, curator_obj.ar_index, curator_obj.re_index]:
134 try:
135 csv.sort(key=lambda x:x['id'])
136 except KeyError:
137 try:
138 csv.sort(key=lambda x:x['meta'])
139 except KeyError:
140 csv.sort(key=lambda x:x['br'])
141 with open(testcase_vi) as json_file:
142 testcase_vi = json.load(json_file)
143 testcase = [testcase_csv, testcase_id_br, testcase_id_ra, testcase_ar, testcase_re, testcase_vi]
144 data_curated = [curator_obj.data, curator_obj.index_id_br, curator_obj.index_id_ra, curator_obj.ar_index,
145 curator_obj.re_index, curator_obj.VolIss]
146 return data_curated, testcase
148def prepareCurator(data:list, server:str=SERVER, resp_agents_only:bool=False) -> Curator:
149 settings = {'normalize_titles': True}
150 reset_redis_counters()
151 counter_handler = get_counter_handler()
152 if resp_agents_only:
153 curator = RespAgentsCurator(data, server, prov_config=PROV_CONFIG, counter_handler=counter_handler)
154 else:
155 curator = Curator(data, server, prov_config=PROV_CONFIG, counter_handler=counter_handler, settings=settings)
156 return curator
159class test_Curator(unittest.TestCase):
160 @classmethod
161 def setUpClass(cls):
162 add_data_ts()
164 def setUp(self):
165 reset_redis_counters()
167 def tearDown(self):
168 reset_redis_counters()
170 def test_merge_entities_in_csv(self):
171 curator = prepareCurator(list())
172 curator.counter_handler.set_counter(4, 'id', supplier_prefix='060')
173 entity_dict = {'0601': {'ids': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'others': []}}
174 id_dict = dict()
175 curator.merge_entities_in_csv(['doi:10.1787/eco_outlook-v2011-2-graph138-en'], '0601', 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', entity_dict, id_dict)
176 expected_output = (
177 {'0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'others': []}},
178 {'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0605'}
179 )
180 self.assertEqual((entity_dict, id_dict), expected_output)
182 def test_clean_id_list(self):
183 input = ['doi:10.001/B-1', 'wikidata:B1111111', 'OMID:br/060101']
184 output = Curator.clean_id_list(input, br=True)
185 expected_output = (['doi:10.001/b-1', 'wikidata:B1111111'], '060101')
186 self.assertEqual(output, expected_output)
188 def test_equalizer(self):
189 # Test equalizer with a row that contains an ID that can be resolved to an existing entity
190 row = {'id': 'doi:10.1001/archderm.104.1.106', 'title': '', 'author': '', 'pub_date': '1972-12-01', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}
191 curator = prepareCurator(list())
192 curator.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI)
194 metavals, identifiers, vvis = curator.extract_identifiers_and_metavals(row, valid_dois_cache=set())
195 curator.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis)
197 curator.log[0] = {'id': {}}
198 curator.clean_id(row)
199 extracted_metaval = row['id']
200 self.assertEqual(extracted_metaval, '3757')
202 # Reset the row to test equalizer
203 row = {'id': '', 'title': '', 'author': '', 'pub_date': '1972-12-01', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}
205 curator.rowcnt = 0
206 curator.log[0] = {
207 'id': {},
208 'author': {},
209 'venue': {},
210 'editor': {},
211 'publisher': {},
212 'page': {},
213 'volume': {},
214 'issue': {},
215 'pub_date': {},
216 'type': {},
217 'title': {}
218 }
219 curator.equalizer(row, extracted_metaval)
220 output = (curator.log, row)
222 expected_output = (
223 {0: {'id': {'status': 'Entity already exists'}, 'author': {}, 'venue': {}, 'editor': {}, 'publisher': {}, 'page': {}, 'volume': {}, 'issue': {}, 'pub_date': {'status': 'New value proposed'}, 'type': {}, 'title': {}}},
224 {'id': '', 'title': '', 'author': 'Curth, W. [omid:ra/6033]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416 issn:0003-987X]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': 'American Medical Association (ama) [omid:ra/3309 crossref:10]', 'editor': ''}
225 )
226 self.assertEqual(output, expected_output)
228 def test_clean_id_metaid_not_in_ts(self):
229 # A MetaId was specified, but it is not on ts. Therefore, it is invalid
230 curator = prepareCurator(list())
231 row = {'id': 'omid:br/131313', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''}
232 curator.log[0] = {'id': {}}
233 curator.clean_id(row)
234 expected_output = {'id': 'wannabe_0', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''}
235 self.assertEqual(row, expected_output)
237 def test_clean_id(self):
238 curator = prepareCurator(list())
239 row = {'id': 'doi:10.1001/archderm.104.1.106', 'title': 'Multiple Blasto', 'author': '', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}
240 curator.log[0] = {'id': {}}
241 curator.finder.get_everything_about_res(metavals=set(), identifiers={'doi:10.1001/archderm.104.1.106'}, vvis=set())
242 curator.clean_id(row)
243 expected_output = {'id': '3757', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}
244 self.assertEqual(row, expected_output)
246 def test_merge_duplicate_entities(self):
247 # Test merge_duplicate_entities with realistic data that includes an ID that resolves to an existing entity
248 data = [
249 {'id': 'doi:10.1001/archderm.104.1.106', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''},
250 {'id': '', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-02', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''},
251 {'id': '', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-03', 'venue': 'Archives Of Blast [omid:br/4416]', 'volume': '105', 'issue': '2', 'page': '106-108', 'type': 'journal volume', 'publisher': '', 'editor': ''},
252 ]
253 curator = prepareCurator(list())
254 curator.data = data
255 curator.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI)
257 # Extract metavals and identifiers from each row
258 all_metavals = set()
259 all_identifiers = set()
260 all_vvis = set()
262 for row in data:
263 metavals, identifiers, vvis = curator.extract_identifiers_and_metavals(row, valid_dois_cache=set())
264 all_metavals.update(metavals)
265 all_identifiers.update(identifiers)
266 all_vvis.update(vvis)
268 curator.finder.get_everything_about_res(metavals=all_metavals, identifiers=all_identifiers, vvis=all_vvis)
270 # Process each row with clean_id to get the actual metavals
271 for i, row in enumerate(data):
272 curator.log[i] = {'id': {}}
273 curator.rowcnt = i
274 curator.clean_id(row)
276 # Initialize log for merge_duplicate_entities
277 for i in range(3):
278 curator.log[i] = {
279 'id': {},
280 'author': {},
281 'venue': {},
282 'editor': {},
283 'publisher': {},
284 'page': {},
285 'volume': {},
286 'issue': {},
287 'pub_date': {},
288 'type': {}
289 }
291 # The brdict should be populated by clean_id, but we need to set up the "others" relationship
292 # The first row should have resolved to '3757', and the other rows should be wannabes
293 first_row_metaval = curator.data[0]['id'] # Should be '3757'
294 self.assertEqual(first_row_metaval, '3757')
296 # Set up the relationship between the existing entity and the wannabes
297 if first_row_metaval in curator.brdict:
298 curator.brdict[first_row_metaval]['others'].extend(['wannabe_0', 'wannabe_1'])
300 curator.merge_duplicate_entities()
301 output = (curator.data, curator.log)
303 expected_output = (
304 [
305 {'id': '3757', 'title': 'Multiple Keloids', 'author': 'Curth, W. [omid:ra/6033]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [issn:0003-987X omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': 'American Medical Association (ama) [omid:ra/3309 crossref:10]', 'editor': ''},
306 {'id': '3757', 'title': 'Multiple Keloids', 'author': 'Curth, W. [omid:ra/6033]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [issn:0003-987X omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': 'American Medical Association (ama) [omid:ra/3309 crossref:10]', 'editor': ''},
307 {'id': '3757', 'title': 'Multiple Keloids', 'author': 'Curth, W. [omid:ra/6033]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [issn:0003-987X omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': 'American Medical Association (ama) [omid:ra/3309 crossref:10]', 'editor': ''}
308 ],
309 {
310 0: {'id': {'status': 'Entity already exists'}, 'author': {}, 'venue': {}, 'editor': {}, 'publisher': {}, 'page': {}, 'volume': {}, 'issue': {}, 'pub_date': {}, 'type': {}},
311 1: {'id': {'status': 'Entity already exists'}, 'author': {}, 'venue': {'status': 'New value proposed'}, 'editor': {}, 'publisher': {}, 'page': {}, 'volume': {}, 'issue': {}, 'pub_date': {'status': 'New value proposed'}, 'type': {}},
312 2: {'id': {'status': 'Entity already exists'}, 'author': {}, 'venue': {'status': 'New value proposed'}, 'editor': {}, 'publisher': {}, 'page': {'status': 'New value proposed'}, 'volume': {'status': 'New value proposed'}, 'issue': {'status': 'New value proposed'}, 'pub_date': {'status': 'New value proposed'}, 'type': {'status': 'New value proposed'}}
313 }
314 )
315 self.assertEqual(output, expected_output)
317 def test_clean_vvi_all_data_on_ts(self):
318 # All data are already on the triplestore. They need to be retrieved and organized correctly
319 row = {'id': 'doi:10.1001/archderm.104.1.106', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''}
320 curator = prepareCurator(list())
321 curator.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI)
323 metavals, identifiers, vvis = curator.extract_identifiers_and_metavals(row, valid_dois_cache=set())
324 curator.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis)
326 curator.log[0] = {'id': {}}
327 curator.clean_id(row)
329 curator.clean_vvi(row)
330 expected_output = {
331 "4416": {
332 "issue": {},
333 "volume": {
334 "104": {
335 "id": "4712",
336 "issue": {
337 "1": {
338 "id": "4713"
339 }
340 }
341 }
342 }
343 }
344 }
345 self.assertEqual(curator.vvi, expected_output)
347 def test_clean_vvi_new_venue(self):
348 # It is a new venue
349 row = {'id': 'wannabe_1', 'title': 'Money growth, interest rates, inflation and raw materials prices: China', 'author': '', 'pub_date': '2011-11-28', 'venue': 'OECD Economic Outlook', 'volume': '2011', 'issue': '2', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''}
350 curator = prepareCurator(list())
351 curator.clean_vvi(row)
352 expected_output = {'wannabe_0': {'volume': {'2011': {'id': 'wannabe_1', 'issue': {'2': {'id': 'wannabe_2'}}}}, 'issue': {}}}
353 self.assertEqual(curator.vvi, expected_output)
355 def test_clean_vvi_volume_with_title(self):
356 # A journal volume having a title
357 row = [{'id': '', 'title': 'The volume title', 'author': '', 'pub_date': '', 'venue': 'OECD Economic Outlook', 'volume': '2011', 'issue': '2', 'page': '', 'type': 'journal volume', 'publisher': '', 'editor': ''}]
358 curator = prepareCurator(row)
359 curator.curator()
360 expected_output = [{'id': 'omid:br/0601', 'title': 'The Volume Title', 'author': '', 'pub_date': '', 'venue': 'OECD Economic Outlook [omid:br/0602]', 'volume': '', 'issue': '', 'page': '', 'type': 'journal volume', 'publisher': '', 'editor': ''}]
361 self.assertEqual(curator.data, expected_output)
363 def test_clean_vvi_invalid_volume(self):
364 # The data must be invalidated, because the resource is journal volume but an issue has also been specified
365 row = {'id': 'wannabe_1', 'title': '', 'author': '', 'pub_date': '', 'venue': 'OECD Economic Outlook', 'volume': '2011', 'issue': '2', 'page': '', 'type': 'journal volume', 'publisher': '', 'editor': ''}
366 curator = prepareCurator(list())
367 curator.clean_vvi(row)
368 expected_output = {'wannabe_0': {'volume': {}, 'issue': {}}}
369 self.assertEqual(curator.vvi, expected_output)
371 def test_clean_vvi_invalid_venue(self):
372 # The data must be invalidated, because the resource is journal but a volume has also been specified
373 row = {'id': 'wannabe_1', 'title': '', 'author': '', 'pub_date': '', 'venue': 'OECD Economic Outlook', 'volume': '2011', 'issue': '', 'page': '', 'type': 'journal', 'publisher': '', 'editor': ''}
374 curator = prepareCurator(list())
375 curator.clean_vvi(row)
376 expected_output = {'wannabe_0': {'volume': {}, 'issue': {}}}
377 self.assertEqual(curator.vvi, expected_output)
379 def test_clean_vvi_new_volume_and_issue(self):
380 # There is a row with vvi and no ids
381 row = {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': 'Archives Of Surgery [omid:br/4480]', 'volume': '147', 'issue': '11', 'page': '', 'type': 'journal article', 'publisher': '', 'editor': ''}
382 curator = prepareCurator(list())
383 curator.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI)
385 metavals, identifiers, vvis = curator.extract_identifiers_and_metavals(row, valid_dois_cache=set())
386 curator.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis)
387 curator.clean_id(row)
388 curator.clean_vvi(row)
389 expected_output = {
390 "4480": {
391 "issue": {},
392 "volume": {
393 "147": {
394 "id": "4481",
395 "issue": {
396 "11": {
397 "id": "4482"
398 }
399 }
400 }
401 }
402 }
403 }
404 self.assertEqual(curator.vvi, expected_output)
406 def test_clean_ra_overlapping_surnames(self):
407 # The surname of one author is included in the surname of another.
408 row = {'id': 'wannabe_0', 'title': 'Giant Oyster Mushroom Pleurotus giganteus (Agaricomycetes) Enhances Adipocyte Differentiation and Glucose Uptake via Activation of PPARγ and Glucose Transporters 1 and 4 in 3T3-L1 Cells', 'author': 'Paravamsivam, Puvaneswari; Heng, Chua Kek; Malek, Sri Nurestri Abdul [orcid:0000-0001-6278-8559]; Sabaratnam, Vikineswary; M, Ravishankar Ram; Kuppusamy, Umah Rani', 'pub_date': '2016', 'venue': 'International Journal of Medicinal Mushrooms [issn:1521-9437]', 'volume': '18', 'issue': '9', 'page': '821-831', 'type': 'journal article', 'publisher': 'Begell House [crossref:613]', 'editor': ''}
409 curator = prepareCurator(list())
410 curator.brdict = {'wannabe_0': {'ids': ['doi:10.1615/intjmedmushrooms.v18.i9.60'], 'title': 'Giant Oyster Mushroom Pleurotus giganteus (Agaricomycetes) Enhances Adipocyte Differentiation and Glucose Uptake via Activation of PPARγ and Glucose Transporters 1 and 4 in 3T3-L1 Cells', 'others': []}}
411 curator.clean_ra(row, 'author')
412 output = (curator.ardict, curator.radict, curator.idra)
413 expected_output = (
414 {'wannabe_0': {'author': [('0601', 'wannabe_0'), ('0602', 'wannabe_1'), ('0603', 'wannabe_2'), ('0604', 'wannabe_3'), ('0605', 'wannabe_4'), ('0606', 'wannabe_5')], 'editor': [], 'publisher': []}},
415 {'wannabe_0': {'ids': [], 'others': [], 'title': 'Paravamsivam, Puvaneswari'}, 'wannabe_1': {'ids': [], 'others': [], 'title': 'Heng, Chua Kek'}, 'wannabe_2': {'ids': ['orcid:0000-0001-6278-8559'], 'others': [], 'title': 'Malek, Sri Nurestri Abdul'}, 'wannabe_3': {'ids': [], 'others': [], 'title': 'Sabaratnam, Vikineswary'}, 'wannabe_4': {'ids': [], 'others': [], 'title': 'M, Ravishankar Ram'}, 'wannabe_5': {'ids': [], 'others': [], 'title': 'Kuppusamy, Umah Rani'}},
416 {'orcid:0000-0001-6278-8559': '0601'}
417 )
418 self.assertEqual(output, expected_output)
420 def test_clean_ra_with_br_metaid(self):
421 # One author is in the triplestore, the other is not.
422 # br_metaval is a MetaID
423 # There are two ids for one author
424 row = {'id': 'doi:10.1001/archderm.104.1.106', 'title': 'Multiple Keloids', 'author': 'Curth, W.; McSorley, J. [orcid:0000-0003-0530-4305 schema:12345]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''}
425 curator = prepareCurator(list())
426 curator.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI)
427 metavals, identifiers, vvis = curator.extract_identifiers_and_metavals(row, valid_dois_cache=set())
428 curator.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis)
430 curator.log[0] = {'id': {}}
431 curator.clean_id(row)
433 resolved_metaval = row['id']
434 self.assertEqual(resolved_metaval, '3757')
435 curator.brdict = {resolved_metaval: {'ids': ['doi:10.1001/archderm.104.1.106'], 'title': 'Multiple Keloids', 'others': []}}
437 curator.clean_ra(row, 'author')
438 output = (curator.ardict, curator.radict, curator.idra)
439 expected_output = (
440 {'3757': {'author': [('9445', '6033'), ('0601', 'wannabe_0')], 'editor': [], 'publisher': []}},
441 {'6033': {'ids': [], 'others': [], 'title': 'Curth, W.'}, 'wannabe_0': {'ids': ['orcid:0000-0003-0530-4305', 'schema:12345'], 'others': [], 'title': 'McSorley, J.'}},
442 {'orcid:0000-0003-0530-4305': '0601', 'schema:12345': '0602'}
443 )
444 self.assertEqual(output, expected_output)
446 def test_clean_ra_with_br_wannabe(self):
447 # Authors not on the triplestore.
448 # br_metaval is a wannabe
449 row = {'id': 'wannabe_0', 'title': 'Multiple Keloids', 'author': 'Curth, W. [orcid:0000-0002-8420-0696] ; McSorley, J. [orcid:0000-0003-0530-4305]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''}
450 curator = prepareCurator(list())
451 curator.brdict = {'wannabe_0': {'ids': ['doi:10.1001/archderm.104.1.106'], 'title': 'Multiple Keloids', 'others': []}}
452 curator.wnb_cnt = 1
453 curator.clean_ra(row, 'author')
454 output = (curator.ardict, curator.radict, curator.idra)
455 expected_output = (
456 {'wannabe_0': {'author': [('0601', 'wannabe_1'), ('0602', 'wannabe_2')], 'editor': [], 'publisher': []}},
457 {'wannabe_1': {'ids': ['orcid:0000-0002-8420-0696'], 'others': [], 'title': 'Curth, W.'}, 'wannabe_2': {'ids': ['orcid:0000-0003-0530-4305'], 'others': [], 'title': 'McSorley, J.'}},
458 {'orcid:0000-0002-8420-0696': '0601', 'orcid:0000-0003-0530-4305': '0602'}
459 )
460 self.assertEqual(output, expected_output)
462 def test_clean_ra_with_empty_square_brackets(self):
463 # One author's name contains a closed square bracket.
464 row = {'id': 'doi:10.1001/archderm.104.1.106', 'title': 'Multiple Keloids', 'author': 'Bernacki, Edward J. [ ]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''}
465 curator = prepareCurator(list())
466 curator.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI)
468 metavals, identifiers, vvis = curator.extract_identifiers_and_metavals(row, valid_dois_cache=set())
469 curator.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis)
471 curator.log[0] = {'id': {}}
472 curator.clean_id(row)
474 resolved_metaval = row['id']
475 self.assertEqual(resolved_metaval, '3757')
476 curator.brdict = {resolved_metaval: {'ids': ['doi:10.1001/archderm.104.1.106'], 'title': 'Multiple Keloids', 'others': []}}
478 curator.clean_ra(row, 'author')
479 output = (curator.ardict, curator.radict, curator.idra)
480 expected_output = (
481 {'3757': {'author': [('9445', '6033'), ('0601', 'wannabe_0')], 'editor': [], 'publisher': []}},
482 {'6033': {'ids': [], 'others': [], 'title': 'Curth, W.'}, 'wannabe_0': {'ids': [], 'others': [], 'title': 'Bernacki, Edward J.'}},
483 {}
484 )
485 self.assertEqual(output, expected_output)
487 def test_meta_maker(self):
488 curator = prepareCurator(list())
489 curator.brdict = {'3757': {'ids': ['doi:10.1001/archderm.104.1.106', 'pmid:29098884'], 'title': 'Multiple Keloids', 'others': []}, '4416': {'ids': ['issn:0003-987X'], 'title': 'Archives Of Dermatology', 'others': []}}
490 curator.radict = {'6033': {'ids': [], 'others': [], 'title': 'Curth, W.'}, 'wannabe_0': {'ids': ['orcid:0000-0003-0530-4305', 'schema:12345'], 'others': [], 'title': 'Mcsorley, J.'}}
491 curator.ardict = {'3757': {'author': [('9445', '6033'), ('0601', 'wannabe_0')], 'editor': [], 'publisher': []}}
492 curator.vvi = {'4416': {'issue': {}, 'volume': {'107': {'id': '4733', 'issue': {'1': {'id': '4734'}, '2': {'id': '4735'}, '3': {'id': '4736'}, '4': {'id': '4737'}, '5': {'id': '4738'}, '6': {'id': '4739'}}}, '108': {'id': '4740', 'issue': {'1': {'id': '4741'}, '2': {'id': '4742'}, '3': {'id': '4743'}, '4': {'id': '4744'}}}, '104': {'id': '4712', 'issue': {'1': {'id': '4713'}, '2': {'id': '4714'}, '3': {'id': '4715'}, '4': {'id': '4716'}, '5': {'id': '4717'}, '6': {'id': '4718'}}}, '148': {'id': '4417', 'issue': {'12': {'id': '4418'}, '11': {'id': '4419'}}}, '100': {'id': '4684', 'issue': {'1': {'id': '4685'}, '2': {'id': '4686'}, '3': {'id': '4687'}, '4': {'id': '4688'}, '5': {'id': '4689'}, '6': {'id': '4690'}}}, '101': {'id': '4691', 'issue': {'1': {'id': '4692'}, '2': {'id': '4693'}, '3': {'id': '4694'}, '4': {'id': '4695'}, '5': {'id': '4696'}, '6': {'id': '4697'}}}, '102': {'id': '4698', 'issue': {'1': {'id': '4699'}, '2': {'id': '4700'}, '3': {'id': '4701'}, '4': {'id': '4702'}, '5': {'id': '4703'}, '6': {'id': '4704'}}}, '103': {'id': '4705', 'issue': {'1': {'id': '4706'}, '2': {'id': '4707'}, '3': {'id': '4708'}, '4': {'id': '4709'}, '5': {'id': '4710'}, '6': {'id': '4711'}}}, '105': {'id': '4719', 'issue': {'1': {'id': '4720'}, '2': {'id': '4721'}, '3': {'id': '4722'}, '4': {'id': '4723'}, '5': {'id': '4724'}, '6': {'id': '4725'}}}, '106': {'id': '4726', 'issue': {'6': {'id': '4732'}, '1': {'id': '4727'}, '2': {'id': '4728'}, '3': {'id': '4729'}, '4': {'id': '4730'}, '5': {'id': '4731'}}}}}}
493 curator.meta_maker()
494 output = (curator.brmeta, curator.rameta, curator.armeta)
495 expected_output = (
496 {'3757': {'ids': ['doi:10.1001/archderm.104.1.106', 'pmid:29098884', 'omid:br/3757'], 'title': 'Multiple Keloids', 'others': []}, '4416': {'ids': ['issn:0003-987X', 'omid:br/4416'], 'title': 'Archives Of Dermatology', 'others': []}},
497 {'6033': {'ids': ['omid:ra/6033'], 'others': [], 'title': 'Curth, W.'}, '0601': {'ids': ['orcid:0000-0003-0530-4305', 'schema:12345', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Mcsorley, J.'}},
498 {'3757': {'author': [('9445', '6033'), ('0601', '0601')], 'editor': [], 'publisher': []}}
499 )
500 self.assertEqual(output, expected_output)
502 def test_enricher(self):
503 curator = prepareCurator(list())
504 curator.data = [{'id': 'wannabe_0', 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'author': '', 'pub_date': '2011-11-28', 'venue': 'wannabe_1', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': 'OECD [crossref:1963]', 'editor': ''}]
505 curator.brmeta = {
506 '0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en', 'omid:br/0601'], 'others': ['wannabe_0'], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'},
507 '0602': {'ids': ['omid:br/0604'], 'others': ['wannabe_1'], 'title': 'OECD Economic Outlook'}
508 }
509 curator.armeta = {'0601': {'author': [], 'editor': [], 'publisher': [('0601', '0601')]}}
510 curator.rameta = {'0601': {'ids': ['crossref:1963', 'omid:ra/0601'], 'others': ['wannabe_2'], 'title': 'Oecd'}}
511 curator.remeta = dict()
512 curator.meta_maker()
513 curator.enrich()
514 output = curator.data
515 expected_output = [{'id': 'doi:10.1787/eco_outlook-v2011-2-graph138-en omid:br/0601', 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'author': '', 'pub_date': '2011-11-28', 'venue': 'OECD Economic Outlook [omid:br/0604]', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': 'Oecd [crossref:1963 omid:ra/0601]', 'editor': ''}]
516 self.assertEqual(output, expected_output)
518 def test_indexer(self):
519 path_index = f'{OUTPUT_DIR}/index'
520 path_csv = f'{OUTPUT_DIR}'
521 curator = prepareCurator(list())
522 curator.filename = '0.csv'
523 curator.idra = {'orcid:0000-0003-0530-4305': '0601', 'schema:12345': '0602'}
524 curator.idbr = {'doi:10.1001/2013.jamasurg.270': '2585'}
525 curator.armeta = {'2585': {'author': [('9445', '0602'), ('0601', '0601')], 'editor': [], 'publisher': []}}
526 curator.remeta = dict()
527 curator.brmeta = {
528 '0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en', 'omid:br/0601'], 'others': ['wannabe_0'], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'},
529 '0602': {'ids': ['omid:br/0602'], 'others': ['wannabe_1'], 'title': 'OECD Economic Outlook'}
530 }
531 curator.vvi = {
532 'wannabe_1': {
533 'issue': {},
534 'volume': {
535 '107': {'id': '4733', 'issue': {'1': {'id': '4734'}, '2': {'id': '4735'}, '3': {'id': '4736'}, '4': {'id': '4737'}, '5': {'id': '4738'}, '6': {'id': '4739'}}},
536 '108': {'id': '4740', 'issue': {'1': {'id': '4741'}, '2': {'id': '4742'}, '3': {'id': '4743'}, '4': {'id': '4744'}}},
537 '104': {'id': '4712', 'issue': {'1': {'id': '4713'}, '2': {'id': '4714'}, '3': {'id': '4715'}, '4': {'id': '4716'}, '5': {'id': '4717'}, '6': {'id': '4718'}}},
538 '148': {'id': '4417', 'issue': {'12': {'id': '4418'}, '11': {'id': '4419'}}},
539 '100': {'id': '4684', 'issue': {'1': {'id': '4685'}, '2': {'id': '4686'}, '3': {'id': '4687'}, '4': {'id': '4688'}, '5': {'id': '4689'}, '6': {'id': '4690'}}},
540 '101': {'id': '4691', 'issue': {'1': {'id': '4692'}, '2': {'id': '4693'}, '3': {'id': '4694'}, '4': {'id': '4695'}, '5': {'id': '4696'}, '6': {'id': '4697'}}},
541 '102': {'id': '4698', 'issue': {'1': {'id': '4699'}, '2': {'id': '4700'}, '3': {'id': '4701'}, '4': {'id': '4702'}, '5': {'id': '4703'}, '6': {'id': '4704'}}},
542 '103': {'id': '4705', 'issue': {'1': {'id': '4706'}, '2': {'id': '4707'}, '3': {'id': '4708'}, '4': {'id': '4709'}, '5': {'id': '4710'}, '6': {'id': '4711'}}},
543 '105': {'id': '4719', 'issue': {'1': {'id': '4720'}, '2': {'id': '4721'}, '3': {'id': '4722'}, '4': {'id': '4723'}, '5': {'id': '4724'}, '6': {'id': '4725'}}},
544 '106': {'id': '4726', 'issue': {'6': {'id': '4732'}, '1': {'id': '4727'}, '2': {'id': '4728'}, '3': {'id': '4729'}, '4': {'id': '4730'}, '5': {'id': '4731'}}}
545 }
546 }
547 }
548 curator.meta_maker()
549 curator.indexer(path_index, path_csv)
550 with open(os.path.join(path_index, 'index_ar.csv'), 'r', encoding='utf-8') as f:
551 index_ar = list(csv.DictReader(f))
552 with open(os.path.join(path_index, 'index_id_br.csv'), 'r', encoding='utf-8') as f:
553 index_id_br = list(csv.DictReader(f))
554 with open(os.path.join(path_index, 'index_id_ra.csv'), 'r', encoding='utf-8') as f:
555 index_id_ra = list(csv.DictReader(f))
556 with open(os.path.join(path_index, 'index_vi.json'), 'r', encoding='utf-8') as f:
557 index_vi = json.load(f)
558 with open(os.path.join(path_index, 'index_re.csv'), 'r', encoding='utf-8') as f:
559 index_re = list(csv.DictReader(f))
560 expected_index_ar = [{'meta': '2585', 'author': '9445, 0602; 0601, 0601', 'editor': '', 'publisher': ''}]
561 expected_index_id_br = [{'id': 'doi:10.1001/2013.jamasurg.270', 'meta': '2585'}]
562 expected_index_id_ra = [{'id': 'orcid:0000-0003-0530-4305', 'meta': '0601'}, {'id': 'schema:12345', 'meta': '0602'}]
563 expected_index_re = [{'br': '', 're': ''}]
564 expected_index_vi = {'0602': {'issue': {}, 'volume': {'107': {'id': '4733', 'issue': {'1': {'id': '4734'}, '2': {'id': '4735'}, '3': {'id': '4736'}, '4': {'id': '4737'}, '5': {'id': '4738'}, '6': {'id': '4739'}}}, '108': {'id': '4740', 'issue': {'1': {'id': '4741'}, '2': {'id': '4742'}, '3': {'id': '4743'}, '4': {'id': '4744'}}}, '104': {'id': '4712', 'issue': {'1': {'id': '4713'}, '2': {'id': '4714'}, '3': {'id': '4715'}, '4': {'id': '4716'}, '5': {'id': '4717'}, '6': {'id': '4718'}}}, '148': {'id': '4417', 'issue': {'12': {'id': '4418'}, '11': {'id': '4419'}}}, '100': {'id': '4684', 'issue': {'1': {'id': '4685'}, '2': {'id': '4686'}, '3': {'id': '4687'}, '4': {'id': '4688'}, '5': {'id': '4689'}, '6': {'id': '4690'}}}, '101': {'id': '4691', 'issue': {'1': {'id': '4692'}, '2': {'id': '4693'}, '3': {'id': '4694'}, '4': {'id': '4695'}, '5': {'id': '4696'}, '6': {'id': '4697'}}}, '102': {'id': '4698', 'issue': {'1': {'id': '4699'}, '2': {'id': '4700'}, '3': {'id': '4701'}, '4': {'id': '4702'}, '5': {'id': '4703'}, '6': {'id': '4704'}}}, '103': {'id': '4705', 'issue': {'1': {'id': '4706'}, '2': {'id': '4707'}, '3': {'id': '4708'}, '4': {'id': '4709'}, '5': {'id': '4710'}, '6': {'id': '4711'}}}, '105': {'id': '4719', 'issue': {'1': {'id': '4720'}, '2': {'id': '4721'}, '3': {'id': '4722'}, '4': {'id': '4723'}, '5': {'id': '4724'}, '6': {'id': '4725'}}}, '106': {'id': '4726', 'issue': {'6': {'id': '4732'}, '1': {'id': '4727'}, '2': {'id': '4728'}, '3': {'id': '4729'}, '4': {'id': '4730'}, '5': {'id': '4731'}}}}}}
565 output = (index_ar, index_id_br, index_id_ra, index_re, index_vi)
566 expected_output = (expected_index_ar, expected_index_id_br, expected_index_id_ra, expected_index_re, expected_index_vi)
567 shutil.rmtree(OUTPUT_DIR)
568 self.assertEqual(output, expected_output)
570 def test_is_a_valid_row(self):
571 rows = [
572 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
573 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '1', 'issue': '', 'page': '', 'type': 'journal volume', 'publisher': '', 'editor': ''},
574 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '1', 'page': '', 'type': 'journal issue', 'publisher': '', 'editor': ''},
575 {'id': 'doi:10.1001/2013.jamasurg.270', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
576 {'id': '', 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'author': 'Deckert, Ron J. [orcid:0000-0003-2100-6412]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
577 {'id': '', 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'author': 'Deckert, Ron J. [orcid:0000-0003-2100-6412]', 'pub_date': '03-01-2020', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': 'book'},
578 {'id': 'doi:10.1001/2013.jamasurg.270', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '5', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}
579 ]
580 output = []
581 for row in rows:
582 output.append(is_a_valid_row(row))
583 expected_output = [False, False, False, True, False, True, False]
584 self.assertEqual(output, expected_output)
586 def test_get_preexisting_entities(self):
587 row = {'id': 'omid:br/2715', 'title': 'Image Of The Year For 2012', 'author': '', 'pub_date': '', 'venue': 'Archives Of Surgery [omid:br/4480]', 'volume': '99', 'issue': '1', 'page': '', 'type': 'journal article', 'publisher': '', 'editor': ''}
588 curator = prepareCurator(data=[row])
589 curator.curator()
590 expected_output = (
591 {'id/4270', 'ra/3309', 'ar/7240', 'br/4481', 'br/2715', 'br/4480', 'id/4274', 'id/2581', 'br/4487', 're/2350'},
592 [{'id': 'doi:10.1001/2013.jamasurg.202 omid:br/2715', 'title': 'Image Of The Year For 2012', 'author': '', 'pub_date': '2012-12-01', 'venue': 'Archives Of Surgery [issn:0004-0010 omid:br/4480]', 'volume': '147', 'issue': '12', 'page': '1140-1140', 'type': 'journal article', 'publisher': 'American Medical Association (ama) [crossref:10 omid:ra/3309]', 'editor': ''}]
593 )
594 self.assertEqual((curator.preexisting_entities, curator.data), expected_output)
597class test_RespAgentsCurator(unittest.TestCase):
598 def test_curator_publishers(self):
599 reset()
600 data = [
601 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': 'American Medical Association (AMA) [crossref:10 crossref:9999]', 'editor': ''},
602 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': 'Elsevier BV [crossref:78]', 'editor': ''},
603 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': 'Wiley [crossref:311]', 'editor': ''}]
604 resp_agents_curator = prepareCurator(data=data, server=SERVER, resp_agents_only=True)
605 resp_agents_curator.curator(filename=None, path_csv=None, path_index=None)
606 output = (resp_agents_curator.data, resp_agents_curator.radict, resp_agents_curator.idra, resp_agents_curator.rameta)
607 expected_output = (
608 [
609 {'id': '', 'title': '', 'author': '', 'venue': '', 'editor': '', 'publisher': 'American Medical Association (ama) [crossref:10 crossref:9999 omid:ra/3309]', 'page': '', 'volume': '', 'issue': '', 'pub_date': '', 'type': ''},
610 {'id': '', 'title': '', 'author': '', 'venue': '', 'editor': '', 'publisher': 'Elsevier Bv [crossref:78 omid:ra/0601]', 'page': '', 'volume': '', 'issue': '', 'pub_date': '', 'type': ''},
611 {'id': '', 'title': '', 'author': '', 'venue': '', 'editor': '', 'publisher': 'Wiley [crossref:311 omid:ra/0602]', 'page': '', 'volume': '', 'issue': '', 'pub_date': '', 'type': ''}],
612 {
613 '3309': {'ids': ['crossref:10', 'crossref:9999', 'omid:ra/3309'], 'others': [], 'title': 'American Medical Association (ama)'},
614 'wannabe_0': {'ids': ['crossref:78', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Elsevier Bv'},
615 'wannabe_1': {'ids': ['crossref:311', 'omid:ra/0602'], 'others': ['wannabe_1'], 'title': 'Wiley'}},
616 {'crossref:10': '4274', 'crossref:9999': '0601', 'crossref:78': '0602', 'crossref:311': '0603'},
617 {
618 '3309': {'ids': ['crossref:10', 'crossref:9999', 'omid:ra/3309'], 'others': [], 'title': 'American Medical Association (ama)'},
619 '0601': {'ids': ['crossref:78', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Elsevier Bv'},
620 '0602': {'ids': ['crossref:311', 'omid:ra/0602'], 'others': ['wannabe_1'], 'title': 'Wiley'}}
621 )
622 self.assertEqual(output, expected_output)
624 def test_curator(self):
625 reset()
626 data = [
627 {'id': '', 'title': '', 'author': 'Deckert, Ron J. [orcid:0000-0003-2100-6412]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
628 {'id': '', 'title': '', 'author': 'Ruso, Juan M. [orcid:0000-0001-5909-6754]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
629 {'id': '', 'title': '', 'author': 'Sarmiento, Félix [orcid:0000-0002-4487-6894]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}
630 ]
631 resp_agents_curator = prepareCurator(data=data, server=SERVER, resp_agents_only=True)
632 resp_agents_curator.curator(filename='resp_agents_curator_output', path_csv='test/testcases/testcase_data', path_index='test/testcases/testcase_data/indices')
633 output = (resp_agents_curator.data, resp_agents_curator.radict, resp_agents_curator.idra, resp_agents_curator.rameta)
634 expected_output = (
635 [
636 {'id': '', 'title': '', 'author': 'Deckert, Ron J. [orcid:0000-0003-2100-6412 omid:ra/0601]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
637 {'id': '', 'title': '', 'author': 'Ruso, Juan M. [orcid:0000-0001-5909-6754 omid:ra/0602]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
638 {'id': '', 'title': '', 'author': 'Sarmiento, Félix [orcid:0000-0002-4487-6894 omid:ra/0603]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}],
639 {
640 'wannabe_0': {'ids': ['orcid:0000-0003-2100-6412', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Deckert, Ron J.'},
641 'wannabe_1': {'ids': ['orcid:0000-0001-5909-6754', 'omid:ra/0602'], 'others': ['wannabe_1'], 'title': 'Ruso, Juan M.'},
642 'wannabe_2': {'ids': ['orcid:0000-0002-4487-6894', 'omid:ra/0603'], 'others': ['wannabe_2'], 'title': 'Sarmiento, Félix'}},
643 {'orcid:0000-0003-2100-6412': '0601', 'orcid:0000-0001-5909-6754': '0602', 'orcid:0000-0002-4487-6894': '0603'},
644 {
645 '0601': {'ids': ['orcid:0000-0003-2100-6412', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Deckert, Ron J.'},
646 '0602': {'ids': ['orcid:0000-0001-5909-6754', 'omid:ra/0602'], 'others': ['wannabe_1'], 'title': 'Ruso, Juan M.'},
647 '0603': {'ids': ['orcid:0000-0002-4487-6894', 'omid:ra/0603'], 'others': ['wannabe_2'], 'title': 'Sarmiento, Félix'}}
648 )
649 self.assertEqual(output, expected_output)
651 def test_curator_ra_on_ts(self):
652 # A responsible agent is already on the triplestore
653 add_data_ts(server=SERVER, data_path=os.path.abspath(os.path.join('test', 'testcases', 'ts', 'real_data.nt')).replace('\\', '/'))
654 self.maxDiff = None
655 data = [
656 {'id': '', 'title': '', 'author': 'Deckert, Ron J. [orcid:0000-0003-2100-6412]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
657 {'id': '', 'title': '', 'author': 'Mehrotra, Ateev [orcid:0000-0003-2223-1582]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
658 {'id': '', 'title': '', 'author': 'Sarmiento, Félix [orcid:0000-0002-4487-6894]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}
659 ]
660 resp_agents_curator = prepareCurator(data=data, server=SERVER, resp_agents_only=True)
661 resp_agents_curator.curator()
662 output = (resp_agents_curator.data, resp_agents_curator.radict, resp_agents_curator.idra, resp_agents_curator.rameta)
663 expected_output = (
664 [
665 {'id': '', 'title': '', 'author': 'Deckert, Ron J. [orcid:0000-0003-2100-6412 omid:ra/0601]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
666 {'id': '', 'title': '', 'author': 'Mehrotra, Ateev [orcid:0000-0003-2223-1582 omid:ra/3976]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
667 {'id': '', 'title': '', 'author': 'Sarmiento, Félix [orcid:0000-0002-4487-6894 omid:ra/0602]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}],
668 {
669 'wannabe_0': {'ids': ['orcid:0000-0003-2100-6412', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Deckert, Ron J.'},
670 '3976': {'ids': ['orcid:0000-0003-2223-1582', 'omid:ra/3976'], 'others': [], 'title': 'Mehrotra, Ateev'},
671 'wannabe_1': {'ids': ['orcid:0000-0002-4487-6894', 'omid:ra/0602'], 'others': ['wannabe_1'], 'title': 'Sarmiento, Félix'}},
672 {'orcid:0000-0003-2100-6412': '0601', 'orcid:0000-0003-2223-1582': '4351', 'orcid:0000-0002-4487-6894': '0602'},
673 {
674 '0601': {'ids': ['orcid:0000-0003-2100-6412', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Deckert, Ron J.'},
675 '3976': {'ids': ['orcid:0000-0003-2223-1582', 'omid:ra/3976'], 'others': [], 'title': 'Mehrotra, Ateev'},
676 '0602': {'ids': ['orcid:0000-0002-4487-6894', 'omid:ra/0602'], 'others': ['wannabe_1'], 'title': 'Sarmiento, Félix'}}
677 )
678 self.assertEqual(output, expected_output)
681class test_id_worker(unittest.TestCase):
682 @classmethod
683 def setUpClass(cls):
684 add_data_ts(SERVER, os.path.abspath(os.path.join('test', 'testcases', 'ts', 'real_data.nt')).replace('\\', '/'))
685 cls.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI)
686 cls.finder.get_everything_about_res(metavals={'omid:br/3309', 'omid:br/2438', 'omid:br/0601'}, identifiers={'doi:10.1001/2013.jamasurg.270', 'doi:10.1787/eco_outlook-v2011-2-graph138-en', 'orcid:0000-0001-6994-8412', 'doi:10.1001/archderm.104.1.106', 'pmid:29098884'}, vvis=set())
688 def test_id_worker_1(self):
689 # 1 EntityA is a new one
690 curator = prepareCurator(list())
691 name = 'βέβαιος, α, ον'
692 idslist = ['doi:10.1163/2214-8655_lgo_lgo_02_0074_ger']
693 wannabe_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
694 output = (wannabe_id, curator.brdict, curator.radict, curator.idbr, curator.idra, curator.log)
695 expected_output = (
696 'wannabe_0',
697 {'wannabe_0': {'ids': ['doi:10.1163/2214-8655_lgo_lgo_02_0074_ger'], 'others': [], 'title': 'βέβαιος, α, ον'}},
698 {},
699 {'doi:10.1163/2214-8655_lgo_lgo_02_0074_ger': '0601'},
700 {},
701 {}
702 )
703 self.assertEqual(output, expected_output)
705 def test_id_worker_1_no_id(self):
706 # 1 EntityA is a new one and has no ids
707 curator = prepareCurator(list())
708 name = 'βέβαιος, α, ον'
709 idslist = []
710 wannabe_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
711 output = (wannabe_id, curator.brdict, curator.radict, curator.idbr, curator.idra, curator.log)
712 expected_output = (
713 'wannabe_0',
714 {'wannabe_0': {'ids': [], 'others': [], 'title': 'βέβαιος, α, ον'}},
715 {},
716 {},
717 {},
718 {}
719 )
720 self.assertEqual(output, expected_output)
722 def test_id_worker_2_id_ts(self):
723 # 2 Retrieve EntityA data in triplestore to update EntityA inside CSV
724 curator = prepareCurator(list())
725 curator.finder = self.finder
726 name = 'American Medical Association (AMA)' # *(ama) on the ts. The name on the ts must prevail
727 idslist = ['crossref:10']
728 wannabe_id = curator.id_worker('editor', name, idslist, '', ra_ent=True, br_ent=False, vvi_ent=False, publ_entity=True)
729 output = (wannabe_id, curator.brdict, curator.radict, curator.idbr, curator.idra, curator.log)
730 expected_output = ('3309', {}, {'3309': {'ids': ['crossref:10'], 'others': [], 'title': 'American Medical Association (ama)'}}, {}, {'crossref:10': '4274'}, {})
731 self.assertEqual(output, expected_output)
733 def test_id_worker_2_metaid_ts(self):
734 # 2 Retrieve EntityA data in triplestore to update EntityA inside CSV
735 curator = prepareCurator(list())
736 curator.finder = self.finder
737 name = 'American Medical Association (AMA)' # *(ama) on the ts. The name on the ts must prevail
738 # MetaID only
739 wannabe_id = curator.id_worker('editor', name, [], '3309', ra_ent=True, br_ent=False, vvi_ent=False, publ_entity=True)
740 output = (wannabe_id, curator.brdict, curator.radict, curator.idbr, curator.idra, curator.log)
741 expected_output = ('3309', {}, {'3309': {'ids': ['crossref:10'], 'others': [], 'title': 'American Medical Association (ama)'}}, {}, {'crossref:10': '4274'}, {})
742 self.assertEqual(output, expected_output)
744 def test_id_worker_2_id_metaid_ts(self):
745 # 2 Retrieve EntityA data in triplestore to update EntityA inside CSV
746 curator = prepareCurator(list())
747 name = 'American Medical Association (AMA)' # *(ama) on the ts. The name on the ts must prevail
748 curator.finder = self.finder
749 # ID and MetaID
750 wannabe_id = curator.id_worker('publisher', name, ['crossref:10'], '3309', ra_ent=True, br_ent=False, vvi_ent=False, publ_entity=True)
751 output = (wannabe_id, curator.brdict, curator.radict, curator.idbr, curator.idra, curator.log)
752 expected_output = ('3309', {}, {'3309': {'ids': ['crossref:10'], 'others': [], 'title': 'American Medical Association (ama)'}}, {}, {'crossref:10': '4274'}, {})
753 self.assertEqual(output, expected_output)
755 def test_id_worker_3(self):
756 # 2 Retrieve EntityA data in triplestore to update EntityA inside CSV. MetaID on ts has precedence
757 curator = prepareCurator(list())
758 name = 'American Medical Association (AMA)' # *(ama) on the ts. The name on the ts must prevail
759 curator.finder = self.finder
760 # ID and MetaID, but it's omid:ra/3309 on ts
761 wannabe_id = curator.id_worker('publisher', name, ['crossref:10'], '33090', ra_ent=True, br_ent=False, vvi_ent=False, publ_entity=True)
762 output = (wannabe_id, curator.brdict, curator.radict, curator.idbr, curator.idra, curator.log)
763 expected_output = ('3309', {}, {'3309': {'ids': ['crossref:10'], 'others': [], 'title': 'American Medical Association (ama)'}}, {}, {'crossref:10': '4274'}, {})
764 self.assertEqual(output, expected_output)
766 def test_id_worker_conflict(self):
767 # there's no meta or there was one but it didn't exist
768 # There are other ids that already exist, but refer to multiple entities on ts.
769 # Conflict!
770 idslist = ['doi:10.1001/2013.jamasurg.270']
771 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'
772 curator = prepareCurator(list())
773 curator.finder = self.finder
774 curator.log[0] = {'id': {}}
775 id_dict = dict()
776 metaval = curator.conflict(idslist, name, id_dict, 'id') # Only the conflict function is tested here, not id_worker
777 output = (metaval, curator.brdict, curator.log, id_dict)
778 expected_output = (
779 'wannabe_0',
780 {'wannabe_0': {'ids': ['doi:10.1001/2013.jamasurg.270'], 'others': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'}},
781 {0: {'id': {'Conflict entity': 'wannabe_0'}}},
782 {'doi:10.1001/2013.jamasurg.270': '2585'}
783 )
784 self.assertEqual(output, expected_output)
786 def test_conflict_br(self):
787 # No MetaId, an identifier to which two separate br point: there is a conflict, and a new entity must be created
788 curator = prepareCurator(list())
789 curator.log[0] = {'id': {}}
790 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'
791 idslist = ['doi:10.1001/2013.jamasurg.270']
792 curator.finder = self.finder
793 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
794 output = (meta_id, curator.idbr, curator.idra, curator.brdict, curator.log)
795 expected_output_1 = (
796 '2719',
797 {'doi:10.1001/2013.jamasurg.270': '2585'},
798 {},
799 {'2719': {'ids': ['doi:10.1001/2013.jamasurg.270'], 'others': [], 'title': 'Patient Satisfaction As A Possible Indicator Of Quality Surgical Care'}},
800 {0: {'id': {}}}
801 )
802 expected_output_2 = ('2720',
803 {'doi:10.1001/2013.jamasurg.270': '2585'},
804 {},
805 {'2720': {'ids': ['doi:10.1001/2013.jamasurg.270'],
806 'others': [],
807 'title': 'Pediatric Injury Outcomes In Racial/Ethnic Minorities In '
808 'California'}},
809 {0: {'id': {}}}
810 )
811 self.assertTrue(output == expected_output_1 or output == expected_output_2)
813 def test_conflict_ra(self):
814 # No MetaId, an identifier to which two separate ra point: there is a conflict, and a new entity must be created
815 idslist = ['orcid:0000-0001-6994-8412']
816 name = 'Alarcon, Louis H.'
817 curator = prepareCurator(list())
818 curator.finder = self.finder
819 curator.log[0] = {'author': {}}
820 meta_id = curator.id_worker('author', name, idslist, '', ra_ent=True, br_ent=False, vvi_ent=False, publ_entity=False)
821 output = (meta_id, curator.idbr, curator.idra, curator.brdict, curator.radict, curator.log)
822 expected_output_1 = (
823 '4940',
824 {},
825 {'orcid:0000-0001-6994-8412': '4475'},
826 {},
827 {'4940': {'ids': ['orcid:0000-0001-6994-8412'], 'others': [], 'title': 'Alarcon, Louis H.'}},
828 {0: {'author': {}}}
829 )
830 expected_output_2 = ('1000000',
831 {},
832 {'orcid:0000-0001-6994-8412': '4475'},
833 {},
834 {'1000000': {'ids': ['orcid:0000-0001-6994-8412'],
835 'others': [],
836 'title': 'Alarcon, Louis H.'}},
837 {0: {'author': {}}})
838 self.assertTrue(output == expected_output_1 or output == expected_output_2)
840 def test_conflict_suspect_id_among_existing(self):
841 # ID already exist in entity_dict and refer to one entity having a MetaID, but there is another ID not in entity_dict that highlights a conflict on ts
842 br_dict = {
843 'omid:br/0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'},
844 'omid:br/0602': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 'others': [], 'title': 'Contributions To GDP Growth And Inflation: South Africa'},
845 'omid:br/0603': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph18-en'], 'others': [], 'title': 'Official Loans To The Governments Of Greece, Ireland And Portugal'},
846 }
847 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: Japan' # The first title must have precedence (China, not Japan)
848 idslist = ['doi:10.1787/eco_outlook-v2011-2-graph138-en', 'doi:10.1001/2013.jamasurg.270']
849 curator = prepareCurator(get_csv_data(REAL_DATA_CSV))
850 curator.log[0] = {'id': {}}
851 curator.brdict = br_dict
852 curator.finder = self.finder
853 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
854 output = (meta_id, curator.idbr, curator.idra, curator.brdict, curator.radict, curator.log)
855 expected_output = (
856 'wannabe_0',
857 {
858 'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601',
859 'doi:10.1001/2013.jamasurg.270': '2585'
860 },
861 {},
862 {'omid:br/0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'],
863 'others': [],
864 'title': 'Money Growth, Interest Rates, Inflation And Raw '
865 'Materials Prices: China'},
866 'omid:br/0602': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'],
867 'others': [],
868 'title': 'Contributions To GDP Growth And Inflation: South '
869 'Africa'},
870 'omid:br/0603': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph18-en'],
871 'others': [],
872 'title': 'Official Loans To The Governments Of Greece, '
873 'Ireland And Portugal'},
874 'wannabe_0': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en',
875 'doi:10.1001/2013.jamasurg.270'],
876 'others': [],
877 'title': 'Money Growth, Interest Rates, Inflation And Raw '
878 'Materials Prices: Japan'}},
879 {},
880 {0: {'id': {'Conflict entity': 'wannabe_0'}}}
881 )
882 self.assertEqual(output, expected_output)
884 def test_conflict_suspect_id_among_wannabe(self):
885 # ID already exist in entity_dict and refer to one temporary, but there is another ID not in entity_dict that highlights a conflict on ts
886 br_dict = {
887 'wannabe_0': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'},
888 'wannabe_2': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 'others': [], 'title': 'Contributions To GDP Growth And Inflation: South Africa'},
889 'wannabe_3': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph18-en'], 'others': [], 'title': 'Official Loans To The Governments Of Greece, Ireland And Portugal'},
890 }
891 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: Japan' # The first title must have precedence (China, not Japan)
892 idslist = ['doi:10.1787/eco_outlook-v2011-2-graph138-en', 'doi:10.1001/2013.jamasurg.270']
893 curator = prepareCurator(get_csv_data(REAL_DATA_CSV))
894 curator.log[0] = {'id': {}}
895 curator.brdict = br_dict
896 curator.finder = self.finder
897 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
898 output = (meta_id, curator.idbr, curator.idra, curator.brdict, curator.radict, curator.log)
899 expected_output_1 = (
900 '2720',
901 {
902 'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601',
903 'doi:10.1001/2013.jamasurg.270': '2585'
904 },
905 {},
906 {'2720': {'ids': ['doi:10.1001/2013.jamasurg.270', 'doi:10.1787/eco_outlook-v2011-2-graph138-en'],
907 'others': ['wannabe_0'],
908 'title': 'Pediatric Injury Outcomes In Racial/Ethnic Minorities In '
909 'California'},
910 'wannabe_2': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'],
911 'others': [],
912 'title': 'Contributions To GDP Growth And Inflation: South '
913 'Africa'},
914 'wannabe_3': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph18-en'],
915 'others': [],
916 'title': 'Official Loans To The Governments Of Greece, Ireland '
917 'And Portugal'}},
918 {},
919 {0: {'id': {}}}
920 )
921 expected_output_2 = (
922 '2719',
923 {
924 'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601',
925 'doi:10.1001/2013.jamasurg.270': '2585'
926 },
927 {},
928 {'2719': {'ids': ['doi:10.1001/2013.jamasurg.270', 'doi:10.1787/eco_outlook-v2011-2-graph138-en'],
929 'others': ['wannabe_0'],
930 'title': 'Patient Satisfaction As A Possible Indicator Of Quality '
931 'Surgical Care'},
932 'wannabe_2': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'],
933 'others': [],
934 'title': 'Contributions To GDP Growth And Inflation: South '
935 'Africa'},
936 'wannabe_3': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph18-en'],
937 'others': [],
938 'title': 'Official Loans To The Governments Of Greece, Ireland '
939 'And Portugal'}},
940 {},
941 {0: {'id': {}}}
942 )
943 self.assertTrue(output == expected_output_1 or output == expected_output_2)
945 def test_id_worker_4(self):
946 # 4 Merge data from EntityA (CSV) with data from EntityX (CSV), update both with data from EntityA (RDF)
947 br_dict = {
948 'wannabe_0': {'ids': ['doi:10.1001/archderm.104.1.106'], 'others': [], 'title': 'Multiple eloids'},
949 'wannabe_1': {'ids': ['doi:10.1001/archderm.104.1.106'], 'others': [], 'title': 'Multiple Blastoids'},
950 }
951 name = 'Multiple Palloids'
952 idslist = ['doi:10.1001/archderm.104.1.106', 'pmid:29098884']
953 curator = prepareCurator(list())
954 curator.brdict = br_dict
955 curator.wnb_cnt = 2
956 curator.finder = self.finder
957 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
958 output = (meta_id, curator.idbr, curator.idra, curator.log)
959 expected_output = (
960 '3757',
961 {'doi:10.1001/archderm.104.1.106': '3624', 'pmid:29098884': '2000000'},
962 {},
963 {}
964 )
965 self.assertEqual(output, expected_output)
967class test_id_worker_with_reset(unittest.TestCase):
968 def test_id_worker_2_meta_in_entity_dict(self):
969 # MetaID exists among data.
970 # MetaID already in entity_dict (no care about conflicts, we have a MetaID specified)
971 # 2 Retrieve EntityA data to update EntityA inside CSV
972 reset_server()
973 data = get_csv_data(REAL_DATA_CSV)
974 curator = prepareCurator(data)
975 curator.curator()
976 store_curated_data(curator, SERVER)
977 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'
978 curator_empty = prepareCurator(list())
979 finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI, local_g=curator_empty.everything_everywhere_allatonce)
980 finder.get_everything_about_res(metavals=set(), identifiers={'doi:10.1787/eco_outlook-v2011-2-graph138-en'}, vvis=set())
981 # put metaval in entity_dict
982 meta_id = curator_empty.id_worker('id', name, [], '0601', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
983 # metaval is in entity_dict
984 meta_id = curator_empty.id_worker('id', name, [], '0601', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
985 output = (meta_id, curator_empty.brdict, curator_empty.radict, curator_empty.idbr, curator_empty.idra, curator_empty.log)
986 expected_output = ('0601', {'0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'others': []}}, {}, {'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601'}, {}, {})
987 self.assertEqual(output, expected_output)
989 def test_conflict_existing(self):
990 # ID already exist in entity_dict but refer to multiple entities having a MetaID
991 reset_server()
992 br_dict = {
993 'omid:br/0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'},
994 'omid:br/0602': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 'others': [], 'title': 'Contributions To GDP Growth And Inflation: South Africa'},
995 'omid:br/0603': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Official Loans To The Governments Of Greece, Ireland And Portugal'},
996 }
997 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'
998 idslist = ['doi:10.1787/eco_outlook-v2011-2-graph138-en']
999 curator = prepareCurator(list())
1000 curator.log[0] = {'id': {}}
1001 curator.brdict = br_dict
1002 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
1003 output = (meta_id, curator.idbr, curator.idra, curator.brdict, curator.radict, curator.log)
1004 expected_output = (
1005 'wannabe_0',
1006 {'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601'},
1007 {},
1008 {'omid:br/0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'],
1009 'others': [],
1010 'title': 'Money Growth, Interest Rates, Inflation And Raw '
1011 'Materials Prices: China'},
1012 'omid:br/0602': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'],
1013 'others': [],
1014 'title': 'Contributions To GDP Growth And Inflation: South '
1015 'Africa'},
1016 'omid:br/0603': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'],
1017 'others': [],
1018 'title': 'Official Loans To The Governments Of Greece, '
1019 'Ireland And Portugal'},
1020 'wannabe_0': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'],
1021 'others': [],
1022 'title': 'Money Growth, Interest Rates, Inflation And Raw '
1023 'Materials Prices: China'}},
1024 {},
1025 {0: {'id': {'Conflict entity': 'wannabe_0'}}}
1026 )
1027 self.assertEqual(output, expected_output)
1029 def test_id_worker_5(self):
1030 # ID already exist in entity_dict and refer to one or more temporary entities -> collective merge
1031 reset_server()
1032 br_dict = {
1033 'wannabe_0': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'},
1034 'wannabe_1': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 'others': [], 'title': 'Contributions To GDP Growth And Inflation: South Africa'},
1035 'wannabe_2': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Official Loans To The Governments Of Greece, Ireland And Portugal'},
1036 }
1037 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'
1038 idslist = ['doi:10.1787/eco_outlook-v2011-2-graph138-en']
1039 curator = prepareCurator(list())
1040 curator.brdict = br_dict
1041 curator.wnb_cnt = 2
1042 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
1043 output = (meta_id, curator.idbr, curator.idra, curator.log)
1044 expected_output = (
1045 'wannabe_0',
1046 {'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601'},
1047 {},
1048 {}
1049 )
1050 self.assertEqual(output, expected_output)
1052 def test_no_conflict_existing(self):
1053 # ID already exist in entity_dict and refer to one entity
1054 reset_server()
1055 br_dict = {
1056 'omid:br/0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'},
1057 'omid:br/0602': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 'others': [], 'title': 'Contributions To GDP Growth And Inflation: South Africa'},
1058 'omid:br/0603': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph18-en'], 'others': [], 'title': 'Official Loans To The Governments Of Greece, Ireland And Portugal'},
1059 }
1060 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: Japan' # The first title must have precedence (China, not Japan)
1061 idslist = ['doi:10.1787/eco_outlook-v2011-2-graph138-en']
1062 curator = prepareCurator(list())
1063 curator.log[0] = {'id': {}}
1064 curator.brdict = br_dict
1065 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
1066 output = (meta_id, curator.idbr, curator.idra, curator.log)
1067 expected_output = (
1068 'omid:br/0601',
1069 {'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601'},
1070 {},
1071 {0: {'id': {}}}
1072 )
1073 self.assertEqual(output, expected_output)
1075 def test_metaid_in_prov(self):
1076 # MetaID not found in data, but found in the provenance metadata.
1077 reset_server()
1078 add_data_ts(server=SERVER, data_path=os.path.abspath(os.path.join('test', 'testcases', 'ts', 'real_data_with_prov.nq')).replace('\\', '/'))
1079 name = ''
1080 curator = prepareCurator(list())
1081 meta_id = curator.id_worker('id', name, [], '4321', ra_ent=True, br_ent=False, vvi_ent=False, publ_entity=False)
1082 self.assertEqual(meta_id, '38013')
1085class testcase_01(unittest.TestCase):
1086 def test(self):
1087 # testcase1: 2 different issues of the same venue (no volume)
1088 name = '01'
1089 data = get_csv_data(MANUAL_DATA_CSV)
1090 partial_data = list()
1091 partial_data.append(data[0])
1092 partial_data.append(data[5])
1093 data_curated, testcase = prepare_to_test(partial_data, name)
1094 for pos, element in enumerate(data_curated):
1095 self.assertEqual(element, testcase[pos])
1098class testcase_02(unittest.TestCase):
1099 def test(self):
1100 # testcase2: 2 different volumes of the same venue (no issue)
1101 name = '02'
1102 data = get_csv_data(MANUAL_DATA_CSV)
1103 partial_data = list()
1104 partial_data.append(data[1])
1105 partial_data.append(data[3])
1106 data_curated, testcase = prepare_to_test(partial_data, name)
1107 self.assertEqual(data_curated, testcase)
1110class testcase_03(unittest.TestCase):
1111 def test(self):
1112 # testcase3: 2 different issues of the same volume
1113 name = '03'
1114 data = get_csv_data(MANUAL_DATA_CSV)
1115 partial_data = list()
1116 partial_data.append(data[2])
1117 partial_data.append(data[4])
1118 data_curated, testcase = prepare_to_test(partial_data, name)
1119 self.assertEqual(data_curated, testcase)
1122class testcase_04(unittest.TestCase):
1123 def test(self):
1124 # testcase4: 2 new IDS and different date format (yyyy-mm and yyyy-mm-dd)
1125 name = '04'
1126 data = get_csv_data(MANUAL_DATA_CSV)
1127 partial_data = list()
1128 partial_data.append(data[6])
1129 partial_data.append(data[7])
1130 data_curated, testcase = prepare_to_test(partial_data, name)
1131 for pos, element in enumerate(data_curated):
1132 self.assertEqual(element, testcase[pos])
1135class testcase_05(unittest.TestCase):
1136 def test(self):
1137 # testcase5: NO ID scenario
1138 name = '05'
1139 data = get_csv_data(MANUAL_DATA_CSV)
1140 partial_data = list()
1141 partial_data.append(data[8])
1142 data_curated, testcase = prepare_to_test(partial_data, name)
1143 self.assertEqual(data_curated, testcase)
1146class testcase_06(unittest.TestCase):
1147 def test(self):
1148 # testcase6: ALL types test
1149 name = '06'
1150 data = get_csv_data(MANUAL_DATA_CSV)
1151 partial_data = data[9:33]
1152 data_curated, testcase = prepare_to_test(partial_data, name)
1153 self.assertEqual(data_curated, testcase)
1156class testcase_07(unittest.TestCase):
1157 def test(self):
1158 # testcase7: all journal related types with an editor
1159 name = '07'
1160 data = get_csv_data(MANUAL_DATA_CSV)
1161 partial_data = data[34:40]
1162 data_curated, testcase = prepare_to_test(partial_data, name)
1163 self.assertEqual(data_curated, testcase)
1166class testcase_08(unittest.TestCase):
1167 def test(self):
1168 # testcase8: all book related types with an editor
1169 name = '08'
1170 data = get_csv_data(MANUAL_DATA_CSV)
1171 partial_data = data[40:43]
1172 data_curated, testcase = prepare_to_test(partial_data, name)
1173 self.assertEqual(data_curated, testcase)
1176class testcase_09(unittest.TestCase):
1177 def test(self):
1178 # testcase09: all proceeding related types with an editor
1179 name = '09'
1180 data = get_csv_data(MANUAL_DATA_CSV)
1181 partial_data = data[43:45]
1182 data_curated, testcase = prepare_to_test(partial_data, name)
1183 self.assertEqual(data_curated, testcase)
1186class testcase_10(unittest.TestCase):
1187 def test(self):
1188 # testcase10: a book inside a book series and a book inside a book set
1189 name = '10'
1190 data = get_csv_data(MANUAL_DATA_CSV)
1191 partial_data = data[45:49]
1192 data_curated, testcase = prepare_to_test(partial_data, name)
1193 self.assertEqual(data_curated, testcase)
1196class testcase_11(unittest.TestCase):
1197 def test(self):
1198 # testcase11: real time entity update
1199 name = '11'
1200 data = get_csv_data(MANUAL_DATA_CSV)
1201 partial_data = data[49:52]
1202 data_curated, testcase = prepare_to_test(partial_data, name)
1203 self.assertEqual(data_curated, testcase)
1206class testcase_12(unittest.TestCase):
1207 def test(self):
1208 # testcase12: clean name, title, ids
1209 name = '12'
1210 data = get_csv_data(MANUAL_DATA_CSV)
1211 partial_data = data[52:53]
1212 data_curated, testcase = prepare_to_test(partial_data, name)
1213 self.assertEqual(data_curated, testcase)
1216class testcase_13(unittest.TestCase):
1217 # testcase13: ID_clean massive test
1219 def test1(self):
1220 # 1--- meta specified br in a row, wannabe with a new id in a row, meta specified with an id related to wannabe
1221 # in a row
1222 name = '13.1'
1223 data = get_csv_data(MANUAL_DATA_CSV)
1224 partial_data = data[53:56]
1225 data_curated, testcase = prepare_to_test(partial_data, name)
1226 self.assertEqual(data_curated, testcase)
1228 def test2(self):
1229 # 2---Conflict with META precedence: a br has a meta_id and an id related to another meta_id, the first
1230 # specified meta has precedence
1231 data = get_csv_data(MANUAL_DATA_CSV)
1232 name = '13.2'
1233 partial_data = data[56:57]
1234 data_curated, testcase = prepare_to_test(partial_data, name)
1235 self.assertEqual(data_curated, testcase)
1237 def test3(self):
1238 # 3--- conflict: br with id shared with 2 meta
1239 data = get_csv_data(MANUAL_DATA_CSV)
1240 name_1 = '13.3'
1241 name_2 = '13.31'
1242 partial_data = data[57:58]
1243 data_curated, testcase_1 = prepare_to_test(partial_data, name_1)
1244 _, testcase_2 = prepare_to_test(partial_data, name_2)
1245 self.assertTrue(data_curated == testcase_1 or data_curated == testcase_2)
1248class testcase_14(unittest.TestCase):
1250 def test1(self):
1251 # update existing sequence, in particular, a new author and an existing author without an existing id (matched
1252 # thanks to surname,name(BAD WRITTEN!)
1253 name = '14.1'
1254 data = get_csv_data(MANUAL_DATA_CSV)
1255 partial_data = data[58:59]
1256 data_curated, testcase = prepare_to_test(partial_data, name)
1257 self.assertEqual(data_curated, testcase)
1259 def test2(self):
1260 # same sequence different order, with new ids
1261 name = '14.2'
1262 data = get_csv_data(MANUAL_DATA_CSV)
1263 partial_data = data[59:60]
1264 data_curated, testcase = prepare_to_test(partial_data, name)
1265 self.assertEqual(data_curated, testcase)
1267 def test3(self):
1268 # RA
1269 # Author with two different ids
1270 name_1 = '14.3'
1271 data = get_csv_data(MANUAL_DATA_CSV)
1272 partial_data = data[60:61]
1273 data_curated, testcase_1 = prepare_to_test(partial_data, name_1)
1274 self.assertEqual(data_curated, testcase_1)
1276 def test4(self):
1277 # meta specified ra in a row, wannabe ra with a new id in a row, meta specified with an id related to wannabe
1278 # in a ra
1279 name = '14.4'
1280 data = get_csv_data(MANUAL_DATA_CSV)
1281 partial_data = data[61:64]
1282 data_curated, testcase = prepare_to_test(partial_data, name)
1283 self.assertEqual(data_curated, testcase)
1286class testcase_15(unittest.TestCase):
1288 def test1(self):
1289 # venue volume issue already exists in ts
1290 name = '15.1'
1291 data = get_csv_data(MANUAL_DATA_CSV)
1292 partial_data = data[64:65]
1293 data_curated, testcase = prepare_to_test(partial_data, name)
1294 self.assertEqual(data_curated, testcase)
1296 def test2(self):
1297 # venue conflict
1298 name = '15.2'
1299 data = get_csv_data(MANUAL_DATA_CSV)
1300 partial_data = data[65:66]
1301 data_curated, testcase = prepare_to_test(partial_data, name)
1302 # _, testcase_2 = prepare_to_test(partial_data, name_2)
1303 self.assertEqual(data_curated, testcase)
1305 def test3(self):
1306 # venue in ts is now the br
1307 name = '15.3'
1308 data = get_csv_data(MANUAL_DATA_CSV)
1309 partial_data = data[66:67]
1310 data_curated, testcase = prepare_to_test(partial_data, name)
1311 self.assertEqual(data_curated, testcase)
1313 def test4(self):
1314 # br in ts is now the venue
1315 name = '15.4'
1316 data = get_csv_data(MANUAL_DATA_CSV)
1317 partial_data = data[67:68]
1318 data_curated, testcase = prepare_to_test(partial_data, name)
1319 self.assertEqual(data_curated, testcase)
1321 def test5(self):
1322 # volume in ts is now the br
1323 name = '15.5'
1324 data = get_csv_data(MANUAL_DATA_CSV)
1325 partial_data = data[71:72]
1326 data_curated, testcase = prepare_to_test(partial_data, name)
1327 self.assertEqual(data_curated, testcase)
1329 def test6(self):
1330 # br is a volume
1331 name = '15.6'
1332 data = get_csv_data(MANUAL_DATA_CSV)
1333 partial_data = data[72:73]
1334 data_curated, testcase = prepare_to_test(partial_data, name)
1335 self.assertEqual(data_curated, testcase)
1337 def test7(self):
1338 # issue in ts is now the br
1339 name = '15.7'
1340 data = get_csv_data(MANUAL_DATA_CSV)
1341 partial_data = data[73:74]
1342 data_curated, testcase = prepare_to_test(partial_data, name)
1343 self.assertEqual(data_curated, testcase)
1345 def test8(self):
1346 # br is a issue
1347 name = '15.8'
1348 data = get_csv_data(MANUAL_DATA_CSV)
1349 partial_data = data[74:75]
1350 data_curated, testcase = prepare_to_test(partial_data, name)
1351 self.assertEqual(data_curated, testcase)
1354class testcase_16(unittest.TestCase):
1356 def test1(self):
1357 # Date cleaning 2019-02-29
1358 name = '16.1'
1359 # add_data_ts('http://127.0.0.1:8805/sparql')
1360 # wrong date (2019/02/29)
1361 data = get_csv_data(MANUAL_DATA_CSV)
1362 partial_data = data[75:76]
1363 data_curated, testcase = prepare_to_test(partial_data, name)
1364 self.assertEqual(data_curated, testcase)
1366 def test2(self):
1367 # existing re
1368 name = '16.2'
1369 data = get_csv_data(MANUAL_DATA_CSV)
1370 partial_data = data[76:77]
1371 data_curated, testcase = prepare_to_test(partial_data, name)
1372 self.assertEqual(data_curated, testcase)
1374 def test3(self):
1375 # given name for an RA with only a family name in TS
1376 name = '16.3'
1377 data = get_csv_data(MANUAL_DATA_CSV)
1378 partial_data = data[77:78]
1379 data_curated, testcase = prepare_to_test(partial_data, name)
1380 self.assertEqual(data_curated, testcase)
1383if __name__ == '__main__': # pragma: no cover
1384 unittest.main()