Coverage for test/curator_test.py: 99%
770 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-12-20 08:55 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-12-20 08:55 +0000
1import csv
2import shutil
3import unittest
5import redis
6from oc_meta.core.creator import Creator
7from oc_meta.core.curator import *
8from oc_meta.lib.file_manager import get_csv_data
9from oc_meta.lib.finder import ResourceFinder
10from oc_meta.plugins.multiprocess.resp_agents_curator import RespAgentsCurator
11from oc_ocdm import Storer
12from oc_ocdm.counter_handler.redis_counter_handler import RedisCounterHandler
13from rdflib import Dataset, Graph
14from sparqlite import SPARQLClient
16SERVER = 'http://127.0.0.1:8805/sparql'
17BASE_DIR = os.path.join('test')
18MANUAL_DATA_CSV = f'{BASE_DIR}/manual_data.csv'
19MANUAL_DATA_RDF = f'{BASE_DIR}/testcases/ts/testcase_ts-13.ttl'
20REAL_DATA_CSV = os.path.join(BASE_DIR, 'real_data.csv')
21REAL_DATA_RDF = f'{BASE_DIR}/testcases/ts/real_data.nt'
22REAL_DATA_RDF_WITH_PROV = f'{BASE_DIR}/testcases/ts/real_data_with_prov.nq'
23BASE_IRI = 'https://w3id.org/oc/meta/'
24CURATOR_COUNTER_DIR = f'{BASE_DIR}/curator_counter'
25OUTPUT_DIR = f'{BASE_DIR}/output'
26PROV_CONFIG = f'{BASE_DIR}/prov_config.json'
28# Redis configuration
29REDIS_HOST = 'localhost'
30REDIS_PORT = 6381
31REDIS_DB = 5
33def get_path(path:str) -> str:
34 # absolute_path:str = os.path.abspath(path)
35 universal_path = path.replace('\\', '/')
36 return universal_path
38def reset_redis_counters():
39 redis_client = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB)
40 redis_client.flushdb()
42def get_counter_handler():
43 return RedisCounterHandler(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB)
45def reset():
46 reset_redis_counters()
48def reset_server(server:str=SERVER) -> None:
49 with SPARQLClient(server, timeout=60) as client:
50 for graph in {'https://w3id.org/oc/meta/br/', 'https://w3id.org/oc/meta/ra/', 'https://w3id.org/oc/meta/re/', 'https://w3id.org/oc/meta/id/', 'https://w3id.org/oc/meta/ar/', 'http://default.graph/'}:
51 client.update(f'CLEAR GRAPH <{graph}>')
53def add_data_ts(server:str=SERVER, data_path:str=os.path.abspath(os.path.join('test', 'testcases', 'ts', 'real_data.nt')).replace('\\', '/'), batch_size:int=100, default_graph_uri=URIRef("http://default.graph/")):
54 reset_server(server)
55 f_path = get_path(data_path)
57 file_extension = os.path.splitext(f_path)[1].lower()
58 if file_extension == '.nt':
59 g = Graph()
60 g.parse(location=f_path, format='nt')
61 elif file_extension == '.nq':
62 g = Dataset(default_union=True)
63 g.parse(location=f_path, format='nquads')
64 elif file_extension == '.ttl':
65 g = Graph()
66 g.parse(location=f_path, format='turtle')
67 else:
68 raise ValueError(f"Unsupported file extension: {file_extension}")
70 triples_list = []
71 if file_extension in {'.nt', '.ttl'}:
72 for subj, pred, obj in g:
73 triples_list.append((subj, pred, obj, default_graph_uri))
74 elif file_extension == '.nq':
75 for subj, pred, obj, ctx in g.quads():
76 triples_list.append((subj, pred, obj, ctx))
78 with SPARQLClient(server, timeout=60) as client:
79 for i in range(0, len(triples_list), batch_size):
80 batch_triples = triples_list[i:i + batch_size]
82 triples_str = ""
83 for subj, pred, obj, ctx in batch_triples:
84 if ctx:
85 triples_str += f"GRAPH {ctx.n3().replace('[', '').replace(']', '')} {{ {subj.n3()} {pred.n3()} {obj.n3()} }} "
86 else:
87 triples_str += f"{subj.n3()} {pred.n3()} {obj.n3()} . "
89 query = f"INSERT DATA {{ {triples_str} }}"
90 client.update(query)
92def store_curated_data(curator_obj:Curator, server:str) -> None:
93 creator_obj = Creator(curator_obj.data, curator_obj.finder, BASE_IRI, None, None, 'https://orcid.org/0000-0002-8420-0696',
94 curator_obj.index_id_ra, curator_obj.index_id_br, curator_obj.re_index,
95 curator_obj.ar_index, curator_obj.VolIss)
96 creator = creator_obj.creator(source=None)
97 res_storer = Storer(creator)
98 res_storer.upload_all(server, base_dir=None, batch_size=100)
100def prepare_to_test(data, name):
101 reset_redis_counters()
103 reset_server(SERVER)
104 if float(name) > 12:
105 add_data_ts(SERVER, os.path.abspath(os.path.join('test', 'testcases', 'ts', 'testcase_ts-13.ttl')).replace('\\', '/'))
107 testcase_csv = get_path('test/testcases/testcase_data/testcase_' + name + '_data.csv')
108 testcase_id_br = get_path('test/testcases/testcase_data/indices/' + name + '/index_id_br_' + name + '.csv')
109 testcase_id_ra = get_path('test/testcases/testcase_data/indices/' + name + '/index_id_ra_' + name + '.csv')
110 testcase_ar = get_path('test/testcases/testcase_data/indices/' + name + '/index_ar_' + name + '.csv')
111 testcase_re = get_path('test/testcases/testcase_data/indices/' + name + '/index_re_' + name + '.csv')
112 testcase_vi = get_path('test/testcases/testcase_data/indices/' + name + '/index_vi_' + name + '.json')
114 counter_handler = get_counter_handler()
115 settings = {'normalize_titles': True}
116 curator_obj = Curator(data, SERVER, prov_config=PROV_CONFIG, counter_handler=counter_handler, settings=settings)
117 curator_obj.curator()
118 testcase_csv = get_csv_data(testcase_csv)
119 for csv in [testcase_csv, curator_obj.data]:
120 for row in csv:
121 row['id'] = sorted(row['id'].split())
122 testcase_id_br = get_csv_data(testcase_id_br)
123 testcase_id_ra = get_csv_data(testcase_id_ra)
124 testcase_ar = get_csv_data(testcase_ar)
125 testcase_re = get_csv_data(testcase_re)
126 for csv in [testcase_id_br, testcase_id_ra, testcase_ar, testcase_re, curator_obj.index_id_br, curator_obj.index_id_ra, curator_obj.ar_index, curator_obj.re_index]:
127 try:
128 csv.sort(key=lambda x:x['id'])
129 except KeyError:
130 try:
131 csv.sort(key=lambda x:x['meta'])
132 except KeyError:
133 csv.sort(key=lambda x:x['br'])
134 with open(testcase_vi) as json_file:
135 testcase_vi = json.load(json_file)
136 testcase = [testcase_csv, testcase_id_br, testcase_id_ra, testcase_ar, testcase_re, testcase_vi]
137 data_curated = [curator_obj.data, curator_obj.index_id_br, curator_obj.index_id_ra, curator_obj.ar_index,
138 curator_obj.re_index, curator_obj.VolIss]
139 return data_curated, testcase
141def prepareCurator(data:list, server:str=SERVER, resp_agents_only:bool=False) -> Curator:
142 settings = {'normalize_titles': True}
143 reset_redis_counters()
144 counter_handler = get_counter_handler()
145 if resp_agents_only:
146 curator = RespAgentsCurator(data, server, prov_config=PROV_CONFIG, counter_handler=counter_handler)
147 else:
148 curator = Curator(data, server, prov_config=PROV_CONFIG, counter_handler=counter_handler, settings=settings)
149 return curator
152class test_Curator(unittest.TestCase):
153 @classmethod
154 def setUpClass(cls):
155 add_data_ts()
157 def setUp(self):
158 reset_redis_counters()
160 def tearDown(self):
161 reset_redis_counters()
163 def test_merge_entities_in_csv(self):
164 curator = prepareCurator(list())
165 curator.counter_handler.set_counter(4, 'id', supplier_prefix='060')
166 entity_dict = {'0601': {'ids': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'others': []}}
167 id_dict = dict()
168 curator.merge_entities_in_csv(['doi:10.1787/eco_outlook-v2011-2-graph138-en'], '0601', 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', entity_dict, id_dict)
169 expected_output = (
170 {'0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'others': []}},
171 {'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0605'}
172 )
173 self.assertEqual((entity_dict, id_dict), expected_output)
175 def test_clean_id_list(self):
176 input = ['doi:10.001/B-1', 'wikidata:B1111111', 'OMID:br/060101']
177 output = Curator.clean_id_list(input, br=True)
178 expected_output = (['doi:10.001/b-1', 'wikidata:B1111111'], '060101')
179 self.assertEqual(output, expected_output)
181 def test_equalizer(self):
182 # Test equalizer with a row that contains an ID that can be resolved to an existing entity
183 row = {'id': 'doi:10.1001/archderm.104.1.106', 'title': '', 'author': '', 'pub_date': '1972-12-01', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}
184 curator = prepareCurator(list())
185 curator.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI)
187 metavals, identifiers, vvis = curator.extract_identifiers_and_metavals(row, valid_dois_cache=set())
188 curator.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis)
190 curator.log[0] = {'id': {}}
191 curator.clean_id(row)
192 extracted_metaval = row['id']
193 self.assertEqual(extracted_metaval, '3757')
195 # Reset the row to test equalizer
196 row = {'id': '', 'title': '', 'author': '', 'pub_date': '1972-12-01', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}
198 curator.rowcnt = 0
199 curator.log[0] = {
200 'id': {},
201 'author': {},
202 'venue': {},
203 'editor': {},
204 'publisher': {},
205 'page': {},
206 'volume': {},
207 'issue': {},
208 'pub_date': {},
209 'type': {},
210 'title': {}
211 }
212 curator.equalizer(row, extracted_metaval)
213 output = (curator.log, row)
215 expected_output = (
216 {0: {'id': {'status': 'Entity already exists'}, 'author': {}, 'venue': {}, 'editor': {}, 'publisher': {}, 'page': {}, 'volume': {}, 'issue': {}, 'pub_date': {'status': 'New value proposed'}, 'type': {}, 'title': {}}},
217 {'id': '', 'title': '', 'author': 'Curth, W. [omid:ra/6033]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416 issn:0003-987X]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': 'American Medical Association (ama) [omid:ra/3309 crossref:10]', 'editor': ''}
218 )
219 self.assertEqual(output, expected_output)
221 def test_clean_id_metaid_not_in_ts(self):
222 # A MetaId was specified, but it is not on ts. Therefore, it is invalid
223 curator = prepareCurator(list())
224 row = {'id': 'omid:br/131313', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''}
225 curator.log[0] = {'id': {}}
226 curator.clean_id(row)
227 expected_output = {'id': 'wannabe_0', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''}
228 self.assertEqual(row, expected_output)
230 def test_clean_id(self):
231 curator = prepareCurator(list())
232 row = {'id': 'doi:10.1001/archderm.104.1.106', 'title': 'Multiple Blasto', 'author': '', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}
233 curator.log[0] = {'id': {}}
234 curator.finder.get_everything_about_res(metavals=set(), identifiers={'doi:10.1001/archderm.104.1.106'}, vvis=set())
235 curator.clean_id(row)
236 expected_output = {'id': '3757', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}
237 self.assertEqual(row, expected_output)
239 def test_merge_duplicate_entities(self):
240 # Test merge_duplicate_entities with realistic data that includes an ID that resolves to an existing entity
241 data = [
242 {'id': 'doi:10.1001/archderm.104.1.106', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''},
243 {'id': '', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-02', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''},
244 {'id': '', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-03', 'venue': 'Archives Of Blast [omid:br/4416]', 'volume': '105', 'issue': '2', 'page': '106-108', 'type': 'journal volume', 'publisher': '', 'editor': ''},
245 ]
246 curator = prepareCurator(list())
247 curator.data = data
248 curator.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI)
250 # Extract metavals and identifiers from each row
251 all_metavals = set()
252 all_identifiers = set()
253 all_vvis = set()
255 for row in data:
256 metavals, identifiers, vvis = curator.extract_identifiers_and_metavals(row, valid_dois_cache=set())
257 all_metavals.update(metavals)
258 all_identifiers.update(identifiers)
259 all_vvis.update(vvis)
261 curator.finder.get_everything_about_res(metavals=all_metavals, identifiers=all_identifiers, vvis=all_vvis)
263 # Process each row with clean_id to get the actual metavals
264 for i, row in enumerate(data):
265 curator.log[i] = {'id': {}}
266 curator.rowcnt = i
267 curator.clean_id(row)
269 # Initialize log for merge_duplicate_entities
270 for i in range(3):
271 curator.log[i] = {
272 'id': {},
273 'author': {},
274 'venue': {},
275 'editor': {},
276 'publisher': {},
277 'page': {},
278 'volume': {},
279 'issue': {},
280 'pub_date': {},
281 'type': {}
282 }
284 # The brdict should be populated by clean_id, but we need to set up the "others" relationship
285 # The first row should have resolved to '3757', and the other rows should be wannabes
286 first_row_metaval = curator.data[0]['id'] # Should be '3757'
287 self.assertEqual(first_row_metaval, '3757')
289 # Set up the relationship between the existing entity and the wannabes
290 if first_row_metaval in curator.brdict:
291 curator.brdict[first_row_metaval]['others'].extend(['wannabe_0', 'wannabe_1'])
293 curator.merge_duplicate_entities()
294 output = (curator.data, curator.log)
296 expected_output = (
297 [
298 {'id': '3757', 'title': 'Multiple Keloids', 'author': 'Curth, W. [omid:ra/6033]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [issn:0003-987X omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': 'American Medical Association (ama) [omid:ra/3309 crossref:10]', 'editor': ''},
299 {'id': '3757', 'title': 'Multiple Keloids', 'author': 'Curth, W. [omid:ra/6033]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [issn:0003-987X omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': 'American Medical Association (ama) [omid:ra/3309 crossref:10]', 'editor': ''},
300 {'id': '3757', 'title': 'Multiple Keloids', 'author': 'Curth, W. [omid:ra/6033]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [issn:0003-987X omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': 'American Medical Association (ama) [omid:ra/3309 crossref:10]', 'editor': ''}
301 ],
302 {
303 0: {'id': {'status': 'Entity already exists'}, 'author': {}, 'venue': {}, 'editor': {}, 'publisher': {}, 'page': {}, 'volume': {}, 'issue': {}, 'pub_date': {}, 'type': {}},
304 1: {'id': {'status': 'Entity already exists'}, 'author': {}, 'venue': {'status': 'New value proposed'}, 'editor': {}, 'publisher': {}, 'page': {}, 'volume': {}, 'issue': {}, 'pub_date': {'status': 'New value proposed'}, 'type': {}},
305 2: {'id': {'status': 'Entity already exists'}, 'author': {}, 'venue': {'status': 'New value proposed'}, 'editor': {}, 'publisher': {}, 'page': {'status': 'New value proposed'}, 'volume': {'status': 'New value proposed'}, 'issue': {'status': 'New value proposed'}, 'pub_date': {'status': 'New value proposed'}, 'type': {'status': 'New value proposed'}}
306 }
307 )
308 self.assertEqual(output, expected_output)
310 def test_clean_vvi_all_data_on_ts(self):
311 # All data are already on the triplestore. They need to be retrieved and organized correctly
312 row = {'id': 'doi:10.1001/archderm.104.1.106', 'title': 'Multiple Keloids', 'author': '', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''}
313 curator = prepareCurator(list())
314 curator.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI)
316 metavals, identifiers, vvis = curator.extract_identifiers_and_metavals(row, valid_dois_cache=set())
317 curator.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis)
319 curator.log[0] = {'id': {}}
320 curator.clean_id(row)
322 curator.clean_vvi(row)
323 expected_output = {
324 "4416": {
325 "issue": {},
326 "volume": {
327 "104": {
328 "id": "4712",
329 "issue": {
330 "1": {
331 "id": "4713"
332 }
333 }
334 }
335 }
336 }
337 }
338 self.assertEqual(curator.vvi, expected_output)
340 def test_clean_vvi_new_venue(self):
341 # It is a new venue
342 row = {'id': 'wannabe_1', 'title': 'Money growth, interest rates, inflation and raw materials prices: China', 'author': '', 'pub_date': '2011-11-28', 'venue': 'OECD Economic Outlook', 'volume': '2011', 'issue': '2', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''}
343 curator = prepareCurator(list())
344 curator.clean_vvi(row)
345 expected_output = {'wannabe_0': {'volume': {'2011': {'id': 'wannabe_1', 'issue': {'2': {'id': 'wannabe_2'}}}}, 'issue': {}}}
346 self.assertEqual(curator.vvi, expected_output)
348 def test_clean_vvi_volume_with_title(self):
349 # A journal volume having a title
350 row = [{'id': '', 'title': 'The volume title', 'author': '', 'pub_date': '', 'venue': 'OECD Economic Outlook', 'volume': '2011', 'issue': '2', 'page': '', 'type': 'journal volume', 'publisher': '', 'editor': ''}]
351 curator = prepareCurator(row)
352 curator.curator()
353 expected_output = [{'id': 'omid:br/0601', 'title': 'The Volume Title', 'author': '', 'pub_date': '', 'venue': 'OECD Economic Outlook [omid:br/0602]', 'volume': '', 'issue': '', 'page': '', 'type': 'journal volume', 'publisher': '', 'editor': ''}]
354 self.assertEqual(curator.data, expected_output)
356 def test_clean_vvi_invalid_volume(self):
357 # The data must be invalidated, because the resource is journal volume but an issue has also been specified
358 row = {'id': 'wannabe_1', 'title': '', 'author': '', 'pub_date': '', 'venue': 'OECD Economic Outlook', 'volume': '2011', 'issue': '2', 'page': '', 'type': 'journal volume', 'publisher': '', 'editor': ''}
359 curator = prepareCurator(list())
360 curator.clean_vvi(row)
361 expected_output = {'wannabe_0': {'volume': {}, 'issue': {}}}
362 self.assertEqual(curator.vvi, expected_output)
364 def test_clean_vvi_invalid_venue(self):
365 # The data must be invalidated, because the resource is journal but a volume has also been specified
366 row = {'id': 'wannabe_1', 'title': '', 'author': '', 'pub_date': '', 'venue': 'OECD Economic Outlook', 'volume': '2011', 'issue': '', 'page': '', 'type': 'journal', 'publisher': '', 'editor': ''}
367 curator = prepareCurator(list())
368 curator.clean_vvi(row)
369 expected_output = {'wannabe_0': {'volume': {}, 'issue': {}}}
370 self.assertEqual(curator.vvi, expected_output)
372 def test_clean_vvi_new_volume_and_issue(self):
373 # There is a row with vvi and no ids
374 row = {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': 'Archives Of Surgery [omid:br/4480]', 'volume': '147', 'issue': '11', 'page': '', 'type': 'journal article', 'publisher': '', 'editor': ''}
375 curator = prepareCurator(list())
376 curator.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI)
378 metavals, identifiers, vvis = curator.extract_identifiers_and_metavals(row, valid_dois_cache=set())
379 curator.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis)
380 curator.clean_id(row)
381 curator.clean_vvi(row)
382 expected_output = {
383 "4480": {
384 "issue": {},
385 "volume": {
386 "147": {
387 "id": "4481",
388 "issue": {
389 "11": {
390 "id": "4482"
391 }
392 }
393 }
394 }
395 }
396 }
397 self.assertEqual(curator.vvi, expected_output)
399 def test_clean_ra_overlapping_surnames(self):
400 # The surname of one author is included in the surname of another.
401 row = {'id': 'wannabe_0', 'title': 'Giant Oyster Mushroom Pleurotus giganteus (Agaricomycetes) Enhances Adipocyte Differentiation and Glucose Uptake via Activation of PPARγ and Glucose Transporters 1 and 4 in 3T3-L1 Cells', 'author': 'Paravamsivam, Puvaneswari; Heng, Chua Kek; Malek, Sri Nurestri Abdul [orcid:0000-0001-6278-8559]; Sabaratnam, Vikineswary; M, Ravishankar Ram; Kuppusamy, Umah Rani', 'pub_date': '2016', 'venue': 'International Journal of Medicinal Mushrooms [issn:1521-9437]', 'volume': '18', 'issue': '9', 'page': '821-831', 'type': 'journal article', 'publisher': 'Begell House [crossref:613]', 'editor': ''}
402 curator = prepareCurator(list())
403 curator.brdict = {'wannabe_0': {'ids': ['doi:10.1615/intjmedmushrooms.v18.i9.60'], 'title': 'Giant Oyster Mushroom Pleurotus giganteus (Agaricomycetes) Enhances Adipocyte Differentiation and Glucose Uptake via Activation of PPARγ and Glucose Transporters 1 and 4 in 3T3-L1 Cells', 'others': []}}
404 curator.clean_ra(row, 'author')
405 output = (curator.ardict, curator.radict, curator.idra)
406 expected_output = (
407 {'wannabe_0': {'author': [('0601', 'wannabe_0'), ('0602', 'wannabe_1'), ('0603', 'wannabe_2'), ('0604', 'wannabe_3'), ('0605', 'wannabe_4'), ('0606', 'wannabe_5')], 'editor': [], 'publisher': []}},
408 {'wannabe_0': {'ids': [], 'others': [], 'title': 'Paravamsivam, Puvaneswari'}, 'wannabe_1': {'ids': [], 'others': [], 'title': 'Heng, Chua Kek'}, 'wannabe_2': {'ids': ['orcid:0000-0001-6278-8559'], 'others': [], 'title': 'Malek, Sri Nurestri Abdul'}, 'wannabe_3': {'ids': [], 'others': [], 'title': 'Sabaratnam, Vikineswary'}, 'wannabe_4': {'ids': [], 'others': [], 'title': 'M, Ravishankar Ram'}, 'wannabe_5': {'ids': [], 'others': [], 'title': 'Kuppusamy, Umah Rani'}},
409 {'orcid:0000-0001-6278-8559': '0601'}
410 )
411 self.assertEqual(output, expected_output)
413 def test_clean_ra_with_br_metaid(self):
414 # One author is in the triplestore, the other is not.
415 # br_metaval is a MetaID
416 # There are two ids for one author
417 row = {'id': 'doi:10.1001/archderm.104.1.106', 'title': 'Multiple Keloids', 'author': 'Curth, W.; McSorley, J. [orcid:0000-0003-0530-4305 schema:12345]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''}
418 curator = prepareCurator(list())
419 curator.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI)
420 metavals, identifiers, vvis = curator.extract_identifiers_and_metavals(row, valid_dois_cache=set())
421 curator.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis)
423 curator.log[0] = {'id': {}}
424 curator.clean_id(row)
426 resolved_metaval = row['id']
427 self.assertEqual(resolved_metaval, '3757')
428 curator.brdict = {resolved_metaval: {'ids': ['doi:10.1001/archderm.104.1.106'], 'title': 'Multiple Keloids', 'others': []}}
430 curator.clean_ra(row, 'author')
431 output = (curator.ardict, curator.radict, curator.idra)
432 expected_output = (
433 {'3757': {'author': [('9445', '6033'), ('0601', 'wannabe_0')], 'editor': [], 'publisher': []}},
434 {'6033': {'ids': [], 'others': [], 'title': 'Curth, W.'}, 'wannabe_0': {'ids': ['orcid:0000-0003-0530-4305', 'schema:12345'], 'others': [], 'title': 'McSorley, J.'}},
435 {'orcid:0000-0003-0530-4305': '0601', 'schema:12345': '0602'}
436 )
437 self.assertEqual(output, expected_output)
439 def test_clean_ra_with_br_wannabe(self):
440 # Authors not on the triplestore.
441 # br_metaval is a wannabe
442 row = {'id': 'wannabe_0', 'title': 'Multiple Keloids', 'author': 'Curth, W. [orcid:0000-0002-8420-0696] ; McSorley, J. [orcid:0000-0003-0530-4305]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''}
443 curator = prepareCurator(list())
444 curator.brdict = {'wannabe_0': {'ids': ['doi:10.1001/archderm.104.1.106'], 'title': 'Multiple Keloids', 'others': []}}
445 curator.wnb_cnt = 1
446 curator.clean_ra(row, 'author')
447 output = (curator.ardict, curator.radict, curator.idra)
448 expected_output = (
449 {'wannabe_0': {'author': [('0601', 'wannabe_1'), ('0602', 'wannabe_2')], 'editor': [], 'publisher': []}},
450 {'wannabe_1': {'ids': ['orcid:0000-0002-8420-0696'], 'others': [], 'title': 'Curth, W.'}, 'wannabe_2': {'ids': ['orcid:0000-0003-0530-4305'], 'others': [], 'title': 'McSorley, J.'}},
451 {'orcid:0000-0002-8420-0696': '0601', 'orcid:0000-0003-0530-4305': '0602'}
452 )
453 self.assertEqual(output, expected_output)
455 def test_clean_ra_with_empty_square_brackets(self):
456 # One author's name contains a closed square bracket.
457 row = {'id': 'doi:10.1001/archderm.104.1.106', 'title': 'Multiple Keloids', 'author': 'Bernacki, Edward J. [ ]', 'pub_date': '1971-07-01', 'venue': 'Archives Of Dermatology [omid:br/4416]', 'volume': '104', 'issue': '1', 'page': '106-107', 'type': 'journal article', 'publisher': '', 'editor': ''}
458 curator = prepareCurator(list())
459 curator.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI)
461 metavals, identifiers, vvis = curator.extract_identifiers_and_metavals(row, valid_dois_cache=set())
462 curator.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis)
464 curator.log[0] = {'id': {}}
465 curator.clean_id(row)
467 resolved_metaval = row['id']
468 self.assertEqual(resolved_metaval, '3757')
469 curator.brdict = {resolved_metaval: {'ids': ['doi:10.1001/archderm.104.1.106'], 'title': 'Multiple Keloids', 'others': []}}
471 curator.clean_ra(row, 'author')
472 output = (curator.ardict, curator.radict, curator.idra)
473 expected_output = (
474 {'3757': {'author': [('9445', '6033'), ('0601', 'wannabe_0')], 'editor': [], 'publisher': []}},
475 {'6033': {'ids': [], 'others': [], 'title': 'Curth, W.'}, 'wannabe_0': {'ids': [], 'others': [], 'title': 'Bernacki, Edward J.'}},
476 {}
477 )
478 self.assertEqual(output, expected_output)
480 def test_meta_maker(self):
481 curator = prepareCurator(list())
482 curator.brdict = {'3757': {'ids': ['doi:10.1001/archderm.104.1.106', 'pmid:29098884'], 'title': 'Multiple Keloids', 'others': []}, '4416': {'ids': ['issn:0003-987X'], 'title': 'Archives Of Dermatology', 'others': []}}
483 curator.radict = {'6033': {'ids': [], 'others': [], 'title': 'Curth, W.'}, 'wannabe_0': {'ids': ['orcid:0000-0003-0530-4305', 'schema:12345'], 'others': [], 'title': 'Mcsorley, J.'}}
484 curator.ardict = {'3757': {'author': [('9445', '6033'), ('0601', 'wannabe_0')], 'editor': [], 'publisher': []}}
485 curator.vvi = {'4416': {'issue': {}, 'volume': {'107': {'id': '4733', 'issue': {'1': {'id': '4734'}, '2': {'id': '4735'}, '3': {'id': '4736'}, '4': {'id': '4737'}, '5': {'id': '4738'}, '6': {'id': '4739'}}}, '108': {'id': '4740', 'issue': {'1': {'id': '4741'}, '2': {'id': '4742'}, '3': {'id': '4743'}, '4': {'id': '4744'}}}, '104': {'id': '4712', 'issue': {'1': {'id': '4713'}, '2': {'id': '4714'}, '3': {'id': '4715'}, '4': {'id': '4716'}, '5': {'id': '4717'}, '6': {'id': '4718'}}}, '148': {'id': '4417', 'issue': {'12': {'id': '4418'}, '11': {'id': '4419'}}}, '100': {'id': '4684', 'issue': {'1': {'id': '4685'}, '2': {'id': '4686'}, '3': {'id': '4687'}, '4': {'id': '4688'}, '5': {'id': '4689'}, '6': {'id': '4690'}}}, '101': {'id': '4691', 'issue': {'1': {'id': '4692'}, '2': {'id': '4693'}, '3': {'id': '4694'}, '4': {'id': '4695'}, '5': {'id': '4696'}, '6': {'id': '4697'}}}, '102': {'id': '4698', 'issue': {'1': {'id': '4699'}, '2': {'id': '4700'}, '3': {'id': '4701'}, '4': {'id': '4702'}, '5': {'id': '4703'}, '6': {'id': '4704'}}}, '103': {'id': '4705', 'issue': {'1': {'id': '4706'}, '2': {'id': '4707'}, '3': {'id': '4708'}, '4': {'id': '4709'}, '5': {'id': '4710'}, '6': {'id': '4711'}}}, '105': {'id': '4719', 'issue': {'1': {'id': '4720'}, '2': {'id': '4721'}, '3': {'id': '4722'}, '4': {'id': '4723'}, '5': {'id': '4724'}, '6': {'id': '4725'}}}, '106': {'id': '4726', 'issue': {'6': {'id': '4732'}, '1': {'id': '4727'}, '2': {'id': '4728'}, '3': {'id': '4729'}, '4': {'id': '4730'}, '5': {'id': '4731'}}}}}}
486 curator.meta_maker()
487 output = (curator.brmeta, curator.rameta, curator.armeta)
488 expected_output = (
489 {'3757': {'ids': ['doi:10.1001/archderm.104.1.106', 'pmid:29098884', 'omid:br/3757'], 'title': 'Multiple Keloids', 'others': []}, '4416': {'ids': ['issn:0003-987X', 'omid:br/4416'], 'title': 'Archives Of Dermatology', 'others': []}},
490 {'6033': {'ids': ['omid:ra/6033'], 'others': [], 'title': 'Curth, W.'}, '0601': {'ids': ['orcid:0000-0003-0530-4305', 'schema:12345', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Mcsorley, J.'}},
491 {'3757': {'author': [('9445', '6033'), ('0601', '0601')], 'editor': [], 'publisher': []}}
492 )
493 self.assertEqual(output, expected_output)
495 def test_enricher(self):
496 curator = prepareCurator(list())
497 curator.data = [{'id': 'wannabe_0', 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'author': '', 'pub_date': '2011-11-28', 'venue': 'wannabe_1', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': 'OECD [crossref:1963]', 'editor': ''}]
498 curator.brmeta = {
499 '0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en', 'omid:br/0601'], 'others': ['wannabe_0'], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'},
500 '0602': {'ids': ['omid:br/0604'], 'others': ['wannabe_1'], 'title': 'OECD Economic Outlook'}
501 }
502 curator.armeta = {'0601': {'author': [], 'editor': [], 'publisher': [('0601', '0601')]}}
503 curator.rameta = {'0601': {'ids': ['crossref:1963', 'omid:ra/0601'], 'others': ['wannabe_2'], 'title': 'Oecd'}}
504 curator.remeta = dict()
505 curator.meta_maker()
506 curator.enrich()
507 output = curator.data
508 expected_output = [{'id': 'doi:10.1787/eco_outlook-v2011-2-graph138-en omid:br/0601', 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'author': '', 'pub_date': '2011-11-28', 'venue': 'OECD Economic Outlook [omid:br/0604]', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': 'Oecd [crossref:1963 omid:ra/0601]', 'editor': ''}]
509 self.assertEqual(output, expected_output)
511 def test_indexer(self):
512 """Test that indexer() correctly transforms internal dicts to list-of-dicts."""
513 curator = prepareCurator(list())
514 curator.filename = '0.csv'
515 curator.idra = {'orcid:0000-0003-0530-4305': '0601', 'schema:12345': '0602'}
516 curator.idbr = {'doi:10.1001/2013.jamasurg.270': '2585'}
517 curator.armeta = {'2585': {'author': [('9445', '0602'), ('0601', '0601')], 'editor': [], 'publisher': []}}
518 curator.remeta = dict()
519 curator.brmeta = {
520 '0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en', 'omid:br/0601'], 'others': ['wannabe_0'], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'},
521 '0602': {'ids': ['omid:br/0602'], 'others': ['wannabe_1'], 'title': 'OECD Economic Outlook'}
522 }
523 curator.vvi = {
524 'wannabe_1': {
525 'issue': {},
526 'volume': {
527 '107': {'id': '4733', 'issue': {'1': {'id': '4734'}, '2': {'id': '4735'}, '3': {'id': '4736'}, '4': {'id': '4737'}, '5': {'id': '4738'}, '6': {'id': '4739'}}},
528 '108': {'id': '4740', 'issue': {'1': {'id': '4741'}, '2': {'id': '4742'}, '3': {'id': '4743'}, '4': {'id': '4744'}}},
529 '104': {'id': '4712', 'issue': {'1': {'id': '4713'}, '2': {'id': '4714'}, '3': {'id': '4715'}, '4': {'id': '4716'}, '5': {'id': '4717'}, '6': {'id': '4718'}}},
530 '148': {'id': '4417', 'issue': {'12': {'id': '4418'}, '11': {'id': '4419'}}},
531 '100': {'id': '4684', 'issue': {'1': {'id': '4685'}, '2': {'id': '4686'}, '3': {'id': '4687'}, '4': {'id': '4688'}, '5': {'id': '4689'}, '6': {'id': '4690'}}},
532 '101': {'id': '4691', 'issue': {'1': {'id': '4692'}, '2': {'id': '4693'}, '3': {'id': '4694'}, '4': {'id': '4695'}, '5': {'id': '4696'}, '6': {'id': '4697'}}},
533 '102': {'id': '4698', 'issue': {'1': {'id': '4699'}, '2': {'id': '4700'}, '3': {'id': '4701'}, '4': {'id': '4702'}, '5': {'id': '4703'}, '6': {'id': '4704'}}},
534 '103': {'id': '4705', 'issue': {'1': {'id': '4706'}, '2': {'id': '4707'}, '3': {'id': '4708'}, '4': {'id': '4709'}, '5': {'id': '4710'}, '6': {'id': '4711'}}},
535 '105': {'id': '4719', 'issue': {'1': {'id': '4720'}, '2': {'id': '4721'}, '3': {'id': '4722'}, '4': {'id': '4723'}, '5': {'id': '4724'}, '6': {'id': '4725'}}},
536 '106': {'id': '4726', 'issue': {'6': {'id': '4732'}, '1': {'id': '4727'}, '2': {'id': '4728'}, '3': {'id': '4729'}, '4': {'id': '4730'}, '5': {'id': '4731'}}}
537 }
538 }
539 }
540 curator.meta_maker()
541 curator.indexer()
542 # Test in-memory data structures
543 expected_index_ar = [{'meta': '2585', 'author': '9445, 0602; 0601, 0601', 'editor': '', 'publisher': ''}]
544 expected_index_id_br = [{'id': 'doi:10.1001/2013.jamasurg.270', 'meta': '2585'}]
545 expected_index_id_ra = [{'id': 'orcid:0000-0003-0530-4305', 'meta': '0601'}, {'id': 'schema:12345', 'meta': '0602'}]
546 expected_index_re = [{'br': '', 're': ''}]
547 expected_index_vi = {'0602': {'issue': {}, 'volume': {'107': {'id': '4733', 'issue': {'1': {'id': '4734'}, '2': {'id': '4735'}, '3': {'id': '4736'}, '4': {'id': '4737'}, '5': {'id': '4738'}, '6': {'id': '4739'}}}, '108': {'id': '4740', 'issue': {'1': {'id': '4741'}, '2': {'id': '4742'}, '3': {'id': '4743'}, '4': {'id': '4744'}}}, '104': {'id': '4712', 'issue': {'1': {'id': '4713'}, '2': {'id': '4714'}, '3': {'id': '4715'}, '4': {'id': '4716'}, '5': {'id': '4717'}, '6': {'id': '4718'}}}, '148': {'id': '4417', 'issue': {'12': {'id': '4418'}, '11': {'id': '4419'}}}, '100': {'id': '4684', 'issue': {'1': {'id': '4685'}, '2': {'id': '4686'}, '3': {'id': '4687'}, '4': {'id': '4688'}, '5': {'id': '4689'}, '6': {'id': '4690'}}}, '101': {'id': '4691', 'issue': {'1': {'id': '4692'}, '2': {'id': '4693'}, '3': {'id': '4694'}, '4': {'id': '4695'}, '5': {'id': '4696'}, '6': {'id': '4697'}}}, '102': {'id': '4698', 'issue': {'1': {'id': '4699'}, '2': {'id': '4700'}, '3': {'id': '4701'}, '4': {'id': '4702'}, '5': {'id': '4703'}, '6': {'id': '4704'}}}, '103': {'id': '4705', 'issue': {'1': {'id': '4706'}, '2': {'id': '4707'}, '3': {'id': '4708'}, '4': {'id': '4709'}, '5': {'id': '4710'}, '6': {'id': '4711'}}}, '105': {'id': '4719', 'issue': {'1': {'id': '4720'}, '2': {'id': '4721'}, '3': {'id': '4722'}, '4': {'id': '4723'}, '5': {'id': '4724'}, '6': {'id': '4725'}}}, '106': {'id': '4726', 'issue': {'6': {'id': '4732'}, '1': {'id': '4727'}, '2': {'id': '4728'}, '3': {'id': '4729'}, '4': {'id': '4730'}, '5': {'id': '4731'}}}}}}
548 # Sort for comparison (order may vary due to dict iteration)
549 curator.index_id_ra.sort(key=lambda x: x['id'])
550 expected_index_id_ra.sort(key=lambda x: x['id'])
551 self.assertEqual(curator.ar_index, expected_index_ar)
552 self.assertEqual(curator.index_id_br, expected_index_id_br)
553 self.assertEqual(curator.index_id_ra, expected_index_id_ra)
554 self.assertEqual(curator.re_index, expected_index_re)
555 self.assertEqual(curator.VolIss, expected_index_vi)
557 def test_is_a_valid_row(self):
558 rows = [
559 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
560 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '1', 'issue': '', 'page': '', 'type': 'journal volume', 'publisher': '', 'editor': ''},
561 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '1', 'page': '', 'type': 'journal issue', 'publisher': '', 'editor': ''},
562 {'id': 'doi:10.1001/2013.jamasurg.270', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
563 {'id': '', 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'author': 'Deckert, Ron J. [orcid:0000-0003-2100-6412]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
564 {'id': '', 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'author': 'Deckert, Ron J. [orcid:0000-0003-2100-6412]', 'pub_date': '03-01-2020', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': 'book'},
565 {'id': 'doi:10.1001/2013.jamasurg.270', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '5', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}
566 ]
567 output = []
568 for row in rows:
569 output.append(is_a_valid_row(row))
570 expected_output = [False, False, False, True, False, True, False]
571 self.assertEqual(output, expected_output)
573 def test_get_preexisting_entities(self):
574 row = {'id': 'omid:br/2715', 'title': 'Image Of The Year For 2012', 'author': '', 'pub_date': '', 'venue': 'Archives Of Surgery [omid:br/4480]', 'volume': '99', 'issue': '1', 'page': '', 'type': 'journal article', 'publisher': '', 'editor': ''}
575 curator = prepareCurator(data=[row])
576 curator.curator()
577 expected_output = (
578 {'id/4270', 'ra/3309', 'ar/7240', 'br/4481', 'br/2715', 'br/4480', 'id/4274', 'id/2581', 'br/4487', 're/2350'},
579 [{'id': 'doi:10.1001/2013.jamasurg.202 omid:br/2715', 'title': 'Image Of The Year For 2012', 'author': '', 'pub_date': '2012-12-01', 'venue': 'Archives Of Surgery [issn:0004-0010 omid:br/4480]', 'volume': '147', 'issue': '12', 'page': '1140-1140', 'type': 'journal article', 'publisher': 'American Medical Association (ama) [crossref:10 omid:ra/3309]', 'editor': ''}]
580 )
581 self.assertEqual((curator.preexisting_entities, curator.data), expected_output)
584class test_RespAgentsCurator(unittest.TestCase):
585 def test_curator_publishers(self):
586 reset()
587 data = [
588 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': 'American Medical Association (AMA) [crossref:10 crossref:9999]', 'editor': ''},
589 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': 'Elsevier BV [crossref:78]', 'editor': ''},
590 {'id': '', 'title': '', 'author': '', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': 'Wiley [crossref:311]', 'editor': ''}]
591 resp_agents_curator = prepareCurator(data=data, server=SERVER, resp_agents_only=True)
592 resp_agents_curator.curator(filename=None, path_csv=None)
593 output = (resp_agents_curator.data, resp_agents_curator.radict, resp_agents_curator.idra, resp_agents_curator.rameta)
594 expected_output = (
595 [
596 {'id': '', 'title': '', 'author': '', 'venue': '', 'editor': '', 'publisher': 'American Medical Association (ama) [crossref:10 crossref:9999 omid:ra/3309]', 'page': '', 'volume': '', 'issue': '', 'pub_date': '', 'type': ''},
597 {'id': '', 'title': '', 'author': '', 'venue': '', 'editor': '', 'publisher': 'Elsevier Bv [crossref:78 omid:ra/0601]', 'page': '', 'volume': '', 'issue': '', 'pub_date': '', 'type': ''},
598 {'id': '', 'title': '', 'author': '', 'venue': '', 'editor': '', 'publisher': 'Wiley [crossref:311 omid:ra/0602]', 'page': '', 'volume': '', 'issue': '', 'pub_date': '', 'type': ''}],
599 {
600 '3309': {'ids': ['crossref:10', 'crossref:9999', 'omid:ra/3309'], 'others': [], 'title': 'American Medical Association (ama)'},
601 'wannabe_0': {'ids': ['crossref:78', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Elsevier Bv'},
602 'wannabe_1': {'ids': ['crossref:311', 'omid:ra/0602'], 'others': ['wannabe_1'], 'title': 'Wiley'}},
603 {'crossref:10': '4274', 'crossref:9999': '0601', 'crossref:78': '0602', 'crossref:311': '0603'},
604 {
605 '3309': {'ids': ['crossref:10', 'crossref:9999', 'omid:ra/3309'], 'others': [], 'title': 'American Medical Association (ama)'},
606 '0601': {'ids': ['crossref:78', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Elsevier Bv'},
607 '0602': {'ids': ['crossref:311', 'omid:ra/0602'], 'others': ['wannabe_1'], 'title': 'Wiley'}}
608 )
609 self.assertEqual(output, expected_output)
611 def test_curator(self):
612 reset()
613 data = [
614 {'id': '', 'title': '', 'author': 'Deckert, Ron J. [orcid:0000-0003-2100-6412]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
615 {'id': '', 'title': '', 'author': 'Ruso, Juan M. [orcid:0000-0001-5909-6754]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
616 {'id': '', 'title': '', 'author': 'Sarmiento, Félix [orcid:0000-0002-4487-6894]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}
617 ]
618 resp_agents_curator = prepareCurator(data=data, server=SERVER, resp_agents_only=True)
619 resp_agents_curator.curator(filename='resp_agents_curator_output', path_csv='test/testcases/testcase_data')
620 output = (resp_agents_curator.data, resp_agents_curator.radict, resp_agents_curator.idra, resp_agents_curator.rameta)
621 expected_output = (
622 [
623 {'id': '', 'title': '', 'author': 'Deckert, Ron J. [orcid:0000-0003-2100-6412 omid:ra/0601]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
624 {'id': '', 'title': '', 'author': 'Ruso, Juan M. [orcid:0000-0001-5909-6754 omid:ra/0602]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
625 {'id': '', 'title': '', 'author': 'Sarmiento, Félix [orcid:0000-0002-4487-6894 omid:ra/0603]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}],
626 {
627 'wannabe_0': {'ids': ['orcid:0000-0003-2100-6412', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Deckert, Ron J.'},
628 'wannabe_1': {'ids': ['orcid:0000-0001-5909-6754', 'omid:ra/0602'], 'others': ['wannabe_1'], 'title': 'Ruso, Juan M.'},
629 'wannabe_2': {'ids': ['orcid:0000-0002-4487-6894', 'omid:ra/0603'], 'others': ['wannabe_2'], 'title': 'Sarmiento, Félix'}},
630 {'orcid:0000-0003-2100-6412': '0601', 'orcid:0000-0001-5909-6754': '0602', 'orcid:0000-0002-4487-6894': '0603'},
631 {
632 '0601': {'ids': ['orcid:0000-0003-2100-6412', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Deckert, Ron J.'},
633 '0602': {'ids': ['orcid:0000-0001-5909-6754', 'omid:ra/0602'], 'others': ['wannabe_1'], 'title': 'Ruso, Juan M.'},
634 '0603': {'ids': ['orcid:0000-0002-4487-6894', 'omid:ra/0603'], 'others': ['wannabe_2'], 'title': 'Sarmiento, Félix'}}
635 )
636 self.assertEqual(output, expected_output)
638 def test_curator_ra_on_ts(self):
639 # A responsible agent is already on the triplestore
640 add_data_ts(server=SERVER, data_path=os.path.abspath(os.path.join('test', 'testcases', 'ts', 'real_data.nt')).replace('\\', '/'))
641 self.maxDiff = None
642 data = [
643 {'id': '', 'title': '', 'author': 'Deckert, Ron J. [orcid:0000-0003-2100-6412]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
644 {'id': '', 'title': '', 'author': 'Mehrotra, Ateev [orcid:0000-0003-2223-1582]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
645 {'id': '', 'title': '', 'author': 'Sarmiento, Félix [orcid:0000-0002-4487-6894]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}
646 ]
647 resp_agents_curator = prepareCurator(data=data, server=SERVER, resp_agents_only=True)
648 resp_agents_curator.curator()
649 output = (resp_agents_curator.data, resp_agents_curator.radict, resp_agents_curator.idra, resp_agents_curator.rameta)
650 expected_output = (
651 [
652 {'id': '', 'title': '', 'author': 'Deckert, Ron J. [orcid:0000-0003-2100-6412 omid:ra/0601]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
653 {'id': '', 'title': '', 'author': 'Mehrotra, Ateev [orcid:0000-0003-2223-1582 omid:ra/3976]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''},
654 {'id': '', 'title': '', 'author': 'Sarmiento, Félix [orcid:0000-0002-4487-6894 omid:ra/0602]', 'pub_date': '', 'venue': '', 'volume': '', 'issue': '', 'page': '', 'type': '', 'publisher': '', 'editor': ''}],
655 {
656 'wannabe_0': {'ids': ['orcid:0000-0003-2100-6412', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Deckert, Ron J.'},
657 '3976': {'ids': ['orcid:0000-0003-2223-1582', 'omid:ra/3976'], 'others': [], 'title': 'Mehrotra, Ateev'},
658 'wannabe_1': {'ids': ['orcid:0000-0002-4487-6894', 'omid:ra/0602'], 'others': ['wannabe_1'], 'title': 'Sarmiento, Félix'}},
659 {'orcid:0000-0003-2100-6412': '0601', 'orcid:0000-0003-2223-1582': '4351', 'orcid:0000-0002-4487-6894': '0602'},
660 {
661 '0601': {'ids': ['orcid:0000-0003-2100-6412', 'omid:ra/0601'], 'others': ['wannabe_0'], 'title': 'Deckert, Ron J.'},
662 '3976': {'ids': ['orcid:0000-0003-2223-1582', 'omid:ra/3976'], 'others': [], 'title': 'Mehrotra, Ateev'},
663 '0602': {'ids': ['orcid:0000-0002-4487-6894', 'omid:ra/0602'], 'others': ['wannabe_1'], 'title': 'Sarmiento, Félix'}}
664 )
665 self.assertEqual(output, expected_output)
668class test_id_worker(unittest.TestCase):
669 @classmethod
670 def setUpClass(cls):
671 add_data_ts(SERVER, os.path.abspath(os.path.join('test', 'testcases', 'ts', 'real_data.nt')).replace('\\', '/'))
672 cls.finder = ResourceFinder(ts_url=SERVER, base_iri=BASE_IRI)
673 cls.finder.get_everything_about_res(metavals={'omid:br/3309', 'omid:br/2438', 'omid:br/0601'}, identifiers={'doi:10.1001/2013.jamasurg.270', 'doi:10.1787/eco_outlook-v2011-2-graph138-en', 'orcid:0000-0001-6994-8412', 'doi:10.1001/archderm.104.1.106', 'pmid:29098884'}, vvis=set())
675 def test_id_worker_1(self):
676 # 1 EntityA is a new one
677 curator = prepareCurator(list())
678 name = 'βέβαιος, α, ον'
679 idslist = ['doi:10.1163/2214-8655_lgo_lgo_02_0074_ger']
680 wannabe_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
681 output = (wannabe_id, curator.brdict, curator.radict, curator.idbr, curator.idra, curator.log)
682 expected_output = (
683 'wannabe_0',
684 {'wannabe_0': {'ids': ['doi:10.1163/2214-8655_lgo_lgo_02_0074_ger'], 'others': [], 'title': 'βέβαιος, α, ον'}},
685 {},
686 {'doi:10.1163/2214-8655_lgo_lgo_02_0074_ger': '0601'},
687 {},
688 {}
689 )
690 self.assertEqual(output, expected_output)
692 def test_id_worker_1_no_id(self):
693 # 1 EntityA is a new one and has no ids
694 curator = prepareCurator(list())
695 name = 'βέβαιος, α, ον'
696 idslist = []
697 wannabe_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
698 output = (wannabe_id, curator.brdict, curator.radict, curator.idbr, curator.idra, curator.log)
699 expected_output = (
700 'wannabe_0',
701 {'wannabe_0': {'ids': [], 'others': [], 'title': 'βέβαιος, α, ον'}},
702 {},
703 {},
704 {},
705 {}
706 )
707 self.assertEqual(output, expected_output)
709 def test_id_worker_2_id_ts(self):
710 # 2 Retrieve EntityA data in triplestore to update EntityA inside CSV
711 curator = prepareCurator(list())
712 curator.finder = self.finder
713 name = 'American Medical Association (AMA)' # *(ama) on the ts. The name on the ts must prevail
714 idslist = ['crossref:10']
715 wannabe_id = curator.id_worker('editor', name, idslist, '', ra_ent=True, br_ent=False, vvi_ent=False, publ_entity=True)
716 output = (wannabe_id, curator.brdict, curator.radict, curator.idbr, curator.idra, curator.log)
717 expected_output = ('3309', {}, {'3309': {'ids': ['crossref:10'], 'others': [], 'title': 'American Medical Association (ama)'}}, {}, {'crossref:10': '4274'}, {})
718 self.assertEqual(output, expected_output)
720 def test_id_worker_2_metaid_ts(self):
721 # 2 Retrieve EntityA data in triplestore to update EntityA inside CSV
722 curator = prepareCurator(list())
723 curator.finder = self.finder
724 name = 'American Medical Association (AMA)' # *(ama) on the ts. The name on the ts must prevail
725 # MetaID only
726 wannabe_id = curator.id_worker('editor', name, [], '3309', ra_ent=True, br_ent=False, vvi_ent=False, publ_entity=True)
727 output = (wannabe_id, curator.brdict, curator.radict, curator.idbr, curator.idra, curator.log)
728 expected_output = ('3309', {}, {'3309': {'ids': ['crossref:10'], 'others': [], 'title': 'American Medical Association (ama)'}}, {}, {'crossref:10': '4274'}, {})
729 self.assertEqual(output, expected_output)
731 def test_id_worker_2_id_metaid_ts(self):
732 # 2 Retrieve EntityA data in triplestore to update EntityA inside CSV
733 curator = prepareCurator(list())
734 name = 'American Medical Association (AMA)' # *(ama) on the ts. The name on the ts must prevail
735 curator.finder = self.finder
736 # ID and MetaID
737 wannabe_id = curator.id_worker('publisher', name, ['crossref:10'], '3309', ra_ent=True, br_ent=False, vvi_ent=False, publ_entity=True)
738 output = (wannabe_id, curator.brdict, curator.radict, curator.idbr, curator.idra, curator.log)
739 expected_output = ('3309', {}, {'3309': {'ids': ['crossref:10'], 'others': [], 'title': 'American Medical Association (ama)'}}, {}, {'crossref:10': '4274'}, {})
740 self.assertEqual(output, expected_output)
742 def test_id_worker_3(self):
743 # 2 Retrieve EntityA data in triplestore to update EntityA inside CSV. MetaID on ts has precedence
744 curator = prepareCurator(list())
745 name = 'American Medical Association (AMA)' # *(ama) on the ts. The name on the ts must prevail
746 curator.finder = self.finder
747 # ID and MetaID, but it's omid:ra/3309 on ts
748 wannabe_id = curator.id_worker('publisher', name, ['crossref:10'], '33090', ra_ent=True, br_ent=False, vvi_ent=False, publ_entity=True)
749 output = (wannabe_id, curator.brdict, curator.radict, curator.idbr, curator.idra, curator.log)
750 expected_output = ('3309', {}, {'3309': {'ids': ['crossref:10'], 'others': [], 'title': 'American Medical Association (ama)'}}, {}, {'crossref:10': '4274'}, {})
751 self.assertEqual(output, expected_output)
753 def test_id_worker_conflict(self):
754 # there's no meta or there was one but it didn't exist
755 # There are other ids that already exist, but refer to multiple entities on ts.
756 # Conflict!
757 idslist = ['doi:10.1001/2013.jamasurg.270']
758 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'
759 curator = prepareCurator(list())
760 curator.finder = self.finder
761 curator.log[0] = {'id': {}}
762 id_dict = dict()
763 metaval = curator.conflict(idslist, name, id_dict, 'id') # Only the conflict function is tested here, not id_worker
764 output = (metaval, curator.brdict, curator.log, id_dict)
765 expected_output = (
766 'wannabe_0',
767 {'wannabe_0': {'ids': ['doi:10.1001/2013.jamasurg.270'], 'others': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'}},
768 {0: {'id': {'Conflict entity': 'wannabe_0'}}},
769 {'doi:10.1001/2013.jamasurg.270': '2585'}
770 )
771 self.assertEqual(output, expected_output)
773 def test_conflict_br(self):
774 # No MetaId, an identifier to which two separate br point: there is a conflict, and a new entity must be created
775 curator = prepareCurator(list())
776 curator.log[0] = {'id': {}}
777 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'
778 idslist = ['doi:10.1001/2013.jamasurg.270']
779 curator.finder = self.finder
780 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
781 output = (meta_id, curator.idbr, curator.idra, curator.brdict, curator.log)
782 expected_output_1 = (
783 '2719',
784 {'doi:10.1001/2013.jamasurg.270': '2585'},
785 {},
786 {'2719': {'ids': ['doi:10.1001/2013.jamasurg.270'], 'others': [], 'title': 'Patient Satisfaction As A Possible Indicator Of Quality Surgical Care'}},
787 {0: {'id': {}}}
788 )
789 expected_output_2 = ('2720',
790 {'doi:10.1001/2013.jamasurg.270': '2585'},
791 {},
792 {'2720': {'ids': ['doi:10.1001/2013.jamasurg.270'],
793 'others': [],
794 'title': 'Pediatric Injury Outcomes In Racial/Ethnic Minorities In '
795 'California'}},
796 {0: {'id': {}}}
797 )
798 self.assertTrue(output == expected_output_1 or output == expected_output_2)
800 def test_conflict_ra(self):
801 # No MetaId, an identifier to which two separate ra point: there is a conflict, and a new entity must be created
802 idslist = ['orcid:0000-0001-6994-8412']
803 name = 'Alarcon, Louis H.'
804 curator = prepareCurator(list())
805 curator.finder = self.finder
806 curator.log[0] = {'author': {}}
807 meta_id = curator.id_worker('author', name, idslist, '', ra_ent=True, br_ent=False, vvi_ent=False, publ_entity=False)
808 output = (meta_id, curator.idbr, curator.idra, curator.brdict, curator.radict, curator.log)
809 expected_output_1 = (
810 '4940',
811 {},
812 {'orcid:0000-0001-6994-8412': '4475'},
813 {},
814 {'4940': {'ids': ['orcid:0000-0001-6994-8412'], 'others': [], 'title': 'Alarcon, Louis H.'}},
815 {0: {'author': {}}}
816 )
817 expected_output_2 = ('1000000',
818 {},
819 {'orcid:0000-0001-6994-8412': '4475'},
820 {},
821 {'1000000': {'ids': ['orcid:0000-0001-6994-8412'],
822 'others': [],
823 'title': 'Alarcon, Louis H.'}},
824 {0: {'author': {}}})
825 self.assertTrue(output == expected_output_1 or output == expected_output_2)
827 def test_conflict_suspect_id_among_existing(self):
828 # ID already exist in entity_dict and refer to one entity having a MetaID, but there is another ID not in entity_dict that highlights a conflict on ts
829 br_dict = {
830 'omid:br/0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'},
831 'omid:br/0602': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 'others': [], 'title': 'Contributions To GDP Growth And Inflation: South Africa'},
832 'omid:br/0603': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph18-en'], 'others': [], 'title': 'Official Loans To The Governments Of Greece, Ireland And Portugal'},
833 }
834 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: Japan' # The first title must have precedence (China, not Japan)
835 idslist = ['doi:10.1787/eco_outlook-v2011-2-graph138-en', 'doi:10.1001/2013.jamasurg.270']
836 curator = prepareCurator(get_csv_data(REAL_DATA_CSV))
837 curator.log[0] = {'id': {}}
838 curator.brdict = br_dict
839 curator.finder = self.finder
840 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
841 output = (meta_id, curator.idbr, curator.idra, curator.brdict, curator.radict, curator.log)
842 expected_output = (
843 'wannabe_0',
844 {
845 'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601',
846 'doi:10.1001/2013.jamasurg.270': '2585'
847 },
848 {},
849 {'omid:br/0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'],
850 'others': [],
851 'title': 'Money Growth, Interest Rates, Inflation And Raw '
852 'Materials Prices: China'},
853 'omid:br/0602': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'],
854 'others': [],
855 'title': 'Contributions To GDP Growth And Inflation: South '
856 'Africa'},
857 'omid:br/0603': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph18-en'],
858 'others': [],
859 'title': 'Official Loans To The Governments Of Greece, '
860 'Ireland And Portugal'},
861 'wannabe_0': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en',
862 'doi:10.1001/2013.jamasurg.270'],
863 'others': [],
864 'title': 'Money Growth, Interest Rates, Inflation And Raw '
865 'Materials Prices: Japan'}},
866 {},
867 {0: {'id': {'Conflict entity': 'wannabe_0'}}}
868 )
869 self.assertEqual(output, expected_output)
871 def test_conflict_suspect_id_among_wannabe(self):
872 # ID already exist in entity_dict and refer to one temporary, but there is another ID not in entity_dict that highlights a conflict on ts
873 br_dict = {
874 'wannabe_0': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'},
875 'wannabe_2': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 'others': [], 'title': 'Contributions To GDP Growth And Inflation: South Africa'},
876 'wannabe_3': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph18-en'], 'others': [], 'title': 'Official Loans To The Governments Of Greece, Ireland And Portugal'},
877 }
878 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: Japan' # The first title must have precedence (China, not Japan)
879 idslist = ['doi:10.1787/eco_outlook-v2011-2-graph138-en', 'doi:10.1001/2013.jamasurg.270']
880 curator = prepareCurator(get_csv_data(REAL_DATA_CSV))
881 curator.log[0] = {'id': {}}
882 curator.brdict = br_dict
883 curator.finder = self.finder
884 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
885 output = (meta_id, curator.idbr, curator.idra, curator.brdict, curator.radict, curator.log)
886 expected_output_1 = (
887 '2720',
888 {
889 'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601',
890 'doi:10.1001/2013.jamasurg.270': '2585'
891 },
892 {},
893 {'2720': {'ids': ['doi:10.1001/2013.jamasurg.270', 'doi:10.1787/eco_outlook-v2011-2-graph138-en'],
894 'others': ['wannabe_0'],
895 'title': 'Pediatric Injury Outcomes In Racial/Ethnic Minorities In '
896 'California'},
897 'wannabe_2': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'],
898 'others': [],
899 'title': 'Contributions To GDP Growth And Inflation: South '
900 'Africa'},
901 'wannabe_3': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph18-en'],
902 'others': [],
903 'title': 'Official Loans To The Governments Of Greece, Ireland '
904 'And Portugal'}},
905 {},
906 {0: {'id': {}}}
907 )
908 expected_output_2 = (
909 '2719',
910 {
911 'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601',
912 'doi:10.1001/2013.jamasurg.270': '2585'
913 },
914 {},
915 {'2719': {'ids': ['doi:10.1001/2013.jamasurg.270', 'doi:10.1787/eco_outlook-v2011-2-graph138-en'],
916 'others': ['wannabe_0'],
917 'title': 'Patient Satisfaction As A Possible Indicator Of Quality '
918 'Surgical Care'},
919 'wannabe_2': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'],
920 'others': [],
921 'title': 'Contributions To GDP Growth And Inflation: South '
922 'Africa'},
923 'wannabe_3': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph18-en'],
924 'others': [],
925 'title': 'Official Loans To The Governments Of Greece, Ireland '
926 'And Portugal'}},
927 {},
928 {0: {'id': {}}}
929 )
930 self.assertTrue(output == expected_output_1 or output == expected_output_2)
932 def test_id_worker_4(self):
933 # 4 Merge data from EntityA (CSV) with data from EntityX (CSV), update both with data from EntityA (RDF)
934 br_dict = {
935 'wannabe_0': {'ids': ['doi:10.1001/archderm.104.1.106'], 'others': [], 'title': 'Multiple eloids'},
936 'wannabe_1': {'ids': ['doi:10.1001/archderm.104.1.106'], 'others': [], 'title': 'Multiple Blastoids'},
937 }
938 name = 'Multiple Palloids'
939 idslist = ['doi:10.1001/archderm.104.1.106', 'pmid:29098884']
940 curator = prepareCurator(list())
941 curator.brdict = br_dict
942 curator.wnb_cnt = 2
943 curator.finder = self.finder
944 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
945 output = (meta_id, curator.idbr, curator.idra, curator.log)
946 expected_output = (
947 '3757',
948 {'doi:10.1001/archderm.104.1.106': '3624', 'pmid:29098884': '2000000'},
949 {},
950 {}
951 )
952 self.assertEqual(output, expected_output)
954class test_id_worker_with_reset(unittest.TestCase):
955 def test_id_worker_2_meta_in_entity_dict(self):
956 # MetaID exists among data.
957 # MetaID already in entity_dict (no care about conflicts, we have a MetaID specified)
958 # 2 Retrieve EntityA data to update EntityA inside CSV
959 reset_server()
960 data = get_csv_data(REAL_DATA_CSV)
961 curator = prepareCurator(data)
962 curator.curator()
963 store_curated_data(curator, SERVER)
964 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'
965 curator_empty = prepareCurator(list())
966 curator_empty.finder.get_everything_about_res(metavals=set(), identifiers={'doi:10.1787/eco_outlook-v2011-2-graph138-en'}, vvis=set())
967 # put metaval in entity_dict
968 meta_id = curator_empty.id_worker('id', name, [], '0601', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
969 # metaval is in entity_dict
970 meta_id = curator_empty.id_worker('id', name, [], '0601', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
971 output = (meta_id, curator_empty.brdict, curator_empty.radict, curator_empty.idbr, curator_empty.idra, curator_empty.log)
972 expected_output = ('0601', {'0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China', 'others': []}}, {}, {'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601'}, {}, {})
973 self.assertEqual(output, expected_output)
975 def test_conflict_existing(self):
976 # ID already exist in entity_dict but refer to multiple entities having a MetaID
977 reset_server()
978 br_dict = {
979 'omid:br/0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'},
980 'omid:br/0602': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 'others': [], 'title': 'Contributions To GDP Growth And Inflation: South Africa'},
981 'omid:br/0603': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Official Loans To The Governments Of Greece, Ireland And Portugal'},
982 }
983 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'
984 idslist = ['doi:10.1787/eco_outlook-v2011-2-graph138-en']
985 curator = prepareCurator(list())
986 curator.log[0] = {'id': {}}
987 curator.brdict = br_dict
988 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
989 output = (meta_id, curator.idbr, curator.idra, curator.brdict, curator.radict, curator.log)
990 expected_output = (
991 'wannabe_0',
992 {'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601'},
993 {},
994 {'omid:br/0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'],
995 'others': [],
996 'title': 'Money Growth, Interest Rates, Inflation And Raw '
997 'Materials Prices: China'},
998 'omid:br/0602': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'],
999 'others': [],
1000 'title': 'Contributions To GDP Growth And Inflation: South '
1001 'Africa'},
1002 'omid:br/0603': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'],
1003 'others': [],
1004 'title': 'Official Loans To The Governments Of Greece, '
1005 'Ireland And Portugal'},
1006 'wannabe_0': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'],
1007 'others': [],
1008 'title': 'Money Growth, Interest Rates, Inflation And Raw '
1009 'Materials Prices: China'}},
1010 {},
1011 {0: {'id': {'Conflict entity': 'wannabe_0'}}}
1012 )
1013 self.assertEqual(output, expected_output)
1015 def test_id_worker_5(self):
1016 # ID already exist in entity_dict and refer to one or more temporary entities -> collective merge
1017 reset_server()
1018 br_dict = {
1019 'wannabe_0': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'},
1020 'wannabe_1': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 'others': [], 'title': 'Contributions To GDP Growth And Inflation: South Africa'},
1021 'wannabe_2': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Official Loans To The Governments Of Greece, Ireland And Portugal'},
1022 }
1023 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'
1024 idslist = ['doi:10.1787/eco_outlook-v2011-2-graph138-en']
1025 curator = prepareCurator(list())
1026 curator.brdict = br_dict
1027 curator.wnb_cnt = 2
1028 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
1029 output = (meta_id, curator.idbr, curator.idra, curator.log)
1030 expected_output = (
1031 'wannabe_0',
1032 {'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601'},
1033 {},
1034 {}
1035 )
1036 self.assertEqual(output, expected_output)
1038 def test_no_conflict_existing(self):
1039 # ID already exist in entity_dict and refer to one entity
1040 reset_server()
1041 br_dict = {
1042 'omid:br/0601': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph138-en'], 'others': [], 'title': 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: China'},
1043 'omid:br/0602': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph150-en'], 'others': [], 'title': 'Contributions To GDP Growth And Inflation: South Africa'},
1044 'omid:br/0603': {'ids': ['doi:10.1787/eco_outlook-v2011-2-graph18-en'], 'others': [], 'title': 'Official Loans To The Governments Of Greece, Ireland And Portugal'},
1045 }
1046 name = 'Money Growth, Interest Rates, Inflation And Raw Materials Prices: Japan' # The first title must have precedence (China, not Japan)
1047 idslist = ['doi:10.1787/eco_outlook-v2011-2-graph138-en']
1048 curator = prepareCurator(list())
1049 curator.log[0] = {'id': {}}
1050 curator.brdict = br_dict
1051 meta_id = curator.id_worker('id', name, idslist, '', ra_ent=False, br_ent=True, vvi_ent=False, publ_entity=False)
1052 output = (meta_id, curator.idbr, curator.idra, curator.log)
1053 expected_output = (
1054 'omid:br/0601',
1055 {'doi:10.1787/eco_outlook-v2011-2-graph138-en': '0601'},
1056 {},
1057 {0: {'id': {}}}
1058 )
1059 self.assertEqual(output, expected_output)
1061 def test_metaid_in_prov(self):
1062 # MetaID not found in data, but found in the provenance metadata.
1063 reset_server()
1064 add_data_ts(server=SERVER, data_path=os.path.abspath(os.path.join('test', 'testcases', 'ts', 'real_data_with_prov.nq')).replace('\\', '/'))
1065 name = ''
1066 curator = prepareCurator(list())
1067 meta_id = curator.id_worker('id', name, [], '4321', ra_ent=True, br_ent=False, vvi_ent=False, publ_entity=False)
1068 self.assertEqual(meta_id, '38013')
1071class testcase_01(unittest.TestCase):
1072 def test(self):
1073 # testcase1: 2 different issues of the same venue (no volume)
1074 name = '01'
1075 data = get_csv_data(MANUAL_DATA_CSV)
1076 partial_data = list()
1077 partial_data.append(data[0])
1078 partial_data.append(data[5])
1079 data_curated, testcase = prepare_to_test(partial_data, name)
1080 for pos, element in enumerate(data_curated):
1081 self.assertEqual(element, testcase[pos])
1084class testcase_02(unittest.TestCase):
1085 def test(self):
1086 # testcase2: 2 different volumes of the same venue (no issue)
1087 name = '02'
1088 data = get_csv_data(MANUAL_DATA_CSV)
1089 partial_data = list()
1090 partial_data.append(data[1])
1091 partial_data.append(data[3])
1092 data_curated, testcase = prepare_to_test(partial_data, name)
1093 self.assertEqual(data_curated, testcase)
1096class testcase_03(unittest.TestCase):
1097 def test(self):
1098 # testcase3: 2 different issues of the same volume
1099 name = '03'
1100 data = get_csv_data(MANUAL_DATA_CSV)
1101 partial_data = list()
1102 partial_data.append(data[2])
1103 partial_data.append(data[4])
1104 data_curated, testcase = prepare_to_test(partial_data, name)
1105 self.assertEqual(data_curated, testcase)
1108class testcase_04(unittest.TestCase):
1109 def test(self):
1110 # testcase4: 2 new IDS and different date format (yyyy-mm and yyyy-mm-dd)
1111 name = '04'
1112 data = get_csv_data(MANUAL_DATA_CSV)
1113 partial_data = list()
1114 partial_data.append(data[6])
1115 partial_data.append(data[7])
1116 data_curated, testcase = prepare_to_test(partial_data, name)
1117 for pos, element in enumerate(data_curated):
1118 self.assertEqual(element, testcase[pos])
1121class testcase_05(unittest.TestCase):
1122 def test(self):
1123 # testcase5: NO ID scenario
1124 name = '05'
1125 data = get_csv_data(MANUAL_DATA_CSV)
1126 partial_data = list()
1127 partial_data.append(data[8])
1128 data_curated, testcase = prepare_to_test(partial_data, name)
1129 self.assertEqual(data_curated, testcase)
1132class testcase_06(unittest.TestCase):
1133 def test(self):
1134 # testcase6: ALL types test
1135 name = '06'
1136 data = get_csv_data(MANUAL_DATA_CSV)
1137 partial_data = data[9:33]
1138 data_curated, testcase = prepare_to_test(partial_data, name)
1139 self.assertEqual(data_curated, testcase)
1142class testcase_07(unittest.TestCase):
1143 def test(self):
1144 # testcase7: all journal related types with an editor
1145 name = '07'
1146 data = get_csv_data(MANUAL_DATA_CSV)
1147 partial_data = data[34:40]
1148 data_curated, testcase = prepare_to_test(partial_data, name)
1149 self.assertEqual(data_curated, testcase)
1152class testcase_08(unittest.TestCase):
1153 def test(self):
1154 # testcase8: all book related types with an editor
1155 name = '08'
1156 data = get_csv_data(MANUAL_DATA_CSV)
1157 partial_data = data[40:43]
1158 data_curated, testcase = prepare_to_test(partial_data, name)
1159 self.assertEqual(data_curated, testcase)
1162class testcase_09(unittest.TestCase):
1163 def test(self):
1164 # testcase09: all proceeding related types with an editor
1165 name = '09'
1166 data = get_csv_data(MANUAL_DATA_CSV)
1167 partial_data = data[43:45]
1168 data_curated, testcase = prepare_to_test(partial_data, name)
1169 self.assertEqual(data_curated, testcase)
1172class testcase_10(unittest.TestCase):
1173 def test(self):
1174 # testcase10: a book inside a book series and a book inside a book set
1175 name = '10'
1176 data = get_csv_data(MANUAL_DATA_CSV)
1177 partial_data = data[45:49]
1178 data_curated, testcase = prepare_to_test(partial_data, name)
1179 self.assertEqual(data_curated, testcase)
1182class testcase_11(unittest.TestCase):
1183 def test(self):
1184 # testcase11: real time entity update
1185 name = '11'
1186 data = get_csv_data(MANUAL_DATA_CSV)
1187 partial_data = data[49:52]
1188 data_curated, testcase = prepare_to_test(partial_data, name)
1189 self.assertEqual(data_curated, testcase)
1192class testcase_12(unittest.TestCase):
1193 def test(self):
1194 # testcase12: clean name, title, ids
1195 name = '12'
1196 data = get_csv_data(MANUAL_DATA_CSV)
1197 partial_data = data[52:53]
1198 data_curated, testcase = prepare_to_test(partial_data, name)
1199 self.assertEqual(data_curated, testcase)
1202class testcase_13(unittest.TestCase):
1203 # testcase13: ID_clean massive test
1205 def test1(self):
1206 # 1--- meta specified br in a row, wannabe with a new id in a row, meta specified with an id related to wannabe
1207 # in a row
1208 name = '13.1'
1209 data = get_csv_data(MANUAL_DATA_CSV)
1210 partial_data = data[53:56]
1211 data_curated, testcase = prepare_to_test(partial_data, name)
1212 self.assertEqual(data_curated, testcase)
1214 def test2(self):
1215 # 2---Conflict with META precedence: a br has a meta_id and an id related to another meta_id, the first
1216 # specified meta has precedence
1217 data = get_csv_data(MANUAL_DATA_CSV)
1218 name = '13.2'
1219 partial_data = data[56:57]
1220 data_curated, testcase = prepare_to_test(partial_data, name)
1221 self.assertEqual(data_curated, testcase)
1223 def test3(self):
1224 # 3--- conflict: br with id shared with 2 meta
1225 data = get_csv_data(MANUAL_DATA_CSV)
1226 name_1 = '13.3'
1227 name_2 = '13.31'
1228 partial_data = data[57:58]
1229 data_curated, testcase_1 = prepare_to_test(partial_data, name_1)
1230 _, testcase_2 = prepare_to_test(partial_data, name_2)
1231 self.assertTrue(data_curated == testcase_1 or data_curated == testcase_2)
1234class testcase_14(unittest.TestCase):
1236 def test1(self):
1237 # update existing sequence, in particular, a new author and an existing author without an existing id (matched
1238 # thanks to surname,name(BAD WRITTEN!)
1239 name = '14.1'
1240 data = get_csv_data(MANUAL_DATA_CSV)
1241 partial_data = data[58:59]
1242 data_curated, testcase = prepare_to_test(partial_data, name)
1243 self.assertEqual(data_curated, testcase)
1245 def test2(self):
1246 # same sequence different order, with new ids
1247 name = '14.2'
1248 data = get_csv_data(MANUAL_DATA_CSV)
1249 partial_data = data[59:60]
1250 data_curated, testcase = prepare_to_test(partial_data, name)
1251 self.assertEqual(data_curated, testcase)
1253 def test3(self):
1254 # RA
1255 # Author with two different ids
1256 name_1 = '14.3'
1257 data = get_csv_data(MANUAL_DATA_CSV)
1258 partial_data = data[60:61]
1259 data_curated, testcase_1 = prepare_to_test(partial_data, name_1)
1260 self.assertEqual(data_curated, testcase_1)
1262 def test4(self):
1263 # meta specified ra in a row, wannabe ra with a new id in a row, meta specified with an id related to wannabe
1264 # in a ra
1265 name = '14.4'
1266 data = get_csv_data(MANUAL_DATA_CSV)
1267 partial_data = data[61:64]
1268 data_curated, testcase = prepare_to_test(partial_data, name)
1269 self.assertEqual(data_curated, testcase)
1272class testcase_15(unittest.TestCase):
1274 def test1(self):
1275 # venue volume issue already exists in ts
1276 name = '15.1'
1277 data = get_csv_data(MANUAL_DATA_CSV)
1278 partial_data = data[64:65]
1279 data_curated, testcase = prepare_to_test(partial_data, name)
1280 self.assertEqual(data_curated, testcase)
1282 def test2(self):
1283 # venue conflict
1284 name = '15.2'
1285 data = get_csv_data(MANUAL_DATA_CSV)
1286 partial_data = data[65:66]
1287 data_curated, testcase = prepare_to_test(partial_data, name)
1288 # _, testcase_2 = prepare_to_test(partial_data, name_2)
1289 self.assertEqual(data_curated, testcase)
1291 def test3(self):
1292 # venue in ts is now the br
1293 name = '15.3'
1294 data = get_csv_data(MANUAL_DATA_CSV)
1295 partial_data = data[66:67]
1296 data_curated, testcase = prepare_to_test(partial_data, name)
1297 self.assertEqual(data_curated, testcase)
1299 def test4(self):
1300 # br in ts is now the venue
1301 name = '15.4'
1302 data = get_csv_data(MANUAL_DATA_CSV)
1303 partial_data = data[67:68]
1304 data_curated, testcase = prepare_to_test(partial_data, name)
1305 self.assertEqual(data_curated, testcase)
1307 def test5(self):
1308 # volume in ts is now the br
1309 name = '15.5'
1310 data = get_csv_data(MANUAL_DATA_CSV)
1311 partial_data = data[71:72]
1312 data_curated, testcase = prepare_to_test(partial_data, name)
1313 self.assertEqual(data_curated, testcase)
1315 def test6(self):
1316 # br is a volume
1317 name = '15.6'
1318 data = get_csv_data(MANUAL_DATA_CSV)
1319 partial_data = data[72:73]
1320 data_curated, testcase = prepare_to_test(partial_data, name)
1321 self.assertEqual(data_curated, testcase)
1323 def test7(self):
1324 # issue in ts is now the br
1325 name = '15.7'
1326 data = get_csv_data(MANUAL_DATA_CSV)
1327 partial_data = data[73:74]
1328 data_curated, testcase = prepare_to_test(partial_data, name)
1329 self.assertEqual(data_curated, testcase)
1331 def test8(self):
1332 # br is a issue
1333 name = '15.8'
1334 data = get_csv_data(MANUAL_DATA_CSV)
1335 partial_data = data[74:75]
1336 data_curated, testcase = prepare_to_test(partial_data, name)
1337 self.assertEqual(data_curated, testcase)
1340class testcase_16(unittest.TestCase):
1342 def test1(self):
1343 # Date cleaning 2019-02-29
1344 name = '16.1'
1345 # add_data_ts('http://127.0.0.1:8805/sparql')
1346 # wrong date (2019/02/29)
1347 data = get_csv_data(MANUAL_DATA_CSV)
1348 partial_data = data[75:76]
1349 data_curated, testcase = prepare_to_test(partial_data, name)
1350 self.assertEqual(data_curated, testcase)
1352 def test2(self):
1353 # existing re
1354 name = '16.2'
1355 data = get_csv_data(MANUAL_DATA_CSV)
1356 partial_data = data[76:77]
1357 data_curated, testcase = prepare_to_test(partial_data, name)
1358 self.assertEqual(data_curated, testcase)
1360 def test3(self):
1361 # given name for an RA with only a family name in TS
1362 name = '16.3'
1363 data = get_csv_data(MANUAL_DATA_CSV)
1364 partial_data = data[77:78]
1365 data_curated, testcase = prepare_to_test(partial_data, name)
1366 self.assertEqual(data_curated, testcase)
1369if __name__ == '__main__': # pragma: no cover
1370 unittest.main()