Coverage for oc_meta/plugins/multiprocess/resp_agents_creator.py: 98%
98 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-07-14 14:06 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-07-14 14:06 +0000
1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3# Copyright (c) 2022 Arcangelo Massari <arcangelo.massari@unibo.it>
4#
5# Permission to use, copy, modify, and/or distribute this software for any purpose
6# with or without fee is hereby granted, provided that the above copyright notice
7# and this permission notice appear in all copies.
8#
9# THE SOFTWARE IS PROVIDED 'AS IS' AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
15# SOFTWARE.
17from __future__ import annotations
19import re
21from oc_meta.core.creator import Creator
22from oc_meta.lib.finder import ResourceFinder
23from oc_meta.lib.master_of_regex import (comma_and_spaces, name_and_ids,
24 semicolon_in_people_field)
25from rdflib import Graph, URIRef
27from oc_ocdm.counter_handler.redis_counter_handler import RedisCounterHandler
28from oc_ocdm.graph import GraphSet
31class RespAgentsCreator(Creator):
32 def __init__(self, data:list, endpoint:str, base_iri:str, counter_handler:RedisCounterHandler, supplier_prefix:str, resp_agent:str, ra_index:dict, preexisting_entities: set, everything_everywhere_allatonce: Graph, settings:dict|None=None, meta_config_path: str = None):
33 self.url = base_iri
34 self.setgraph = GraphSet(self.url, supplier_prefix=supplier_prefix, wanted_label=False, custom_counter_handler=counter_handler)
35 self.finder = ResourceFinder(ts_url = endpoint, base_iri = base_iri, local_g=everything_everywhere_allatonce, settings=settings, meta_config_path=meta_config_path)
36 self.resp_agent = resp_agent
37 self.ra_id_schemas = {'crossref', 'orcid', 'viaf', 'wikidata'}
38 self.br_id_schemas = {'doi', 'issn', 'isbn', 'pmid', 'pmcid', 'url', 'wikidata', 'wikipedia'}
39 self.schemas = self.ra_id_schemas.union(self.br_id_schemas)
40 self.ra_index = self.indexer_id(ra_index)
41 self.preexisting_entities = preexisting_entities
42 self.preexisting_graphs = dict()
43 self.data = data
44 self.counter_handler = counter_handler
46 def creator(self, source=None):
47 self.src = source
48 for row in self.data:
49 authors = row['author']
50 publisher = row['publisher']
51 editor = row['editor']
52 self.author_action(authors)
53 if publisher:
54 self.publisher_action(publisher)
55 if editor:
56 self.editor_action(editor)
57 return self.setgraph
59 def author_action(self, authors):
60 if authors:
61 authorslist = re.split(semicolon_in_people_field, authors)
62 for aut in authorslist:
63 aut_and_ids = re.search(name_and_ids, aut)
64 aut_id = aut_and_ids.group(2)
65 aut_id_list = aut_id.split()
66 for identifier in aut_id_list:
67 if 'omid:' in identifier:
68 identifier = str(identifier).replace('omid:', '')
69 preexisting_entity = True if identifier in self.preexisting_entities else False
70 url = URIRef(self.url + identifier)
71 preexisting_graph = self.finder.get_subgraph(url, self.preexisting_graphs) if preexisting_entity else None
72 pub_aut = self.setgraph.add_ra(self.resp_agent, source=self.src, res=url, preexisting_graph=preexisting_graph)
73 author_name = aut_and_ids.group(1)
74 if ',' in author_name:
75 author_name_splitted = re.split(comma_and_spaces, author_name)
76 first_name = author_name_splitted[1]
77 last_name = author_name_splitted[0]
78 if first_name.strip():
79 pub_aut.has_given_name(first_name)
80 pub_aut.has_family_name(last_name)
81 else:
82 pub_aut.has_name(author_name)
83 # lists of authors' IDs
84 for identifier in aut_id_list:
85 self.id_creator(pub_aut, identifier, ra=True)
87 def publisher_action(self, publisher):
88 publ_and_ids = re.search(name_and_ids, publisher)
89 publ_id = publ_and_ids.group(2)
90 publ_id_list = publ_id.split()
91 for identifier in publ_id_list:
92 if 'omid:' in identifier:
93 identifier = str(identifier).replace('omid:', '')
94 preexisting_entity = True if identifier in self.preexisting_entities else False
95 url = URIRef(self.url + identifier)
96 publ_name = publ_and_ids.group(1)
97 preexisting_graph = self.finder.get_subgraph(url, self.preexisting_graphs) if preexisting_entity else None
98 publ = self.setgraph.add_ra(self.resp_agent, source=self.src, res=url, preexisting_graph=preexisting_graph)
99 publ.has_name(publ_name)
100 for identifier in publ_id_list:
101 self.id_creator(publ, identifier, ra=True)
103 def editor_action(self, editor):
104 editorslist = re.split(semicolon_in_people_field, editor)
105 for ed in editorslist:
106 ed_and_ids = re.search(name_and_ids, ed)
107 ed_id = ed_and_ids.group(2)
108 ed_id_list = ed_id.split(' ')
109 for identifier in ed_id_list:
110 if 'omid:' in identifier:
111 identifier = str(identifier).replace('omid:', '')
112 preexisting_entity = True if identifier in self.preexisting_entities else False
113 url = URIRef(self.url + identifier)
114 preexisting_graph = self.finder.get_subgraph(url, self.preexisting_graphs) if preexisting_entity else None
115 pub_ed = self.setgraph.add_ra(self.resp_agent, source=self.src, res=url, preexisting_graph=preexisting_graph)
116 editor_name = ed_and_ids.group(1)
117 if ',' in editor_name:
118 editor_name_splitted = re.split(comma_and_spaces, editor_name)
119 firstName = editor_name_splitted[1]
120 lastName = editor_name_splitted[0]
121 if firstName.strip():
122 pub_ed.has_given_name(firstName)
123 pub_ed.has_family_name(lastName)
124 else:
125 pub_ed.has_name(editor_name)
126 # lists of editor's IDs
127 for identifier in ed_id_list:
128 self.id_creator(pub_ed, identifier, ra=True)