Coverage for oc_meta/plugins/multiprocess/resp_agents_creator.py: 98%
96 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-12-20 08:55 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-12-20 08:55 +0000
1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3# Copyright (c) 2022 Arcangelo Massari <arcangelo.massari@unibo.it>
4#
5# Permission to use, copy, modify, and/or distribute this software for any purpose
6# with or without fee is hereby granted, provided that the above copyright notice
7# and this permission notice appear in all copies.
8#
9# THE SOFTWARE IS PROVIDED 'AS IS' AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
15# SOFTWARE.
17from __future__ import annotations
19import re
21from oc_meta.core.creator import Creator
22from oc_meta.lib.finder import ResourceFinder
23from oc_meta.lib.master_of_regex import (comma_and_spaces, name_and_ids,
24 semicolon_in_people_field)
25from rdflib import Graph, URIRef
27from oc_ocdm.counter_handler.redis_counter_handler import RedisCounterHandler
28from oc_ocdm.graph import GraphSet
31class RespAgentsCreator(Creator):
32 def __init__(self, data:list, finder:ResourceFinder, base_iri:str, counter_handler:RedisCounterHandler, supplier_prefix:str, resp_agent:str, ra_index:dict):
33 self.url = base_iri
34 self.setgraph = GraphSet(self.url, supplier_prefix=supplier_prefix, wanted_label=False, custom_counter_handler=counter_handler)
35 self.finder = finder
36 self.resp_agent = resp_agent
37 self.ra_id_schemas = {'crossref', 'orcid', 'viaf', 'wikidata'}
38 self.br_id_schemas = {'doi', 'issn', 'isbn', 'pmid', 'pmcid', 'url', 'wikidata', 'wikipedia'}
39 self.schemas = self.ra_id_schemas.union(self.br_id_schemas)
40 self.ra_index = self.indexer_id(ra_index)
41 self.data = data
42 self.counter_handler = counter_handler
44 def creator(self, source=None):
45 self.src = source
46 for row in self.data:
47 authors = row['author']
48 publisher = row['publisher']
49 editor = row['editor']
50 self.author_action(authors)
51 if publisher:
52 self.publisher_action(publisher)
53 if editor:
54 self.editor_action(editor)
55 return self.setgraph
57 def author_action(self, authors):
58 if authors:
59 authorslist = re.split(semicolon_in_people_field, authors)
60 for aut in authorslist:
61 aut_and_ids = re.search(name_and_ids, aut)
62 aut_id = aut_and_ids.group(2)
63 aut_id_list = aut_id.split()
64 for identifier in aut_id_list:
65 if 'omid:' in identifier:
66 identifier = str(identifier).replace('omid:', '')
67 url = URIRef(self.url + identifier)
68 preexisting_entity = url in self.finder.prebuilt_subgraphs
69 preexisting_graph = self.finder.get_subgraph(url) if preexisting_entity else None
70 pub_aut = self.setgraph.add_ra(self.resp_agent, source=self.src, res=url, preexisting_graph=preexisting_graph)
71 author_name = aut_and_ids.group(1)
72 if ',' in author_name:
73 author_name_splitted = re.split(comma_and_spaces, author_name)
74 first_name = author_name_splitted[1]
75 last_name = author_name_splitted[0]
76 if first_name.strip():
77 pub_aut.has_given_name(first_name)
78 pub_aut.has_family_name(last_name)
79 else:
80 pub_aut.has_name(author_name)
81 # lists of authors' IDs
82 for identifier in aut_id_list:
83 self.id_creator(pub_aut, identifier, ra=True)
85 def publisher_action(self, publisher):
86 publ_and_ids = re.search(name_and_ids, publisher)
87 publ_id = publ_and_ids.group(2)
88 publ_id_list = publ_id.split()
89 for identifier in publ_id_list:
90 if 'omid:' in identifier:
91 identifier = str(identifier).replace('omid:', '')
92 url = URIRef(self.url + identifier)
93 preexisting_entity = url in self.finder.prebuilt_subgraphs
94 publ_name = publ_and_ids.group(1)
95 preexisting_graph = self.finder.get_subgraph(url) if preexisting_entity else None
96 publ = self.setgraph.add_ra(self.resp_agent, source=self.src, res=url, preexisting_graph=preexisting_graph)
97 publ.has_name(publ_name)
98 for identifier in publ_id_list:
99 self.id_creator(publ, identifier, ra=True)
101 def editor_action(self, editor):
102 editorslist = re.split(semicolon_in_people_field, editor)
103 for ed in editorslist:
104 ed_and_ids = re.search(name_and_ids, ed)
105 ed_id = ed_and_ids.group(2)
106 ed_id_list = ed_id.split(' ')
107 for identifier in ed_id_list:
108 if 'omid:' in identifier:
109 identifier = str(identifier).replace('omid:', '')
110 url = URIRef(self.url + identifier)
111 preexisting_entity = url in self.finder.prebuilt_subgraphs
112 preexisting_graph = self.finder.get_subgraph(url) if preexisting_entity else None
113 pub_ed = self.setgraph.add_ra(self.resp_agent, source=self.src, res=url, preexisting_graph=preexisting_graph)
114 editor_name = ed_and_ids.group(1)
115 if ',' in editor_name:
116 editor_name_splitted = re.split(comma_and_spaces, editor_name)
117 firstName = editor_name_splitted[1]
118 lastName = editor_name_splitted[0]
119 if firstName.strip():
120 pub_ed.has_given_name(firstName)
121 pub_ed.has_family_name(lastName)
122 else:
123 pub_ed.has_name(editor_name)
124 # lists of editor's IDs
125 for identifier in ed_id_list:
126 self.id_creator(pub_ed, identifier, ra=True)