Coverage for oc_meta/plugins/multiprocess/resp_agents_creator.py: 98%

98 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-07-14 14:06 +0000

1#!/usr/bin/python 

2# -*- coding: utf-8 -*- 

3# Copyright (c) 2022 Arcangelo Massari <arcangelo.massari@unibo.it> 

4# 

5# Permission to use, copy, modify, and/or distribute this software for any purpose 

6# with or without fee is hereby granted, provided that the above copyright notice 

7# and this permission notice appear in all copies. 

8# 

9# THE SOFTWARE IS PROVIDED 'AS IS' AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 

10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 

11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, 

12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 

13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 

15# SOFTWARE. 

16 

17from __future__ import annotations 

18 

19import re 

20 

21from oc_meta.core.creator import Creator 

22from oc_meta.lib.finder import ResourceFinder 

23from oc_meta.lib.master_of_regex import (comma_and_spaces, name_and_ids, 

24 semicolon_in_people_field) 

25from rdflib import Graph, URIRef 

26 

27from oc_ocdm.counter_handler.redis_counter_handler import RedisCounterHandler 

28from oc_ocdm.graph import GraphSet 

29 

30 

31class RespAgentsCreator(Creator): 

32 def __init__(self, data:list, endpoint:str, base_iri:str, counter_handler:RedisCounterHandler, supplier_prefix:str, resp_agent:str, ra_index:dict, preexisting_entities: set, everything_everywhere_allatonce: Graph, settings:dict|None=None, meta_config_path: str = None): 

33 self.url = base_iri 

34 self.setgraph = GraphSet(self.url, supplier_prefix=supplier_prefix, wanted_label=False, custom_counter_handler=counter_handler) 

35 self.finder = ResourceFinder(ts_url = endpoint, base_iri = base_iri, local_g=everything_everywhere_allatonce, settings=settings, meta_config_path=meta_config_path) 

36 self.resp_agent = resp_agent 

37 self.ra_id_schemas = {'crossref', 'orcid', 'viaf', 'wikidata'} 

38 self.br_id_schemas = {'doi', 'issn', 'isbn', 'pmid', 'pmcid', 'url', 'wikidata', 'wikipedia'} 

39 self.schemas = self.ra_id_schemas.union(self.br_id_schemas) 

40 self.ra_index = self.indexer_id(ra_index) 

41 self.preexisting_entities = preexisting_entities 

42 self.preexisting_graphs = dict() 

43 self.data = data 

44 self.counter_handler = counter_handler 

45 

46 def creator(self, source=None): 

47 self.src = source 

48 for row in self.data: 

49 authors = row['author'] 

50 publisher = row['publisher'] 

51 editor = row['editor'] 

52 self.author_action(authors) 

53 if publisher: 

54 self.publisher_action(publisher) 

55 if editor: 

56 self.editor_action(editor) 

57 return self.setgraph 

58 

59 def author_action(self, authors): 

60 if authors: 

61 authorslist = re.split(semicolon_in_people_field, authors) 

62 for aut in authorslist: 

63 aut_and_ids = re.search(name_and_ids, aut) 

64 aut_id = aut_and_ids.group(2) 

65 aut_id_list = aut_id.split() 

66 for identifier in aut_id_list: 

67 if 'omid:' in identifier: 

68 identifier = str(identifier).replace('omid:', '') 

69 preexisting_entity = True if identifier in self.preexisting_entities else False 

70 url = URIRef(self.url + identifier) 

71 preexisting_graph = self.finder.get_subgraph(url, self.preexisting_graphs) if preexisting_entity else None 

72 pub_aut = self.setgraph.add_ra(self.resp_agent, source=self.src, res=url, preexisting_graph=preexisting_graph) 

73 author_name = aut_and_ids.group(1) 

74 if ',' in author_name: 

75 author_name_splitted = re.split(comma_and_spaces, author_name) 

76 first_name = author_name_splitted[1] 

77 last_name = author_name_splitted[0] 

78 if first_name.strip(): 

79 pub_aut.has_given_name(first_name) 

80 pub_aut.has_family_name(last_name) 

81 else: 

82 pub_aut.has_name(author_name) 

83 # lists of authors' IDs 

84 for identifier in aut_id_list: 

85 self.id_creator(pub_aut, identifier, ra=True) 

86 

87 def publisher_action(self, publisher): 

88 publ_and_ids = re.search(name_and_ids, publisher) 

89 publ_id = publ_and_ids.group(2) 

90 publ_id_list = publ_id.split() 

91 for identifier in publ_id_list: 

92 if 'omid:' in identifier: 

93 identifier = str(identifier).replace('omid:', '') 

94 preexisting_entity = True if identifier in self.preexisting_entities else False 

95 url = URIRef(self.url + identifier) 

96 publ_name = publ_and_ids.group(1) 

97 preexisting_graph = self.finder.get_subgraph(url, self.preexisting_graphs) if preexisting_entity else None 

98 publ = self.setgraph.add_ra(self.resp_agent, source=self.src, res=url, preexisting_graph=preexisting_graph) 

99 publ.has_name(publ_name) 

100 for identifier in publ_id_list: 

101 self.id_creator(publ, identifier, ra=True) 

102 

103 def editor_action(self, editor): 

104 editorslist = re.split(semicolon_in_people_field, editor) 

105 for ed in editorslist: 

106 ed_and_ids = re.search(name_and_ids, ed) 

107 ed_id = ed_and_ids.group(2) 

108 ed_id_list = ed_id.split(' ') 

109 for identifier in ed_id_list: 

110 if 'omid:' in identifier: 

111 identifier = str(identifier).replace('omid:', '') 

112 preexisting_entity = True if identifier in self.preexisting_entities else False 

113 url = URIRef(self.url + identifier) 

114 preexisting_graph = self.finder.get_subgraph(url, self.preexisting_graphs) if preexisting_entity else None 

115 pub_ed = self.setgraph.add_ra(self.resp_agent, source=self.src, res=url, preexisting_graph=preexisting_graph) 

116 editor_name = ed_and_ids.group(1) 

117 if ',' in editor_name: 

118 editor_name_splitted = re.split(comma_and_spaces, editor_name) 

119 firstName = editor_name_splitted[1] 

120 lastName = editor_name_splitted[0] 

121 if firstName.strip(): 

122 pub_ed.has_given_name(firstName) 

123 pub_ed.has_family_name(lastName) 

124 else: 

125 pub_ed.has_name(editor_name) 

126 # lists of editor's IDs 

127 for identifier in ed_id_list: 

128 self.id_creator(pub_ed, identifier, ra=True)