Coverage for oc_meta/plugins/multiprocess/resp_agents_creator.py: 98%

96 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-12-20 08:55 +0000

1#!/usr/bin/python 

2# -*- coding: utf-8 -*- 

3# Copyright (c) 2022 Arcangelo Massari <arcangelo.massari@unibo.it> 

4# 

5# Permission to use, copy, modify, and/or distribute this software for any purpose 

6# with or without fee is hereby granted, provided that the above copyright notice 

7# and this permission notice appear in all copies. 

8# 

9# THE SOFTWARE IS PROVIDED 'AS IS' AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 

10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 

11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, 

12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 

13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 

15# SOFTWARE. 

16 

17from __future__ import annotations 

18 

19import re 

20 

21from oc_meta.core.creator import Creator 

22from oc_meta.lib.finder import ResourceFinder 

23from oc_meta.lib.master_of_regex import (comma_and_spaces, name_and_ids, 

24 semicolon_in_people_field) 

25from rdflib import Graph, URIRef 

26 

27from oc_ocdm.counter_handler.redis_counter_handler import RedisCounterHandler 

28from oc_ocdm.graph import GraphSet 

29 

30 

31class RespAgentsCreator(Creator): 

32 def __init__(self, data:list, finder:ResourceFinder, base_iri:str, counter_handler:RedisCounterHandler, supplier_prefix:str, resp_agent:str, ra_index:dict): 

33 self.url = base_iri 

34 self.setgraph = GraphSet(self.url, supplier_prefix=supplier_prefix, wanted_label=False, custom_counter_handler=counter_handler) 

35 self.finder = finder 

36 self.resp_agent = resp_agent 

37 self.ra_id_schemas = {'crossref', 'orcid', 'viaf', 'wikidata'} 

38 self.br_id_schemas = {'doi', 'issn', 'isbn', 'pmid', 'pmcid', 'url', 'wikidata', 'wikipedia'} 

39 self.schemas = self.ra_id_schemas.union(self.br_id_schemas) 

40 self.ra_index = self.indexer_id(ra_index) 

41 self.data = data 

42 self.counter_handler = counter_handler 

43 

44 def creator(self, source=None): 

45 self.src = source 

46 for row in self.data: 

47 authors = row['author'] 

48 publisher = row['publisher'] 

49 editor = row['editor'] 

50 self.author_action(authors) 

51 if publisher: 

52 self.publisher_action(publisher) 

53 if editor: 

54 self.editor_action(editor) 

55 return self.setgraph 

56 

57 def author_action(self, authors): 

58 if authors: 

59 authorslist = re.split(semicolon_in_people_field, authors) 

60 for aut in authorslist: 

61 aut_and_ids = re.search(name_and_ids, aut) 

62 aut_id = aut_and_ids.group(2) 

63 aut_id_list = aut_id.split() 

64 for identifier in aut_id_list: 

65 if 'omid:' in identifier: 

66 identifier = str(identifier).replace('omid:', '') 

67 url = URIRef(self.url + identifier) 

68 preexisting_entity = url in self.finder.prebuilt_subgraphs 

69 preexisting_graph = self.finder.get_subgraph(url) if preexisting_entity else None 

70 pub_aut = self.setgraph.add_ra(self.resp_agent, source=self.src, res=url, preexisting_graph=preexisting_graph) 

71 author_name = aut_and_ids.group(1) 

72 if ',' in author_name: 

73 author_name_splitted = re.split(comma_and_spaces, author_name) 

74 first_name = author_name_splitted[1] 

75 last_name = author_name_splitted[0] 

76 if first_name.strip(): 

77 pub_aut.has_given_name(first_name) 

78 pub_aut.has_family_name(last_name) 

79 else: 

80 pub_aut.has_name(author_name) 

81 # lists of authors' IDs 

82 for identifier in aut_id_list: 

83 self.id_creator(pub_aut, identifier, ra=True) 

84 

85 def publisher_action(self, publisher): 

86 publ_and_ids = re.search(name_and_ids, publisher) 

87 publ_id = publ_and_ids.group(2) 

88 publ_id_list = publ_id.split() 

89 for identifier in publ_id_list: 

90 if 'omid:' in identifier: 

91 identifier = str(identifier).replace('omid:', '') 

92 url = URIRef(self.url + identifier) 

93 preexisting_entity = url in self.finder.prebuilt_subgraphs 

94 publ_name = publ_and_ids.group(1) 

95 preexisting_graph = self.finder.get_subgraph(url) if preexisting_entity else None 

96 publ = self.setgraph.add_ra(self.resp_agent, source=self.src, res=url, preexisting_graph=preexisting_graph) 

97 publ.has_name(publ_name) 

98 for identifier in publ_id_list: 

99 self.id_creator(publ, identifier, ra=True) 

100 

101 def editor_action(self, editor): 

102 editorslist = re.split(semicolon_in_people_field, editor) 

103 for ed in editorslist: 

104 ed_and_ids = re.search(name_and_ids, ed) 

105 ed_id = ed_and_ids.group(2) 

106 ed_id_list = ed_id.split(' ') 

107 for identifier in ed_id_list: 

108 if 'omid:' in identifier: 

109 identifier = str(identifier).replace('omid:', '') 

110 url = URIRef(self.url + identifier) 

111 preexisting_entity = url in self.finder.prebuilt_subgraphs 

112 preexisting_graph = self.finder.get_subgraph(url) if preexisting_entity else None 

113 pub_ed = self.setgraph.add_ra(self.resp_agent, source=self.src, res=url, preexisting_graph=preexisting_graph) 

114 editor_name = ed_and_ids.group(1) 

115 if ',' in editor_name: 

116 editor_name_splitted = re.split(comma_and_spaces, editor_name) 

117 firstName = editor_name_splitted[1] 

118 lastName = editor_name_splitted[0] 

119 if firstName.strip(): 

120 pub_ed.has_given_name(firstName) 

121 pub_ed.has_family_name(lastName) 

122 else: 

123 pub_ed.has_name(editor_name) 

124 # lists of editor's IDs 

125 for identifier in ed_id_list: 

126 self.id_creator(pub_ed, identifier, ra=True)