Coverage for oc_meta/plugins/multiprocess/resp_agents

1#!/usr/bin/python

2# -*- coding: utf-8 -*-

5# Permission to use, copy, modify, and/or distribute this software for any purpose

6# with or without fee is hereby granted, provided that the above copyright notice

7# and this permission notice appear in all copies.

9# THE SOFTWARE IS PROVIDED 'AS IS' AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH

10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND

11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,

12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,

13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS

14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS

15# SOFTWARE.

17from __future__ import annotations

19import re

21from oc_meta.core.creator import Creator

22from oc_meta.lib.finder import ResourceFinder

23from oc_meta.lib.master_of_regex import (comma_and_spaces, name_and_ids,

24 semicolon_in_people_field)

25from rdflib import Graph, URIRef

27from oc_ocdm.counter_handler.redis_counter_handler import RedisCounterHandler

28from oc_ocdm.graph import GraphSet

31class RespAgentsCreator(Creator):

32 def __init__(self, data:list, endpoint:str, base_iri:str, counter_handler:RedisCounterHandler, supplier_prefix:str, resp_agent:str, ra_index:dict, preexisting_entities: set, everything_everywhere_allatonce: Graph, settings:dict|None=None, meta_config_path: str = None):

33 self.url = base_iri

34 self.setgraph = GraphSet(self.url, supplier_prefix=supplier_prefix, wanted_label=False, custom_counter_handler=counter_handler)

35 self.finder = ResourceFinder(ts_url = endpoint, base_iri = base_iri, local_g=everything_everywhere_allatonce, settings=settings, meta_config_path=meta_config_path)

36 self.resp_agent = resp_agent

37 self.ra_id_schemas = {'crossref', 'orcid', 'viaf', 'wikidata'}

38 self.br_id_schemas = {'doi', 'issn', 'isbn', 'pmid', 'pmcid', 'url', 'wikidata', 'wikipedia'}

39 self.schemas = self.ra_id_schemas.union(self.br_id_schemas)

40 self.ra_index = self.indexer_id(ra_index)

41 self.preexisting_entities = preexisting_entities

42 self.preexisting_graphs = dict()

43 self.data = data

44 self.counter_handler = counter_handler

46 def creator(self, source=None):

47 self.src = source

48 for row in self.data:

49 authors = row['author']

50 publisher = row['publisher']

51 editor = row['editor']

52 self.author_action(authors)

53 if publisher:

54 self.publisher_action(publisher)

55 if editor:

56 self.editor_action(editor)

57 return self.setgraph

59 def author_action(self, authors):

60 if authors:

61 authorslist = re.split(semicolon_in_people_field, authors)

62 for aut in authorslist:

63 aut_and_ids = re.search(name_and_ids, aut)

64 aut_id = aut_and_ids.group(2)

65 aut_id_list = aut_id.split()

66 for identifier in aut_id_list:

67 if 'omid:' in identifier:

68 identifier = str(identifier).replace('omid:', '')

69 preexisting_entity = True if identifier in self.preexisting_entities else False

70 url = URIRef(self.url + identifier)

71 preexisting_graph = self.finder.get_subgraph(url, self.preexisting_graphs) if preexisting_entity else None

72 pub_aut = self.setgraph.add_ra(self.resp_agent, source=self.src, res=url, preexisting_graph=preexisting_graph)

73 author_name = aut_and_ids.group(1)

74 if ',' in author_name:

75 author_name_splitted = re.split(comma_and_spaces, author_name)

76 first_name = author_name_splitted[1]

77 last_name = author_name_splitted[0]

78 if first_name.strip():

79 pub_aut.has_given_name(first_name)

80 pub_aut.has_family_name(last_name)

81 else:

82 pub_aut.has_name(author_name)

83 # lists of authors' IDs

84 for identifier in aut_id_list:

85 self.id_creator(pub_aut, identifier, ra=True)

87 def publisher_action(self, publisher):

88 publ_and_ids = re.search(name_and_ids, publisher)

89 publ_id = publ_and_ids.group(2)

90 publ_id_list = publ_id.split()

91 for identifier in publ_id_list:

92 if 'omid:' in identifier:

93 identifier = str(identifier).replace('omid:', '')

94 preexisting_entity = True if identifier in self.preexisting_entities else False

95 url = URIRef(self.url + identifier)

96 publ_name = publ_and_ids.group(1)

97 preexisting_graph = self.finder.get_subgraph(url, self.preexisting_graphs) if preexisting_entity else None

98 publ = self.setgraph.add_ra(self.resp_agent, source=self.src, res=url, preexisting_graph=preexisting_graph)

99 publ.has_name(publ_name)

100 for identifier in publ_id_list:

101 self.id_creator(publ, identifier, ra=True)

102

103 def editor_action(self, editor):

104 editorslist = re.split(semicolon_in_people_field, editor)

105 for ed in editorslist:

106 ed_and_ids = re.search(name_and_ids, ed)

107 ed_id = ed_and_ids.group(2)

108 ed_id_list = ed_id.split(' ')

109 for identifier in ed_id_list:

110 if 'omid:' in identifier:

111 identifier = str(identifier).replace('omid:', '')

112 preexisting_entity = True if identifier in self.preexisting_entities else False

113 url = URIRef(self.url + identifier)

114 preexisting_graph = self.finder.get_subgraph(url, self.preexisting_graphs) if preexisting_entity else None

115 pub_ed = self.setgraph.add_ra(self.resp_agent, source=self.src, res=url, preexisting_graph=preexisting_graph)

116 editor_name = ed_and_ids.group(1)

117 if ',' in editor_name:

118 editor_name_splitted = re.split(comma_and_spaces, editor_name)

119 firstName = editor_name_splitted[1]

120 lastName = editor_name_splitted[0]

121 if firstName.strip():

122 pub_ed.has_given_name(firstName)

123 pub_ed.has_family_name(lastName)

124 else:

125 pub_ed.has_name(editor_name)

126 # lists of editor's IDs

127 for identifier in ed_id_list:

128 self.id_creator(pub_ed, identifier, ra=True)

Coverage for oc_meta/plugins/multiprocess/resp_agents_creator.py: 98%

98 statements