Coverage for oc_ocdm/graph/entities/bibliographic_entity.py: 89%

45 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-05-30 22:05 +0000

1#!/usr/bin/python 

2# -*- coding: utf-8 -*- 

3# Copyright (c) 2016, Silvio Peroni <essepuntato@gmail.com> 

4# 

5# Permission to use, copy, modify, and/or distribute this software for any purpose 

6# with or without fee is hereby granted, provided that the above copyright notice 

7# and this permission notice appear in all copies. 

8# 

9# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 

10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 

11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, 

12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 

13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 

15# SOFTWARE. 

16from __future__ import annotations 

17 

18from typing import TYPE_CHECKING 

19 

20from oc_ocdm.decorators import accepts_only 

21 

22if TYPE_CHECKING: 

23 from typing import List, Dict, Optional 

24 from rdflib import URIRef 

25 from oc_ocdm.graph.entities.identifier import Identifier 

26from oc_ocdm.graph.graph_entity import GraphEntity 

27 

28 

29class BibliographicEntity(GraphEntity): 

30 """The base class for each bibliographic entity of the OpenCitations DataModel (OCDM).""" 

31 

32 def merge(self, other: BibliographicEntity, prefer_self: bool = False) -> None: 

33 """ 

34 **WARNING:** ``BibliographicEntity`` **is an abstract class that cannot be instantiated at runtime. 

35 As such, it's only possible to execute this method on entities generated from** 

36 ``BibliographicEntity``'s **subclasses. Please, refer to their documentation of the** `merge` **method.** 

37 

38 :param other: The entity which will be marked as to be deleted and whose properties will 

39 be merged into the current entity. 

40 :type other: BibliographicEntity 

41 :raises TypeError: if the parameter is of the wrong type 

42 :return: None 

43 """ 

44 super(BibliographicEntity, self).merge(other, prefer_self=prefer_self) 

45 

46 id_list: List[Identifier] = other.get_identifiers() 

47 for cur_id in id_list: 

48 self.has_identifier(cur_id) 

49 

50 # The special semantics associated to the identifiers 

51 # of a bibliographic entity requires them to be uniquely 

52 # defined based on their scheme and literal value: 

53 self.remove_duplicated_identifiers() 

54 

55 # HAS IDENTIFIER 

56 def get_identifiers(self) -> List[Identifier]: 

57 """ 

58 Getter method corresponding to the ``datacite:hasIdentifier`` RDF predicate. 

59 

60 :return: A list containing the requested values if found, None otherwise 

61 """ 

62 uri_list: List[URIRef] = self._get_multiple_uri_references(GraphEntity.iri_has_identifier, 'id') 

63 result: List[Identifier] = [] 

64 for uri in uri_list: 

65 result.append(self.g_set.add_id(self.resp_agent, self.source, uri)) 

66 return result 

67 

68 @accepts_only('id') 

69 def has_identifier(self, id_res: Identifier) -> None: 

70 """ 

71 Setter method corresponding to the ``datacite:hasIdentifier`` RDF predicate. 

72 

73 `In addition to the internal dataset identifier assigned to the entity upon initial 

74 curation (format: [entity short name]/[local identifier]), other external third-party 

75 identifiers can be specified through this attribute (e.g. DOI, ORCID, PubMedID).` 

76 

77 :param id_res: The value that will be set as the object of the property related to this method 

78 :type id_res: Identifier 

79 :raises TypeError: if the parameter is of the wrong type 

80 :return: None 

81 """ 

82 self.g.add((self.res, GraphEntity.iri_has_identifier, id_res.res)) 

83 

84 @accepts_only('id') 

85 def remove_identifier(self, id_res: Identifier = None) -> None: 

86 """ 

87 Remover method corresponding to the ``datacite:hasIdentifier`` RDF predicate. 

88 

89 **WARNING: this is a non-functional property, hence, if the parameter 

90 is None, any existing value will be removed!** 

91 

92 :param id_res: If not None, the specific object value that will be removed from the property 

93 related to this method (defaults to None) 

94 :type id_res: Identifier 

95 :raises TypeError: if the parameter is of the wrong type 

96 :return: None 

97 """ 

98 if id_res is not None: 

99 self.g.remove((self.res, GraphEntity.iri_has_identifier, id_res.res)) 

100 else: 

101 self.g.remove((self.res, GraphEntity.iri_has_identifier, None)) 

102 

103 def remove_duplicated_identifiers(self) -> None: 

104 """ 

105 Utility function that automatically scans the list of Identifier entities associated to the 

106 current bibliographic entity (through the ``datacite:hasIdentifier`` RDF predicate) and it removes 

107 duplicated entries. 

108 

109 Two distinct ``Identifier`` entities are considered the same if they share both 

110 the scheme (``datacite:usesIdentifierScheme``) and the literal value (``literal:hasLiteralValue``). 

111 

112 :return: None 

113 """ 

114 

115 # Identifiers should be merged based on the 

116 # correspondence between both their scheme and literal value! 

117 id_list: List[Identifier] = self.get_identifiers() 

118 # We remove every identifier from 'self': only unique ones 

119 # will be re-associated with 'self'. 

120 self.remove_identifier() 

121 

122 # We use a nested dictionary which associates the 'schema-literal_value' 

123 # pair to the corresponding identifier object 

124 # (ex. id_dict[ISSN][1234-5678] <- base_iri:id/34). 

125 id_dict: Dict[URIRef, Dict[str, Identifier]] = {} 

126 for identifier in id_list: 

127 schema: Optional[URIRef] = identifier.get_scheme() 

128 literal_value: Optional[str] = identifier.get_literal_value() 

129 if schema is not None and literal_value is not None: 

130 if schema not in id_dict: 

131 id_dict[schema] = {literal_value: identifier} 

132 self.has_identifier(identifier) # the Identifier is kept! 

133 else: 

134 if literal_value not in id_dict[schema]: 

135 id_dict[schema][literal_value] = identifier 

136 self.has_identifier(identifier) # the Identifier is kept! 

137 else: 

138 id_to_be_kept: Identifier = id_dict[schema][literal_value] 

139 id_to_be_kept.merge(identifier) # the Identifier is dropped!