Coverage for oc_ocdm / graph / entities / bibliographic_entity.py: 95%

43 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-05-08 20:23 +0000

1#!/usr/bin/python 

2 

3# SPDX-FileCopyrightText: 2020-2022 Simone Persiani <iosonopersia@gmail.com> 

4# SPDX-FileCopyrightText: 2024 Arcangelo Massari <arcangelo.massari@unibo.it> 

5# 

6# SPDX-License-Identifier: ISC 

7 

8# -*- coding: utf-8 -*- 

9from __future__ import annotations 

10 

11from typing import TYPE_CHECKING 

12 

13from triplelite import RDFTerm 

14 

15from oc_ocdm.decorators import accepts_only 

16 

17if TYPE_CHECKING: 

18 from typing import Dict, List, Optional 

19 

20 from oc_ocdm.graph.entities.identifier import Identifier 

21from oc_ocdm.graph.graph_entity import GraphEntity 

22 

23 

24class BibliographicEntity(GraphEntity): 

25 """The base class for each bibliographic entity of the OpenCitations DataModel (OCDM).""" 

26 

27 def _merge_properties(self, other: GraphEntity, prefer_self: bool) -> None: 

28 """ 

29 Hook method called by ``merge`` to copy properties specific to bibliographic entities. 

30 Merges identifiers from the other entity and removes duplicates. 

31 

32 :param other: The entity whose properties will be merged into the current entity. 

33 :type other: BibliographicEntity 

34 :param prefer_self: If True, prefer values from the current entity for non-functional properties 

35 :type prefer_self: bool 

36 :return: None 

37 """ 

38 super()._merge_properties(other, prefer_self) 

39 assert isinstance(other, BibliographicEntity) 

40 

41 id_list: List[Identifier] = other.get_identifiers() 

42 for cur_id in id_list: 

43 self.has_identifier(cur_id) 

44 

45 # The special semantics associated to the identifiers 

46 # of a bibliographic entity requires them to be uniquely 

47 # defined based on their scheme and literal value: 

48 self.remove_duplicated_identifiers() 

49 

50 # HAS IDENTIFIER 

51 def get_identifiers(self) -> List[Identifier]: 

52 """ 

53 Getter method corresponding to the ``datacite:hasIdentifier`` RDF predicate. 

54 

55 :return: A list containing the requested values if found, None otherwise 

56 """ 

57 uri_list: List[str] = self._get_multiple_uri_references(GraphEntity.iri_has_identifier, 'id') 

58 result: List[Identifier] = [] 

59 for uri in uri_list: 

60 result.append(self.g_set.add_id(self.resp_agent, self.source, uri)) 

61 return result 

62 

63 @accepts_only('id') 

64 def has_identifier(self, id_res: Identifier) -> None: 

65 """ 

66 Setter method corresponding to the ``datacite:hasIdentifier`` RDF predicate. 

67 

68 `In addition to the internal dataset identifier assigned to the entity upon initial 

69 curation (format: [entity short name]/[local identifier]), other external third-party 

70 identifiers can be specified through this attribute (e.g. DOI, ORCID, PubMedID).` 

71 

72 :param id_res: The value that will be set as the object of the property related to this method 

73 :type id_res: Identifier 

74 :raises TypeError: if the parameter is of the wrong type 

75 :return: None 

76 """ 

77 self.g.add((self.res, GraphEntity.iri_has_identifier, RDFTerm("uri", str(id_res.res)))) 

78 

79 @accepts_only('id') 

80 def remove_identifier(self, id_res: Identifier | None = None) -> None: 

81 """ 

82 Remover method corresponding to the ``datacite:hasIdentifier`` RDF predicate. 

83 

84 **WARNING: this is a non-functional property, hence, if the parameter 

85 is None, any existing value will be removed!** 

86 

87 :param id_res: If not None, the specific object value that will be removed from the property 

88 related to this method (defaults to None) 

89 :type id_res: Identifier 

90 :raises TypeError: if the parameter is of the wrong type 

91 :return: None 

92 """ 

93 if id_res is not None: 

94 self.g.remove((self.res, GraphEntity.iri_has_identifier, RDFTerm("uri", str(id_res.res)))) 

95 else: 

96 self.g.remove((self.res, GraphEntity.iri_has_identifier, None)) 

97 

98 def remove_duplicated_identifiers(self) -> None: 

99 """ 

100 Utility function that automatically scans the list of Identifier entities associated to the 

101 current bibliographic entity (through the ``datacite:hasIdentifier`` RDF predicate) and it removes 

102 duplicated entries. 

103 

104 Two distinct ``Identifier`` entities are considered the same if they share both 

105 the scheme (``datacite:usesIdentifierScheme``) and the literal value (``literal:hasLiteralValue``). 

106 

107 :return: None 

108 """ 

109 

110 # Identifiers should be merged based on the 

111 # correspondence between both their scheme and literal value! 

112 id_list: List[Identifier] = self.get_identifiers() 

113 # We remove every identifier from 'self': only unique ones 

114 # will be re-associated with 'self'. 

115 self.remove_identifier() 

116 

117 # We use a nested dictionary which associates the 'schema-literal_value' 

118 # pair to the corresponding identifier object 

119 # (ex. id_dict[ISSN][1234-5678] <- base_iri:id/34). 

120 id_dict: Dict[str, Dict[str, Identifier]] = {} 

121 for identifier in id_list: 

122 schema: Optional[str] = identifier.get_scheme() 

123 literal_value: Optional[str] = identifier.get_literal_value() 

124 if schema is not None and literal_value is not None: 

125 if schema not in id_dict: 

126 id_dict[schema] = {literal_value: identifier} 

127 self.has_identifier(identifier) # the Identifier is kept! 

128 else: 

129 if literal_value not in id_dict[schema]: 

130 id_dict[schema][literal_value] = identifier 

131 self.has_identifier(identifier) # the Identifier is kept! 

132 else: 

133 id_to_be_kept: Identifier = id_dict[schema][literal_value] 

134 id_to_be_kept.merge(identifier) # the Identifier is dropped!