Coverage for oc_ocdm / graph / entities / bibliographic_entity.py: 95%

42 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-28 18:52 +0000

1#!/usr/bin/python 

2 

3# SPDX-FileCopyrightText: 2020-2022 Simone Persiani <iosonopersia@gmail.com> 

4# SPDX-FileCopyrightText: 2024 Arcangelo Massari <arcangelo.massari@unibo.it> 

5# 

6# SPDX-License-Identifier: ISC 

7 

8# -*- coding: utf-8 -*- 

9from __future__ import annotations 

10 

11from typing import TYPE_CHECKING 

12 

13from oc_ocdm.decorators import accepts_only 

14 

15if TYPE_CHECKING: 

16 from typing import List, Dict, Optional 

17 from rdflib import URIRef 

18 from oc_ocdm.graph.entities.identifier import Identifier 

19from oc_ocdm.graph.graph_entity import GraphEntity 

20 

21 

22class BibliographicEntity(GraphEntity): 

23 """The base class for each bibliographic entity of the OpenCitations DataModel (OCDM).""" 

24 

25 def _merge_properties(self, other: GraphEntity, prefer_self: bool) -> None: 

26 """ 

27 Hook method called by ``merge`` to copy properties specific to bibliographic entities. 

28 Merges identifiers from the other entity and removes duplicates. 

29 

30 :param other: The entity whose properties will be merged into the current entity. 

31 :type other: BibliographicEntity 

32 :param prefer_self: If True, prefer values from the current entity for non-functional properties 

33 :type prefer_self: bool 

34 :return: None 

35 """ 

36 super()._merge_properties(other, prefer_self) 

37 assert isinstance(other, BibliographicEntity) 

38 

39 id_list: List[Identifier] = other.get_identifiers() 

40 for cur_id in id_list: 

41 self.has_identifier(cur_id) 

42 

43 # The special semantics associated to the identifiers 

44 # of a bibliographic entity requires them to be uniquely 

45 # defined based on their scheme and literal value: 

46 self.remove_duplicated_identifiers() 

47 

48 # HAS IDENTIFIER 

49 def get_identifiers(self) -> List[Identifier]: 

50 """ 

51 Getter method corresponding to the ``datacite:hasIdentifier`` RDF predicate. 

52 

53 :return: A list containing the requested values if found, None otherwise 

54 """ 

55 uri_list: List[URIRef] = self._get_multiple_uri_references(GraphEntity.iri_has_identifier, 'id') 

56 result: List[Identifier] = [] 

57 for uri in uri_list: 

58 result.append(self.g_set.add_id(self.resp_agent, self.source, uri)) 

59 return result 

60 

61 @accepts_only('id') 

62 def has_identifier(self, id_res: Identifier) -> None: 

63 """ 

64 Setter method corresponding to the ``datacite:hasIdentifier`` RDF predicate. 

65 

66 `In addition to the internal dataset identifier assigned to the entity upon initial 

67 curation (format: [entity short name]/[local identifier]), other external third-party 

68 identifiers can be specified through this attribute (e.g. DOI, ORCID, PubMedID).` 

69 

70 :param id_res: The value that will be set as the object of the property related to this method 

71 :type id_res: Identifier 

72 :raises TypeError: if the parameter is of the wrong type 

73 :return: None 

74 """ 

75 self.g.add((self.res, GraphEntity.iri_has_identifier, id_res.res)) 

76 

77 @accepts_only('id') 

78 def remove_identifier(self, id_res: Identifier | None = None) -> None: 

79 """ 

80 Remover method corresponding to the ``datacite:hasIdentifier`` RDF predicate. 

81 

82 **WARNING: this is a non-functional property, hence, if the parameter 

83 is None, any existing value will be removed!** 

84 

85 :param id_res: If not None, the specific object value that will be removed from the property 

86 related to this method (defaults to None) 

87 :type id_res: Identifier 

88 :raises TypeError: if the parameter is of the wrong type 

89 :return: None 

90 """ 

91 if id_res is not None: 

92 self.g.remove((self.res, GraphEntity.iri_has_identifier, id_res.res)) 

93 else: 

94 self.g.remove((self.res, GraphEntity.iri_has_identifier, None)) 

95 

96 def remove_duplicated_identifiers(self) -> None: 

97 """ 

98 Utility function that automatically scans the list of Identifier entities associated to the 

99 current bibliographic entity (through the ``datacite:hasIdentifier`` RDF predicate) and it removes 

100 duplicated entries. 

101 

102 Two distinct ``Identifier`` entities are considered the same if they share both 

103 the scheme (``datacite:usesIdentifierScheme``) and the literal value (``literal:hasLiteralValue``). 

104 

105 :return: None 

106 """ 

107 

108 # Identifiers should be merged based on the 

109 # correspondence between both their scheme and literal value! 

110 id_list: List[Identifier] = self.get_identifiers() 

111 # We remove every identifier from 'self': only unique ones 

112 # will be re-associated with 'self'. 

113 self.remove_identifier() 

114 

115 # We use a nested dictionary which associates the 'schema-literal_value' 

116 # pair to the corresponding identifier object 

117 # (ex. id_dict[ISSN][1234-5678] <- base_iri:id/34). 

118 id_dict: Dict[URIRef, Dict[str, Identifier]] = {} 

119 for identifier in id_list: 

120 schema: Optional[URIRef] = identifier.get_scheme() 

121 literal_value: Optional[str] = identifier.get_literal_value() 

122 if schema is not None and literal_value is not None: 

123 if schema not in id_dict: 

124 id_dict[schema] = {literal_value: identifier} 

125 self.has_identifier(identifier) # the Identifier is kept! 

126 else: 

127 if literal_value not in id_dict[schema]: 

128 id_dict[schema][literal_value] = identifier 

129 self.has_identifier(identifier) # the Identifier is kept! 

130 else: 

131 id_to_be_kept: Identifier = id_dict[schema][literal_value] 

132 id_to_be_kept.merge(identifier) # the Identifier is dropped!