Coverage for oc_ocdm / graph / entities / bibliographic_entity.py: 95%
43 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-08 20:23 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-08 20:23 +0000
1#!/usr/bin/python
3# SPDX-FileCopyrightText: 2020-2022 Simone Persiani <iosonopersia@gmail.com>
4# SPDX-FileCopyrightText: 2024 Arcangelo Massari <arcangelo.massari@unibo.it>
5#
6# SPDX-License-Identifier: ISC
8# -*- coding: utf-8 -*-
9from __future__ import annotations
11from typing import TYPE_CHECKING
13from triplelite import RDFTerm
15from oc_ocdm.decorators import accepts_only
17if TYPE_CHECKING:
18 from typing import Dict, List, Optional
20 from oc_ocdm.graph.entities.identifier import Identifier
21from oc_ocdm.graph.graph_entity import GraphEntity
24class BibliographicEntity(GraphEntity):
25 """The base class for each bibliographic entity of the OpenCitations DataModel (OCDM)."""
27 def _merge_properties(self, other: GraphEntity, prefer_self: bool) -> None:
28 """
29 Hook method called by ``merge`` to copy properties specific to bibliographic entities.
30 Merges identifiers from the other entity and removes duplicates.
32 :param other: The entity whose properties will be merged into the current entity.
33 :type other: BibliographicEntity
34 :param prefer_self: If True, prefer values from the current entity for non-functional properties
35 :type prefer_self: bool
36 :return: None
37 """
38 super()._merge_properties(other, prefer_self)
39 assert isinstance(other, BibliographicEntity)
41 id_list: List[Identifier] = other.get_identifiers()
42 for cur_id in id_list:
43 self.has_identifier(cur_id)
45 # The special semantics associated to the identifiers
46 # of a bibliographic entity requires them to be uniquely
47 # defined based on their scheme and literal value:
48 self.remove_duplicated_identifiers()
50 # HAS IDENTIFIER
51 def get_identifiers(self) -> List[Identifier]:
52 """
53 Getter method corresponding to the ``datacite:hasIdentifier`` RDF predicate.
55 :return: A list containing the requested values if found, None otherwise
56 """
57 uri_list: List[str] = self._get_multiple_uri_references(GraphEntity.iri_has_identifier, 'id')
58 result: List[Identifier] = []
59 for uri in uri_list:
60 result.append(self.g_set.add_id(self.resp_agent, self.source, uri))
61 return result
63 @accepts_only('id')
64 def has_identifier(self, id_res: Identifier) -> None:
65 """
66 Setter method corresponding to the ``datacite:hasIdentifier`` RDF predicate.
68 `In addition to the internal dataset identifier assigned to the entity upon initial
69 curation (format: [entity short name]/[local identifier]), other external third-party
70 identifiers can be specified through this attribute (e.g. DOI, ORCID, PubMedID).`
72 :param id_res: The value that will be set as the object of the property related to this method
73 :type id_res: Identifier
74 :raises TypeError: if the parameter is of the wrong type
75 :return: None
76 """
77 self.g.add((self.res, GraphEntity.iri_has_identifier, RDFTerm("uri", str(id_res.res))))
79 @accepts_only('id')
80 def remove_identifier(self, id_res: Identifier | None = None) -> None:
81 """
82 Remover method corresponding to the ``datacite:hasIdentifier`` RDF predicate.
84 **WARNING: this is a non-functional property, hence, if the parameter
85 is None, any existing value will be removed!**
87 :param id_res: If not None, the specific object value that will be removed from the property
88 related to this method (defaults to None)
89 :type id_res: Identifier
90 :raises TypeError: if the parameter is of the wrong type
91 :return: None
92 """
93 if id_res is not None:
94 self.g.remove((self.res, GraphEntity.iri_has_identifier, RDFTerm("uri", str(id_res.res))))
95 else:
96 self.g.remove((self.res, GraphEntity.iri_has_identifier, None))
98 def remove_duplicated_identifiers(self) -> None:
99 """
100 Utility function that automatically scans the list of Identifier entities associated to the
101 current bibliographic entity (through the ``datacite:hasIdentifier`` RDF predicate) and it removes
102 duplicated entries.
104 Two distinct ``Identifier`` entities are considered the same if they share both
105 the scheme (``datacite:usesIdentifierScheme``) and the literal value (``literal:hasLiteralValue``).
107 :return: None
108 """
110 # Identifiers should be merged based on the
111 # correspondence between both their scheme and literal value!
112 id_list: List[Identifier] = self.get_identifiers()
113 # We remove every identifier from 'self': only unique ones
114 # will be re-associated with 'self'.
115 self.remove_identifier()
117 # We use a nested dictionary which associates the 'schema-literal_value'
118 # pair to the corresponding identifier object
119 # (ex. id_dict[ISSN][1234-5678] <- base_iri:id/34).
120 id_dict: Dict[str, Dict[str, Identifier]] = {}
121 for identifier in id_list:
122 schema: Optional[str] = identifier.get_scheme()
123 literal_value: Optional[str] = identifier.get_literal_value()
124 if schema is not None and literal_value is not None:
125 if schema not in id_dict:
126 id_dict[schema] = {literal_value: identifier}
127 self.has_identifier(identifier) # the Identifier is kept!
128 else:
129 if literal_value not in id_dict[schema]:
130 id_dict[schema][literal_value] = identifier
131 self.has_identifier(identifier) # the Identifier is kept!
132 else:
133 id_to_be_kept: Identifier = id_dict[schema][literal_value]
134 id_to_be_kept.merge(identifier) # the Identifier is dropped!