Coverage for oc_ocdm / graph / entities / bibliographic_entity.py: 95%
42 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-28 18:52 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-28 18:52 +0000
1#!/usr/bin/python
3# SPDX-FileCopyrightText: 2020-2022 Simone Persiani <iosonopersia@gmail.com>
4# SPDX-FileCopyrightText: 2024 Arcangelo Massari <arcangelo.massari@unibo.it>
5#
6# SPDX-License-Identifier: ISC
8# -*- coding: utf-8 -*-
9from __future__ import annotations
11from typing import TYPE_CHECKING
13from oc_ocdm.decorators import accepts_only
15if TYPE_CHECKING:
16 from typing import List, Dict, Optional
17 from rdflib import URIRef
18 from oc_ocdm.graph.entities.identifier import Identifier
19from oc_ocdm.graph.graph_entity import GraphEntity
22class BibliographicEntity(GraphEntity):
23 """The base class for each bibliographic entity of the OpenCitations DataModel (OCDM)."""
25 def _merge_properties(self, other: GraphEntity, prefer_self: bool) -> None:
26 """
27 Hook method called by ``merge`` to copy properties specific to bibliographic entities.
28 Merges identifiers from the other entity and removes duplicates.
30 :param other: The entity whose properties will be merged into the current entity.
31 :type other: BibliographicEntity
32 :param prefer_self: If True, prefer values from the current entity for non-functional properties
33 :type prefer_self: bool
34 :return: None
35 """
36 super()._merge_properties(other, prefer_self)
37 assert isinstance(other, BibliographicEntity)
39 id_list: List[Identifier] = other.get_identifiers()
40 for cur_id in id_list:
41 self.has_identifier(cur_id)
43 # The special semantics associated to the identifiers
44 # of a bibliographic entity requires them to be uniquely
45 # defined based on their scheme and literal value:
46 self.remove_duplicated_identifiers()
48 # HAS IDENTIFIER
49 def get_identifiers(self) -> List[Identifier]:
50 """
51 Getter method corresponding to the ``datacite:hasIdentifier`` RDF predicate.
53 :return: A list containing the requested values if found, None otherwise
54 """
55 uri_list: List[URIRef] = self._get_multiple_uri_references(GraphEntity.iri_has_identifier, 'id')
56 result: List[Identifier] = []
57 for uri in uri_list:
58 result.append(self.g_set.add_id(self.resp_agent, self.source, uri))
59 return result
61 @accepts_only('id')
62 def has_identifier(self, id_res: Identifier) -> None:
63 """
64 Setter method corresponding to the ``datacite:hasIdentifier`` RDF predicate.
66 `In addition to the internal dataset identifier assigned to the entity upon initial
67 curation (format: [entity short name]/[local identifier]), other external third-party
68 identifiers can be specified through this attribute (e.g. DOI, ORCID, PubMedID).`
70 :param id_res: The value that will be set as the object of the property related to this method
71 :type id_res: Identifier
72 :raises TypeError: if the parameter is of the wrong type
73 :return: None
74 """
75 self.g.add((self.res, GraphEntity.iri_has_identifier, id_res.res))
77 @accepts_only('id')
78 def remove_identifier(self, id_res: Identifier | None = None) -> None:
79 """
80 Remover method corresponding to the ``datacite:hasIdentifier`` RDF predicate.
82 **WARNING: this is a non-functional property, hence, if the parameter
83 is None, any existing value will be removed!**
85 :param id_res: If not None, the specific object value that will be removed from the property
86 related to this method (defaults to None)
87 :type id_res: Identifier
88 :raises TypeError: if the parameter is of the wrong type
89 :return: None
90 """
91 if id_res is not None:
92 self.g.remove((self.res, GraphEntity.iri_has_identifier, id_res.res))
93 else:
94 self.g.remove((self.res, GraphEntity.iri_has_identifier, None))
96 def remove_duplicated_identifiers(self) -> None:
97 """
98 Utility function that automatically scans the list of Identifier entities associated to the
99 current bibliographic entity (through the ``datacite:hasIdentifier`` RDF predicate) and it removes
100 duplicated entries.
102 Two distinct ``Identifier`` entities are considered the same if they share both
103 the scheme (``datacite:usesIdentifierScheme``) and the literal value (``literal:hasLiteralValue``).
105 :return: None
106 """
108 # Identifiers should be merged based on the
109 # correspondence between both their scheme and literal value!
110 id_list: List[Identifier] = self.get_identifiers()
111 # We remove every identifier from 'self': only unique ones
112 # will be re-associated with 'self'.
113 self.remove_identifier()
115 # We use a nested dictionary which associates the 'schema-literal_value'
116 # pair to the corresponding identifier object
117 # (ex. id_dict[ISSN][1234-5678] <- base_iri:id/34).
118 id_dict: Dict[URIRef, Dict[str, Identifier]] = {}
119 for identifier in id_list:
120 schema: Optional[URIRef] = identifier.get_scheme()
121 literal_value: Optional[str] = identifier.get_literal_value()
122 if schema is not None and literal_value is not None:
123 if schema not in id_dict:
124 id_dict[schema] = {literal_value: identifier}
125 self.has_identifier(identifier) # the Identifier is kept!
126 else:
127 if literal_value not in id_dict[schema]:
128 id_dict[schema][literal_value] = identifier
129 self.has_identifier(identifier) # the Identifier is kept!
130 else:
131 id_to_be_kept: Identifier = id_dict[schema][literal_value]
132 id_to_be_kept.merge(identifier) # the Identifier is dropped!