Coverage for oc_ocdm/graph/entities/bibliographic_entity.py: 89%
45 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-05-30 22:05 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-05-30 22:05 +0000
1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3# Copyright (c) 2016, Silvio Peroni <essepuntato@gmail.com>
4#
5# Permission to use, copy, modify, and/or distribute this software for any purpose
6# with or without fee is hereby granted, provided that the above copyright notice
7# and this permission notice appear in all copies.
8#
9# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
15# SOFTWARE.
16from __future__ import annotations
18from typing import TYPE_CHECKING
20from oc_ocdm.decorators import accepts_only
22if TYPE_CHECKING:
23 from typing import List, Dict, Optional
24 from rdflib import URIRef
25 from oc_ocdm.graph.entities.identifier import Identifier
26from oc_ocdm.graph.graph_entity import GraphEntity
29class BibliographicEntity(GraphEntity):
30 """The base class for each bibliographic entity of the OpenCitations DataModel (OCDM)."""
32 def merge(self, other: BibliographicEntity, prefer_self: bool = False) -> None:
33 """
34 **WARNING:** ``BibliographicEntity`` **is an abstract class that cannot be instantiated at runtime.
35 As such, it's only possible to execute this method on entities generated from**
36 ``BibliographicEntity``'s **subclasses. Please, refer to their documentation of the** `merge` **method.**
38 :param other: The entity which will be marked as to be deleted and whose properties will
39 be merged into the current entity.
40 :type other: BibliographicEntity
41 :raises TypeError: if the parameter is of the wrong type
42 :return: None
43 """
44 super(BibliographicEntity, self).merge(other, prefer_self=prefer_self)
46 id_list: List[Identifier] = other.get_identifiers()
47 for cur_id in id_list:
48 self.has_identifier(cur_id)
50 # The special semantics associated to the identifiers
51 # of a bibliographic entity requires them to be uniquely
52 # defined based on their scheme and literal value:
53 self.remove_duplicated_identifiers()
55 # HAS IDENTIFIER
56 def get_identifiers(self) -> List[Identifier]:
57 """
58 Getter method corresponding to the ``datacite:hasIdentifier`` RDF predicate.
60 :return: A list containing the requested values if found, None otherwise
61 """
62 uri_list: List[URIRef] = self._get_multiple_uri_references(GraphEntity.iri_has_identifier, 'id')
63 result: List[Identifier] = []
64 for uri in uri_list:
65 result.append(self.g_set.add_id(self.resp_agent, self.source, uri))
66 return result
68 @accepts_only('id')
69 def has_identifier(self, id_res: Identifier) -> None:
70 """
71 Setter method corresponding to the ``datacite:hasIdentifier`` RDF predicate.
73 `In addition to the internal dataset identifier assigned to the entity upon initial
74 curation (format: [entity short name]/[local identifier]), other external third-party
75 identifiers can be specified through this attribute (e.g. DOI, ORCID, PubMedID).`
77 :param id_res: The value that will be set as the object of the property related to this method
78 :type id_res: Identifier
79 :raises TypeError: if the parameter is of the wrong type
80 :return: None
81 """
82 self.g.add((self.res, GraphEntity.iri_has_identifier, id_res.res))
84 @accepts_only('id')
85 def remove_identifier(self, id_res: Identifier = None) -> None:
86 """
87 Remover method corresponding to the ``datacite:hasIdentifier`` RDF predicate.
89 **WARNING: this is a non-functional property, hence, if the parameter
90 is None, any existing value will be removed!**
92 :param id_res: If not None, the specific object value that will be removed from the property
93 related to this method (defaults to None)
94 :type id_res: Identifier
95 :raises TypeError: if the parameter is of the wrong type
96 :return: None
97 """
98 if id_res is not None:
99 self.g.remove((self.res, GraphEntity.iri_has_identifier, id_res.res))
100 else:
101 self.g.remove((self.res, GraphEntity.iri_has_identifier, None))
103 def remove_duplicated_identifiers(self) -> None:
104 """
105 Utility function that automatically scans the list of Identifier entities associated to the
106 current bibliographic entity (through the ``datacite:hasIdentifier`` RDF predicate) and it removes
107 duplicated entries.
109 Two distinct ``Identifier`` entities are considered the same if they share both
110 the scheme (``datacite:usesIdentifierScheme``) and the literal value (``literal:hasLiteralValue``).
112 :return: None
113 """
115 # Identifiers should be merged based on the
116 # correspondence between both their scheme and literal value!
117 id_list: List[Identifier] = self.get_identifiers()
118 # We remove every identifier from 'self': only unique ones
119 # will be re-associated with 'self'.
120 self.remove_identifier()
122 # We use a nested dictionary which associates the 'schema-literal_value'
123 # pair to the corresponding identifier object
124 # (ex. id_dict[ISSN][1234-5678] <- base_iri:id/34).
125 id_dict: Dict[URIRef, Dict[str, Identifier]] = {}
126 for identifier in id_list:
127 schema: Optional[URIRef] = identifier.get_scheme()
128 literal_value: Optional[str] = identifier.get_literal_value()
129 if schema is not None and literal_value is not None:
130 if schema not in id_dict:
131 id_dict[schema] = {literal_value: identifier}
132 self.has_identifier(identifier) # the Identifier is kept!
133 else:
134 if literal_value not in id_dict[schema]:
135 id_dict[schema][literal_value] = identifier
136 self.has_identifier(identifier) # the Identifier is kept!
137 else:
138 id_to_be_kept: Identifier = id_dict[schema][literal_value]
139 id_to_be_kept.merge(identifier) # the Identifier is dropped!