Coverage for oc_ocdm / graph / graph_entity.py: 97%
252 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-08 20:23 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-08 20:23 +0000
1# SPDX-FileCopyrightText: 2020-2022 Simone Persiani <iosonopersia@gmail.com>
2# SPDX-FileCopyrightText: 2022-2024 Arcangelo Massari <arcangelo.massari@unibo.it>
3# SPDX-FileCopyrightText: 2024 martasoricetti <marta.soricetti@studio.unibo.it>
4#
5# SPDX-License-Identifier: ISC
7from __future__ import annotations
9from typing import TYPE_CHECKING
11from triplelite import RDFTerm, SubgraphView, TripleLite
13from oc_ocdm.abstract_entity import AbstractEntity
14from oc_ocdm.constants import RDF_TYPE, Namespace
16if TYPE_CHECKING:
17 from typing import ClassVar, Dict, List, Optional, Self
19 from oc_ocdm.graph.graph_set import GraphSet
22class GraphEntity(AbstractEntity):
23 BIRO: ClassVar[Namespace] = Namespace("http://purl.org/spar/biro/")
24 C4O: ClassVar[Namespace] = Namespace("http://purl.org/spar/c4o/")
25 CO: ClassVar[Namespace] = Namespace("http://purl.org/co/")
26 CITO: ClassVar[Namespace] = Namespace("http://purl.org/spar/cito/")
27 DATACITE: ClassVar[Namespace] = Namespace("http://purl.org/spar/datacite/")
28 DCTERMS: ClassVar[Namespace] = Namespace("http://purl.org/dc/terms/")
29 DEO: ClassVar[Namespace] = Namespace("http://purl.org/spar/deo/")
30 DOCO: ClassVar[Namespace] = Namespace("http://purl.org/spar/doco/")
31 FABIO: ClassVar[Namespace] = Namespace("http://purl.org/spar/fabio/")
32 FOAF: ClassVar[Namespace] = Namespace("http://xmlns.com/foaf/0.1/")
33 FR: ClassVar[Namespace] = Namespace("http://purl.org/spar/fr/")
34 FRBR: ClassVar[Namespace] = Namespace("http://purl.org/vocab/frbr/core#")
35 LITERAL: ClassVar[Namespace] = Namespace("http://www.essepuntato.it/2010/06/literalreification/")
36 OA: ClassVar[Namespace] = Namespace("http://www.w3.org/ns/oa#")
37 OCO: ClassVar[Namespace] = Namespace("https://w3id.org/oc/ontology/")
38 PRISM: ClassVar[Namespace] = Namespace("http://prismstandard.org/namespaces/basic/2.0/")
39 PRO: ClassVar[Namespace] = Namespace("http://purl.org/spar/pro/")
41 iri_has_subtitle: ClassVar[str] = FABIO.hasSubtitle
42 iri_has_publication_date: ClassVar[str] = PRISM.publicationDate
43 iri_bibliographic_reference: ClassVar[str] = BIRO.BibliographicReference
44 iri_references: ClassVar[str] = BIRO.references
45 iri_denotes: ClassVar[str] = C4O.denotes
46 iri_has_content: ClassVar[str] = C4O.hasContent
47 iri_intextref_pointer: ClassVar[str] = C4O.InTextReferencePointer
48 iri_is_context_of: ClassVar[str] = C4O.isContextOf
49 iri_singleloc_pointer_list: ClassVar[str] = C4O.SingleLocationPointerList
50 iri_has_element: ClassVar[str] = CO.element
51 iri_citation: ClassVar[str] = CITO.Citation
52 iri_cites: ClassVar[str] = CITO.cites
53 iri_citation_characterisation: ClassVar[str] = CITO.hasCitationCharacterisation
54 iri_has_citing_entity: ClassVar[str] = CITO.hasCitingEntity
55 iri_has_cited_entity: ClassVar[str] = CITO.hasCitedEntity
56 iri_openalex: ClassVar[str] = DATACITE.openalex
57 iri_arxiv: ClassVar[str] = DATACITE.arxiv
58 iri_oci: ClassVar[str] = DATACITE.oci
59 iri_doi: ClassVar[str] = DATACITE.doi
60 iri_pmid: ClassVar[str] = DATACITE.pmid
61 iri_pmcid: ClassVar[str] = DATACITE.pmcid
62 iri_orcid: ClassVar[str] = DATACITE.orcid
63 iri_xpath: ClassVar[str] = DATACITE["local-resource-identifier-scheme"]
64 iri_intrepid: ClassVar[str] = DATACITE.intrepid
65 iri_xmlid: ClassVar[str] = DATACITE["local-resource-identifier-scheme"]
66 iri_has_identifier: ClassVar[str] = DATACITE.hasIdentifier
67 iri_identifier: ClassVar[str] = DATACITE.Identifier
68 iri_isbn: ClassVar[str] = DATACITE.isbn
69 iri_issn: ClassVar[str] = DATACITE.issn
70 iri_url: ClassVar[str] = DATACITE.url
71 iri_uses_identifier_scheme: ClassVar[str] = DATACITE.usesIdentifierScheme
72 iri_title: ClassVar[str] = DCTERMS.title
73 iri_caption: ClassVar[str] = DEO.Caption
74 iri_discourse_element: ClassVar[str] = DEO.DiscourseElement
75 iri_footnote: ClassVar[str] = DOCO.Footnote
76 iri_paragraph: ClassVar[str] = DOCO.Paragraph
77 iri_part: ClassVar[str] = DOCO.Part
78 iri_section: ClassVar[str] = DOCO.Section
79 iri_introduction: ClassVar[str] = DEO.Introduction
80 iri_methods: ClassVar[str] = DEO.Methods
81 iri_materials: ClassVar[str] = DEO.Materials
82 iri_related_work: ClassVar[str] = DEO.RelatedWork
83 iri_results: ClassVar[str] = DEO.Results
84 iri_discussion: ClassVar[str] = DEO.Discussion
85 iri_conclusion: ClassVar[str] = DEO.Conclusion
86 iri_section_title: ClassVar[str] = DOCO.SectionTitle
87 iri_sentence: ClassVar[str] = DOCO.Sentence
88 iri_table: ClassVar[str] = DOCO.Table
89 iri_text_chunk: ClassVar[str] = DOCO.TextChunk
90 iri_abstract: ClassVar[str] = DOCO.Abstract
91 iri_academic_proceedings: ClassVar[str] = FABIO.AcademicProceedings
92 iri_audio_document: ClassVar[str] = FABIO.AudioDocument
93 iri_book: ClassVar[str] = FABIO.Book
94 iri_book_chapter: ClassVar[str] = FABIO.BookChapter
95 iri_book_series: ClassVar[str] = FABIO.BookSeries
96 iri_book_set: ClassVar[str] = FABIO.BookSet
97 iri_computer_program: ClassVar[str] = FABIO.ComputerProgram
98 iri_data_file: ClassVar[str] = FABIO.DataFile
99 iri_data_management_plan: ClassVar[str] = FABIO.DataManagementPlan
100 iri_editorial: ClassVar[str] = FABIO.Editorial
101 iri_expression: ClassVar[str] = FABIO.Expression
102 iri_expression_collection: ClassVar[str] = FABIO.ExpressionCollection
103 iri_has_sequence_identifier: ClassVar[str] = FABIO.hasSequenceIdentifier
104 iri_journal: ClassVar[str] = FABIO.Journal
105 iri_journal_article: ClassVar[str] = FABIO.JournalArticle
106 iri_journal_editorial: ClassVar[str] = FABIO.JournalEditorial
107 iri_journal_issue: ClassVar[str] = FABIO.JournalIssue
108 iri_journal_volume: ClassVar[str] = FABIO.JournalVolume
109 iri_manifestation: ClassVar[str] = FABIO.Manifestation
110 iri_newspaper: ClassVar[str] = FABIO.Newspaper
111 iri_newspaper_article: ClassVar[str] = FABIO.NewspaperArticle
112 iri_newspaper_editorial: ClassVar[str] = FABIO.NewspaperEditorial
113 iri_newspaper_issue: ClassVar[str] = FABIO.NewspaperIssue
114 iri_peer_review: ClassVar[str] = FR.ReviewVersion
115 iri_preprint: ClassVar[str] = FABIO.Preprint
116 iri_presentation: ClassVar[str] = FABIO.Presentation
117 iri_proceedings_paper: ClassVar[str] = FABIO.ProceedingsPaper
118 iri_proceedings_series: ClassVar[str] = FABIO.Series
119 iri_reference_book: ClassVar[str] = FABIO.ReferenceBook
120 iri_reference_entry: ClassVar[str] = FABIO.ReferenceEntry
121 iri_report_document: ClassVar[str] = FABIO.ReportDocument
122 iri_retraction_notice: ClassVar[str] = FABIO.RetractionNotice
123 iri_series: ClassVar[str] = FABIO.Series
124 iri_specification_document: ClassVar[str] = FABIO.SpecificationDocument
125 iri_thesis: ClassVar[str] = FABIO.Thesis
126 iri_web_content: ClassVar[str] = FABIO.WebContent
127 iri_agent: ClassVar[str] = FOAF.Agent
128 iri_family_name: ClassVar[str] = FOAF.familyName
129 iri_given_name: ClassVar[str] = FOAF.givenName
130 iri_name: ClassVar[str] = FOAF.name
131 iri_embodiment: ClassVar[str] = FRBR.embodiment
132 iri_part_of: ClassVar[str] = FRBR.partOf
133 iri_contains_reference: ClassVar[str] = FRBR.part
134 iri_contains_de: ClassVar[str] = FRBR.part
135 iri_has_literal_value: ClassVar[str] = LITERAL.hasLiteralValue
136 iri_ending_page: ClassVar[str] = PRISM.endingPage
137 iri_starting_page: ClassVar[str] = PRISM.startingPage
138 iri_author: ClassVar[str] = PRO.author
139 iri_editor: ClassVar[str] = PRO.editor
140 iri_is_held_by: ClassVar[str] = PRO.isHeldBy
141 iri_publisher: ClassVar[str] = PRO.publisher
142 iri_is_document_context_for: ClassVar[str] = PRO.isDocumentContextFor
143 iri_role_in_time: ClassVar[str] = PRO.RoleInTime
144 iri_with_role: ClassVar[str] = PRO.withRole
145 iri_note: ClassVar[str] = OA.Annotation
146 iri_has_body: ClassVar[str] = OA.hasBody
147 iri_has_annotation: ClassVar[str] = OCO.hasAnnotation # inverse of OA.hasTarget
148 iri_has_next: ClassVar[str] = OCO.hasNext
149 iri_archival_document: ClassVar[str] = FABIO.ArchivalDocument
150 iri_viaf: ClassVar[str] = DATACITE.viaf
151 iri_crossref: ClassVar[str] = DATACITE.crossref # TODO: add to datacite!
152 iri_datacite: ClassVar[str] = DATACITE.datacite # TODO: add to datacite!
153 iri_jid: ClassVar[str] = DATACITE.jid # TODO: add to datacite!
154 iri_wikidata: ClassVar[str] = DATACITE.wikidata # TODO: add to datacite!
155 iri_wikipedia: ClassVar[str] = DATACITE.wikipedia # TODO: add to datacite!
156 iri_has_edition: ClassVar[str] = PRISM.edition
157 iri_relation: ClassVar[str] = DCTERMS.relation
158 iri_has_citation_creation_date: ClassVar[str] = CITO.hasCitationCreationDate
159 iri_has_citation_time_span: ClassVar[str] = CITO.hasCitationTimeSpan
160 iri_digital_manifestation: ClassVar[str] = FABIO.DigitalManifestation
161 iri_print_object: ClassVar[str] = FABIO.PrintObject
162 iri_has_url: ClassVar[str] = FRBR.exemplar
163 iri_self_citation: ClassVar[str] = CITO.SelfCitation
164 iri_affiliation_self_citation: ClassVar[str] = CITO.AffiliationSelfCitation
165 iri_author_network_self_citation: ClassVar[str] = CITO.AuthorNetworkSelfCitation
166 iri_author_self_citation: ClassVar[str] = CITO.AuthorSelfCitation
167 iri_funder_self_citation: ClassVar[str] = CITO.FunderSelfCitation
168 iri_journal_self_citation: ClassVar[str] = CITO.JournalSelfCitation
169 iri_journal_cartel_citation: ClassVar[str] = CITO.JournalCartelCitation
170 iri_distant_citation: ClassVar[str] = CITO.DistantCitation
171 iri_has_format: ClassVar[str] = DCTERMS.format
173 short_name_to_type_iri: ClassVar[Dict[str, str]] = {
174 'an': iri_note,
175 'ar': iri_role_in_time,
176 'be': iri_bibliographic_reference,
177 'br': iri_expression,
178 'ci': iri_citation,
179 'de': iri_discourse_element,
180 'id': iri_identifier,
181 'pl': iri_singleloc_pointer_list,
182 'ra': iri_agent,
183 're': iri_manifestation,
184 'rp': iri_intextref_pointer
185 }
187 def __init__(self, g: TripleLite, g_set: GraphSet, res_type: str, res: str | None = None,
188 resp_agent: str | None = None, source: str | None = None, count: str | None = None, label: str | None = None,
189 short_name: str = "", preexisting_graph: SubgraphView | None = None) -> None:
190 super(GraphEntity, self).__init__()
191 self.g: TripleLite = g
192 self.resp_agent: str | None = resp_agent
193 self.source: str | None = source
194 self.short_name: str = short_name
195 self.g_set: GraphSet = g_set
196 self._preexisting_triples: frozenset | SubgraphView = frozenset()
197 self._merge_list: tuple[GraphEntity, ...] = ()
198 # FLAGS
199 self._to_be_deleted: bool = False
200 self._was_merged: bool = False
201 self._is_restored: bool = False
203 # If res was not specified, create from scratch the URI reference for this entity,
204 # otherwise use the provided one
205 if res is None:
206 self.res = self._generate_new_res(g, count)
207 else:
208 self.res = res
210 if g_set is not None:
211 # If not already done, register this GraphEntity instance inside the GraphSet
212 if self.res not in g_set.res_to_entity:
213 g_set.res_to_entity[self.res] = self
215 if preexisting_graph is not None:
216 # Triples inside self.g are entirely replaced by triples from preexisting_graph.
217 # This has maximum priority with respect to every other self.g initializations.
218 # It's fundamental that the preexisting graph gets passed as an argument of the constructor:
219 # allowing the user to set this value later through a method would mean that the user could
220 # set the preexisting graph AFTER having modified self.g (which would not make sense).
221 self.remove_every_triple()
222 self.g.add_many((self.res, p, o) for p, o in preexisting_graph.predicate_objects(self.res))
223 self._preexisting_triples = preexisting_graph
224 else:
225 # Add mandatory information to the entity graph
226 self._create_type(res_type)
227 if label is not None:
228 self.create_label(label)
230 @staticmethod
231 def _generate_new_res(g: TripleLite, count: str | None) -> str:
232 assert count is not None
233 return str(g.identifier) + count
235 @property
236 def to_be_deleted(self) -> bool:
237 return self._to_be_deleted
239 @property
240 def was_merged(self) -> bool:
241 return self._was_merged
243 @property
244 def merge_list(self) -> tuple[GraphEntity, ...]:
245 return self._merge_list
247 @property
248 def is_restored(self) -> bool:
249 """Indicates if this entity was restored after being deleted."""
250 return self._is_restored
252 def mark_as_restored(self) -> None:
253 """
254 Marks an entity as being restored after deletion.
256 This state signals to the provenance system that:
257 - No new invalidation time should be generated for the previous snapshot
258 - The original deletion snapshot's invalidation time should be preserved
259 - The entity should be treated as restored rather than newly created
260 """
261 self._to_be_deleted = False
262 self._is_restored = True
264 def mark_as_to_be_deleted(self) -> None:
265 # Here we must REMOVE triples pointing
266 # to 'self' [THIS CANNOT BE UNDONE]:
267 for res, entity in self.g_set.res_to_entity.items():
268 triples_list: List[tuple] = list(entity.g.triples((res, None, RDFTerm("uri", str(self.res)))))
269 for triple in triples_list:
270 entity.g.remove(triple)
272 self._to_be_deleted = True
274 def _get_specific_type(self) -> Optional[str]:
275 base_type_str = self.short_name_to_type_iri[self.short_name]
276 for _, _, type_uri in self.g.triples((self.res, RDF_TYPE, None)):
277 if type_uri.type == "uri" and type_uri.value != base_type_str:
278 return type_uri.value
279 return None
281 def merge(self, other: Self, prefer_self: bool = False) -> None:
282 """
283 **WARNING:** ``GraphEntity`` **is an abstract class that cannot be instantiated at runtime.
284 As such, it's only possible to execute this method on entities generated from**
285 ``GraphEntity``'s **subclasses. Please, refer to their documentation of the** `merge` **method.**
287 :param other: The entity which will be marked as to be deleted and whose properties will
288 be merged into the current entity.
289 :type other: GraphEntity
290 :param prefer_self: If True, prefer values from the current entity for non-functional properties
291 :type prefer_self: bool
292 :raises TypeError: if the parameter is not of the same entity type
293 :return: None
294 """
295 if not isinstance(other, GraphEntity) or other.short_name != self.short_name:
296 raise TypeError(
297 f"[{self.__class__.__name__}.merge] Expected entity type: {self.short_name}. "
298 f"Provided: {type(other).__name__}."
299 )
301 # Redirect triples pointing to 'other' to point to 'self'
302 for res, entity in self.g_set.res_to_entity.items():
303 triples_list: List[tuple] = list(entity.g.triples((res, None, RDFTerm("uri", str(other.res)))))
304 for triple in triples_list:
305 entity.g.remove(triple)
306 new_triple = (triple[0], triple[1], RDFTerm("uri", str(self.res)))
307 entity.g.add(new_triple)
309 self_specific_type = self._get_specific_type()
310 other_specific_type = other._get_specific_type()
312 final_specific_type = None
313 if prefer_self and self_specific_type:
314 final_specific_type = self_specific_type
315 elif other_specific_type:
316 final_specific_type = other_specific_type
317 elif self_specific_type:
318 final_specific_type = self_specific_type
320 self.g.remove((self.res, RDF_TYPE, None))
321 base_type = self.short_name_to_type_iri[self.short_name]
322 self.g.add((self.res, RDF_TYPE, RDFTerm("uri", base_type)))
323 if final_specific_type:
324 self.g.add((self.res, RDF_TYPE, RDFTerm("uri", final_specific_type)))
326 label: Optional[str] = other.get_label()
327 if label is not None:
328 self.create_label(label)
330 self._was_merged = True
331 self._merge_list = (*self._merge_list, other)
333 # 'other' must be deleted AFTER the redirection of
334 # triples pointing to it, since mark_as_to_be_deleted
335 # also removes every triple pointing to 'other'
336 other.mark_as_to_be_deleted()
338 self._merge_properties(other, prefer_self)
340 def _merge_properties(self, other: GraphEntity, prefer_self: bool) -> None:
341 pass
343 def commit_changes(self):
344 if self._to_be_deleted:
345 self._preexisting_triples = frozenset()
346 self.remove_every_triple()
347 else:
348 self._preexisting_triples = frozenset(self.g.triples((self.res, None, None)))
349 self._is_restored = False
350 self._to_be_deleted = False
351 self._was_merged = False
352 self._merge_list = tuple()