Coverage for oc_ocdm / graph / graph_entity.py: 98%
253 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-28 18:52 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-28 18:52 +0000
1# SPDX-FileCopyrightText: 2020-2022 Simone Persiani <iosonopersia@gmail.com>
2# SPDX-FileCopyrightText: 2022-2024 Arcangelo Massari <arcangelo.massari@unibo.it>
3# SPDX-FileCopyrightText: 2024 martasoricetti <marta.soricetti@studio.unibo.it>
4#
5# SPDX-License-Identifier: ISC
7from __future__ import annotations
9from typing import TYPE_CHECKING
11from oc_ocdm.abstract_entity import AbstractEntity
12from rdflib import RDF, Graph, Namespace, URIRef
14if TYPE_CHECKING:
15 from typing import ClassVar, Dict, List, Optional, Self
17 from oc_ocdm.graph.graph_set import GraphSet
20class GraphEntity(AbstractEntity):
21 BIRO: ClassVar[Namespace] = Namespace("http://purl.org/spar/biro/")
22 C4O: ClassVar[Namespace] = Namespace("http://purl.org/spar/c4o/")
23 CO: ClassVar[Namespace] = Namespace("http://purl.org/co/")
24 CITO: ClassVar[Namespace] = Namespace("http://purl.org/spar/cito/")
25 DATACITE: ClassVar[Namespace] = Namespace("http://purl.org/spar/datacite/")
26 DCTERMS: ClassVar[Namespace] = Namespace("http://purl.org/dc/terms/")
27 DEO: ClassVar[Namespace] = Namespace("http://purl.org/spar/deo/")
28 DOCO: ClassVar[Namespace] = Namespace("http://purl.org/spar/doco/")
29 FABIO: ClassVar[Namespace] = Namespace("http://purl.org/spar/fabio/")
30 FOAF: ClassVar[Namespace] = Namespace("http://xmlns.com/foaf/0.1/")
31 FR: ClassVar[Namespace] = Namespace("http://purl.org/spar/fr/")
32 FRBR: ClassVar[Namespace] = Namespace("http://purl.org/vocab/frbr/core#")
33 LITERAL: ClassVar[Namespace] = Namespace("http://www.essepuntato.it/2010/06/literalreification/")
34 OA: ClassVar[Namespace] = Namespace("http://www.w3.org/ns/oa#")
35 OCO: ClassVar[Namespace] = Namespace("https://w3id.org/oc/ontology/")
36 PRISM: ClassVar[Namespace] = Namespace("http://prismstandard.org/namespaces/basic/2.0/")
37 PRO: ClassVar[Namespace] = Namespace("http://purl.org/spar/pro/")
39 iri_has_subtitle: ClassVar[URIRef] = FABIO.hasSubtitle
40 iri_has_publication_date: ClassVar[URIRef] = PRISM.publicationDate
41 iri_bibliographic_reference: ClassVar[URIRef] = BIRO.BibliographicReference
42 iri_references: ClassVar[URIRef] = BIRO.references
43 iri_denotes: ClassVar[URIRef] = C4O.denotes
44 iri_has_content: ClassVar[URIRef] = C4O.hasContent
45 iri_intextref_pointer: ClassVar[URIRef] = C4O.InTextReferencePointer
46 iri_is_context_of: ClassVar[URIRef] = C4O.isContextOf
47 iri_singleloc_pointer_list: ClassVar[URIRef] = C4O.SingleLocationPointerList
48 iri_has_element: ClassVar[URIRef] = CO.element
49 iri_citation: ClassVar[URIRef] = CITO.Citation
50 iri_cites: ClassVar[URIRef] = CITO.cites
51 iri_citation_characterisation: ClassVar[URIRef] = CITO.hasCitationCharacterisation
52 iri_has_citing_entity: ClassVar[URIRef] = CITO.hasCitingEntity
53 iri_has_cited_entity: ClassVar[URIRef] = CITO.hasCitedEntity
54 iri_openalex: ClassVar[URIRef] = DATACITE.openalex
55 iri_arxiv: ClassVar[URIRef] = DATACITE.arxiv
56 iri_oci: ClassVar[URIRef] = DATACITE.oci
57 iri_doi: ClassVar[URIRef] = DATACITE.doi
58 iri_pmid: ClassVar[URIRef] = DATACITE.pmid
59 iri_pmcid: ClassVar[URIRef] = DATACITE.pmcid
60 iri_orcid: ClassVar[URIRef] = DATACITE.orcid
61 iri_xpath: ClassVar[URIRef] = DATACITE["local-resource-identifier-scheme"]
62 iri_intrepid: ClassVar[URIRef] = DATACITE.intrepid
63 iri_xmlid: ClassVar[URIRef] = DATACITE["local-resource-identifier-scheme"]
64 iri_has_identifier: ClassVar[URIRef] = DATACITE.hasIdentifier
65 iri_identifier: ClassVar[URIRef] = DATACITE.Identifier
66 iri_isbn: ClassVar[URIRef] = DATACITE.isbn
67 iri_issn: ClassVar[URIRef] = DATACITE.issn
68 iri_url: ClassVar[URIRef] = DATACITE.url
69 iri_uses_identifier_scheme: ClassVar[URIRef] = DATACITE.usesIdentifierScheme
70 iri_title: ClassVar[URIRef] = DCTERMS["title"]
71 iri_caption: ClassVar[URIRef] = DEO.Caption
72 iri_discourse_element: ClassVar[URIRef] = DEO.DiscourseElement
73 iri_footnote: ClassVar[URIRef] = DOCO.Footnote
74 iri_paragraph: ClassVar[URIRef] = DOCO.Paragraph
75 iri_part: ClassVar[URIRef] = DOCO.Part
76 iri_section: ClassVar[URIRef] = DOCO.Section
77 iri_introduction: ClassVar[URIRef] = DEO.Introduction
78 iri_methods: ClassVar[URIRef] = DEO.Methods
79 iri_materials: ClassVar[URIRef] = DEO.Materials
80 iri_related_work: ClassVar[URIRef] = DEO.RelatedWork
81 iri_results: ClassVar[URIRef] = DEO.Results
82 iri_discussion: ClassVar[URIRef] = DEO.Discussion
83 iri_conclusion: ClassVar[URIRef] = DEO.Conclusion
84 iri_section_title: ClassVar[URIRef] = DOCO.SectionTitle
85 iri_sentence: ClassVar[URIRef] = DOCO.Sentence
86 iri_table: ClassVar[URIRef] = DOCO.Table
87 iri_text_chunk: ClassVar[URIRef] = DOCO.TextChunk
88 iri_abstract: ClassVar[URIRef] = DOCO.Abstract
89 iri_academic_proceedings: ClassVar[URIRef] = FABIO.AcademicProceedings
90 iri_audio_document: ClassVar[URIRef] = FABIO.AudioDocument
91 iri_book: ClassVar[URIRef] = FABIO.Book
92 iri_book_chapter: ClassVar[URIRef] = FABIO.BookChapter
93 iri_book_series: ClassVar[URIRef] = FABIO.BookSeries
94 iri_book_set: ClassVar[URIRef] = FABIO.BookSet
95 iri_computer_program: ClassVar[URIRef] = FABIO.ComputerProgram
96 iri_data_file: ClassVar[URIRef] = FABIO.DataFile
97 iri_data_management_plan: ClassVar[URIRef] = FABIO.DataManagementPlan
98 iri_editorial: ClassVar[URIRef] = FABIO.Editorial
99 iri_expression: ClassVar[URIRef] = FABIO.Expression
100 iri_expression_collection: ClassVar[URIRef] = FABIO.ExpressionCollection
101 iri_has_sequence_identifier: ClassVar[URIRef] = FABIO.hasSequenceIdentifier
102 iri_journal: ClassVar[URIRef] = FABIO.Journal
103 iri_journal_article: ClassVar[URIRef] = FABIO.JournalArticle
104 iri_journal_editorial: ClassVar[URIRef] = FABIO.JournalEditorial
105 iri_journal_issue: ClassVar[URIRef] = FABIO.JournalIssue
106 iri_journal_volume: ClassVar[URIRef] = FABIO.JournalVolume
107 iri_manifestation: ClassVar[URIRef] = FABIO.Manifestation
108 iri_newspaper: ClassVar[URIRef] = FABIO.Newspaper
109 iri_newspaper_article: ClassVar[URIRef] = FABIO.NewspaperArticle
110 iri_newspaper_editorial: ClassVar[URIRef] = FABIO.NewspaperEditorial
111 iri_newspaper_issue: ClassVar[URIRef] = FABIO.NewspaperIssue
112 iri_peer_review: ClassVar[URIRef] = FR.ReviewVersion
113 iri_preprint: ClassVar[URIRef] = FABIO.Preprint
114 iri_presentation: ClassVar[URIRef] = FABIO.Presentation
115 iri_proceedings_paper: ClassVar[URIRef] = FABIO.ProceedingsPaper
116 iri_proceedings_series: ClassVar[URIRef] = FABIO.Series
117 iri_reference_book: ClassVar[URIRef] = FABIO.ReferenceBook
118 iri_reference_entry: ClassVar[URIRef] = FABIO.ReferenceEntry
119 iri_report_document: ClassVar[URIRef] = FABIO.ReportDocument
120 iri_retraction_notice: ClassVar[URIRef] = FABIO.RetractionNotice
121 iri_series: ClassVar[URIRef] = FABIO.Series
122 iri_specification_document: ClassVar[URIRef] = FABIO.SpecificationDocument
123 iri_thesis: ClassVar[URIRef] = FABIO.Thesis
124 iri_web_content: ClassVar[URIRef] = FABIO.WebContent
125 iri_agent: ClassVar[URIRef] = FOAF.Agent
126 iri_family_name: ClassVar[URIRef] = FOAF.familyName
127 iri_given_name: ClassVar[URIRef] = FOAF.givenName
128 iri_name: ClassVar[URIRef] = FOAF.name
129 iri_embodiment: ClassVar[URIRef] = FRBR.embodiment
130 iri_part_of: ClassVar[URIRef] = FRBR.partOf
131 iri_contains_reference: ClassVar[URIRef] = FRBR.part
132 iri_contains_de: ClassVar[URIRef] = FRBR.part
133 iri_has_literal_value: ClassVar[URIRef] = LITERAL.hasLiteralValue
134 iri_ending_page: ClassVar[URIRef] = PRISM.endingPage
135 iri_starting_page: ClassVar[URIRef] = PRISM.startingPage
136 iri_author: ClassVar[URIRef] = PRO.author
137 iri_editor: ClassVar[URIRef] = PRO.editor
138 iri_is_held_by: ClassVar[URIRef] = PRO.isHeldBy
139 iri_publisher: ClassVar[URIRef] = PRO.publisher
140 iri_is_document_context_for: ClassVar[URIRef] = PRO.isDocumentContextFor
141 iri_role_in_time: ClassVar[URIRef] = PRO.RoleInTime
142 iri_with_role: ClassVar[URIRef] = PRO.withRole
143 iri_note: ClassVar[URIRef] = OA.Annotation
144 iri_has_body: ClassVar[URIRef] = OA.hasBody
145 iri_has_annotation: ClassVar[URIRef] = OCO.hasAnnotation # inverse of OA.hasTarget
146 iri_has_next: ClassVar[URIRef] = OCO.hasNext
147 iri_archival_document: ClassVar[URIRef] = FABIO.ArchivalDocument
148 iri_viaf: ClassVar[URIRef] = DATACITE.viaf
149 iri_crossref: ClassVar[URIRef] = DATACITE.crossref # TODO: add to datacite!
150 iri_datacite: ClassVar[URIRef] = DATACITE.datacite # TODO: add to datacite!
151 iri_jid: ClassVar[URIRef] = DATACITE.jid # TODO: add to datacite!
152 iri_wikidata: ClassVar[URIRef] = DATACITE.wikidata # TODO: add to datacite!
153 iri_wikipedia: ClassVar[URIRef] = DATACITE.wikipedia # TODO: add to datacite!
154 iri_has_edition: ClassVar[URIRef] = PRISM.edition
155 iri_relation: ClassVar[URIRef] = DCTERMS.relation
156 iri_has_citation_creation_date: ClassVar[URIRef] = CITO.hasCitationCreationDate
157 iri_has_citation_time_span: ClassVar[URIRef] = CITO.hasCitationTimeSpan
158 iri_digital_manifestation: ClassVar[URIRef] = FABIO.DigitalManifestation
159 iri_print_object: ClassVar[URIRef] = FABIO.PrintObject
160 iri_has_url: ClassVar[URIRef] = FRBR.exemplar
161 iri_self_citation: ClassVar[URIRef] = CITO.SelfCitation
162 iri_affiliation_self_citation: ClassVar[URIRef] = CITO.AffiliationSelfCitation
163 iri_author_network_self_citation: ClassVar[URIRef] = CITO.AuthorNetworkSelfCitation
164 iri_author_self_citation: ClassVar[URIRef] = CITO.AuthorSelfCitation
165 iri_funder_self_citation: ClassVar[URIRef] = CITO.FunderSelfCitation
166 iri_journal_self_citation: ClassVar[URIRef] = CITO.JournalSelfCitation
167 iri_journal_cartel_citation: ClassVar[URIRef] = CITO.JournalCartelCitation
168 iri_distant_citation: ClassVar[URIRef] = CITO.DistantCitation
169 iri_has_format: ClassVar[URIRef] = DCTERMS["format"]
171 short_name_to_type_iri: ClassVar[Dict[str, URIRef]] = {
172 'an': iri_note,
173 'ar': iri_role_in_time,
174 'be': iri_bibliographic_reference,
175 'br': iri_expression,
176 'ci': iri_citation,
177 'de': iri_discourse_element,
178 'id': iri_identifier,
179 'pl': iri_singleloc_pointer_list,
180 'ra': iri_agent,
181 're': iri_manifestation,
182 'rp': iri_intextref_pointer
183 }
185 def __init__(self, g: Graph, g_set: GraphSet, res_type: URIRef, res: URIRef | None = None,
186 resp_agent: str | None = None, source: str | None = None, count: str | None = None, label: str | None = None,
187 short_name: str = "", preexisting_graph: Graph | None = None) -> None:
188 super(GraphEntity, self).__init__()
189 self.g: Graph = g
190 self.resp_agent: str | None = resp_agent
191 self.source: str | None = source
192 self.short_name: str = short_name
193 self.g_set: GraphSet = g_set
194 self.preexisting_graph: Graph = Graph(identifier=g.identifier)
195 self._merge_list: tuple[GraphEntity, ...] = ()
196 # FLAGS
197 self._to_be_deleted: bool = False
198 self._was_merged: bool = False
199 self._is_restored: bool = False
201 # If res was not specified, create from scratch the URI reference for this entity,
202 # otherwise use the provided one
203 if res is None:
204 self.res = self._generate_new_res(g, count)
205 else:
206 self.res = res
208 if g_set is not None:
209 # If not already done, register this GraphEntity instance inside the GraphSet
210 if self.res not in g_set.res_to_entity:
211 g_set.res_to_entity[self.res] = self
213 if preexisting_graph is not None:
214 # Triples inside self.g are entirely replaced by triples from preexisting_graph.
215 # This has maximum priority with respect to every other self.g initializations.
216 # It's fundamental that the preexisting graph gets passed as an argument of the constructor:
217 # allowing the user to set this value later through a method would mean that the user could
218 # set the preexisting graph AFTER having modified self.g (which would not make sense).
219 self.remove_every_triple()
220 for p, o in preexisting_graph.predicate_objects(self.res):
221 self.g.add((self.res, p, o))
222 self.preexisting_graph.add((self.res, p, o))
223 else:
224 # Add mandatory information to the entity graph
225 self._create_type(res_type)
226 if label is not None:
227 self.create_label(label)
229 @staticmethod
230 def _generate_new_res(g: Graph, count: str | None) -> URIRef:
231 assert count is not None
232 return URIRef(str(g.identifier) + count)
234 @property
235 def to_be_deleted(self) -> bool:
236 return self._to_be_deleted
238 @property
239 def was_merged(self) -> bool:
240 return self._was_merged
242 @property
243 def merge_list(self) -> tuple[GraphEntity, ...]:
244 return self._merge_list
246 @property
247 def is_restored(self) -> bool:
248 """Indicates if this entity was restored after being deleted."""
249 return self._is_restored
251 def mark_as_restored(self) -> None:
252 """
253 Marks an entity as being restored after deletion.
255 This state signals to the provenance system that:
256 - No new invalidation time should be generated for the previous snapshot
257 - The original deletion snapshot's invalidation time should be preserved
258 - The entity should be treated as restored rather than newly created
259 """
260 self._to_be_deleted = False
261 self._is_restored = True
263 def mark_as_to_be_deleted(self) -> None:
264 # Here we must REMOVE triples pointing
265 # to 'self' [THIS CANNOT BE UNDONE]:
266 for res, entity in self.g_set.res_to_entity.items():
267 triples_list: List[tuple] = list(entity.g.triples((res, None, self.res)))
268 for triple in triples_list:
269 entity.g.remove(triple)
271 self._to_be_deleted = True
273 def _get_specific_type(self) -> Optional[URIRef]:
274 """
275 Get the specific type of the entity (e.g., JournalArticle), if any.
276 Excludes the base Expression type.
278 Returns:
279 The specific type URI if present, None otherwise
280 """
281 base_type = self.short_name_to_type_iri[self.short_name]
282 for _, _, type_uri in self.g.triples((self.res, RDF.type, None)):
283 if isinstance(type_uri, URIRef) and type_uri != base_type:
284 return type_uri
285 return None
287 def merge(self, other: Self, prefer_self: bool = False) -> None:
288 """
289 **WARNING:** ``GraphEntity`` **is an abstract class that cannot be instantiated at runtime.
290 As such, it's only possible to execute this method on entities generated from**
291 ``GraphEntity``'s **subclasses. Please, refer to their documentation of the** `merge` **method.**
293 :param other: The entity which will be marked as to be deleted and whose properties will
294 be merged into the current entity.
295 :type other: GraphEntity
296 :param prefer_self: If True, prefer values from the current entity for non-functional properties
297 :type prefer_self: bool
298 :raises TypeError: if the parameter is not of the same entity type
299 :return: None
300 """
301 if not isinstance(other, GraphEntity) or other.short_name != self.short_name:
302 raise TypeError(
303 f"[{self.__class__.__name__}.merge] Expected entity type: {self.short_name}. "
304 f"Provided: {type(other).__name__}."
305 )
307 # Redirect triples pointing to 'other' to point to 'self'
308 for res, entity in self.g_set.res_to_entity.items():
309 triples_list: List[tuple] = list(entity.g.triples((res, None, other.res)))
310 for triple in triples_list:
311 entity.g.remove(triple)
312 new_triple = (triple[0], triple[1], self.res)
313 entity.g.add(new_triple)
315 self_specific_type = self._get_specific_type()
316 other_specific_type = other._get_specific_type()
318 final_specific_type = None
319 if prefer_self and self_specific_type:
320 final_specific_type = self_specific_type
321 elif other_specific_type:
322 final_specific_type = other_specific_type
323 elif self_specific_type:
324 final_specific_type = self_specific_type
326 self.g.remove((self.res, RDF.type, None))
327 base_type = self.short_name_to_type_iri[self.short_name]
328 self.g.add((self.res, RDF.type, base_type))
329 if final_specific_type:
330 self.g.add((self.res, RDF.type, final_specific_type))
332 label: Optional[str] = other.get_label()
333 if label is not None:
334 self.create_label(label)
336 self._was_merged = True
337 self._merge_list = (*self._merge_list, other)
339 # 'other' must be deleted AFTER the redirection of
340 # triples pointing to it, since mark_as_to_be_deleted
341 # also removes every triple pointing to 'other'
342 other.mark_as_to_be_deleted()
344 self._merge_properties(other, prefer_self)
346 def _merge_properties(self, other: GraphEntity, prefer_self: bool) -> None:
347 pass
349 def commit_changes(self):
350 self.preexisting_graph = Graph(identifier=self.g.identifier)
351 if self._to_be_deleted:
352 self.remove_every_triple()
353 else:
354 for triple in self.g.triples((self.res, None, None)):
355 self.preexisting_graph.add(triple)
356 self._is_restored = False
357 self._to_be_deleted = False
358 self._was_merged = False
359 self._merge_list = tuple()