Coverage for oc_ocdm / graph / graph_entity.py: 97%

252 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-05-08 20:23 +0000

1# SPDX-FileCopyrightText: 2020-2022 Simone Persiani <iosonopersia@gmail.com> 

2# SPDX-FileCopyrightText: 2022-2024 Arcangelo Massari <arcangelo.massari@unibo.it> 

3# SPDX-FileCopyrightText: 2024 martasoricetti <marta.soricetti@studio.unibo.it> 

4# 

5# SPDX-License-Identifier: ISC 

6 

7from __future__ import annotations 

8 

9from typing import TYPE_CHECKING 

10 

11from triplelite import RDFTerm, SubgraphView, TripleLite 

12 

13from oc_ocdm.abstract_entity import AbstractEntity 

14from oc_ocdm.constants import RDF_TYPE, Namespace 

15 

16if TYPE_CHECKING: 

17 from typing import ClassVar, Dict, List, Optional, Self 

18 

19 from oc_ocdm.graph.graph_set import GraphSet 

20 

21 

22class GraphEntity(AbstractEntity): 

23 BIRO: ClassVar[Namespace] = Namespace("http://purl.org/spar/biro/") 

24 C4O: ClassVar[Namespace] = Namespace("http://purl.org/spar/c4o/") 

25 CO: ClassVar[Namespace] = Namespace("http://purl.org/co/") 

26 CITO: ClassVar[Namespace] = Namespace("http://purl.org/spar/cito/") 

27 DATACITE: ClassVar[Namespace] = Namespace("http://purl.org/spar/datacite/") 

28 DCTERMS: ClassVar[Namespace] = Namespace("http://purl.org/dc/terms/") 

29 DEO: ClassVar[Namespace] = Namespace("http://purl.org/spar/deo/") 

30 DOCO: ClassVar[Namespace] = Namespace("http://purl.org/spar/doco/") 

31 FABIO: ClassVar[Namespace] = Namespace("http://purl.org/spar/fabio/") 

32 FOAF: ClassVar[Namespace] = Namespace("http://xmlns.com/foaf/0.1/") 

33 FR: ClassVar[Namespace] = Namespace("http://purl.org/spar/fr/") 

34 FRBR: ClassVar[Namespace] = Namespace("http://purl.org/vocab/frbr/core#") 

35 LITERAL: ClassVar[Namespace] = Namespace("http://www.essepuntato.it/2010/06/literalreification/") 

36 OA: ClassVar[Namespace] = Namespace("http://www.w3.org/ns/oa#") 

37 OCO: ClassVar[Namespace] = Namespace("https://w3id.org/oc/ontology/") 

38 PRISM: ClassVar[Namespace] = Namespace("http://prismstandard.org/namespaces/basic/2.0/") 

39 PRO: ClassVar[Namespace] = Namespace("http://purl.org/spar/pro/") 

40 

41 iri_has_subtitle: ClassVar[str] = FABIO.hasSubtitle 

42 iri_has_publication_date: ClassVar[str] = PRISM.publicationDate 

43 iri_bibliographic_reference: ClassVar[str] = BIRO.BibliographicReference 

44 iri_references: ClassVar[str] = BIRO.references 

45 iri_denotes: ClassVar[str] = C4O.denotes 

46 iri_has_content: ClassVar[str] = C4O.hasContent 

47 iri_intextref_pointer: ClassVar[str] = C4O.InTextReferencePointer 

48 iri_is_context_of: ClassVar[str] = C4O.isContextOf 

49 iri_singleloc_pointer_list: ClassVar[str] = C4O.SingleLocationPointerList 

50 iri_has_element: ClassVar[str] = CO.element 

51 iri_citation: ClassVar[str] = CITO.Citation 

52 iri_cites: ClassVar[str] = CITO.cites 

53 iri_citation_characterisation: ClassVar[str] = CITO.hasCitationCharacterisation 

54 iri_has_citing_entity: ClassVar[str] = CITO.hasCitingEntity 

55 iri_has_cited_entity: ClassVar[str] = CITO.hasCitedEntity 

56 iri_openalex: ClassVar[str] = DATACITE.openalex 

57 iri_arxiv: ClassVar[str] = DATACITE.arxiv 

58 iri_oci: ClassVar[str] = DATACITE.oci 

59 iri_doi: ClassVar[str] = DATACITE.doi 

60 iri_pmid: ClassVar[str] = DATACITE.pmid 

61 iri_pmcid: ClassVar[str] = DATACITE.pmcid 

62 iri_orcid: ClassVar[str] = DATACITE.orcid 

63 iri_xpath: ClassVar[str] = DATACITE["local-resource-identifier-scheme"] 

64 iri_intrepid: ClassVar[str] = DATACITE.intrepid 

65 iri_xmlid: ClassVar[str] = DATACITE["local-resource-identifier-scheme"] 

66 iri_has_identifier: ClassVar[str] = DATACITE.hasIdentifier 

67 iri_identifier: ClassVar[str] = DATACITE.Identifier 

68 iri_isbn: ClassVar[str] = DATACITE.isbn 

69 iri_issn: ClassVar[str] = DATACITE.issn 

70 iri_url: ClassVar[str] = DATACITE.url 

71 iri_uses_identifier_scheme: ClassVar[str] = DATACITE.usesIdentifierScheme 

72 iri_title: ClassVar[str] = DCTERMS.title 

73 iri_caption: ClassVar[str] = DEO.Caption 

74 iri_discourse_element: ClassVar[str] = DEO.DiscourseElement 

75 iri_footnote: ClassVar[str] = DOCO.Footnote 

76 iri_paragraph: ClassVar[str] = DOCO.Paragraph 

77 iri_part: ClassVar[str] = DOCO.Part 

78 iri_section: ClassVar[str] = DOCO.Section 

79 iri_introduction: ClassVar[str] = DEO.Introduction 

80 iri_methods: ClassVar[str] = DEO.Methods 

81 iri_materials: ClassVar[str] = DEO.Materials 

82 iri_related_work: ClassVar[str] = DEO.RelatedWork 

83 iri_results: ClassVar[str] = DEO.Results 

84 iri_discussion: ClassVar[str] = DEO.Discussion 

85 iri_conclusion: ClassVar[str] = DEO.Conclusion 

86 iri_section_title: ClassVar[str] = DOCO.SectionTitle 

87 iri_sentence: ClassVar[str] = DOCO.Sentence 

88 iri_table: ClassVar[str] = DOCO.Table 

89 iri_text_chunk: ClassVar[str] = DOCO.TextChunk 

90 iri_abstract: ClassVar[str] = DOCO.Abstract 

91 iri_academic_proceedings: ClassVar[str] = FABIO.AcademicProceedings 

92 iri_audio_document: ClassVar[str] = FABIO.AudioDocument 

93 iri_book: ClassVar[str] = FABIO.Book 

94 iri_book_chapter: ClassVar[str] = FABIO.BookChapter 

95 iri_book_series: ClassVar[str] = FABIO.BookSeries 

96 iri_book_set: ClassVar[str] = FABIO.BookSet 

97 iri_computer_program: ClassVar[str] = FABIO.ComputerProgram 

98 iri_data_file: ClassVar[str] = FABIO.DataFile 

99 iri_data_management_plan: ClassVar[str] = FABIO.DataManagementPlan 

100 iri_editorial: ClassVar[str] = FABIO.Editorial 

101 iri_expression: ClassVar[str] = FABIO.Expression 

102 iri_expression_collection: ClassVar[str] = FABIO.ExpressionCollection 

103 iri_has_sequence_identifier: ClassVar[str] = FABIO.hasSequenceIdentifier 

104 iri_journal: ClassVar[str] = FABIO.Journal 

105 iri_journal_article: ClassVar[str] = FABIO.JournalArticle 

106 iri_journal_editorial: ClassVar[str] = FABIO.JournalEditorial 

107 iri_journal_issue: ClassVar[str] = FABIO.JournalIssue 

108 iri_journal_volume: ClassVar[str] = FABIO.JournalVolume 

109 iri_manifestation: ClassVar[str] = FABIO.Manifestation 

110 iri_newspaper: ClassVar[str] = FABIO.Newspaper 

111 iri_newspaper_article: ClassVar[str] = FABIO.NewspaperArticle 

112 iri_newspaper_editorial: ClassVar[str] = FABIO.NewspaperEditorial 

113 iri_newspaper_issue: ClassVar[str] = FABIO.NewspaperIssue 

114 iri_peer_review: ClassVar[str] = FR.ReviewVersion 

115 iri_preprint: ClassVar[str] = FABIO.Preprint 

116 iri_presentation: ClassVar[str] = FABIO.Presentation 

117 iri_proceedings_paper: ClassVar[str] = FABIO.ProceedingsPaper 

118 iri_proceedings_series: ClassVar[str] = FABIO.Series 

119 iri_reference_book: ClassVar[str] = FABIO.ReferenceBook 

120 iri_reference_entry: ClassVar[str] = FABIO.ReferenceEntry 

121 iri_report_document: ClassVar[str] = FABIO.ReportDocument 

122 iri_retraction_notice: ClassVar[str] = FABIO.RetractionNotice 

123 iri_series: ClassVar[str] = FABIO.Series 

124 iri_specification_document: ClassVar[str] = FABIO.SpecificationDocument 

125 iri_thesis: ClassVar[str] = FABIO.Thesis 

126 iri_web_content: ClassVar[str] = FABIO.WebContent 

127 iri_agent: ClassVar[str] = FOAF.Agent 

128 iri_family_name: ClassVar[str] = FOAF.familyName 

129 iri_given_name: ClassVar[str] = FOAF.givenName 

130 iri_name: ClassVar[str] = FOAF.name 

131 iri_embodiment: ClassVar[str] = FRBR.embodiment 

132 iri_part_of: ClassVar[str] = FRBR.partOf 

133 iri_contains_reference: ClassVar[str] = FRBR.part 

134 iri_contains_de: ClassVar[str] = FRBR.part 

135 iri_has_literal_value: ClassVar[str] = LITERAL.hasLiteralValue 

136 iri_ending_page: ClassVar[str] = PRISM.endingPage 

137 iri_starting_page: ClassVar[str] = PRISM.startingPage 

138 iri_author: ClassVar[str] = PRO.author 

139 iri_editor: ClassVar[str] = PRO.editor 

140 iri_is_held_by: ClassVar[str] = PRO.isHeldBy 

141 iri_publisher: ClassVar[str] = PRO.publisher 

142 iri_is_document_context_for: ClassVar[str] = PRO.isDocumentContextFor 

143 iri_role_in_time: ClassVar[str] = PRO.RoleInTime 

144 iri_with_role: ClassVar[str] = PRO.withRole 

145 iri_note: ClassVar[str] = OA.Annotation 

146 iri_has_body: ClassVar[str] = OA.hasBody 

147 iri_has_annotation: ClassVar[str] = OCO.hasAnnotation # inverse of OA.hasTarget 

148 iri_has_next: ClassVar[str] = OCO.hasNext 

149 iri_archival_document: ClassVar[str] = FABIO.ArchivalDocument 

150 iri_viaf: ClassVar[str] = DATACITE.viaf 

151 iri_crossref: ClassVar[str] = DATACITE.crossref # TODO: add to datacite! 

152 iri_datacite: ClassVar[str] = DATACITE.datacite # TODO: add to datacite! 

153 iri_jid: ClassVar[str] = DATACITE.jid # TODO: add to datacite! 

154 iri_wikidata: ClassVar[str] = DATACITE.wikidata # TODO: add to datacite! 

155 iri_wikipedia: ClassVar[str] = DATACITE.wikipedia # TODO: add to datacite! 

156 iri_has_edition: ClassVar[str] = PRISM.edition 

157 iri_relation: ClassVar[str] = DCTERMS.relation 

158 iri_has_citation_creation_date: ClassVar[str] = CITO.hasCitationCreationDate 

159 iri_has_citation_time_span: ClassVar[str] = CITO.hasCitationTimeSpan 

160 iri_digital_manifestation: ClassVar[str] = FABIO.DigitalManifestation 

161 iri_print_object: ClassVar[str] = FABIO.PrintObject 

162 iri_has_url: ClassVar[str] = FRBR.exemplar 

163 iri_self_citation: ClassVar[str] = CITO.SelfCitation 

164 iri_affiliation_self_citation: ClassVar[str] = CITO.AffiliationSelfCitation 

165 iri_author_network_self_citation: ClassVar[str] = CITO.AuthorNetworkSelfCitation 

166 iri_author_self_citation: ClassVar[str] = CITO.AuthorSelfCitation 

167 iri_funder_self_citation: ClassVar[str] = CITO.FunderSelfCitation 

168 iri_journal_self_citation: ClassVar[str] = CITO.JournalSelfCitation 

169 iri_journal_cartel_citation: ClassVar[str] = CITO.JournalCartelCitation 

170 iri_distant_citation: ClassVar[str] = CITO.DistantCitation 

171 iri_has_format: ClassVar[str] = DCTERMS.format 

172 

173 short_name_to_type_iri: ClassVar[Dict[str, str]] = { 

174 'an': iri_note, 

175 'ar': iri_role_in_time, 

176 'be': iri_bibliographic_reference, 

177 'br': iri_expression, 

178 'ci': iri_citation, 

179 'de': iri_discourse_element, 

180 'id': iri_identifier, 

181 'pl': iri_singleloc_pointer_list, 

182 'ra': iri_agent, 

183 're': iri_manifestation, 

184 'rp': iri_intextref_pointer 

185 } 

186 

187 def __init__(self, g: TripleLite, g_set: GraphSet, res_type: str, res: str | None = None, 

188 resp_agent: str | None = None, source: str | None = None, count: str | None = None, label: str | None = None, 

189 short_name: str = "", preexisting_graph: SubgraphView | None = None) -> None: 

190 super(GraphEntity, self).__init__() 

191 self.g: TripleLite = g 

192 self.resp_agent: str | None = resp_agent 

193 self.source: str | None = source 

194 self.short_name: str = short_name 

195 self.g_set: GraphSet = g_set 

196 self._preexisting_triples: frozenset | SubgraphView = frozenset() 

197 self._merge_list: tuple[GraphEntity, ...] = () 

198 # FLAGS 

199 self._to_be_deleted: bool = False 

200 self._was_merged: bool = False 

201 self._is_restored: bool = False 

202 

203 # If res was not specified, create from scratch the URI reference for this entity, 

204 # otherwise use the provided one 

205 if res is None: 

206 self.res = self._generate_new_res(g, count) 

207 else: 

208 self.res = res 

209 

210 if g_set is not None: 

211 # If not already done, register this GraphEntity instance inside the GraphSet 

212 if self.res not in g_set.res_to_entity: 

213 g_set.res_to_entity[self.res] = self 

214 

215 if preexisting_graph is not None: 

216 # Triples inside self.g are entirely replaced by triples from preexisting_graph. 

217 # This has maximum priority with respect to every other self.g initializations. 

218 # It's fundamental that the preexisting graph gets passed as an argument of the constructor: 

219 # allowing the user to set this value later through a method would mean that the user could 

220 # set the preexisting graph AFTER having modified self.g (which would not make sense). 

221 self.remove_every_triple() 

222 self.g.add_many((self.res, p, o) for p, o in preexisting_graph.predicate_objects(self.res)) 

223 self._preexisting_triples = preexisting_graph 

224 else: 

225 # Add mandatory information to the entity graph 

226 self._create_type(res_type) 

227 if label is not None: 

228 self.create_label(label) 

229 

230 @staticmethod 

231 def _generate_new_res(g: TripleLite, count: str | None) -> str: 

232 assert count is not None 

233 return str(g.identifier) + count 

234 

235 @property 

236 def to_be_deleted(self) -> bool: 

237 return self._to_be_deleted 

238 

239 @property 

240 def was_merged(self) -> bool: 

241 return self._was_merged 

242 

243 @property 

244 def merge_list(self) -> tuple[GraphEntity, ...]: 

245 return self._merge_list 

246 

247 @property 

248 def is_restored(self) -> bool: 

249 """Indicates if this entity was restored after being deleted.""" 

250 return self._is_restored 

251 

252 def mark_as_restored(self) -> None: 

253 """ 

254 Marks an entity as being restored after deletion. 

255  

256 This state signals to the provenance system that: 

257 - No new invalidation time should be generated for the previous snapshot 

258 - The original deletion snapshot's invalidation time should be preserved 

259 - The entity should be treated as restored rather than newly created 

260 """ 

261 self._to_be_deleted = False 

262 self._is_restored = True 

263 

264 def mark_as_to_be_deleted(self) -> None: 

265 # Here we must REMOVE triples pointing 

266 # to 'self' [THIS CANNOT BE UNDONE]: 

267 for res, entity in self.g_set.res_to_entity.items(): 

268 triples_list: List[tuple] = list(entity.g.triples((res, None, RDFTerm("uri", str(self.res))))) 

269 for triple in triples_list: 

270 entity.g.remove(triple) 

271 

272 self._to_be_deleted = True 

273 

274 def _get_specific_type(self) -> Optional[str]: 

275 base_type_str = self.short_name_to_type_iri[self.short_name] 

276 for _, _, type_uri in self.g.triples((self.res, RDF_TYPE, None)): 

277 if type_uri.type == "uri" and type_uri.value != base_type_str: 

278 return type_uri.value 

279 return None 

280 

281 def merge(self, other: Self, prefer_self: bool = False) -> None: 

282 """ 

283 **WARNING:** ``GraphEntity`` **is an abstract class that cannot be instantiated at runtime. 

284 As such, it's only possible to execute this method on entities generated from** 

285 ``GraphEntity``'s **subclasses. Please, refer to their documentation of the** `merge` **method.** 

286 

287 :param other: The entity which will be marked as to be deleted and whose properties will 

288 be merged into the current entity. 

289 :type other: GraphEntity 

290 :param prefer_self: If True, prefer values from the current entity for non-functional properties 

291 :type prefer_self: bool 

292 :raises TypeError: if the parameter is not of the same entity type 

293 :return: None 

294 """ 

295 if not isinstance(other, GraphEntity) or other.short_name != self.short_name: 

296 raise TypeError( 

297 f"[{self.__class__.__name__}.merge] Expected entity type: {self.short_name}. " 

298 f"Provided: {type(other).__name__}." 

299 ) 

300 

301 # Redirect triples pointing to 'other' to point to 'self' 

302 for res, entity in self.g_set.res_to_entity.items(): 

303 triples_list: List[tuple] = list(entity.g.triples((res, None, RDFTerm("uri", str(other.res))))) 

304 for triple in triples_list: 

305 entity.g.remove(triple) 

306 new_triple = (triple[0], triple[1], RDFTerm("uri", str(self.res))) 

307 entity.g.add(new_triple) 

308 

309 self_specific_type = self._get_specific_type() 

310 other_specific_type = other._get_specific_type() 

311 

312 final_specific_type = None 

313 if prefer_self and self_specific_type: 

314 final_specific_type = self_specific_type 

315 elif other_specific_type: 

316 final_specific_type = other_specific_type 

317 elif self_specific_type: 

318 final_specific_type = self_specific_type 

319 

320 self.g.remove((self.res, RDF_TYPE, None)) 

321 base_type = self.short_name_to_type_iri[self.short_name] 

322 self.g.add((self.res, RDF_TYPE, RDFTerm("uri", base_type))) 

323 if final_specific_type: 

324 self.g.add((self.res, RDF_TYPE, RDFTerm("uri", final_specific_type))) 

325 

326 label: Optional[str] = other.get_label() 

327 if label is not None: 

328 self.create_label(label) 

329 

330 self._was_merged = True 

331 self._merge_list = (*self._merge_list, other) 

332 

333 # 'other' must be deleted AFTER the redirection of 

334 # triples pointing to it, since mark_as_to_be_deleted 

335 # also removes every triple pointing to 'other' 

336 other.mark_as_to_be_deleted() 

337 

338 self._merge_properties(other, prefer_self) 

339 

340 def _merge_properties(self, other: GraphEntity, prefer_self: bool) -> None: 

341 pass 

342 

343 def commit_changes(self): 

344 if self._to_be_deleted: 

345 self._preexisting_triples = frozenset() 

346 self.remove_every_triple() 

347 else: 

348 self._preexisting_triples = frozenset(self.g.triples((self.res, None, None))) 

349 self._is_restored = False 

350 self._to_be_deleted = False 

351 self._was_merged = False 

352 self._merge_list = tuple()