Coverage for oc_ocdm/graph/graph_entity.py: 97%

250 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-05-30 22:05 +0000

1#!/usr/bin/python 

2# -*- coding: utf-8 -*- 

3# Copyright (c) 2016, Silvio Peroni <essepuntato@gmail.com> 

4# 

5# Permission to use, copy, modify, and/or distribute this software for any purpose 

6# with or without fee is hereby granted, provided that the above copyright notice 

7# and this permission notice appear in all copies. 

8# 

9# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 

10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 

11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, 

12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 

13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 

15# SOFTWARE. 

16from __future__ import annotations 

17 

18from typing import TYPE_CHECKING 

19 

20from oc_ocdm.abstract_entity import AbstractEntity 

21from rdflib import RDF, Graph, Namespace, URIRef 

22 

23if TYPE_CHECKING: 

24 from typing import ClassVar, Dict, List, Optional, Tuple 

25 

26 from oc_ocdm.graph.graph_set import GraphSet 

27 

28 

29class GraphEntity(AbstractEntity): 

30 BIRO: ClassVar[Namespace] = Namespace("http://purl.org/spar/biro/") 

31 C4O: ClassVar[Namespace] = Namespace("http://purl.org/spar/c4o/") 

32 CO: ClassVar[Namespace] = Namespace("http://purl.org/co/") 

33 CITO: ClassVar[Namespace] = Namespace("http://purl.org/spar/cito/") 

34 DATACITE: ClassVar[Namespace] = Namespace("http://purl.org/spar/datacite/") 

35 DCTERMS: ClassVar[Namespace] = Namespace("http://purl.org/dc/terms/") 

36 DEO: ClassVar[Namespace] = Namespace("http://purl.org/spar/deo/") 

37 DOCO: ClassVar[Namespace] = Namespace("http://purl.org/spar/doco/") 

38 FABIO: ClassVar[Namespace] = Namespace("http://purl.org/spar/fabio/") 

39 FOAF: ClassVar[Namespace] = Namespace("http://xmlns.com/foaf/0.1/") 

40 FR: ClassVar[Namespace] = Namespace("http://purl.org/spar/fr/") 

41 FRBR: ClassVar[Namespace] = Namespace("http://purl.org/vocab/frbr/core#") 

42 LITERAL: ClassVar[Namespace] = Namespace("http://www.essepuntato.it/2010/06/literalreification/") 

43 OA: ClassVar[Namespace] = Namespace("http://www.w3.org/ns/oa#") 

44 OCO: ClassVar[Namespace] = Namespace("https://w3id.org/oc/ontology/") 

45 PRISM: ClassVar[Namespace] = Namespace("http://prismstandard.org/namespaces/basic/2.0/") 

46 PRO: ClassVar[Namespace] = Namespace("http://purl.org/spar/pro/") 

47 

48 iri_has_subtitle: ClassVar[URIRef] = FABIO.hasSubtitle 

49 iri_has_publication_date: ClassVar[URIRef] = PRISM.publicationDate 

50 iri_bibliographic_reference: ClassVar[URIRef] = BIRO.BibliographicReference 

51 iri_references: ClassVar[URIRef] = BIRO.references 

52 iri_denotes: ClassVar[URIRef] = C4O.denotes 

53 iri_has_content: ClassVar[URIRef] = C4O.hasContent 

54 iri_intextref_pointer: ClassVar[URIRef] = C4O.InTextReferencePointer 

55 iri_is_context_of: ClassVar[URIRef] = C4O.isContextOf 

56 iri_singleloc_pointer_list: ClassVar[URIRef] = C4O.SingleLocationPointerList 

57 iri_has_element: ClassVar[URIRef] = CO.element 

58 iri_citation: ClassVar[URIRef] = CITO.Citation 

59 iri_cites: ClassVar[URIRef] = CITO.cites 

60 iri_citation_characterisation: ClassVar[URIRef] = CITO.hasCitationCharacterisation 

61 iri_has_citing_entity: ClassVar[URIRef] = CITO.hasCitingEntity 

62 iri_has_cited_entity: ClassVar[URIRef] = CITO.hasCitedEntity 

63 iri_openalex: ClassVar[URIRef] = DATACITE.openalex 

64 iri_arxiv: ClassVar[URIRef] = DATACITE.arxiv 

65 iri_oci: ClassVar[URIRef] = DATACITE.oci 

66 iri_doi: ClassVar[URIRef] = DATACITE.doi 

67 iri_pmid: ClassVar[URIRef] = DATACITE.pmid 

68 iri_pmcid: ClassVar[URIRef] = DATACITE.pmcid 

69 iri_orcid: ClassVar[URIRef] = DATACITE.orcid 

70 iri_xpath: ClassVar[URIRef] = DATACITE["local-resource-identifier-scheme"] 

71 iri_intrepid: ClassVar[URIRef] = DATACITE.intrepid 

72 iri_xmlid: ClassVar[URIRef] = DATACITE["local-resource-identifier-scheme"] 

73 iri_has_identifier: ClassVar[URIRef] = DATACITE.hasIdentifier 

74 iri_identifier: ClassVar[URIRef] = DATACITE.Identifier 

75 iri_isbn: ClassVar[URIRef] = DATACITE.isbn 

76 iri_issn: ClassVar[URIRef] = DATACITE.issn 

77 iri_url: ClassVar[URIRef] = DATACITE.url 

78 iri_uses_identifier_scheme: ClassVar[URIRef] = DATACITE.usesIdentifierScheme 

79 iri_title: ClassVar[URIRef] = DCTERMS["title"] 

80 iri_caption: ClassVar[URIRef] = DEO.Caption 

81 iri_discourse_element: ClassVar[URIRef] = DEO.DiscourseElement 

82 iri_footnote: ClassVar[URIRef] = DOCO.Footnote 

83 iri_paragraph: ClassVar[URIRef] = DOCO.Paragraph 

84 iri_part: ClassVar[URIRef] = DOCO.Part 

85 iri_section: ClassVar[URIRef] = DOCO.Section 

86 iri_introduction: ClassVar[URIRef] = DEO.Introduction 

87 iri_methods: ClassVar[URIRef] = DEO.Methods 

88 iri_materials: ClassVar[URIRef] = DEO.Materials 

89 iri_related_work: ClassVar[URIRef] = DEO.RelatedWork 

90 iri_results: ClassVar[URIRef] = DEO.Results 

91 iri_discussion: ClassVar[URIRef] = DEO.Discussion 

92 iri_conclusion: ClassVar[URIRef] = DEO.Conclusion 

93 iri_section_title: ClassVar[URIRef] = DOCO.SectionTitle 

94 iri_sentence: ClassVar[URIRef] = DOCO.Sentence 

95 iri_table: ClassVar[URIRef] = DOCO.Table 

96 iri_text_chunk: ClassVar[URIRef] = DOCO.TextChunk 

97 iri_abstract: ClassVar[URIRef] = DOCO.Abstract 

98 iri_academic_proceedings: ClassVar[URIRef] = FABIO.AcademicProceedings 

99 iri_audio_document: ClassVar[URIRef] = FABIO.AudioDocument 

100 iri_book: ClassVar[URIRef] = FABIO.Book 

101 iri_book_chapter: ClassVar[URIRef] = FABIO.BookChapter 

102 iri_book_series: ClassVar[URIRef] = FABIO.BookSeries 

103 iri_book_set: ClassVar[URIRef] = FABIO.BookSet 

104 iri_computer_program: ClassVar[URIRef] = FABIO.ComputerProgram 

105 iri_data_file: ClassVar[URIRef] = FABIO.DataFile 

106 iri_data_management_plan: ClassVar[URIRef] = FABIO.DataManagementPlan 

107 iri_editorial: ClassVar[URIRef] = FABIO.Editorial 

108 iri_expression: ClassVar[URIRef] = FABIO.Expression 

109 iri_expression_collection: ClassVar[URIRef] = FABIO.ExpressionCollection 

110 iri_has_sequence_identifier: ClassVar[URIRef] = FABIO.hasSequenceIdentifier 

111 iri_journal: ClassVar[URIRef] = FABIO.Journal 

112 iri_journal_article: ClassVar[URIRef] = FABIO.JournalArticle 

113 iri_journal_editorial: ClassVar[URIRef] = FABIO.JournalEditorial 

114 iri_journal_issue: ClassVar[URIRef] = FABIO.JournalIssue 

115 iri_journal_volume: ClassVar[URIRef] = FABIO.JournalVolume 

116 iri_manifestation: ClassVar[URIRef] = FABIO.Manifestation 

117 iri_newspaper: ClassVar[URIRef] = FABIO.Newspaper 

118 iri_newspaper_article: ClassVar[URIRef] = FABIO.NewspaperArticle 

119 iri_newspaper_editorial: ClassVar[URIRef] = FABIO.NewspaperEditorial 

120 iri_newspaper_issue: ClassVar[URIRef] = FABIO.NewspaperIssue 

121 iri_peer_review: ClassVar[URIRef] = FR.ReviewVersion 

122 iri_preprint: ClassVar[URIRef] = FABIO.Preprint 

123 iri_presentation: ClassVar[URIRef] = FABIO.Presentation 

124 iri_proceedings_paper: ClassVar[URIRef] = FABIO.ProceedingsPaper 

125 iri_proceedings_series: ClassVar[URIRef] = FABIO.Series 

126 iri_reference_book: ClassVar[URIRef] = FABIO.ReferenceBook 

127 iri_reference_entry: ClassVar[URIRef] = FABIO.ReferenceEntry 

128 iri_report_document: ClassVar[URIRef] = FABIO.ReportDocument 

129 iri_retraction_notice: ClassVar[URIRef] = FABIO.RetractionNotice 

130 iri_series: ClassVar[URIRef] = FABIO.Series 

131 iri_specification_document: ClassVar[URIRef] = FABIO.SpecificationDocument 

132 iri_thesis: ClassVar[URIRef] = FABIO.Thesis 

133 iri_web_content: ClassVar[URIRef] = FABIO.WebContent 

134 iri_agent: ClassVar[URIRef] = FOAF.Agent 

135 iri_family_name: ClassVar[URIRef] = FOAF.familyName 

136 iri_given_name: ClassVar[URIRef] = FOAF.givenName 

137 iri_name: ClassVar[URIRef] = FOAF.name 

138 iri_embodiment: ClassVar[URIRef] = FRBR.embodiment 

139 iri_part_of: ClassVar[URIRef] = FRBR.partOf 

140 iri_contains_reference: ClassVar[URIRef] = FRBR.part 

141 iri_contains_de: ClassVar[URIRef] = FRBR.part 

142 iri_has_literal_value: ClassVar[URIRef] = LITERAL.hasLiteralValue 

143 iri_ending_page: ClassVar[URIRef] = PRISM.endingPage 

144 iri_starting_page: ClassVar[URIRef] = PRISM.startingPage 

145 iri_author: ClassVar[URIRef] = PRO.author 

146 iri_editor: ClassVar[URIRef] = PRO.editor 

147 iri_is_held_by: ClassVar[URIRef] = PRO.isHeldBy 

148 iri_publisher: ClassVar[URIRef] = PRO.publisher 

149 iri_is_document_context_for: ClassVar[URIRef] = PRO.isDocumentContextFor 

150 iri_role_in_time: ClassVar[URIRef] = PRO.RoleInTime 

151 iri_with_role: ClassVar[URIRef] = PRO.withRole 

152 iri_note: ClassVar[URIRef] = OA.Annotation 

153 iri_has_body: ClassVar[URIRef] = OA.hasBody 

154 iri_has_annotation: ClassVar[URIRef] = OCO.hasAnnotation # inverse of OA.hasTarget 

155 iri_has_next: ClassVar[URIRef] = OCO.hasNext 

156 iri_archival_document: ClassVar[URIRef] = FABIO.ArchivalDocument 

157 iri_viaf: ClassVar[URIRef] = DATACITE.viaf 

158 iri_crossref: ClassVar[URIRef] = DATACITE.crossref # TODO: add to datacite! 

159 iri_datacite: ClassVar[URIRef] = DATACITE.datacite # TODO: add to datacite! 

160 iri_jid: ClassVar[URIRef] = DATACITE.jid # TODO: add to datacite! 

161 iri_wikidata: ClassVar[URIRef] = DATACITE.wikidata # TODO: add to datacite! 

162 iri_wikipedia: ClassVar[URIRef] = DATACITE.wikipedia # TODO: add to datacite! 

163 iri_has_edition: ClassVar[URIRef] = PRISM.edition 

164 iri_relation: ClassVar[URIRef] = DCTERMS.relation 

165 iri_has_citation_creation_date: ClassVar[URIRef] = CITO.hasCitationCreationDate 

166 iri_has_citation_time_span: ClassVar[URIRef] = CITO.hasCitationTimeSpan 

167 iri_digital_manifestation: ClassVar[URIRef] = FABIO.DigitalManifestation 

168 iri_print_object: ClassVar[URIRef] = FABIO.PrintObject 

169 iri_has_url: ClassVar[URIRef] = FRBR.exemplar 

170 iri_self_citation: ClassVar[URIRef] = CITO.SelfCitation 

171 iri_affiliation_self_citation: ClassVar[URIRef] = CITO.AffiliationSelfCitation 

172 iri_author_network_self_citation: ClassVar[URIRef] = CITO.AuthorNetworkSelfCitation 

173 iri_author_self_citation: ClassVar[URIRef] = CITO.AuthorSelfCitation 

174 iri_funder_self_citation: ClassVar[URIRef] = CITO.FunderSelfCitation 

175 iri_journal_self_citation: ClassVar[URIRef] = CITO.JournalSelfCitation 

176 iri_journal_cartel_citation: ClassVar[URIRef] = CITO.JournalCartelCitation 

177 iri_distant_citation: ClassVar[URIRef] = CITO.DistantCitation 

178 iri_has_format: ClassVar[URIRef] = DCTERMS["format"] 

179 

180 short_name_to_type_iri: ClassVar[Dict[str, URIRef]] = { 

181 'an': iri_note, 

182 'ar': iri_role_in_time, 

183 'be': iri_bibliographic_reference, 

184 'br': iri_expression, 

185 'ci': iri_citation, 

186 'de': iri_discourse_element, 

187 'id': iri_identifier, 

188 'pl': iri_singleloc_pointer_list, 

189 'ra': iri_agent, 

190 're': iri_manifestation, 

191 'rp': iri_intextref_pointer 

192 } 

193 

194 def __init__(self, g: Graph, g_set: GraphSet, res: URIRef = None, res_type: URIRef = None, 

195 resp_agent: str = None, source: str = None, count: str = None, label: str = None, 

196 short_name: str = "", preexisting_graph: Graph = None) -> None: 

197 super(GraphEntity, self).__init__() 

198 self.g: Graph = g 

199 self.resp_agent: str = resp_agent 

200 self.source: str = source 

201 self.short_name: str = short_name 

202 self.g_set: GraphSet = g_set 

203 self.preexisting_graph: Graph = Graph(identifier=g.identifier) 

204 self._merge_list: Tuple[GraphEntity] = tuple() 

205 # FLAGS 

206 self._to_be_deleted: bool = False 

207 self._was_merged: bool = False 

208 self._is_restored: bool = False 

209 

210 # If res was not specified, create from scratch the URI reference for this entity, 

211 # otherwise use the provided one 

212 if res is None: 

213 self.res = self._generate_new_res(g, count) 

214 else: 

215 self.res = res 

216 

217 if g_set is not None: 

218 # If not already done, register this GraphEntity instance inside the GraphSet 

219 if self.res not in g_set.res_to_entity: 

220 g_set.res_to_entity[self.res] = self 

221 

222 if preexisting_graph is not None: 

223 # Triples inside self.g are entirely replaced by triples from preexisting_graph. 

224 # This has maximum priority with respect to every other self.g initializations. 

225 # It's fundamental that the preexisting graph gets passed as an argument of the constructor: 

226 # allowing the user to set this value later through a method would mean that the user could 

227 # set the preexisting graph AFTER having modified self.g (which would not make sense). 

228 self.remove_every_triple() 

229 for p, o in preexisting_graph.predicate_objects(self.res): 

230 self.g.add((self.res, p, o)) 

231 self.preexisting_graph.add((self.res, p, o)) 

232 else: 

233 # Add mandatory information to the entity graph 

234 self._create_type(res_type) 

235 if label is not None: 

236 self.create_label(label) 

237 

238 @staticmethod 

239 def _generate_new_res(g: Graph, count: str) -> URIRef: 

240 return URIRef(str(g.identifier) + count) 

241 

242 @property 

243 def to_be_deleted(self) -> bool: 

244 return self._to_be_deleted 

245 

246 @property 

247 def was_merged(self) -> bool: 

248 return self._was_merged 

249 

250 @property 

251 def merge_list(self) -> Tuple[GraphEntity]: 

252 return self._merge_list 

253 

254 @property 

255 def is_restored(self) -> bool: 

256 """Indicates if this entity was restored after being deleted.""" 

257 return self._is_restored 

258 

259 def mark_as_restored(self) -> None: 

260 """ 

261 Marks an entity as being restored after deletion. 

262  

263 This state signals to the provenance system that: 

264 - No new invalidation time should be generated for the previous snapshot 

265 - The original deletion snapshot's invalidation time should be preserved 

266 - The entity should be treated as restored rather than newly created 

267 """ 

268 self._to_be_deleted = False 

269 self._is_restored = True 

270 

271 def mark_as_to_be_deleted(self) -> None: 

272 # Here we must REMOVE triples pointing 

273 # to 'self' [THIS CANNOT BE UNDONE]: 

274 for res, entity in self.g_set.res_to_entity.items(): 

275 triples_list: List[Tuple] = list(entity.g.triples((res, None, self.res))) 

276 for triple in triples_list: 

277 entity.g.remove(triple) 

278 

279 self._to_be_deleted = True 

280 

281 def _get_specific_type(self) -> Optional[URIRef]: 

282 """ 

283 Get the specific type of the entity (e.g., JournalArticle), if any. 

284 Excludes the base Expression type. 

285  

286 Returns: 

287 The specific type URI if present, None otherwise 

288 """ 

289 base_type = self.short_name_to_type_iri[self.short_name] 

290 for _, _, type_uri in self.g.triples((self.res, RDF.type, None)): 

291 if type_uri != base_type: 

292 return type_uri 

293 return None 

294 

295 def merge(self, other: GraphEntity, prefer_self: bool = False) -> None: 

296 """ 

297 **WARNING:** ``GraphEntity`` **is an abstract class that cannot be instantiated at runtime. 

298 As such, it's only possible to execute this method on entities generated from** 

299 ``GraphEntity``'s **subclasses. Please, refer to their documentation of the** `merge` **method.** 

300 

301 :param other: The entity which will be marked as to be deleted and whose properties will 

302 be merged into the current entity. 

303 :type other: GraphEntity 

304 :raises TypeError: if the parameter is of the wrong type 

305 :return: None 

306 """ 

307 

308 # Here we must REDIRECT triples pointing 

309 # to 'other' to make them point to 'self': 

310 for res, entity in self.g_set.res_to_entity.items(): 

311 triples_list: List[Tuple] = list(entity.g.triples((res, None, other.res))) 

312 for triple in triples_list: 

313 entity.g.remove(triple) 

314 new_triple = (triple[0], triple[1], self.res) 

315 entity.g.add(new_triple) 

316 

317 self_specific_type = self._get_specific_type() 

318 other_specific_type = other._get_specific_type() 

319 

320 final_specific_type = None 

321 if prefer_self and self_specific_type: 

322 final_specific_type = self_specific_type 

323 elif other_specific_type: 

324 final_specific_type = other_specific_type 

325 elif self_specific_type: 

326 final_specific_type = self_specific_type 

327 

328 self.g.remove((self.res, RDF.type, None)) 

329 base_type = self.short_name_to_type_iri[self.short_name] 

330 self.g.add((self.res, RDF.type, base_type)) 

331 if final_specific_type: 

332 self.g.add((self.res, RDF.type, final_specific_type)) 

333 

334 label: Optional[str] = other.get_label() 

335 if label is not None: 

336 self.create_label(label) 

337 

338 self._was_merged = True 

339 self._merge_list = (*self._merge_list, other) 

340 

341 # 'other' must be deleted AFTER the redirection of 

342 # triples pointing to it, since mark_as_to_be_deleted 

343 # also removes every triple pointing to 'other' 

344 other.mark_as_to_be_deleted() 

345 

346 def commit_changes(self): 

347 self.preexisting_graph = Graph(identifier=self.g.identifier) 

348 if self._to_be_deleted: 

349 self.remove_every_triple() 

350 else: 

351 for triple in self.g.triples((self.res, None, None)): 

352 self.preexisting_graph.add(triple) 

353 self._is_restored = False 

354 self._to_be_deleted = False 

355 self._was_merged = False 

356 self._merge_list = tuple()