Coverage for oc_ocdm / metadata / metadata_entity.py: 80%
106 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-28 18:52 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-28 18:52 +0000
1#!/usr/bin/python
3# SPDX-FileCopyrightText: 2020-2022 Simone Persiani <iosonopersia@gmail.com>
4#
5# SPDX-License-Identifier: ISC
7# -*- coding: utf-8 -*-
8from __future__ import annotations
10from typing import TYPE_CHECKING, List
12from oc_ocdm.abstract_entity import AbstractEntity
13from rdflib import URIRef, Namespace, Graph
15if TYPE_CHECKING:
16 from typing import ClassVar, Dict
17 from oc_ocdm.metadata.metadata_set import MetadataSet
20class MetadataEntity(AbstractEntity):
21 DCTERMS = Namespace("http://purl.org/dc/terms/")
22 DCAT = Namespace("http://www.w3.org/ns/dcat#")
23 VOID = Namespace("http://rdfs.org/ns/void#")
25 iri_dataset = DCAT.Dataset
26 iri_datafile = DCAT.Distribution
28 iri_title = DCTERMS["title"]
29 iri_description = DCTERMS.description
30 iri_issued = DCTERMS.issued
31 iri_modified = DCTERMS.modified
32 iri_keyword = DCAT.keyword
33 iri_subject = DCAT.theme
34 iri_landing_page = DCAT.landingPage
35 iri_subset = VOID.subset
36 iri_sparql_endpoint = VOID.sparqlEndpoint
37 iri_distribution = DCAT.distribution
38 iri_license = DCTERMS.license
39 iri_download_url = DCAT.downloadURL
40 iri_media_type = DCAT.mediaType
41 iri_byte_size = DCAT.byte_size
43 short_name_to_type_iri: ClassVar[Dict[str, URIRef]] = {
44 '_dataset_': iri_dataset,
45 'di': iri_datafile
46 }
48 def __init__(self, g: Graph, base_iri: str, dataset_name: str, m_set: MetadataSet,
49 res_type: URIRef, res: URIRef | None = None, resp_agent: str | None = None,
50 source: str | None = None, count: str | None = None, label: str | None = None, short_name: str = "",
51 preexisting_graph: Graph | None = None) -> None:
52 super(MetadataEntity, self).__init__()
53 self.g: Graph = g
54 self.base_iri: str = base_iri
55 self.dataset_name: str = dataset_name
56 self.resp_agent: str | None = resp_agent
57 self.source: str | None = source
58 self.short_name: str = short_name
59 self.m_set: MetadataSet = m_set
60 self.preexisting_graph: Graph = Graph(identifier=g.identifier)
61 self._merge_list: tuple[MetadataEntity, ...] = ()
62 # FLAGS
63 self._to_be_deleted: bool = False
64 self._was_merged: bool = False
66 # If res was not specified, create from scratch the URI reference for this entity,
67 # otherwise use the provided one
68 if res is None:
69 base_res: str = self.base_iri + self.dataset_name
70 if base_res[-1] != '/':
71 base_res += '/'
72 self.res = self._generate_new_res(count, base_res, short_name)
73 else:
74 self.res = res
76 if m_set is not None:
77 # If not already done, register this MetadataEntity instance inside the MetadataSet
78 if self.res not in m_set.res_to_entity:
79 m_set.res_to_entity[self.res] = self
81 if preexisting_graph is not None:
82 # Triples inside self.g are entirely replaced by triples from preexisting_graph.
83 # This has maximum priority with respect to every other self.g initializations.
84 # It's fundamental that the preexisting graph gets passed as an argument of the constructor:
85 # allowing the user to set this value later through a method would mean that the user could
86 # set the preexisting graph AFTER having modified self.g (which would not make sense).
87 self.remove_every_triple()
88 for p, o in preexisting_graph.predicate_objects(self.res):
89 self.g.add((self.res, p, o))
90 self.preexisting_graph.add((self.res, p, o))
91 else:
92 # Add mandatory information to the entity graph
93 self._create_type(res_type)
94 if label is not None:
95 self.create_label(label)
97 @staticmethod
98 def _generate_new_res(count: str | None, base_res: str, short_name: str) -> URIRef:
99 if short_name == '_dataset_':
100 return URIRef(base_res)
101 else:
102 assert count is not None
103 return URIRef(base_res + short_name + "/" + count)
105 @property
106 def to_be_deleted(self) -> bool:
107 return self._to_be_deleted
109 @property
110 def was_merged(self) -> bool:
111 return self._was_merged
113 @property
114 def merge_list(self) -> tuple[MetadataEntity, ...]:
115 return self._merge_list
117 def mark_as_to_be_deleted(self) -> None:
118 # Here we must REMOVE triples pointing
119 # to 'self' [THIS CANNOT BE UNDONE]:
120 for res, entity in self.m_set.res_to_entity.items():
121 triples_list: List[tuple] = list(entity.g.triples((res, None, self.res)))
122 for triple in triples_list:
123 entity.g.remove(triple)
125 self._to_be_deleted = True
127 def merge(self, other: MetadataEntity) -> None:
128 """
129 **WARNING:** ``MetadataEntity`` **is an abstract class that cannot be instantiated at runtime.
130 As such, it's only possible to execute this method on entities generated from**
131 ``MetadataEntity``'s **subclasses. Please, refer to their documentation of the** `merge` **method.**
133 :param other: The entity which will be marked as to be deleted and whose properties will
134 be merged into the current entity.
135 :type other: MetadataEntity
136 :raises TypeError: if the parameter is of the wrong type
137 :return: None
138 """
139 if not isinstance(other, MetadataEntity) or other.short_name != self.short_name:
140 raise TypeError(
141 f"[{self.__class__.__name__}.merge] Expected entity type: {self.short_name}. "
142 f"Provided: {type(other).__name__}."
143 )
145 # Here we must REDIRECT triples pointing
146 # to 'other' to make them point to 'self':
147 for res, entity in self.m_set.res_to_entity.items():
148 triples_list: List[tuple] = list(entity.g.triples((res, None, other.res)))
149 for triple in triples_list:
150 entity.g.remove(triple)
151 new_triple = (triple[0], triple[1], self.res)
152 entity.g.add(new_triple)
154 types: List[URIRef] = other.get_types()
155 for cur_type in types:
156 self._create_type(cur_type)
158 label: str | None = other.get_label()
159 if label is not None:
160 self.create_label(label)
162 self._was_merged = True
163 self._merge_list = (*self._merge_list, other)
165 # 'other' must be deleted AFTER the redirection of
166 # triples pointing to it, since mark_as_to_be_deleted
167 # also removes every triple pointing to 'other'
168 other.mark_as_to_be_deleted()
170 self._merge_properties(other)
172 def _merge_properties(self, other: MetadataEntity) -> None:
173 pass
175 def commit_changes(self):
176 self.preexisting_graph = Graph(identifier=self.g.identifier)
177 if self._to_be_deleted:
178 self.remove_every_triple()
179 else:
180 for triple in self.g.triples((self.res, None, None)):
181 self.preexisting_graph.add(triple)
182 self._to_be_deleted = False
183 self._was_merged = False
184 self._merge_list = ()