Coverage for oc_ocdm / metadata / metadata_entity.py: 80%

106 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-28 18:52 +0000

1#!/usr/bin/python 

2 

3# SPDX-FileCopyrightText: 2020-2022 Simone Persiani <iosonopersia@gmail.com> 

4# 

5# SPDX-License-Identifier: ISC 

6 

7# -*- coding: utf-8 -*- 

8from __future__ import annotations 

9 

10from typing import TYPE_CHECKING, List 

11 

12from oc_ocdm.abstract_entity import AbstractEntity 

13from rdflib import URIRef, Namespace, Graph 

14 

15if TYPE_CHECKING: 

16 from typing import ClassVar, Dict 

17 from oc_ocdm.metadata.metadata_set import MetadataSet 

18 

19 

20class MetadataEntity(AbstractEntity): 

21 DCTERMS = Namespace("http://purl.org/dc/terms/") 

22 DCAT = Namespace("http://www.w3.org/ns/dcat#") 

23 VOID = Namespace("http://rdfs.org/ns/void#") 

24 

25 iri_dataset = DCAT.Dataset 

26 iri_datafile = DCAT.Distribution 

27 

28 iri_title = DCTERMS["title"] 

29 iri_description = DCTERMS.description 

30 iri_issued = DCTERMS.issued 

31 iri_modified = DCTERMS.modified 

32 iri_keyword = DCAT.keyword 

33 iri_subject = DCAT.theme 

34 iri_landing_page = DCAT.landingPage 

35 iri_subset = VOID.subset 

36 iri_sparql_endpoint = VOID.sparqlEndpoint 

37 iri_distribution = DCAT.distribution 

38 iri_license = DCTERMS.license 

39 iri_download_url = DCAT.downloadURL 

40 iri_media_type = DCAT.mediaType 

41 iri_byte_size = DCAT.byte_size 

42 

43 short_name_to_type_iri: ClassVar[Dict[str, URIRef]] = { 

44 '_dataset_': iri_dataset, 

45 'di': iri_datafile 

46 } 

47 

48 def __init__(self, g: Graph, base_iri: str, dataset_name: str, m_set: MetadataSet, 

49 res_type: URIRef, res: URIRef | None = None, resp_agent: str | None = None, 

50 source: str | None = None, count: str | None = None, label: str | None = None, short_name: str = "", 

51 preexisting_graph: Graph | None = None) -> None: 

52 super(MetadataEntity, self).__init__() 

53 self.g: Graph = g 

54 self.base_iri: str = base_iri 

55 self.dataset_name: str = dataset_name 

56 self.resp_agent: str | None = resp_agent 

57 self.source: str | None = source 

58 self.short_name: str = short_name 

59 self.m_set: MetadataSet = m_set 

60 self.preexisting_graph: Graph = Graph(identifier=g.identifier) 

61 self._merge_list: tuple[MetadataEntity, ...] = () 

62 # FLAGS 

63 self._to_be_deleted: bool = False 

64 self._was_merged: bool = False 

65 

66 # If res was not specified, create from scratch the URI reference for this entity, 

67 # otherwise use the provided one 

68 if res is None: 

69 base_res: str = self.base_iri + self.dataset_name 

70 if base_res[-1] != '/': 

71 base_res += '/' 

72 self.res = self._generate_new_res(count, base_res, short_name) 

73 else: 

74 self.res = res 

75 

76 if m_set is not None: 

77 # If not already done, register this MetadataEntity instance inside the MetadataSet 

78 if self.res not in m_set.res_to_entity: 

79 m_set.res_to_entity[self.res] = self 

80 

81 if preexisting_graph is not None: 

82 # Triples inside self.g are entirely replaced by triples from preexisting_graph. 

83 # This has maximum priority with respect to every other self.g initializations. 

84 # It's fundamental that the preexisting graph gets passed as an argument of the constructor: 

85 # allowing the user to set this value later through a method would mean that the user could 

86 # set the preexisting graph AFTER having modified self.g (which would not make sense). 

87 self.remove_every_triple() 

88 for p, o in preexisting_graph.predicate_objects(self.res): 

89 self.g.add((self.res, p, o)) 

90 self.preexisting_graph.add((self.res, p, o)) 

91 else: 

92 # Add mandatory information to the entity graph 

93 self._create_type(res_type) 

94 if label is not None: 

95 self.create_label(label) 

96 

97 @staticmethod 

98 def _generate_new_res(count: str | None, base_res: str, short_name: str) -> URIRef: 

99 if short_name == '_dataset_': 

100 return URIRef(base_res) 

101 else: 

102 assert count is not None 

103 return URIRef(base_res + short_name + "/" + count) 

104 

105 @property 

106 def to_be_deleted(self) -> bool: 

107 return self._to_be_deleted 

108 

109 @property 

110 def was_merged(self) -> bool: 

111 return self._was_merged 

112 

113 @property 

114 def merge_list(self) -> tuple[MetadataEntity, ...]: 

115 return self._merge_list 

116 

117 def mark_as_to_be_deleted(self) -> None: 

118 # Here we must REMOVE triples pointing 

119 # to 'self' [THIS CANNOT BE UNDONE]: 

120 for res, entity in self.m_set.res_to_entity.items(): 

121 triples_list: List[tuple] = list(entity.g.triples((res, None, self.res))) 

122 for triple in triples_list: 

123 entity.g.remove(triple) 

124 

125 self._to_be_deleted = True 

126 

127 def merge(self, other: MetadataEntity) -> None: 

128 """ 

129 **WARNING:** ``MetadataEntity`` **is an abstract class that cannot be instantiated at runtime. 

130 As such, it's only possible to execute this method on entities generated from** 

131 ``MetadataEntity``'s **subclasses. Please, refer to their documentation of the** `merge` **method.** 

132 

133 :param other: The entity which will be marked as to be deleted and whose properties will 

134 be merged into the current entity. 

135 :type other: MetadataEntity 

136 :raises TypeError: if the parameter is of the wrong type 

137 :return: None 

138 """ 

139 if not isinstance(other, MetadataEntity) or other.short_name != self.short_name: 

140 raise TypeError( 

141 f"[{self.__class__.__name__}.merge] Expected entity type: {self.short_name}. " 

142 f"Provided: {type(other).__name__}." 

143 ) 

144 

145 # Here we must REDIRECT triples pointing 

146 # to 'other' to make them point to 'self': 

147 for res, entity in self.m_set.res_to_entity.items(): 

148 triples_list: List[tuple] = list(entity.g.triples((res, None, other.res))) 

149 for triple in triples_list: 

150 entity.g.remove(triple) 

151 new_triple = (triple[0], triple[1], self.res) 

152 entity.g.add(new_triple) 

153 

154 types: List[URIRef] = other.get_types() 

155 for cur_type in types: 

156 self._create_type(cur_type) 

157 

158 label: str | None = other.get_label() 

159 if label is not None: 

160 self.create_label(label) 

161 

162 self._was_merged = True 

163 self._merge_list = (*self._merge_list, other) 

164 

165 # 'other' must be deleted AFTER the redirection of 

166 # triples pointing to it, since mark_as_to_be_deleted 

167 # also removes every triple pointing to 'other' 

168 other.mark_as_to_be_deleted() 

169 

170 self._merge_properties(other) 

171 

172 def _merge_properties(self, other: MetadataEntity) -> None: 

173 pass 

174 

175 def commit_changes(self): 

176 self.preexisting_graph = Graph(identifier=self.g.identifier) 

177 if self._to_be_deleted: 

178 self.remove_every_triple() 

179 else: 

180 for triple in self.g.triples((self.res, None, None)): 

181 self.preexisting_graph.add(triple) 

182 self._to_be_deleted = False 

183 self._was_merged = False 

184 self._merge_list = ()