Coverage for oc_ocdm / metadata / metadata_entity.py: 82%

105 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-05-08 20:23 +0000

1#!/usr/bin/python 

2 

3# SPDX-FileCopyrightText: 2020-2022 Simone Persiani <iosonopersia@gmail.com> 

4# 

5# SPDX-License-Identifier: ISC 

6 

7# -*- coding: utf-8 -*- 

8from __future__ import annotations 

9 

10from typing import TYPE_CHECKING, List 

11 

12from triplelite import RDFTerm, SubgraphView, TripleLite, rdflib_to_rdfterm 

13 

14from oc_ocdm.abstract_entity import AbstractEntity 

15from oc_ocdm.constants import Namespace 

16 

17if TYPE_CHECKING: 

18 from typing import ClassVar, Dict 

19 

20 from oc_ocdm.metadata.metadata_set import MetadataSet 

21 

22 

23class MetadataEntity(AbstractEntity): 

24 DCTERMS = Namespace("http://purl.org/dc/terms/") 

25 DCAT = Namespace("http://www.w3.org/ns/dcat#") 

26 VOID = Namespace("http://rdfs.org/ns/void#") 

27 

28 iri_dataset = DCAT.Dataset 

29 iri_datafile = DCAT.Distribution 

30 

31 iri_title = DCTERMS.title 

32 iri_description = DCTERMS.description 

33 iri_issued = DCTERMS.issued 

34 iri_modified = DCTERMS.modified 

35 iri_keyword = DCAT.keyword 

36 iri_subject = DCAT.theme 

37 iri_landing_page = DCAT.landingPage 

38 iri_subset = VOID.subset 

39 iri_sparql_endpoint = VOID.sparqlEndpoint 

40 iri_distribution = DCAT.distribution 

41 iri_license = DCTERMS.license 

42 iri_download_url = DCAT.downloadURL 

43 iri_media_type = DCAT.mediaType 

44 iri_byte_size = DCAT.byteSize 

45 

46 short_name_to_type_iri: ClassVar[Dict[str, str]] = { 

47 '_dataset_': iri_dataset, 

48 'di': iri_datafile 

49 } 

50 

51 def __init__(self, g: TripleLite, base_iri: str, dataset_name: str, m_set: MetadataSet, 

52 res_type: str, res: str | None = None, resp_agent: str | None = None, 

53 source: str | None = None, count: str | None = None, label: str | None = None, short_name: str = "", 

54 preexisting_graph: SubgraphView | None = None) -> None: 

55 super(MetadataEntity, self).__init__() 

56 self.g: TripleLite = g 

57 self.base_iri: str = base_iri 

58 self.dataset_name: str = dataset_name 

59 self.resp_agent: str | None = resp_agent 

60 self.source: str | None = source 

61 self.short_name: str = short_name 

62 self.m_set: MetadataSet = m_set 

63 self._preexisting_triples: frozenset | SubgraphView = frozenset() 

64 self._merge_list: tuple[MetadataEntity, ...] = () 

65 # FLAGS 

66 self._to_be_deleted: bool = False 

67 self._was_merged: bool = False 

68 

69 # If res was not specified, create from scratch the URI reference for this entity, 

70 # otherwise use the provided one 

71 if res is None: 

72 base_res: str = self.base_iri + self.dataset_name 

73 if base_res[-1] != '/': 

74 base_res += '/' 

75 self.res = self._generate_new_res(count, base_res, short_name) 

76 else: 

77 self.res = res 

78 

79 if m_set is not None: 

80 # If not already done, register this MetadataEntity instance inside the MetadataSet 

81 if self.res not in m_set.res_to_entity: 

82 m_set.res_to_entity[self.res] = self 

83 

84 if preexisting_graph is not None: 

85 self.remove_every_triple() 

86 self.g.add_many((self.res, p, rdflib_to_rdfterm(o)) for p, o in preexisting_graph.predicate_objects(self.res)) 

87 self._preexisting_triples = preexisting_graph 

88 else: 

89 # Add mandatory information to the entity graph 

90 self._create_type(res_type) 

91 if label is not None: 

92 self.create_label(label) 

93 

94 @staticmethod 

95 def _generate_new_res(count: str | None, base_res: str, short_name: str) -> str: 

96 if short_name == '_dataset_': 

97 return base_res 

98 else: 

99 assert count is not None 

100 return base_res + short_name + "/" + count 

101 

102 @property 

103 def to_be_deleted(self) -> bool: 

104 return self._to_be_deleted 

105 

106 @property 

107 def was_merged(self) -> bool: 

108 return self._was_merged 

109 

110 @property 

111 def merge_list(self) -> tuple[MetadataEntity, ...]: 

112 return self._merge_list 

113 

114 def mark_as_to_be_deleted(self) -> None: 

115 # Here we must REMOVE triples pointing 

116 # to 'self' [THIS CANNOT BE UNDONE]: 

117 for res, entity in self.m_set.res_to_entity.items(): 

118 triples_list: List[tuple] = list(entity.g.triples((res, None, RDFTerm("uri", str(self.res))))) 

119 for triple in triples_list: 

120 entity.g.remove(triple) 

121 

122 self._to_be_deleted = True 

123 

124 def merge(self, other: MetadataEntity) -> None: 

125 """ 

126 **WARNING:** ``MetadataEntity`` **is an abstract class that cannot be instantiated at runtime. 

127 As such, it's only possible to execute this method on entities generated from** 

128 ``MetadataEntity``'s **subclasses. Please, refer to their documentation of the** `merge` **method.** 

129 

130 :param other: The entity which will be marked as to be deleted and whose properties will 

131 be merged into the current entity. 

132 :type other: MetadataEntity 

133 :raises TypeError: if the parameter is of the wrong type 

134 :return: None 

135 """ 

136 if not isinstance(other, MetadataEntity) or other.short_name != self.short_name: 

137 raise TypeError( 

138 f"[{self.__class__.__name__}.merge] Expected entity type: {self.short_name}. " 

139 f"Provided: {type(other).__name__}." 

140 ) 

141 

142 # Here we must REDIRECT triples pointing 

143 # to 'other' to make them point to 'self': 

144 for res, entity in self.m_set.res_to_entity.items(): 

145 triples_list: List[tuple] = list(entity.g.triples((res, None, RDFTerm("uri", str(other.res))))) 

146 for triple in triples_list: 

147 entity.g.remove(triple) 

148 new_triple = (triple[0], triple[1], RDFTerm("uri", str(self.res))) 

149 entity.g.add(new_triple) 

150 

151 types: List[str] = other.get_types() 

152 for cur_type in types: 

153 self._create_type(cur_type) 

154 

155 label: str | None = other.get_label() 

156 if label is not None: 

157 self.create_label(label) 

158 

159 self._was_merged = True 

160 self._merge_list = (*self._merge_list, other) 

161 

162 # 'other' must be deleted AFTER the redirection of 

163 # triples pointing to it, since mark_as_to_be_deleted 

164 # also removes every triple pointing to 'other' 

165 other.mark_as_to_be_deleted() 

166 

167 self._merge_properties(other) 

168 

169 def _merge_properties(self, other: MetadataEntity) -> None: 

170 pass 

171 

172 def commit_changes(self): 

173 if self._to_be_deleted: 

174 self._preexisting_triples = frozenset() 

175 self.remove_every_triple() 

176 else: 

177 self._preexisting_triples = frozenset(self.g.triples((self.res, None, None))) 

178 self._to_be_deleted = False 

179 self._was_merged = False 

180 self._merge_list = ()