Coverage for oc_ocdm/metadata/metadata_entity.py: 62%

103 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-05-30 22:05 +0000

1#!/usr/bin/python 

2# -*- coding: utf-8 -*- 

3# Copyright (c) 2016, Silvio Peroni <essepuntato@gmail.com> 

4# 

5# Permission to use, copy, modify, and/or distribute this software for any purpose 

6# with or without fee is hereby granted, provided that the above copyright notice 

7# and this permission notice appear in all copies. 

8# 

9# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 

10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 

11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, 

12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 

13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 

15# SOFTWARE. 

16from __future__ import annotations 

17 

18from typing import TYPE_CHECKING, Tuple, List, Optional 

19 

20from oc_ocdm.abstract_entity import AbstractEntity 

21from rdflib import URIRef, Namespace, Graph 

22 

23if TYPE_CHECKING: 

24 from typing import ClassVar, Dict 

25 from oc_ocdm.metadata.metadata_set import MetadataSet 

26 

27 

28class MetadataEntity(AbstractEntity): 

29 DCTERMS = Namespace("http://purl.org/dc/terms/") 

30 DCAT = Namespace("http://www.w3.org/ns/dcat#") 

31 VOID = Namespace("http://rdfs.org/ns/void#") 

32 

33 iri_dataset = DCAT.Dataset 

34 iri_datafile = DCAT.Distribution 

35 

36 iri_title = DCTERMS["title"] 

37 iri_description = DCTERMS.description 

38 iri_issued = DCTERMS.issued 

39 iri_modified = DCTERMS.modified 

40 iri_keyword = DCAT.keyword 

41 iri_subject = DCAT.theme 

42 iri_landing_page = DCAT.landingPage 

43 iri_subset = VOID.subset 

44 iri_sparql_endpoint = VOID.sparqlEndpoint 

45 iri_distribution = DCAT.distribution 

46 iri_license = DCTERMS.license 

47 iri_download_url = DCAT.downloadURL 

48 iri_media_type = DCAT.mediaType 

49 iri_byte_size = DCAT.byte_size 

50 

51 short_name_to_type_iri: ClassVar[Dict[str, URIRef]] = { 

52 '_dataset_': iri_dataset, 

53 'di': iri_datafile 

54 } 

55 

56 def __init__(self, g: Graph, base_iri: str, dataset_name: str, m_set: MetadataSet, 

57 res: URIRef = None, res_type: URIRef = None, resp_agent: str = None, 

58 source: str = None, count: str = None, label: str = None, short_name: str = "", 

59 preexisting_graph: Graph = None) -> None: 

60 super(MetadataEntity, self).__init__() 

61 self.g: Graph = g 

62 self.base_iri: str = base_iri 

63 self.dataset_name: str = dataset_name 

64 self.resp_agent: str = resp_agent 

65 self.source: str = source 

66 self.short_name: str = short_name 

67 self.m_set: MetadataSet = m_set 

68 self.preexisting_graph: Graph = Graph(identifier=g.identifier) 

69 self._merge_list: Tuple[MetadataEntity] = tuple() 

70 # FLAGS 

71 self._to_be_deleted: bool = False 

72 self._was_merged: bool = False 

73 

74 # If res was not specified, create from scratch the URI reference for this entity, 

75 # otherwise use the provided one 

76 if res is None: 

77 base_res: str = self.base_iri + self.dataset_name 

78 if base_res[-1] != '/': 

79 base_res += '/' 

80 self.res = self._generate_new_res(count, base_res, short_name) 

81 else: 

82 self.res = res 

83 

84 if m_set is not None: 

85 # If not already done, register this MetadataEntity instance inside the MetadataSet 

86 if self.res not in m_set.res_to_entity: 

87 m_set.res_to_entity[self.res] = self 

88 

89 if preexisting_graph is not None: 

90 # Triples inside self.g are entirely replaced by triples from preexisting_graph. 

91 # This has maximum priority with respect to every other self.g initializations. 

92 # It's fundamental that the preexisting graph gets passed as an argument of the constructor: 

93 # allowing the user to set this value later through a method would mean that the user could 

94 # set the preexisting graph AFTER having modified self.g (which would not make sense). 

95 self.remove_every_triple() 

96 for p, o in preexisting_graph.predicate_objects(self.res): 

97 self.g.add((self.res, p, o)) 

98 self.preexisting_graph.add((self.res, p, o)) 

99 else: 

100 # Add mandatory information to the entity graph 

101 self._create_type(res_type) 

102 if label is not None: 

103 self.create_label(label) 

104 

105 @staticmethod 

106 def _generate_new_res(count: str, base_res: str, short_name: str) -> URIRef: 

107 if short_name == '_dataset_': 

108 return URIRef(base_res) 

109 else: 

110 return URIRef(base_res + short_name + "/" + count) 

111 

112 @property 

113 def to_be_deleted(self) -> bool: 

114 return self._to_be_deleted 

115 

116 @property 

117 def was_merged(self) -> bool: 

118 return self._was_merged 

119 

120 @property 

121 def merge_list(self) -> Tuple[MetadataEntity]: 

122 return self._merge_list 

123 

124 def mark_as_to_be_deleted(self) -> None: 

125 # Here we must REMOVE triples pointing 

126 # to 'self' [THIS CANNOT BE UNDONE]: 

127 for res, entity in self.m_set.res_to_entity.items(): 

128 triples_list: List[Tuple] = list(entity.g.triples((res, None, self.res))) 

129 for triple in triples_list: 

130 entity.g.remove(triple) 

131 

132 self._to_be_deleted = True 

133 

134 def merge(self, other: MetadataEntity) -> None: 

135 """ 

136 **WARNING:** ``MetadataEntity`` **is an abstract class that cannot be instantiated at runtime. 

137 As such, it's only possible to execute this method on entities generated from** 

138 ``MetadataEntity``'s **subclasses. Please, refer to their documentation of the** `merge` **method.** 

139 

140 :param other: The entity which will be marked as to be deleted and whose properties will 

141 be merged into the current entity. 

142 :type other: MetadataEntity 

143 :raises TypeError: if the parameter is of the wrong type 

144 :return: None 

145 """ 

146 

147 # Here we must REDIRECT triples pointing 

148 # to 'other' to make them point to 'self': 

149 for res, entity in self.m_set.res_to_entity.items(): 

150 triples_list: List[Tuple] = list(entity.g.triples((res, None, other.res))) 

151 for triple in triples_list: 

152 entity.g.remove(triple) 

153 new_triple = (triple[0], triple[1], self.res) 

154 entity.g.add(new_triple) 

155 

156 types: List[URIRef] = other.get_types() 

157 for cur_type in types: 

158 self._create_type(cur_type) 

159 

160 label: Optional[str] = other.get_label() 

161 if label is not None: 

162 self.create_label(label) 

163 

164 self._was_merged = True 

165 self._merge_list = (*self._merge_list, other) 

166 

167 # 'other' must be deleted AFTER the redirection of 

168 # triples pointing to it, since mark_as_to_be_deleted 

169 # also removes every triple pointing to 'other' 

170 other.mark_as_to_be_deleted() 

171 

172 def commit_changes(self): 

173 self.preexisting_graph = Graph(identifier=self.g.identifier) 

174 if self._to_be_deleted: 

175 self.remove_every_triple() 

176 else: 

177 for triple in self.g.triples((self.res, None, None)): 

178 self.preexisting_graph.add(triple) 

179 self._to_be_deleted = False 

180 self._was_merged = False 

181 self._merge_list = tuple()