Coverage for oc_ocdm/graph/graph_set.py: 89%

229 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-12-05 23:58 +0000

1#!/usr/bin/python 

2# -*- coding: utf-8 -*- 

3# Copyright (c) 2016, Silvio Peroni <essepuntato@gmail.com> 

4# 

5# Permission to use, copy, modify, and/or distribute this software for any purpose 

6# with or without fee is hereby granted, provided that the above copyright notice 

7# and this permission notice appear in all copies. 

8# 

9# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 

10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 

11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, 

12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 

13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 

15# SOFTWARE. 

16from __future__ import annotations 

17 

18from typing import TYPE_CHECKING 

19 

20from io import BytesIO 

21 

22from oc_ocdm.abstract_set import AbstractSet 

23from oc_ocdm.reader import Reader 

24from oc_ocdm.support.support import get_count, get_prefix, get_short_name 

25from sparqlite import SPARQLClient 

26 

27if TYPE_CHECKING: 

28 from typing import Dict, ClassVar, Tuple, Optional, List, Set 

29 

30from oc_ocdm.counter_handler.counter_handler import CounterHandler 

31from oc_ocdm.counter_handler.filesystem_counter_handler import \ 

32 FilesystemCounterHandler 

33from oc_ocdm.counter_handler.in_memory_counter_handler import \ 

34 InMemoryCounterHandler 

35from oc_ocdm.graph.entities.bibliographic.agent_role import AgentRole 

36from oc_ocdm.graph.entities.bibliographic.bibliographic_reference import \ 

37 BibliographicReference 

38from oc_ocdm.graph.entities.bibliographic.bibliographic_resource import \ 

39 BibliographicResource 

40from oc_ocdm.graph.entities.bibliographic.citation import Citation 

41from oc_ocdm.graph.entities.bibliographic.discourse_element import \ 

42 DiscourseElement 

43from oc_ocdm.graph.entities.bibliographic.pointer_list import PointerList 

44from oc_ocdm.graph.entities.bibliographic.reference_annotation import \ 

45 ReferenceAnnotation 

46from oc_ocdm.graph.entities.bibliographic.reference_pointer import \ 

47 ReferencePointer 

48from oc_ocdm.graph.entities.bibliographic.resource_embodiment import \ 

49 ResourceEmbodiment 

50from oc_ocdm.graph.entities.bibliographic.responsible_agent import \ 

51 ResponsibleAgent 

52from oc_ocdm.graph.entities.identifier import Identifier 

53from oc_ocdm.graph.graph_entity import GraphEntity 

54from rdflib import Graph, Namespace, URIRef 

55 

56 

57class GraphSet(AbstractSet): 

58 # Labels 

59 labels: ClassVar[Dict[str, str]] = { 

60 "an": "annotation", 

61 "ar": "agent role", 

62 "be": "bibliographic entry", 

63 "br": "bibliographic resource", 

64 "ci": "citation", 

65 "de": "discourse element", 

66 "id": "identifier", 

67 "pl": "single location pointer list", 

68 "ra": "responsible agent", 

69 "re": "resource embodiment", 

70 "rp": "in-text reference pointer" 

71 } 

72 

73 def __init__(self, base_iri: str, info_dir: str = "", supplier_prefix: str = "", 

74 wanted_label: bool = True, custom_counter_handler: CounterHandler = None) -> None: 

75 super(GraphSet, self).__init__() 

76 # The following variable maps a URIRef with the related graph entity 

77 self.res_to_entity: Dict[URIRef, GraphEntity] = {} 

78 self.base_iri: str = base_iri 

79 self.info_dir: str = info_dir 

80 self.supplier_prefix: str = supplier_prefix 

81 self.wanted_label: bool = wanted_label 

82 # Graphs 

83 # The following structure of URL is quite important for the other classes 

84 # developed and should not be changed. The only part that can change is the 

85 # value of the base_iri 

86 self.g_an: str = base_iri + "an/" 

87 self.g_ar: str = base_iri + "ar/" 

88 self.g_be: str = base_iri + "be/" 

89 self.g_br: str = base_iri + "br/" 

90 self.g_ci: str = base_iri + "ci/" 

91 self.g_de: str = base_iri + "de/" 

92 self.g_id: str = base_iri + "id/" 

93 self.g_pl: str = base_iri + "pl/" 

94 self.g_ra: str = base_iri + "ra/" 

95 self.g_re: str = base_iri + "re/" 

96 self.g_rp: str = base_iri + "rp/" 

97 

98 if custom_counter_handler: 

99 self.counter_handler = custom_counter_handler 

100 elif info_dir is not None and info_dir != "": 

101 self.counter_handler = FilesystemCounterHandler(info_dir, supplier_prefix) 

102 else: 

103 self.counter_handler = InMemoryCounterHandler() 

104 

105 def get_entity(self, res: URIRef) -> Optional[GraphEntity]: 

106 if res in self.res_to_entity: 

107 return self.res_to_entity[res] 

108 

109 # Add resources related to bibliographic entities 

110 def add_an(self, resp_agent: str, source: str = None, res: URIRef = None, 

111 preexisting_graph: Graph = None) -> ReferenceAnnotation: 

112 if res is not None and get_short_name(res) != "an": 

113 raise ValueError(f"Given res: <{res}> is inappropriate for a ReferenceAnnotation entity.") 

114 if res is not None and res in self.res_to_entity: 

115 return self.res_to_entity[res] 

116 cur_g, count, label = self._add(self.g_an, "an", res) 

117 return ReferenceAnnotation(cur_g, self, res, GraphEntity.iri_note, 

118 resp_agent, source, count, label, "an", 

119 preexisting_graph) 

120 

121 def add_ar(self, resp_agent: str, source: str = None, res: URIRef = None, 

122 preexisting_graph: Graph = None) -> AgentRole: 

123 if res is not None and get_short_name(res) != "ar": 

124 raise ValueError(f"Given res: <{res}> is inappropriate for an AgentRole entity.") 

125 if res is not None and res in self.res_to_entity: 

126 return self.res_to_entity[res] 

127 cur_g, count, label = self._add(self.g_ar, "ar", res) 

128 return AgentRole(cur_g, self, res, GraphEntity.iri_role_in_time, 

129 resp_agent, source, count, label, "ar", 

130 preexisting_graph) 

131 

132 def add_be(self, resp_agent: str, source: str = None, res: URIRef = None, 

133 preexisting_graph: Graph = None) -> BibliographicReference: 

134 if res is not None and get_short_name(res) != "be": 

135 raise ValueError(f"Given res: <{res}> is inappropriate for a BibliographicReference entity.") 

136 if res is not None and res in self.res_to_entity: 

137 return self.res_to_entity[res] 

138 cur_g, count, label = self._add(self.g_be, "be", res) 

139 return BibliographicReference(cur_g, self, res, GraphEntity.iri_bibliographic_reference, 

140 resp_agent, source, count, label, "be", 

141 preexisting_graph) 

142 

143 def add_br(self, resp_agent: str, source: str = None, res: URIRef = None, 

144 preexisting_graph: Graph = None) -> BibliographicResource: 

145 if res is not None and get_short_name(res) != "br": 

146 raise ValueError(f"Given res: <{res}> is inappropriate for a BibliographicResource entity.") 

147 if res is not None and res in self.res_to_entity: 

148 return self.res_to_entity[res] 

149 cur_g, count, label = self._add(self.g_br, "br", res) 

150 return BibliographicResource(cur_g, self, res, GraphEntity.iri_expression, 

151 resp_agent, source, count, label, "br", 

152 preexisting_graph) 

153 

154 def add_ci(self, resp_agent: str, source: str = None, res: URIRef = None, 

155 preexisting_graph: Graph = None) -> Citation: 

156 if res is not None and get_short_name(res) != "ci": 

157 raise ValueError(f"Given res: <{res}> is inappropriate for a Citation entity.") 

158 if res is not None and res in self.res_to_entity: 

159 return self.res_to_entity[res] 

160 cur_g, count, label = self._add(self.g_ci, "ci", res) 

161 return Citation(cur_g, self, res, GraphEntity.iri_citation, 

162 resp_agent, source, count, label, "ci", 

163 preexisting_graph) 

164 

165 def add_de(self, resp_agent: str, source: str = None, res: URIRef = None, 

166 preexisting_graph: Graph = None) -> DiscourseElement: 

167 if res is not None and get_short_name(res) != "de": 

168 raise ValueError(f"Given res: <{res}> is inappropriate for a DiscourseElement entity.") 

169 if res is not None and res in self.res_to_entity: 

170 return self.res_to_entity[res] 

171 cur_g, count, label = self._add(self.g_de, "de", res) 

172 return DiscourseElement(cur_g, self, res, GraphEntity.iri_discourse_element, 

173 resp_agent, source, count, label, "de", 

174 preexisting_graph) 

175 

176 def add_id(self, resp_agent: str, source: str = None, res: URIRef = None, 

177 preexisting_graph: Graph = None) -> Identifier: 

178 if res is not None and get_short_name(res) != "id": 

179 raise ValueError(f"Given res: <{res}> is inappropriate for an Identifier entity.") 

180 if res is not None and res in self.res_to_entity: 

181 return self.res_to_entity[res] 

182 cur_g, count, label = self._add(self.g_id, "id", res) 

183 return Identifier(cur_g, self, res, GraphEntity.iri_identifier, 

184 resp_agent, source, count, label, "id", 

185 preexisting_graph) 

186 

187 def add_pl(self, resp_agent: str, source: str = None, res: URIRef = None, 

188 preexisting_graph: Graph = None) -> PointerList: 

189 if res is not None and get_short_name(res) != "pl": 

190 raise ValueError(f"Given res: <{res}> is inappropriate for a PointerList entity.") 

191 if res is not None and res in self.res_to_entity: 

192 return self.res_to_entity[res] 

193 cur_g, count, label = self._add(self.g_pl, "pl", res) 

194 return PointerList(cur_g, self, res, GraphEntity.iri_singleloc_pointer_list, 

195 resp_agent, source, count, label, "pl", 

196 preexisting_graph) 

197 

198 def add_rp(self, resp_agent: str, source: str = None, res: URIRef = None, 

199 preexisting_graph: Graph = None) -> ReferencePointer: 

200 if res is not None and get_short_name(res) != "rp": 

201 raise ValueError(f"Given res: <{res}> is inappropriate for a ReferencePointer entity.") 

202 if res is not None and res in self.res_to_entity: 

203 return self.res_to_entity[res] 

204 cur_g, count, label = self._add(self.g_rp, "rp", res) 

205 return ReferencePointer(cur_g, self, res, GraphEntity.iri_intextref_pointer, 

206 resp_agent, source, count, label, "rp", 

207 preexisting_graph) 

208 

209 def add_ra(self, resp_agent: str, source: str = None, res: URIRef = None, 

210 preexisting_graph: Graph = None) -> ResponsibleAgent: 

211 if res is not None and get_short_name(res) != "ra": 

212 raise ValueError(f"Given res: <{res}> is inappropriate for a ResponsibleAgent entity.") 

213 if res is not None and res in self.res_to_entity: 

214 return self.res_to_entity[res] 

215 cur_g, count, label = self._add(self.g_ra, "ra", res) 

216 return ResponsibleAgent(cur_g, self, res, GraphEntity.iri_agent, 

217 resp_agent, source, count, label, "ra", 

218 preexisting_graph) 

219 

220 def add_re(self, resp_agent: str, source: str = None, res: URIRef = None, 

221 preexisting_graph: Graph = None) -> ResourceEmbodiment: 

222 if res is not None and get_short_name(res) != "re": 

223 raise ValueError(f"Given res: <{res}> is inappropriate for a ResourceEmbodiment entity.") 

224 if res is not None and res in self.res_to_entity: 

225 return self.res_to_entity[res] 

226 cur_g, count, label = self._add(self.g_re, "re", res) 

227 return ResourceEmbodiment(cur_g, self, res, GraphEntity.iri_manifestation, 

228 resp_agent, source, count, label, "re", 

229 preexisting_graph) 

230 

231 def _add(self, graph_url: str, short_name: str, res: URIRef = None) -> Tuple[Graph, Optional[str], Optional[str]]: 

232 cur_g: Graph = Graph(identifier=graph_url) 

233 self._set_ns(cur_g) 

234 

235 count: Optional[str] = None 

236 label: Optional[str] = None 

237 supplier_prefix = get_prefix(res) if res is not None else self.supplier_prefix 

238 if res is not None: 

239 try: 

240 res_count: int = int(get_count(res)) 

241 except ValueError: 

242 res_count: int = -1 

243 if res_count > self.counter_handler.read_counter(short_name, supplier_prefix=supplier_prefix): 

244 self.counter_handler.set_counter(res_count, short_name, supplier_prefix=supplier_prefix) 

245 return cur_g, count, label 

246 

247 count = supplier_prefix + str(self.counter_handler.increment_counter(short_name, supplier_prefix=supplier_prefix)) 

248 

249 if self.wanted_label: 

250 label = "%s %s [%s/%s]" % (self.labels[short_name], count, short_name, count) 

251 

252 return cur_g, count, label 

253 

254 def get_orphans(self) -> List[GraphEntity]: 

255 full_set_of_entities: Set[URIRef] = set(self.res_to_entity.keys()) 

256 referenced_entities: Set[URIRef] = set() 

257 for res, entity in self.res_to_entity.items(): 

258 for obj in entity.g.objects(subject=res, predicate=None): 

259 if type(obj) == URIRef: 

260 referenced_entities.add(obj) 

261 set_of_orphan_res: Set[URIRef] = full_set_of_entities - referenced_entities 

262 

263 result_list: List[GraphEntity] = [] 

264 for orphan_res in set_of_orphan_res: 

265 entity: Optional[GraphEntity] = self.get_entity(orphan_res) 

266 if entity is not None: 

267 result_list.append(entity) 

268 

269 return result_list 

270 

271 def remove_orphans_from_triplestore(self, ts_url: str, resp_agent: str) -> None: 

272 with SPARQLClient(ts_url) as client: 

273 for entity_res, entity in self.res_to_entity.items(): 

274 if entity.to_be_deleted: 

275 query: str = f"CONSTRUCT {{?s ?p ?o}} WHERE {{?s ?p ?o ; ?p_1 <{entity_res}>}}" 

276 nt_bytes = client.construct(query) 

277 if nt_bytes: 

278 result: Graph = Graph() 

279 result.parse(BytesIO(nt_bytes), format='nt') 

280 imported_entities: List[GraphEntity] = Reader.import_entities_from_graph(self, result, resp_agent) 

281 for imported_entity in imported_entities: 

282 imported_entity.g.remove((imported_entity.res, None, entity_res)) 

283 

284 def commit_changes(self): 

285 for res, entity in self.res_to_entity.items(): 

286 entity.commit_changes() 

287 if entity.to_be_deleted: 

288 del self.res_to_entity[res] 

289 

290 def _set_ns(self, g: Graph) -> None: 

291 g.namespace_manager.bind("an", Namespace(self.g_an)) 

292 g.namespace_manager.bind("ar", Namespace(self.g_ar)) 

293 g.namespace_manager.bind("be", Namespace(self.g_be)) 

294 g.namespace_manager.bind("ci", Namespace(self.g_ci)) 

295 g.namespace_manager.bind("de", Namespace(self.g_de)) 

296 g.namespace_manager.bind("br", Namespace(self.g_br)) 

297 g.namespace_manager.bind("id", Namespace(self.g_id)) 

298 g.namespace_manager.bind("pl", Namespace(self.g_pl)) 

299 g.namespace_manager.bind("ra", Namespace(self.g_ra)) 

300 g.namespace_manager.bind("re", Namespace(self.g_re)) 

301 g.namespace_manager.bind("rp", Namespace(self.g_rp)) 

302 g.namespace_manager.bind("biro", GraphEntity.BIRO) 

303 g.namespace_manager.bind("co", GraphEntity.CO) 

304 g.namespace_manager.bind("c4o", GraphEntity.C4O) 

305 g.namespace_manager.bind("cito", GraphEntity.CITO) 

306 g.namespace_manager.bind("datacite", GraphEntity.DATACITE) 

307 g.namespace_manager.bind("dcterms", GraphEntity.DCTERMS) 

308 g.namespace_manager.bind("deo", GraphEntity.DEO) 

309 g.namespace_manager.bind("doco", GraphEntity.DOCO) 

310 g.namespace_manager.bind("fabio", GraphEntity.FABIO) 

311 g.namespace_manager.bind("foaf", GraphEntity.FOAF) 

312 g.namespace_manager.bind("frbr", GraphEntity.FRBR) 

313 g.namespace_manager.bind("literal", GraphEntity.LITERAL) 

314 g.namespace_manager.bind("oa", GraphEntity.OA) 

315 g.namespace_manager.bind("oco", GraphEntity.OCO) 

316 g.namespace_manager.bind("prism", GraphEntity.PRISM) 

317 g.namespace_manager.bind("pro", GraphEntity.PRO) 

318 

319 def get_an(self) -> Tuple[ReferenceAnnotation]: 

320 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ReferenceAnnotation)) 

321 

322 def get_ar(self) -> Tuple[AgentRole]: 

323 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, AgentRole)) 

324 

325 def get_be(self) -> Tuple[BibliographicReference]: 

326 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, BibliographicReference)) 

327 

328 def get_br(self) -> Tuple[BibliographicResource]: 

329 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, BibliographicResource)) 

330 

331 def get_ci(self) -> Tuple[Citation]: 

332 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, Citation)) 

333 

334 def get_de(self) -> Tuple[DiscourseElement]: 

335 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, DiscourseElement)) 

336 

337 def get_id(self) -> Tuple[Identifier]: 

338 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, Identifier)) 

339 

340 def get_pl(self) -> Tuple[PointerList]: 

341 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, PointerList)) 

342 

343 def get_rp(self) -> Tuple[ReferencePointer]: 

344 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ReferencePointer)) 

345 

346 def get_ra(self) -> Tuple[ResponsibleAgent]: 

347 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ResponsibleAgent)) 

348 

349 def get_re(self) -> Tuple[ResourceEmbodiment]: 

350 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ResourceEmbodiment))