Coverage for oc_ocdm/graph/graph_set.py: 69%

285 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-05-30 22:05 +0000

1#!/usr/bin/python 

2# -*- coding: utf-8 -*- 

3# Copyright (c) 2016, Silvio Peroni <essepuntato@gmail.com> 

4# 

5# Permission to use, copy, modify, and/or distribute this software for any purpose 

6# with or without fee is hereby granted, provided that the above copyright notice 

7# and this permission notice appear in all copies. 

8# 

9# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 

10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 

11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, 

12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 

13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 

15# SOFTWARE. 

16from __future__ import annotations 

17 

18from typing import TYPE_CHECKING 

19 

20from oc_ocdm.abstract_set import AbstractSet 

21from oc_ocdm.reader import Reader 

22from oc_ocdm.support.support import get_count, get_prefix, get_short_name 

23from SPARQLWrapper import RDFXML, SPARQLWrapper 

24 

25if TYPE_CHECKING: 

26 from typing import Dict, ClassVar, Tuple, Optional, List, Set 

27 from rdflib import ConjunctiveGraph 

28 

29from oc_ocdm.counter_handler.counter_handler import CounterHandler 

30from oc_ocdm.counter_handler.filesystem_counter_handler import \ 

31 FilesystemCounterHandler 

32from oc_ocdm.counter_handler.in_memory_counter_handler import \ 

33 InMemoryCounterHandler 

34from oc_ocdm.graph.entities.bibliographic.agent_role import AgentRole 

35from oc_ocdm.graph.entities.bibliographic.bibliographic_reference import \ 

36 BibliographicReference 

37from oc_ocdm.graph.entities.bibliographic.bibliographic_resource import \ 

38 BibliographicResource 

39from oc_ocdm.graph.entities.bibliographic.citation import Citation 

40from oc_ocdm.graph.entities.bibliographic.discourse_element import \ 

41 DiscourseElement 

42from oc_ocdm.graph.entities.bibliographic.pointer_list import PointerList 

43from oc_ocdm.graph.entities.bibliographic.reference_annotation import \ 

44 ReferenceAnnotation 

45from oc_ocdm.graph.entities.bibliographic.reference_pointer import \ 

46 ReferencePointer 

47from oc_ocdm.graph.entities.bibliographic.resource_embodiment import \ 

48 ResourceEmbodiment 

49from oc_ocdm.graph.entities.bibliographic.responsible_agent import \ 

50 ResponsibleAgent 

51from oc_ocdm.graph.entities.identifier import Identifier 

52from oc_ocdm.graph.graph_entity import GraphEntity 

53from rdflib import Graph, Namespace, URIRef 

54 

55 

56class GraphSet(AbstractSet): 

57 # Labels 

58 labels: ClassVar[Dict[str, str]] = { 

59 "an": "annotation", 

60 "ar": "agent role", 

61 "be": "bibliographic entry", 

62 "br": "bibliographic resource", 

63 "ci": "citation", 

64 "de": "discourse element", 

65 "id": "identifier", 

66 "pl": "single location pointer list", 

67 "ra": "responsible agent", 

68 "re": "resource embodiment", 

69 "rp": "in-text reference pointer" 

70 } 

71 

72 def __init__(self, base_iri: str, info_dir: str = "", supplier_prefix: str = "", 

73 wanted_label: bool = True, custom_counter_handler: CounterHandler = None) -> None: 

74 super(GraphSet, self).__init__() 

75 # The following variable maps a URIRef with the related graph entity 

76 self.res_to_entity: Dict[URIRef, GraphEntity] = {} 

77 self.base_iri: str = base_iri 

78 self.info_dir: str = info_dir 

79 self.supplier_prefix: str = supplier_prefix 

80 self.wanted_label: bool = wanted_label 

81 # Graphs 

82 # The following structure of URL is quite important for the other classes 

83 # developed and should not be changed. The only part that can change is the 

84 # value of the base_iri 

85 self.g_an: str = base_iri + "an/" 

86 self.g_ar: str = base_iri + "ar/" 

87 self.g_be: str = base_iri + "be/" 

88 self.g_br: str = base_iri + "br/" 

89 self.g_ci: str = base_iri + "ci/" 

90 self.g_de: str = base_iri + "de/" 

91 self.g_id: str = base_iri + "id/" 

92 self.g_pl: str = base_iri + "pl/" 

93 self.g_ra: str = base_iri + "ra/" 

94 self.g_re: str = base_iri + "re/" 

95 self.g_rp: str = base_iri + "rp/" 

96 

97 if custom_counter_handler: 

98 self.counter_handler = custom_counter_handler 

99 elif info_dir is not None and info_dir != "": 

100 self.counter_handler = FilesystemCounterHandler(info_dir, supplier_prefix) 

101 else: 

102 self.counter_handler = InMemoryCounterHandler() 

103 

104 def get_entity(self, res: URIRef) -> Optional[GraphEntity]: 

105 if res in self.res_to_entity: 

106 return self.res_to_entity[res] 

107 

108 # Add resources related to bibliographic entities 

109 def add_an(self, resp_agent: str, source: str = None, res: URIRef = None, 

110 preexisting_graph: Graph = None) -> ReferenceAnnotation: 

111 if res is not None and get_short_name(res) != "an": 

112 raise ValueError(f"Given res: <{res}> is inappropriate for a ReferenceAnnotation entity.") 

113 if res is not None and res in self.res_to_entity: 

114 return self.res_to_entity[res] 

115 cur_g, count, label = self._add(self.g_an, "an", res) 

116 return ReferenceAnnotation(cur_g, self, res, GraphEntity.iri_note, 

117 resp_agent, source, count, label, "an", 

118 preexisting_graph) 

119 

120 def add_ar(self, resp_agent: str, source: str = None, res: URIRef = None, 

121 preexisting_graph: Graph = None) -> AgentRole: 

122 if res is not None and get_short_name(res) != "ar": 

123 raise ValueError(f"Given res: <{res}> is inappropriate for an AgentRole entity.") 

124 if res is not None and res in self.res_to_entity: 

125 return self.res_to_entity[res] 

126 cur_g, count, label = self._add(self.g_ar, "ar", res) 

127 return AgentRole(cur_g, self, res, GraphEntity.iri_role_in_time, 

128 resp_agent, source, count, label, "ar", 

129 preexisting_graph) 

130 

131 def add_be(self, resp_agent: str, source: str = None, res: URIRef = None, 

132 preexisting_graph: Graph = None) -> BibliographicReference: 

133 if res is not None and get_short_name(res) != "be": 

134 raise ValueError(f"Given res: <{res}> is inappropriate for a BibliographicReference entity.") 

135 if res is not None and res in self.res_to_entity: 

136 return self.res_to_entity[res] 

137 cur_g, count, label = self._add(self.g_be, "be", res) 

138 return BibliographicReference(cur_g, self, res, GraphEntity.iri_bibliographic_reference, 

139 resp_agent, source, count, label, "be", 

140 preexisting_graph) 

141 

142 def add_br(self, resp_agent: str, source: str = None, res: URIRef = None, 

143 preexisting_graph: Graph = None) -> BibliographicResource: 

144 if res is not None and get_short_name(res) != "br": 

145 raise ValueError(f"Given res: <{res}> is inappropriate for a BibliographicResource entity.") 

146 if res is not None and res in self.res_to_entity: 

147 return self.res_to_entity[res] 

148 cur_g, count, label = self._add(self.g_br, "br", res) 

149 return BibliographicResource(cur_g, self, res, GraphEntity.iri_expression, 

150 resp_agent, source, count, label, "br", 

151 preexisting_graph) 

152 

153 def add_ci(self, resp_agent: str, source: str = None, res: URIRef = None, 

154 preexisting_graph: Graph = None) -> Citation: 

155 if res is not None and get_short_name(res) != "ci": 

156 raise ValueError(f"Given res: <{res}> is inappropriate for a Citation entity.") 

157 if res is not None and res in self.res_to_entity: 

158 return self.res_to_entity[res] 

159 cur_g, count, label = self._add(self.g_ci, "ci", res) 

160 return Citation(cur_g, self, res, GraphEntity.iri_citation, 

161 resp_agent, source, count, label, "ci", 

162 preexisting_graph) 

163 

164 def add_de(self, resp_agent: str, source: str = None, res: URIRef = None, 

165 preexisting_graph: Graph = None) -> DiscourseElement: 

166 if res is not None and get_short_name(res) != "de": 

167 raise ValueError(f"Given res: <{res}> is inappropriate for a DiscourseElement entity.") 

168 if res is not None and res in self.res_to_entity: 

169 return self.res_to_entity[res] 

170 cur_g, count, label = self._add(self.g_de, "de", res) 

171 return DiscourseElement(cur_g, self, res, GraphEntity.iri_discourse_element, 

172 resp_agent, source, count, label, "de", 

173 preexisting_graph) 

174 

175 def add_id(self, resp_agent: str, source: str = None, res: URIRef = None, 

176 preexisting_graph: Graph = None) -> Identifier: 

177 if res is not None and get_short_name(res) != "id": 

178 raise ValueError(f"Given res: <{res}> is inappropriate for an Identifier entity.") 

179 if res is not None and res in self.res_to_entity: 

180 return self.res_to_entity[res] 

181 cur_g, count, label = self._add(self.g_id, "id", res) 

182 return Identifier(cur_g, self, res, GraphEntity.iri_identifier, 

183 resp_agent, source, count, label, "id", 

184 preexisting_graph) 

185 

186 def add_pl(self, resp_agent: str, source: str = None, res: URIRef = None, 

187 preexisting_graph: Graph = None) -> PointerList: 

188 if res is not None and get_short_name(res) != "pl": 

189 raise ValueError(f"Given res: <{res}> is inappropriate for a PointerList entity.") 

190 if res is not None and res in self.res_to_entity: 

191 return self.res_to_entity[res] 

192 cur_g, count, label = self._add(self.g_pl, "pl", res) 

193 return PointerList(cur_g, self, res, GraphEntity.iri_singleloc_pointer_list, 

194 resp_agent, source, count, label, "pl", 

195 preexisting_graph) 

196 

197 def add_rp(self, resp_agent: str, source: str = None, res: URIRef = None, 

198 preexisting_graph: Graph = None) -> ReferencePointer: 

199 if res is not None and get_short_name(res) != "rp": 

200 raise ValueError(f"Given res: <{res}> is inappropriate for a ReferencePointer entity.") 

201 if res is not None and res in self.res_to_entity: 

202 return self.res_to_entity[res] 

203 cur_g, count, label = self._add(self.g_rp, "rp", res) 

204 return ReferencePointer(cur_g, self, res, GraphEntity.iri_intextref_pointer, 

205 resp_agent, source, count, label, "rp", 

206 preexisting_graph) 

207 

208 def add_ra(self, resp_agent: str, source: str = None, res: URIRef = None, 

209 preexisting_graph: Graph = None) -> ResponsibleAgent: 

210 if res is not None and get_short_name(res) != "ra": 

211 raise ValueError(f"Given res: <{res}> is inappropriate for a ResponsibleAgent entity.") 

212 if res is not None and res in self.res_to_entity: 

213 return self.res_to_entity[res] 

214 cur_g, count, label = self._add(self.g_ra, "ra", res) 

215 return ResponsibleAgent(cur_g, self, res, GraphEntity.iri_agent, 

216 resp_agent, source, count, label, "ra", 

217 preexisting_graph) 

218 

219 def add_re(self, resp_agent: str, source: str = None, res: URIRef = None, 

220 preexisting_graph: Graph = None) -> ResourceEmbodiment: 

221 if res is not None and get_short_name(res) != "re": 

222 raise ValueError(f"Given res: <{res}> is inappropriate for a ResourceEmbodiment entity.") 

223 if res is not None and res in self.res_to_entity: 

224 return self.res_to_entity[res] 

225 cur_g, count, label = self._add(self.g_re, "re", res) 

226 return ResourceEmbodiment(cur_g, self, res, GraphEntity.iri_manifestation, 

227 resp_agent, source, count, label, "re", 

228 preexisting_graph) 

229 

230 def _add(self, graph_url: str, short_name: str, res: URIRef = None) -> Tuple[Graph, Optional[str], Optional[str]]: 

231 cur_g: Graph = Graph(identifier=graph_url) 

232 self._set_ns(cur_g) 

233 

234 count: Optional[str] = None 

235 label: Optional[str] = None 

236 supplier_prefix = get_prefix(res) if res is not None else self.supplier_prefix 

237 if res is not None: 

238 try: 

239 res_count: int = int(get_count(res)) 

240 except ValueError: 

241 res_count: int = -1 

242 if res_count > self.counter_handler.read_counter(short_name, supplier_prefix=supplier_prefix): 

243 self.counter_handler.set_counter(res_count, short_name, supplier_prefix=supplier_prefix) 

244 return cur_g, count, label 

245 

246 count = supplier_prefix + str(self.counter_handler.increment_counter(short_name, supplier_prefix=supplier_prefix)) 

247 

248 if self.wanted_label: 

249 label = "%s %s [%s/%s]" % (self.labels[short_name], count, short_name, count) 

250 

251 return cur_g, count, label 

252 

253 def get_orphans(self) -> List[GraphEntity]: 

254 full_set_of_entities: Set[URIRef] = set(self.res_to_entity.keys()) 

255 referenced_entities: Set[URIRef] = set() 

256 for res, entity in self.res_to_entity.items(): 

257 for obj in entity.g.objects(subject=res, predicate=None): 

258 if type(obj) == URIRef: 

259 referenced_entities.add(obj) 

260 set_of_orphan_res: Set[URIRef] = full_set_of_entities - referenced_entities 

261 

262 result_list: List[GraphEntity] = [] 

263 for orphan_res in set_of_orphan_res: 

264 entity: Optional[GraphEntity] = self.get_entity(orphan_res) 

265 if entity is not None: 

266 result_list.append(entity) 

267 

268 return result_list 

269 

270 def remove_orphans_from_triplestore(self, ts_url: str, resp_agent: str) -> None: 

271 sparql: SPARQLWrapper = SPARQLWrapper(ts_url) 

272 

273 for entity_res, entity in self.res_to_entity.items(): 

274 if entity.to_be_deleted: 

275 query: str = f"CONSTRUCT {{?s ?p ?o}} WHERE {{?s ?p ?o ; ?p_1 <{entity_res}>}}" 

276 sparql.setQuery(query) 

277 sparql.setMethod('GET') 

278 sparql.setReturnFormat(RDFXML) 

279 

280 result: ConjunctiveGraph = sparql.query().convert() 

281 if result is not None: 

282 imported_entities: List[GraphEntity] = Reader.import_entities_from_graph(self, result, resp_agent) 

283 for imported_entity in imported_entities: 

284 imported_entity.g.remove((imported_entity.res, None, entity_res)) 

285 

286 def commit_changes(self): 

287 for res, entity in self.res_to_entity.items(): 

288 entity.commit_changes() 

289 if entity.to_be_deleted: 

290 del self.res_to_entity[res] 

291 

292 def _set_ns(self, g: Graph) -> None: 

293 g.namespace_manager.bind("an", Namespace(self.g_an)) 

294 g.namespace_manager.bind("ar", Namespace(self.g_ar)) 

295 g.namespace_manager.bind("be", Namespace(self.g_be)) 

296 g.namespace_manager.bind("ci", Namespace(self.g_ci)) 

297 g.namespace_manager.bind("de", Namespace(self.g_de)) 

298 g.namespace_manager.bind("br", Namespace(self.g_br)) 

299 g.namespace_manager.bind("id", Namespace(self.g_id)) 

300 g.namespace_manager.bind("pl", Namespace(self.g_pl)) 

301 g.namespace_manager.bind("ra", Namespace(self.g_ra)) 

302 g.namespace_manager.bind("re", Namespace(self.g_re)) 

303 g.namespace_manager.bind("rp", Namespace(self.g_rp)) 

304 g.namespace_manager.bind("biro", GraphEntity.BIRO) 

305 g.namespace_manager.bind("co", GraphEntity.CO) 

306 g.namespace_manager.bind("c4o", GraphEntity.C4O) 

307 g.namespace_manager.bind("cito", GraphEntity.CITO) 

308 g.namespace_manager.bind("datacite", GraphEntity.DATACITE) 

309 g.namespace_manager.bind("dcterms", GraphEntity.DCTERMS) 

310 g.namespace_manager.bind("deo", GraphEntity.DEO) 

311 g.namespace_manager.bind("doco", GraphEntity.DOCO) 

312 g.namespace_manager.bind("fabio", GraphEntity.FABIO) 

313 g.namespace_manager.bind("foaf", GraphEntity.FOAF) 

314 g.namespace_manager.bind("frbr", GraphEntity.FRBR) 

315 g.namespace_manager.bind("literal", GraphEntity.LITERAL) 

316 g.namespace_manager.bind("oa", GraphEntity.OA) 

317 g.namespace_manager.bind("oco", GraphEntity.OCO) 

318 g.namespace_manager.bind("prism", GraphEntity.PRISM) 

319 g.namespace_manager.bind("pro", GraphEntity.PRO) 

320 

321 def get_an(self) -> Tuple[ReferenceAnnotation]: 

322 result: Tuple[ReferenceAnnotation] = tuple() 

323 for ref in self.res_to_entity: 

324 entity: GraphEntity = self.res_to_entity[ref] 

325 if isinstance(entity, ReferenceAnnotation): 

326 result += (entity, ) 

327 return result 

328 

329 def get_ar(self) -> Tuple[AgentRole]: 

330 result: Tuple[AgentRole] = tuple() 

331 for ref in self.res_to_entity: 

332 entity: GraphEntity = self.res_to_entity[ref] 

333 if isinstance(entity, AgentRole): 

334 result += (entity, ) 

335 return result 

336 

337 def get_be(self) -> Tuple[BibliographicReference]: 

338 result: Tuple[BibliographicReference] = tuple() 

339 for ref in self.res_to_entity: 

340 entity: GraphEntity = self.res_to_entity[ref] 

341 if isinstance(entity, BibliographicReference): 

342 result += (entity, ) 

343 return result 

344 

345 def get_br(self) -> Tuple[BibliographicResource]: 

346 result: Tuple[BibliographicResource] = tuple() 

347 for ref in self.res_to_entity: 

348 entity: GraphEntity = self.res_to_entity[ref] 

349 if isinstance(entity, BibliographicResource): 

350 result += (entity, ) 

351 return result 

352 

353 def get_ci(self) -> Tuple[Citation]: 

354 result: Tuple[Citation] = tuple() 

355 for ref in self.res_to_entity: 

356 entity: GraphEntity = self.res_to_entity[ref] 

357 if isinstance(entity, Citation): 

358 result += (entity, ) 

359 return result 

360 

361 def get_de(self) -> Tuple[DiscourseElement]: 

362 result: Tuple[DiscourseElement] = tuple() 

363 for ref in self.res_to_entity: 

364 entity: GraphEntity = self.res_to_entity[ref] 

365 if isinstance(entity, DiscourseElement): 

366 result += (entity, ) 

367 return result 

368 

369 def get_id(self) -> Tuple[Identifier]: 

370 result: Tuple[Identifier] = tuple() 

371 for ref in self.res_to_entity: 

372 entity: GraphEntity = self.res_to_entity[ref] 

373 if isinstance(entity, Identifier): 

374 result += (entity, ) 

375 return result 

376 

377 def get_pl(self) -> Tuple[PointerList]: 

378 result: Tuple[PointerList] = tuple() 

379 for ref in self.res_to_entity: 

380 entity: GraphEntity = self.res_to_entity[ref] 

381 if isinstance(entity, PointerList): 

382 result += (entity, ) 

383 return result 

384 

385 def get_rp(self) -> Tuple[ReferencePointer]: 

386 result: Tuple[ReferencePointer] = tuple() 

387 for ref in self.res_to_entity: 

388 entity: GraphEntity = self.res_to_entity[ref] 

389 if isinstance(entity, ReferencePointer): 

390 result += (entity, ) 

391 return result 

392 

393 def get_ra(self) -> Tuple[ResponsibleAgent]: 

394 result: Tuple[ResponsibleAgent] = tuple() 

395 for ref in self.res_to_entity: 

396 entity: GraphEntity = self.res_to_entity[ref] 

397 if isinstance(entity, ResponsibleAgent): 

398 result += (entity, ) 

399 return result 

400 

401 def get_re(self) -> Tuple[ResourceEmbodiment]: 

402 result: Tuple[ResourceEmbodiment] = tuple() 

403 for ref in self.res_to_entity: 

404 entity: GraphEntity = self.res_to_entity[ref] 

405 if isinstance(entity, ResourceEmbodiment): 

406 result += (entity, ) 

407 return result