Coverage for oc_ocdm / graph / graph_set.py: 89%

227 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-28 18:52 +0000

1# SPDX-FileCopyrightText: 2020-2022 Simone Persiani <iosonopersia@gmail.com> 

2# SPDX-FileCopyrightText: 2023-2026 Arcangelo Massari <arcangelo.massari@unibo.it> 

3# 

4# SPDX-License-Identifier: ISC 

5 

6from __future__ import annotations 

7 

8from typing import TYPE_CHECKING, cast 

9 

10from io import BytesIO 

11 

12from oc_ocdm.abstract_set import AbstractSet 

13from oc_ocdm.reader import Reader 

14from oc_ocdm.support.support import get_count, get_prefix, get_short_name 

15from sparqlite import SPARQLClient 

16 

17if TYPE_CHECKING: 

18 from typing import Dict, ClassVar, Optional, List, Set 

19 

20from oc_ocdm.counter_handler.counter_handler import CounterHandler 

21from oc_ocdm.counter_handler.filesystem_counter_handler import \ 

22 FilesystemCounterHandler 

23from oc_ocdm.counter_handler.in_memory_counter_handler import \ 

24 InMemoryCounterHandler 

25from oc_ocdm.graph.entities.bibliographic.agent_role import AgentRole 

26from oc_ocdm.graph.entities.bibliographic.bibliographic_reference import \ 

27 BibliographicReference 

28from oc_ocdm.graph.entities.bibliographic.bibliographic_resource import \ 

29 BibliographicResource 

30from oc_ocdm.graph.entities.bibliographic.citation import Citation 

31from oc_ocdm.graph.entities.bibliographic.discourse_element import \ 

32 DiscourseElement 

33from oc_ocdm.graph.entities.bibliographic.pointer_list import PointerList 

34from oc_ocdm.graph.entities.bibliographic.reference_annotation import \ 

35 ReferenceAnnotation 

36from oc_ocdm.graph.entities.bibliographic.reference_pointer import \ 

37 ReferencePointer 

38from oc_ocdm.graph.entities.bibliographic.resource_embodiment import \ 

39 ResourceEmbodiment 

40from oc_ocdm.graph.entities.bibliographic.responsible_agent import \ 

41 ResponsibleAgent 

42from oc_ocdm.graph.entities.identifier import Identifier 

43from oc_ocdm.graph.graph_entity import GraphEntity 

44from rdflib import Graph, Namespace, URIRef 

45 

46 

47class GraphSet(AbstractSet[GraphEntity]): 

48 # Labels 

49 labels: ClassVar[Dict[str, str]] = { 

50 "an": "annotation", 

51 "ar": "agent role", 

52 "be": "bibliographic entry", 

53 "br": "bibliographic resource", 

54 "ci": "citation", 

55 "de": "discourse element", 

56 "id": "identifier", 

57 "pl": "single location pointer list", 

58 "ra": "responsible agent", 

59 "re": "resource embodiment", 

60 "rp": "in-text reference pointer" 

61 } 

62 

63 def __init__(self, base_iri: str, info_dir: str = "", supplier_prefix: str = "", 

64 wanted_label: bool = True, custom_counter_handler: CounterHandler | None = None) -> None: 

65 super(GraphSet, self).__init__() 

66 # The following variable maps a URIRef with the related graph entity 

67 self.res_to_entity: Dict[URIRef, GraphEntity] = {} 

68 self.base_iri: str = base_iri 

69 self.info_dir: str = info_dir 

70 self.supplier_prefix: str = supplier_prefix 

71 self.wanted_label: bool = wanted_label 

72 # Graphs 

73 # The following structure of URL is quite important for the other classes 

74 # developed and should not be changed. The only part that can change is the 

75 # value of the base_iri 

76 self.g_an: str = base_iri + "an/" 

77 self.g_ar: str = base_iri + "ar/" 

78 self.g_be: str = base_iri + "be/" 

79 self.g_br: str = base_iri + "br/" 

80 self.g_ci: str = base_iri + "ci/" 

81 self.g_de: str = base_iri + "de/" 

82 self.g_id: str = base_iri + "id/" 

83 self.g_pl: str = base_iri + "pl/" 

84 self.g_ra: str = base_iri + "ra/" 

85 self.g_re: str = base_iri + "re/" 

86 self.g_rp: str = base_iri + "rp/" 

87 

88 if custom_counter_handler: 

89 self.counter_handler = custom_counter_handler 

90 elif info_dir is not None and info_dir != "": 

91 self.counter_handler = FilesystemCounterHandler(info_dir, supplier_prefix) 

92 else: 

93 self.counter_handler = InMemoryCounterHandler() 

94 

95 def get_entity(self, res: URIRef) -> Optional[GraphEntity]: 

96 if res in self.res_to_entity: 

97 return self.res_to_entity[res] 

98 

99 # Add resources related to bibliographic entities 

100 def add_an(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None, 

101 preexisting_graph: Graph | None = None) -> ReferenceAnnotation: 

102 if res is not None and get_short_name(res) != "an": 

103 raise ValueError(f"Given res: <{res}> is inappropriate for a ReferenceAnnotation entity.") 

104 if res is not None and res in self.res_to_entity: 

105 return cast(ReferenceAnnotation, self.res_to_entity[res]) 

106 cur_g, count, label = self._add(self.g_an, "an", res) 

107 return ReferenceAnnotation(cur_g, self, GraphEntity.iri_note, res, 

108 resp_agent, source, count, label, "an", 

109 preexisting_graph) 

110 

111 def add_ar(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None, 

112 preexisting_graph: Graph | None = None) -> AgentRole: 

113 if res is not None and get_short_name(res) != "ar": 

114 raise ValueError(f"Given res: <{res}> is inappropriate for an AgentRole entity.") 

115 if res is not None and res in self.res_to_entity: 

116 return cast(AgentRole, self.res_to_entity[res]) 

117 cur_g, count, label = self._add(self.g_ar, "ar", res) 

118 return AgentRole(cur_g, self, GraphEntity.iri_role_in_time, res, 

119 resp_agent, source, count, label, "ar", 

120 preexisting_graph) 

121 

122 def add_be(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None, 

123 preexisting_graph: Graph | None = None) -> BibliographicReference: 

124 if res is not None and get_short_name(res) != "be": 

125 raise ValueError(f"Given res: <{res}> is inappropriate for a BibliographicReference entity.") 

126 if res is not None and res in self.res_to_entity: 

127 return cast(BibliographicReference, self.res_to_entity[res]) 

128 cur_g, count, label = self._add(self.g_be, "be", res) 

129 return BibliographicReference(cur_g, self, GraphEntity.iri_bibliographic_reference, res, 

130 resp_agent, source, count, label, "be", 

131 preexisting_graph) 

132 

133 def add_br(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None, 

134 preexisting_graph: Graph | None = None) -> BibliographicResource: 

135 if res is not None and get_short_name(res) != "br": 

136 raise ValueError(f"Given res: <{res}> is inappropriate for a BibliographicResource entity.") 

137 if res is not None and res in self.res_to_entity: 

138 return cast(BibliographicResource, self.res_to_entity[res]) 

139 cur_g, count, label = self._add(self.g_br, "br", res) 

140 return BibliographicResource(cur_g, self, GraphEntity.iri_expression, res, 

141 resp_agent, source, count, label, "br", 

142 preexisting_graph) 

143 

144 def add_ci(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None, 

145 preexisting_graph: Graph | None = None) -> Citation: 

146 if res is not None and get_short_name(res) != "ci": 

147 raise ValueError(f"Given res: <{res}> is inappropriate for a Citation entity.") 

148 if res is not None and res in self.res_to_entity: 

149 return cast(Citation, self.res_to_entity[res]) 

150 cur_g, count, label = self._add(self.g_ci, "ci", res) 

151 return Citation(cur_g, self, GraphEntity.iri_citation, res, 

152 resp_agent, source, count, label, "ci", 

153 preexisting_graph) 

154 

155 def add_de(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None, 

156 preexisting_graph: Graph | None = None) -> DiscourseElement: 

157 if res is not None and get_short_name(res) != "de": 

158 raise ValueError(f"Given res: <{res}> is inappropriate for a DiscourseElement entity.") 

159 if res is not None and res in self.res_to_entity: 

160 return cast(DiscourseElement, self.res_to_entity[res]) 

161 cur_g, count, label = self._add(self.g_de, "de", res) 

162 return DiscourseElement(cur_g, self, GraphEntity.iri_discourse_element, res, 

163 resp_agent, source, count, label, "de", 

164 preexisting_graph) 

165 

166 def add_id(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None, 

167 preexisting_graph: Graph | None = None) -> Identifier: 

168 if res is not None and get_short_name(res) != "id": 

169 raise ValueError(f"Given res: <{res}> is inappropriate for an Identifier entity.") 

170 if res is not None and res in self.res_to_entity: 

171 return cast(Identifier, self.res_to_entity[res]) 

172 cur_g, count, label = self._add(self.g_id, "id", res) 

173 return Identifier(cur_g, self, GraphEntity.iri_identifier, res, 

174 resp_agent, source, count, label, "id", 

175 preexisting_graph) 

176 

177 def add_pl(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None, 

178 preexisting_graph: Graph | None = None) -> PointerList: 

179 if res is not None and get_short_name(res) != "pl": 

180 raise ValueError(f"Given res: <{res}> is inappropriate for a PointerList entity.") 

181 if res is not None and res in self.res_to_entity: 

182 return cast(PointerList, self.res_to_entity[res]) 

183 cur_g, count, label = self._add(self.g_pl, "pl", res) 

184 return PointerList(cur_g, self, GraphEntity.iri_singleloc_pointer_list, res, 

185 resp_agent, source, count, label, "pl", 

186 preexisting_graph) 

187 

188 def add_rp(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None, 

189 preexisting_graph: Graph | None = None) -> ReferencePointer: 

190 if res is not None and get_short_name(res) != "rp": 

191 raise ValueError(f"Given res: <{res}> is inappropriate for a ReferencePointer entity.") 

192 if res is not None and res in self.res_to_entity: 

193 return cast(ReferencePointer, self.res_to_entity[res]) 

194 cur_g, count, label = self._add(self.g_rp, "rp", res) 

195 return ReferencePointer(cur_g, self, GraphEntity.iri_intextref_pointer, res, 

196 resp_agent, source, count, label, "rp", 

197 preexisting_graph) 

198 

199 def add_ra(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None, 

200 preexisting_graph: Graph | None = None) -> ResponsibleAgent: 

201 if res is not None and get_short_name(res) != "ra": 

202 raise ValueError(f"Given res: <{res}> is inappropriate for a ResponsibleAgent entity.") 

203 if res is not None and res in self.res_to_entity: 

204 return cast(ResponsibleAgent, self.res_to_entity[res]) 

205 cur_g, count, label = self._add(self.g_ra, "ra", res) 

206 return ResponsibleAgent(cur_g, self, GraphEntity.iri_agent, res, 

207 resp_agent, source, count, label, "ra", 

208 preexisting_graph) 

209 

210 def add_re(self, resp_agent: str | None, source: str | None = None, res: URIRef | None = None, 

211 preexisting_graph: Graph | None = None) -> ResourceEmbodiment: 

212 if res is not None and get_short_name(res) != "re": 

213 raise ValueError(f"Given res: <{res}> is inappropriate for a ResourceEmbodiment entity.") 

214 if res is not None and res in self.res_to_entity: 

215 return cast(ResourceEmbodiment, self.res_to_entity[res]) 

216 cur_g, count, label = self._add(self.g_re, "re", res) 

217 return ResourceEmbodiment(cur_g, self, GraphEntity.iri_manifestation, res, 

218 resp_agent, source, count, label, "re", 

219 preexisting_graph) 

220 

221 def _add(self, graph_url: str, short_name: str, res: URIRef | None = None) -> tuple[Graph, str | None, str | None]: 

222 cur_g: Graph = Graph(identifier=graph_url) 

223 self._set_ns(cur_g) 

224 

225 count: Optional[str] = None 

226 label: Optional[str] = None 

227 supplier_prefix = get_prefix(res) if res is not None else self.supplier_prefix 

228 if res is not None: 

229 try: 

230 res_count: int = int(get_count(res)) 

231 except ValueError: 

232 res_count: int = -1 

233 if res_count > self.counter_handler.read_counter(short_name, supplier_prefix=supplier_prefix): 

234 self.counter_handler.set_counter(res_count, short_name, supplier_prefix=supplier_prefix) 

235 return cur_g, count, label 

236 

237 count = supplier_prefix + str(self.counter_handler.increment_counter(short_name, supplier_prefix=supplier_prefix)) 

238 

239 if self.wanted_label: 

240 label = "%s %s [%s/%s]" % (self.labels[short_name], count, short_name, count) 

241 

242 return cur_g, count, label 

243 

244 def get_orphans(self) -> List[GraphEntity]: 

245 full_set_of_entities: Set[URIRef] = set(self.res_to_entity.keys()) 

246 referenced_entities: Set[URIRef] = set() 

247 for res, entity in self.res_to_entity.items(): 

248 for obj in entity.g.objects(subject=res, predicate=None): 

249 if type(obj) == URIRef: 

250 referenced_entities.add(obj) 

251 set_of_orphan_res: Set[URIRef] = full_set_of_entities - referenced_entities 

252 

253 result_list: List[GraphEntity] = [] 

254 for orphan_res in set_of_orphan_res: 

255 entity: Optional[GraphEntity] = self.get_entity(orphan_res) 

256 if entity is not None: 

257 result_list.append(entity) 

258 

259 return result_list 

260 

261 def remove_orphans_from_triplestore(self, ts_url: str, resp_agent: str) -> None: 

262 with SPARQLClient(ts_url) as client: 

263 for entity_res, entity in self.res_to_entity.items(): 

264 if entity.to_be_deleted: 

265 query: str = f"CONSTRUCT {{?s ?p ?o}} WHERE {{?s ?p ?o ; ?p_1 <{entity_res}>}}" 

266 nt_bytes = client.construct(query) 

267 if nt_bytes: 

268 result: Graph = Graph() 

269 result.parse(BytesIO(nt_bytes), format='nt') 

270 imported_entities: List[GraphEntity] = Reader.import_entities_from_graph(self, result, resp_agent) 

271 for imported_entity in imported_entities: 

272 imported_entity.g.remove((imported_entity.res, None, entity_res)) 

273 

274 def commit_changes(self): 

275 for res, entity in self.res_to_entity.items(): 

276 entity.commit_changes() 

277 if entity.to_be_deleted: 

278 del self.res_to_entity[res] 

279 

280 def _set_ns(self, g: Graph) -> None: 

281 g.namespace_manager.bind("an", Namespace(self.g_an)) 

282 g.namespace_manager.bind("ar", Namespace(self.g_ar)) 

283 g.namespace_manager.bind("be", Namespace(self.g_be)) 

284 g.namespace_manager.bind("ci", Namespace(self.g_ci)) 

285 g.namespace_manager.bind("de", Namespace(self.g_de)) 

286 g.namespace_manager.bind("br", Namespace(self.g_br)) 

287 g.namespace_manager.bind("id", Namespace(self.g_id)) 

288 g.namespace_manager.bind("pl", Namespace(self.g_pl)) 

289 g.namespace_manager.bind("ra", Namespace(self.g_ra)) 

290 g.namespace_manager.bind("re", Namespace(self.g_re)) 

291 g.namespace_manager.bind("rp", Namespace(self.g_rp)) 

292 g.namespace_manager.bind("biro", GraphEntity.BIRO) 

293 g.namespace_manager.bind("co", GraphEntity.CO) 

294 g.namespace_manager.bind("c4o", GraphEntity.C4O) 

295 g.namespace_manager.bind("cito", GraphEntity.CITO) 

296 g.namespace_manager.bind("datacite", GraphEntity.DATACITE) 

297 g.namespace_manager.bind("dcterms", GraphEntity.DCTERMS) 

298 g.namespace_manager.bind("deo", GraphEntity.DEO) 

299 g.namespace_manager.bind("doco", GraphEntity.DOCO) 

300 g.namespace_manager.bind("fabio", GraphEntity.FABIO) 

301 g.namespace_manager.bind("foaf", GraphEntity.FOAF) 

302 g.namespace_manager.bind("frbr", GraphEntity.FRBR) 

303 g.namespace_manager.bind("literal", GraphEntity.LITERAL) 

304 g.namespace_manager.bind("oa", GraphEntity.OA) 

305 g.namespace_manager.bind("oco", GraphEntity.OCO) 

306 g.namespace_manager.bind("prism", GraphEntity.PRISM) 

307 g.namespace_manager.bind("pro", GraphEntity.PRO) 

308 

309 def get_an(self) -> tuple[ReferenceAnnotation, ...]: 

310 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ReferenceAnnotation)) 

311 

312 def get_ar(self) -> tuple[AgentRole, ...]: 

313 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, AgentRole)) 

314 

315 def get_be(self) -> tuple[BibliographicReference, ...]: 

316 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, BibliographicReference)) 

317 

318 def get_br(self) -> tuple[BibliographicResource, ...]: 

319 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, BibliographicResource)) 

320 

321 def get_ci(self) -> tuple[Citation, ...]: 

322 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, Citation)) 

323 

324 def get_de(self) -> tuple[DiscourseElement, ...]: 

325 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, DiscourseElement)) 

326 

327 def get_id(self) -> tuple[Identifier, ...]: 

328 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, Identifier)) 

329 

330 def get_pl(self) -> tuple[PointerList, ...]: 

331 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, PointerList)) 

332 

333 def get_rp(self) -> tuple[ReferencePointer, ...]: 

334 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ReferencePointer)) 

335 

336 def get_ra(self) -> tuple[ResponsibleAgent, ...]: 

337 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ResponsibleAgent)) 

338 

339 def get_re(self) -> tuple[ResourceEmbodiment, ...]: 

340 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ResourceEmbodiment))