Coverage for oc_ocdm / graph / graph_set.py: 87%

199 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-05-08 20:23 +0000

1# SPDX-FileCopyrightText: 2020-2022 Simone Persiani <iosonopersia@gmail.com> 

2# SPDX-FileCopyrightText: 2023-2026 Arcangelo Massari <arcangelo.massari@unibo.it> 

3# 

4# SPDX-License-Identifier: ISC 

5 

6from __future__ import annotations 

7 

8from io import BytesIO 

9from typing import TYPE_CHECKING, cast 

10 

11from rdflib import Graph 

12from triplelite import RDFTerm, SubgraphView, TripleLite, from_rdflib 

13 

14from oc_ocdm.abstract_set import AbstractSet 

15from oc_ocdm.counter_handler.counter_handler import CounterHandler 

16from oc_ocdm.counter_handler.filesystem_counter_handler import FilesystemCounterHandler 

17from oc_ocdm.counter_handler.in_memory_counter_handler import InMemoryCounterHandler 

18from oc_ocdm.graph.entities.bibliographic.agent_role import AgentRole 

19from oc_ocdm.graph.entities.bibliographic.bibliographic_reference import BibliographicReference 

20from oc_ocdm.graph.entities.bibliographic.bibliographic_resource import BibliographicResource 

21from oc_ocdm.graph.entities.bibliographic.citation import Citation 

22from oc_ocdm.graph.entities.bibliographic.discourse_element import DiscourseElement 

23from oc_ocdm.graph.entities.bibliographic.pointer_list import PointerList 

24from oc_ocdm.graph.entities.bibliographic.reference_annotation import ReferenceAnnotation 

25from oc_ocdm.graph.entities.bibliographic.reference_pointer import ReferencePointer 

26from oc_ocdm.graph.entities.bibliographic.resource_embodiment import ResourceEmbodiment 

27from oc_ocdm.graph.entities.bibliographic.responsible_agent import ResponsibleAgent 

28from oc_ocdm.graph.entities.identifier import Identifier 

29from oc_ocdm.graph.graph_entity import GraphEntity 

30from oc_ocdm.support.sparql import sparql_construct 

31from oc_ocdm.support.support import get_count, get_prefix, get_short_name 

32 

33if TYPE_CHECKING: 

34 from typing import ClassVar, Dict, List, Optional, Set 

35 

36 

37 

38class GraphSet(AbstractSet[GraphEntity]): 

39 # Labels 

40 labels: ClassVar[Dict[str, str]] = { 

41 "an": "annotation", 

42 "ar": "agent role", 

43 "be": "bibliographic entry", 

44 "br": "bibliographic resource", 

45 "ci": "citation", 

46 "de": "discourse element", 

47 "id": "identifier", 

48 "pl": "single location pointer list", 

49 "ra": "responsible agent", 

50 "re": "resource embodiment", 

51 "rp": "in-text reference pointer" 

52 } 

53 

54 def __init__(self, base_iri: str, info_dir: str = "", supplier_prefix: str = "", 

55 wanted_label: bool = True, custom_counter_handler: CounterHandler | None = None) -> None: 

56 super(GraphSet, self).__init__() 

57 # The following variable maps a URIRef with the related graph entity 

58 self.res_to_entity: Dict[str, GraphEntity] = {} 

59 self.base_iri: str = base_iri 

60 self.info_dir: str = info_dir 

61 self.supplier_prefix: str = supplier_prefix 

62 self.wanted_label: bool = wanted_label 

63 # Graphs 

64 # The following structure of URL is quite important for the other classes 

65 # developed and should not be changed. The only part that can change is the 

66 # value of the base_iri 

67 self.g_an: str = base_iri + "an/" 

68 self.g_ar: str = base_iri + "ar/" 

69 self.g_be: str = base_iri + "be/" 

70 self.g_br: str = base_iri + "br/" 

71 self.g_ci: str = base_iri + "ci/" 

72 self.g_de: str = base_iri + "de/" 

73 self.g_id: str = base_iri + "id/" 

74 self.g_pl: str = base_iri + "pl/" 

75 self.g_ra: str = base_iri + "ra/" 

76 self.g_re: str = base_iri + "re/" 

77 self.g_rp: str = base_iri + "rp/" 

78 

79 if custom_counter_handler: 

80 self.counter_handler = custom_counter_handler 

81 elif info_dir is not None and info_dir != "": 

82 self.counter_handler = FilesystemCounterHandler(info_dir, supplier_prefix) 

83 else: 

84 self.counter_handler = InMemoryCounterHandler() 

85 

86 def get_entity(self, res: str) -> Optional[GraphEntity]: 

87 if res in self.res_to_entity: 

88 return self.res_to_entity[res] 

89 

90 # Add resources related to bibliographic entities 

91 def add_an(self, resp_agent: str | None, source: str | None = None, res: str | None = None, 

92 preexisting_graph: SubgraphView | None = None) -> ReferenceAnnotation: 

93 if res is not None and get_short_name(res) != "an": 

94 raise ValueError(f"Given res: <{res}> is inappropriate for a ReferenceAnnotation entity.") 

95 if res is not None and res in self.res_to_entity: 

96 return cast(ReferenceAnnotation, self.res_to_entity[res]) 

97 cur_g, count, label = self._add(self.g_an, "an", res) 

98 return ReferenceAnnotation(cur_g, self, GraphEntity.iri_note, res, 

99 resp_agent, source, count, label, "an", 

100 preexisting_graph) 

101 

102 def add_ar(self, resp_agent: str | None, source: str | None = None, res: str | None = None, 

103 preexisting_graph: SubgraphView | None = None) -> AgentRole: 

104 if res is not None and get_short_name(res) != "ar": 

105 raise ValueError(f"Given res: <{res}> is inappropriate for an AgentRole entity.") 

106 if res is not None and res in self.res_to_entity: 

107 return cast(AgentRole, self.res_to_entity[res]) 

108 cur_g, count, label = self._add(self.g_ar, "ar", res) 

109 return AgentRole(cur_g, self, GraphEntity.iri_role_in_time, res, 

110 resp_agent, source, count, label, "ar", 

111 preexisting_graph) 

112 

113 def add_be(self, resp_agent: str | None, source: str | None = None, res: str | None = None, 

114 preexisting_graph: SubgraphView | None = None) -> BibliographicReference: 

115 if res is not None and get_short_name(res) != "be": 

116 raise ValueError(f"Given res: <{res}> is inappropriate for a BibliographicReference entity.") 

117 if res is not None and res in self.res_to_entity: 

118 return cast(BibliographicReference, self.res_to_entity[res]) 

119 cur_g, count, label = self._add(self.g_be, "be", res) 

120 return BibliographicReference(cur_g, self, GraphEntity.iri_bibliographic_reference, res, 

121 resp_agent, source, count, label, "be", 

122 preexisting_graph) 

123 

124 def add_br(self, resp_agent: str | None, source: str | None = None, res: str | None = None, 

125 preexisting_graph: SubgraphView | None = None) -> BibliographicResource: 

126 if res is not None and get_short_name(res) != "br": 

127 raise ValueError(f"Given res: <{res}> is inappropriate for a BibliographicResource entity.") 

128 if res is not None and res in self.res_to_entity: 

129 return cast(BibliographicResource, self.res_to_entity[res]) 

130 cur_g, count, label = self._add(self.g_br, "br", res) 

131 return BibliographicResource(cur_g, self, GraphEntity.iri_expression, res, 

132 resp_agent, source, count, label, "br", 

133 preexisting_graph) 

134 

135 def add_ci(self, resp_agent: str | None, source: str | None = None, res: str | None = None, 

136 preexisting_graph: SubgraphView | None = None) -> Citation: 

137 if res is not None and get_short_name(res) != "ci": 

138 raise ValueError(f"Given res: <{res}> is inappropriate for a Citation entity.") 

139 if res is not None and res in self.res_to_entity: 

140 return cast(Citation, self.res_to_entity[res]) 

141 cur_g, count, label = self._add(self.g_ci, "ci", res) 

142 return Citation(cur_g, self, GraphEntity.iri_citation, res, 

143 resp_agent, source, count, label, "ci", 

144 preexisting_graph) 

145 

146 def add_de(self, resp_agent: str | None, source: str | None = None, res: str | None = None, 

147 preexisting_graph: SubgraphView | None = None) -> DiscourseElement: 

148 if res is not None and get_short_name(res) != "de": 

149 raise ValueError(f"Given res: <{res}> is inappropriate for a DiscourseElement entity.") 

150 if res is not None and res in self.res_to_entity: 

151 return cast(DiscourseElement, self.res_to_entity[res]) 

152 cur_g, count, label = self._add(self.g_de, "de", res) 

153 return DiscourseElement(cur_g, self, GraphEntity.iri_discourse_element, res, 

154 resp_agent, source, count, label, "de", 

155 preexisting_graph) 

156 

157 def add_id(self, resp_agent: str | None, source: str | None = None, res: str | None = None, 

158 preexisting_graph: SubgraphView | None = None) -> Identifier: 

159 if res is not None and get_short_name(res) != "id": 

160 raise ValueError(f"Given res: <{res}> is inappropriate for an Identifier entity.") 

161 if res is not None and res in self.res_to_entity: 

162 return cast(Identifier, self.res_to_entity[res]) 

163 cur_g, count, label = self._add(self.g_id, "id", res) 

164 return Identifier(cur_g, self, GraphEntity.iri_identifier, res, 

165 resp_agent, source, count, label, "id", 

166 preexisting_graph) 

167 

168 def add_pl(self, resp_agent: str | None, source: str | None = None, res: str | None = None, 

169 preexisting_graph: SubgraphView | None = None) -> PointerList: 

170 if res is not None and get_short_name(res) != "pl": 

171 raise ValueError(f"Given res: <{res}> is inappropriate for a PointerList entity.") 

172 if res is not None and res in self.res_to_entity: 

173 return cast(PointerList, self.res_to_entity[res]) 

174 cur_g, count, label = self._add(self.g_pl, "pl", res) 

175 return PointerList(cur_g, self, GraphEntity.iri_singleloc_pointer_list, res, 

176 resp_agent, source, count, label, "pl", 

177 preexisting_graph) 

178 

179 def add_rp(self, resp_agent: str | None, source: str | None = None, res: str | None = None, 

180 preexisting_graph: SubgraphView | None = None) -> ReferencePointer: 

181 if res is not None and get_short_name(res) != "rp": 

182 raise ValueError(f"Given res: <{res}> is inappropriate for a ReferencePointer entity.") 

183 if res is not None and res in self.res_to_entity: 

184 return cast(ReferencePointer, self.res_to_entity[res]) 

185 cur_g, count, label = self._add(self.g_rp, "rp", res) 

186 return ReferencePointer(cur_g, self, GraphEntity.iri_intextref_pointer, res, 

187 resp_agent, source, count, label, "rp", 

188 preexisting_graph) 

189 

190 def add_ra(self, resp_agent: str | None, source: str | None = None, res: str | None = None, 

191 preexisting_graph: SubgraphView | None = None) -> ResponsibleAgent: 

192 if res is not None and get_short_name(res) != "ra": 

193 raise ValueError(f"Given res: <{res}> is inappropriate for a ResponsibleAgent entity.") 

194 if res is not None and res in self.res_to_entity: 

195 return cast(ResponsibleAgent, self.res_to_entity[res]) 

196 cur_g, count, label = self._add(self.g_ra, "ra", res) 

197 return ResponsibleAgent(cur_g, self, GraphEntity.iri_agent, res, 

198 resp_agent, source, count, label, "ra", 

199 preexisting_graph) 

200 

201 def add_re(self, resp_agent: str | None, source: str | None = None, res: str | None = None, 

202 preexisting_graph: SubgraphView | None = None) -> ResourceEmbodiment: 

203 if res is not None and get_short_name(res) != "re": 

204 raise ValueError(f"Given res: <{res}> is inappropriate for a ResourceEmbodiment entity.") 

205 if res is not None and res in self.res_to_entity: 

206 return cast(ResourceEmbodiment, self.res_to_entity[res]) 

207 cur_g, count, label = self._add(self.g_re, "re", res) 

208 return ResourceEmbodiment(cur_g, self, GraphEntity.iri_manifestation, res, 

209 resp_agent, source, count, label, "re", 

210 preexisting_graph) 

211 

212 def _add(self, graph_url: str, short_name: str, res: str | None = None) -> tuple[TripleLite, str | None, str | None]: 

213 cur_g = TripleLite(identifier=graph_url) 

214 

215 count: Optional[str] = None 

216 label: Optional[str] = None 

217 supplier_prefix = get_prefix(res) if res is not None else self.supplier_prefix 

218 if res is not None: 

219 try: 

220 res_count: int = int(get_count(res)) 

221 except ValueError: 

222 res_count: int = -1 

223 if res_count > self.counter_handler.read_counter(short_name, supplier_prefix=supplier_prefix): 

224 self.counter_handler.set_counter(res_count, short_name, supplier_prefix=supplier_prefix) 

225 return cur_g, count, label 

226 

227 count = supplier_prefix + str(self.counter_handler.increment_counter(short_name, supplier_prefix=supplier_prefix)) 

228 

229 if self.wanted_label: 

230 label = "%s %s [%s/%s]" % (self.labels[short_name], count, short_name, count) 

231 

232 return cur_g, count, label 

233 

234 def get_orphans(self) -> List[GraphEntity]: 

235 full_set_of_entities: Set[str] = set(self.res_to_entity.keys()) 

236 referenced_entities: Set[str] = set() 

237 for res, entity in self.res_to_entity.items(): 

238 for obj in entity.g.objects(subject=res, predicate=None): 

239 if obj.type == "uri": 

240 referenced_entities.add(obj.value) 

241 set_of_orphan_res: Set[str] = full_set_of_entities - referenced_entities 

242 

243 result_list: List[GraphEntity] = [] 

244 for orphan_res in set_of_orphan_res: 

245 entity: Optional[GraphEntity] = self.get_entity(orphan_res) 

246 if entity is not None: 

247 result_list.append(entity) 

248 

249 return result_list 

250 

251 def remove_orphans_from_triplestore(self, ts_url: str, resp_agent: str) -> None: 

252 for entity_res, entity in self.res_to_entity.items(): 

253 if entity.to_be_deleted: 

254 query: str = f"CONSTRUCT {{?s ?p ?o}} WHERE {{?s ?p ?o ; ?p_1 <{entity_res}>}}" 

255 nt_bytes = sparql_construct(ts_url, query) 

256 if nt_bytes: 

257 from oc_ocdm.reader import Reader 

258 rdflib_g = Graph() 

259 rdflib_g.parse(BytesIO(nt_bytes), format='nt') 

260 graphs = from_rdflib(rdflib_g) 

261 imported_entities: List[GraphEntity] = Reader.import_entities_from_graph(self, graphs[0], resp_agent) 

262 for imported_entity in imported_entities: 

263 imported_entity.g.remove((imported_entity.res, None, RDFTerm("uri", str(entity_res)))) 

264 

265 def commit_changes(self): 

266 for res, entity in self.res_to_entity.items(): 

267 entity.commit_changes() 

268 if entity.to_be_deleted: 

269 del self.res_to_entity[res] 

270 

271 def get_an(self) -> tuple[ReferenceAnnotation, ...]: 

272 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ReferenceAnnotation)) 

273 

274 def get_ar(self) -> tuple[AgentRole, ...]: 

275 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, AgentRole)) 

276 

277 def get_be(self) -> tuple[BibliographicReference, ...]: 

278 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, BibliographicReference)) 

279 

280 def get_br(self) -> tuple[BibliographicResource, ...]: 

281 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, BibliographicResource)) 

282 

283 def get_ci(self) -> tuple[Citation, ...]: 

284 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, Citation)) 

285 

286 def get_de(self) -> tuple[DiscourseElement, ...]: 

287 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, DiscourseElement)) 

288 

289 def get_id(self) -> tuple[Identifier, ...]: 

290 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, Identifier)) 

291 

292 def get_pl(self) -> tuple[PointerList, ...]: 

293 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, PointerList)) 

294 

295 def get_rp(self) -> tuple[ReferencePointer, ...]: 

296 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ReferencePointer)) 

297 

298 def get_ra(self) -> tuple[ResponsibleAgent, ...]: 

299 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ResponsibleAgent)) 

300 

301 def get_re(self) -> tuple[ResourceEmbodiment, ...]: 

302 return tuple(entity for entity in self.res_to_entity.values() if isinstance(entity, ResourceEmbodiment))