Coverage for lode / reader / reader.py: 17%

95 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-03-25 15:05 +0000

1# reader.py - ORCHESTRATOR GENERICO 

2from lode.reader.loader import Loader 

3from lode.reader.config_manager import get_configuration 

4from lode.models import * 

5 

6class Reader: 

7 """ 

8 Generic RDF Reader/Orchestrator. 

9  

10 Responsibilities: 

11 - Calls the loader to parse input RDF (via rdflib) 

12 - Orchestrates Python model population phases 

13 - Delegates specific logics for extraction and population to other modules 

14 """ 

15 

16 def __init__(self): 

17 self._instance_cache = {} 

18 self._logic = None # Logic specializzata (OWL, SKOS, RDF, RDFS) 

19 self._graph = None 

20 self._configuration = None 

21 

22 def load_instances(self, graph_path : str, read_as : str, imported=None, closure=None): 

23 """Carica e processa grafo RDF""" 

24 

25 # 1. Parse generico 

26 loader = Loader(graph_path, imported=imported, closure=closure) 

27 self._graph = loader.get_graph() 

28 

29 # 2. Seleziona strategia 

30 self._configuration = get_configuration(read_as) 

31 

32 # 3. Crea Logic specializzata 

33 self._logic = self._configuration.create_logic(self._graph, self._instance_cache) 

34 

35 # 4. Esecuzione fasi 

36 self._extract_instances() 

37 

38 def get_instance(self, uri: str, instance_type=None): 

39 """Ottiene istanze per URI""" 

40 uri_identifier = None 

41 

42 for identifier in self._instance_cache.keys(): 

43 if str(identifier) == uri: 

44 uri_identifier = identifier 

45 break 

46 

47 if uri_identifier is None: 

48 return None 

49 

50 instances = self._instance_cache[uri_identifier] 

51 

52 if instance_type is None: 

53 return instances 

54 

55 for instance in instances: 

56 if isinstance(instance, instance_type): 

57 return instance 

58 

59 return None 

60 

61 def get_instances(self) -> dict: 

62 """Raggruppa istanze per tipo""" 

63 grouped = {} 

64 

65 for uri_identifier, instances in self._instance_cache.items(): 

66 if isinstance(uri_identifier, str) and uri_identifier.startswith("LITERAL::"): 

67 continue 

68 

69 instances_list = instances if isinstance(instances, set) else [instances] 

70 

71 for instance in instances_list: 

72 class_name = instance.__class__.__name__ 

73 if class_name not in grouped: 

74 grouped[class_name] = [] 

75 grouped[class_name].append(instance) 

76 

77 return grouped 

78 

79 def get_triples_for_instance(self, instance): 

80 """Ottiene le triple RDF associate a un'istanza Python""" 

81 if self._logic and hasattr(self._logic, '_triples_map'): 

82 return self._logic._triples_map.get(instance, set()) 

83 return set() 

84 

85 def get_all_triples_map(self) -> dict: 

86 """Ottiene la mappa completa instance → triple""" 

87 if self._logic and hasattr(self._logic, '_triples_map'): 

88 return self._logic._triples_map 

89 return {} 

90 

91 

92 # function reused by the api to push instances 

93 def to_dict(self, instance) -> dict: 

94 """Serializza un'istanza Python in dict JSON, includendo le triple RDF""" 

95 result = { 

96 "instance": str(instance), 

97 "type": type(instance).__name__, 

98 "uri": str(instance.has_identifier) if instance.has_identifier else None, 

99 "properties": {}, 

100 "rdf_triples": [] 

101 } 

102 

103 # Serializza le proprietà dell'istanza 

104 for attr_name in instance.__dict__.keys(): 

105 value = getattr(instance, attr_name) 

106 # removes empty values from the properties 

107 if value is not None and not (isinstance(value, (list, set)) and not value): 

108 result["properties"][attr_name] = str(value) 

109 

110 # Aggiungi le triple RDF associate 

111 triples = self.get_triples_for_instance(instance) 

112 for s, p, o in triples: 

113 triple_dict = { 

114 "subject": str(s), 

115 "predicate": str(p), 

116 "object": str(o) 

117 } 

118 result["rdf_triples"].append(triple_dict) 

119 

120 return result 

121 

122 def _serialize_value(self, value): 

123 """Helper per serializzare diversi tipi di valori""" 

124 if isinstance(value, (str, int, float, bool)): 

125 return value 

126 elif isinstance(value, (list, set)): 

127 return [self._serialize_value(v) for v in value] 

128 elif hasattr(value, 'uri'): 

129 return str(value.uri) 

130 else: 

131 return str(value) 

132 

133 def get_viewer(self): 

134 """Ottiene il viewer appropriato per il formato corrente.""" 

135 if not self._configuration: 

136 raise ValueError("No configuration loaded. Call load_instances() first.") 

137 

138 return self._configuration.create_viewer(self) 

139 

140 def clear_cache(self): 

141 """Pulisce la cache""" 

142 self._instance_cache.clear() 

143 if self._logic: 

144 self._logic.clear_cache() 

145 

146 # ==================== ESTRAZIONE (ORCHESTRAZIONE) ==================== 

147 

148 def _extract_instances(self): 

149 """Estrazione in 6 fasi orchestrate""" 

150 # print("\n" + "="*60) 

151 # print("ESTRAZIONE INSTANCES") 

152 # print("="*60) 

153 

154 # FASE 0: Pre-crea datatypes (comune a tutti) 

155 # self._phase0_create_datatypes() 

156 

157 # FASE 1-4: Delegate alla Logic specifica 

158 self._logic.phase1_classify_from_predicates() 

159 self._logic.phase2_create_from_types() 

160 self._logic.phase3_populate_properties() 

161 self._logic.phase4_process_group_axioms() 

162 

163 # FASE 5: Fallback (comune) 

164 self._logic.phase5_fallback() 

165 

166 # FASE 6: Statements (solo RDF) 

167 self._logic.phase6_create_statements() 

168 

169 # def _phase0_create_datatypes(self): 

170 # """Pre-crea tutti i Datatype (comune a tutti i formati)""" 

171 # print("\n--- FASE 0: Datatypes ---") 

172 

173 # created = 0 

174 

175 # # 1. URI XSD 

176 # for s, p, o in self._graph: 

177 # if isinstance(s, URIRef) and str(s).startswith(str(XSD)): 

178 # if s not in self._instance_cache: 

179 # self._logic.create_empty_instance(s, Datatype) 

180 # created += 1 

181 

182 # if isinstance(o, URIRef) and str(o).startswith(str(XSD)): 

183 # if o not in self._instance_cache: 

184 # self._logic.create_empty_instance(o, Datatype) 

185 # created += 1 

186 

187 # # 2. rdfs:Literal 

188 # if RDFS.Literal not in self._instance_cache: 

189 # self._logic.create_empty_instance(RDFS.Literal, Datatype) 

190 # created += 1 

191 

192 # # 3. BNode Datatypes 

193 # for bnode in self._graph.subjects(RDF.type, RDFS.Datatype): 

194 # if isinstance(bnode, BNode) and bnode not in self._instance_cache: 

195 # self._logic.create_empty_instance(bnode, Datatype) 

196 # created += 1 

197 

198 # print(f" Creati {created} datatypes") 

199 

200 def _phase5_fallback(self): 

201 """Fallback per risorse non categorizzate (comune)""" 

202 #print("\n--- FASE 5: Fallback ---") 

203 

204 fallback_class = self._configuration.get_fallback_class() 

205 if not fallback_class: 

206 print(" Nessun fallback configurato") 

207 return 

208 

209 all_subjects = set(self._graph.subjects()) 

210 fallback_count = 0 

211 

212 for subj in all_subjects: 

213 if subj not in self._instance_cache: 

214 self._logic.get_or_create(subj, fallback_class) 

215 fallback_count += 1 

216 

217 #print(f" Fallback: {fallback_count} risorse -> {fallback_class.__name__}") 

218 

219 # def get_ontology_metadata(self) -> Model: 

220 # """ 

221 # Extracts metadata and returns a populated Model object. 

222 # """ 

223 # if self._graph is None: 

224 # return Model() 

225 

226 # # 1. Create the Model instance 

227 # ontology_model = Model() 

228 

229 # # 2. Find the owl:Ontology node 

230 # ontology_node = self._graph.value(predicate=RDF.type, object=OWL.Ontology) 

231 

232 # if ontology_node: 

233 # # --- IDENTIFIER (IRI) --- 

234 # ontology_model.set_has_identifier(str(ontology_node)) 

235 

236 # # --- TITLE (Label) --- 

237 # title = ( 

238 # self._graph.value(ontology_node, DCTERMS.title) or 

239 # self._graph.value(ontology_node, DC.title) or 

240 # self._graph.value(ontology_node, RDFS.label) 

241 # ) 

242 # if title: 

243 # ontology_model.set_has_label(str(title)) 

244 

245 # # --- DESCRIPTION (Comment) --- 

246 # comment = ( 

247 # self._graph.value(ontology_node, DCTERMS.description) or 

248 # self._graph.value(ontology_node, RDFS.comment) 

249 # ) 

250 # if comment: 

251 # ontology_model.set_has_comment(str(comment)) 

252 

253 # # --- VERSION INFO --- 

254 # version_info = self._graph.value(ontology_node, OWL.versionInfo) 

255 # if version_info: 

256 # ontology_model.set_has_version_info(str(version_info)) 

257 

258 # # --- VERSION IRI --- 

259 # version_iri = self._graph.value(ontology_node, OWL.versionIRI) 

260 # if version_iri: 

261 # v_model = Model() 

262 # v_model.set_has_identifier(str(version_iri)) 

263 # ontology_model.set_has_version(v_model) 

264 

265 # # --- IMPORTS --- 

266 # for imported_iri in self._graph.objects(ontology_node, OWL.imports): 

267 # imported_model = Model() 

268 # imported_model.set_has_identifier(str(imported_iri)) 

269 # ontology_model.set_imports(imported_model) 

270 

271 # # --- CREATORS (Custom handling) --- 

272 # for pred in [DCTERMS.creator, DC.creator]: 

273 # for creator in self._graph.objects(ontology_node, pred): 

274 # ontology_model.set_has_creator(creator) 

275 

276 

277 # # --- CONTRIBUTORS (Custom handling) --- 

278 # for pred in [DCTERMS.contributor, DC.contributor]: 

279 # for contributor in self._graph.objects(ontology_node, pred): 

280 # ontology_model.set_has_contributor(contributor) 

281 

282 # return ontology_model