Coverage for lode / reader / logic / base_logic.py: 64%

298 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-03-25 15:05 +0000

1# base_logic.py 

2from abc import ABC, abstractmethod 

3from rdflib import Graph, URIRef, Node, Literal as RDFlibLiteral, BNode 

4from rdflib.namespace import RDF, RDFS, OWL, SKOS, XSD 

5from rdflib.collection import Collection as RDFLibCollection 

6 

7from lode.models import * 

8 

9# ========== ALLOWED CLASSES PER FORMATO ========== 

10 

11# ALLOWED_CLASSES = { 

12# 'RDF': { 

13# Statement, Property, Container, Datatype, Literal, Resource, Concept 

14# }, 

15# 'OWL': { 

16# Statement, Literal, Relation, Container, 

17# Concept, Attribute, Datatype, 

18# Individual, Model, Annotation, 

19# TruthFunction, Value, OneOf, Quantifier, Cardinality, PropertyConceptRestriction, 

20# Collection, Restriction, Resource 

21# }, 

22# 'SKOS': { 

23# Collection, Literal, Resource, 

24# Concept, Model, Datatype 

25# } 

26# } 

27 

28 

29class BaseLogic(ABC): 

30 """ 

31 Logica base comune per parsing RDF. 

32 """ 

33 

34 def __init__(self, graph: Graph, instance_cache: dict, strategy): 

35 self.graph = graph 

36 self._instance_cache = instance_cache 

37 self._strategy = strategy 

38 self._property_mapping = strategy.get_property_mapping() 

39 self._allowed_classes = self._get_allowed_classes() 

40 self._triples_map = {} 

41 # Namespaces now driven by config YAML (key: 'namespaces') 

42 self._allowed_namespaces = self._get_allowed_namespaces() 

43 # Validate all handlers declared in config exist on this instance 

44 self._validate_handlers() 

45 

46 # ========== METODI ASTRATTI ========== 

47 

48 def _get_allowed_classes(self) -> set: 

49 class_names = self._strategy.config.get('allowed_classes', []) 

50 return {self._strategy.CLASSES[name] for name in class_names if name in self._strategy.CLASSES} 

51 

52 def _get_allowed_namespaces(self) -> set: 

53 """ 

54 Reads namespaces from config YAML key 'namespaces'. 

55 Subclasses do NOT need to override this anymore. 

56 """ 

57 return set(self._strategy.config.get('namespaces', [])) 

58 

59 @abstractmethod 

60 def phase1_classify_from_predicates(self): 

61 pass 

62 

63 @abstractmethod 

64 def phase2_create_from_types(self): 

65 pass 

66 

67 @abstractmethod 

68 def phase3_populate_properties(self): 

69 pass 

70 

71 @abstractmethod 

72 def phase4_process_group_axioms(self): 

73 pass 

74 

75 @abstractmethod 

76 def phase5_fallback(self): 

77 pass 

78 

79 def phase6_create_statements(self): 

80 """Crea Statement per triple non mappate""" 

81 for subj, pred, obj in self.graph: 

82 if pred not in [RDF.first, RDF.rest, RDF.nil, OWL.distinctMembers, OWL.members]: 

83 if self._is_triple_mapped(subj, pred, obj): 

84 continue 

85 self._create_statement_for_triple(subj, pred, obj) 

86 

87 # ========== VALIDAZIONE CONFIG -> LOGIC ========== 

88 

89 def _validate_handlers(self): 

90 """ 

91 Fail-fast: verifica che tutti gli handler dichiarati nel config 

92 esistano come metodi su questa istanza Logic. 

93 Solleva AttributeError subito, prima di qualsiasi parsing. 

94 """ 

95 errors = [] 

96 

97 for uri_str, cfg in self._strategy.config.get('mapper', {}).items(): 

98 handler_name = cfg.get('handler') 

99 if handler_name and not hasattr(self, handler_name): 

100 errors.append( 

101 f"mapper['{uri_str}'].handler='{handler_name}' " 

102 f"not found on {type(self).__name__}" 

103 ) 

104 

105 for uri_str, handler_name in self._strategy.config.get('enricher', {}).items(): 

106 if isinstance(handler_name, str) and not hasattr(self, handler_name): 

107 errors.append( 

108 f"enricher['{uri_str}']='{handler_name}' " 

109 f"not found on {type(self).__name__}" 

110 ) 

111 

112 if errors: 

113 raise AttributeError( 

114 f"Config/Logic contract violations in {type(self).__name__}:\n" 

115 + "\n".join(f" - {e}" for e in errors) 

116 ) 

117 

118 # ========== RISOLUZIONE CLASSI AMMESSE ========== 

119 

120 def _resolve_allowed_class(self, python_class: type, id: Node = None) -> type: 

121 """ 

122 Resolves a Python class to one allowed by the current format, by walking 

123 the MRO until a class present in _allowed_classes is found. 

124 Before the MRO walk, calls _pre_resolve_hook to allow subclasses to 

125 short-circuit resolution with custom logic (e.g. reusing an existing 

126 cached type instead of silently downcasting). 

127 Falls back to Resource if nothing is found. 

128 """ 

129 if python_class in self._allowed_classes: 

130 return python_class 

131 

132 # Hook per logica custom pre-MRO (es. OWL: controlla cache) 

133 resolved = self._pre_resolve_hook(python_class, id) 

134 if resolved: 

135 return resolved 

136 

137 for parent_class in python_class.__mro__[1:]: 

138 if parent_class in self._allowed_classes: 

139 return parent_class 

140 

141 print(f" [WARN] {python_class.__name__} -> Resource (fallback finale)") 

142 return Resource 

143 

144 def _pre_resolve_hook(self, python_class: type, id: Node) -> type | None: 

145 """ 

146 Hook opzionale per logica custom pre-MRO. 

147 Le subclass possono override senza toccare _resolve_allowed_class. 

148 Ritorna None per delegare al comportamento base. 

149 """ 

150 return None 

151 

152 # ========== UTILITIES ========== 

153 

154 def _traverse_hierarchy( 

155 self, 

156 start: object, 

157 next_getter: str, 

158 direction: str = "up", # "up" | "down" | "both" 

159 collect: callable = None, # (node) -> value | None — ferma e ritorna quando non None 

160 visit_all: callable = None, # (node) -> None — visita ogni nodo senza fermarsi 

161 ) -> object | None: 

162 """ 

163 Generic BFS traversal along a property hierarchy. 

164 

165 Parameters 

166 ---------- 

167 start : starting node (Python model instance) 

168 next_getter : name of the getter that returns the next nodes in the 

169 'up' direction (e.g. 'get_is_sub_property_of'). 

170 For 'down' the cache is scanned for instances that list 

171 `start` as one of their `next_getter` targets. 

172 direction : 'up' - follow next_getter only 

173 'down' - scan cache for reverse links only 

174 'both' - up first, then down 

175 collect : callable(node) -> value | None 

176 Called on every visited node (including start). 

177 When it returns a non-None value the traversal stops 

178 immediately and that value is returned. 

179 visit_all : callable(node) -> None 

180 Called on every visited node; traversal never stops early. 

181 Mutually exclusive with collect (collect takes priority). 

182 

183 Returns the first non-None value from collect, or None if visit_all is used. 

184 """ 

185 visited = set() 

186 queue = [start] 

187 result = None 

188 

189 while queue: 

190 current = queue.pop(0) 

191 if id(current) in visited: 

192 continue 

193 visited.add(id(current)) 

194 

195 # --- collect / visit --- 

196 if collect: 

197 value = collect(current) 

198 if value is not None: 

199 return value 

200 elif visit_all: 

201 visit_all(current) 

202 

203 # --- enqueue next nodes --- 

204 if direction in ("up", "both"): 

205 getter = getattr(current, next_getter, None) 

206 if getter: 

207 nexts = getter() 

208 if nexts: 

209 if not isinstance(nexts, list): 

210 nexts = [nexts] 

211 queue.extend(nexts) 

212 

213 if direction in ("down", "both"): 

214 for instances_set in self._instance_cache.values(): 

215 for inst in instances_set: 

216 if inst is current or id(inst) in visited: 

217 continue 

218 getter = getattr(inst, next_getter, None) 

219 if getter: 

220 parents = getter() or [] 

221 if not isinstance(parents, list): 

222 parents = [parents] 

223 if any(p is current for p in parents): 

224 queue.append(inst) 

225 

226 return None 

227 

228 # def is_in_range_or_domain_of_property(self, property_getter, property_getter_inverse, property_instance): 

229 

230 # if isinstance(object, (Concept, Individual, Datatype)): 

231 # obj_inst = property_instance.property_getter() 

232 # obj_inst.property_getter_inverse() 

233 

234 

235 def _create_literal(self, rdflib_literal): 

236 literal_key = f"LITERAL::{rdflib_literal}" 

237 if literal_key in self._instance_cache: 

238 return next(iter(self._instance_cache[literal_key])) 

239 

240 literal = Literal() 

241 literal.set_has_value(str(rdflib_literal)) 

242 

243 if rdflib_literal.language: 

244 literal.set_has_language(rdflib_literal.language) 

245 

246 if rdflib_literal.datatype: 

247 dt = self.get_or_create(rdflib_literal.datatype, Datatype) 

248 if dt: 

249 literal.set_has_type(dt) 

250 

251 self._instance_cache[literal_key] = {literal} 

252 return literal 

253 

254 def _is_rdf_collection(self, node: Node) -> bool: 

255 return (node, RDF.first, None) in self.graph 

256 

257 def _instance_matches_target(self, instance, target_classes: list) -> bool: 

258 instance_class = instance.__class__ 

259 if instance_class in target_classes: 

260 return True 

261 for parent_class in instance_class.__mro__[1:]: 

262 if parent_class in target_classes: 

263 return True 

264 return False 

265 

266 def _apply_setters(self, instance, setters_config, obj): 

267 for setter_item in setters_config: 

268 if isinstance(setter_item, dict): 

269 for setter_name, value_type in setter_item.items(): 

270 if not hasattr(instance, setter_name): 

271 continue 

272 setter = getattr(instance, setter_name) 

273 if value_type == 'Literal': 

274 try: 

275 setter(self._create_literal(obj)) 

276 except: 

277 continue 

278 elif isinstance(value_type, bool): 

279 setter(value_type) 

280 elif isinstance(value_type, str): 

281 setter(value_type) 

282 elif isinstance(value_type, type): 

283 obj_instance = self.get_or_create(obj, value_type) 

284 if obj_instance: 

285 setter(obj_instance) 

286 else: 

287 setter(obj) 

288 else: 

289 if hasattr(instance, setter_item): 

290 getattr(instance, setter_item)() 

291 

292 def _handle_collection_object(self, instance, predicate, collection_uri): 

293 try: 

294 collection = RDFLibCollection(self.graph, collection_uri) 

295 items = [] 

296 for item in collection: 

297 if isinstance(item, RDFlibLiteral): 

298 items.append(self._create_literal(item)) 

299 else: 

300 item_instance = self.get_or_create(item, Resource) 

301 if item_instance: 

302 items.append(item_instance) 

303 

304 config = self._property_mapping.get(predicate, {}) 

305 for setter_item in config.get('setters', []): 

306 if isinstance(setter_item, dict): 

307 for setter_name in setter_item: 

308 if hasattr(instance, setter_name): 

309 getattr(instance, setter_name)(items) 

310 break 

311 except Exception as e: 

312 print(f"Errore Collection: {e}") 

313 

314 def clear_cache(self): 

315 self._instance_cache.clear() 

316 

317 # ========== LOGIC CORE ========== 

318 

319 def get_or_create(self, id: Node, python_class: type = None, populate: bool = True): 

320 try: 

321 

322 if isinstance(id, RDFlibLiteral): 

323 return self._create_literal(id) 

324 

325 if isinstance(id, URIRef) and str(id).startswith(str(XSD)): 

326 python_class = Datatype 

327 

328 if python_class: 

329 python_class = self._resolve_allowed_class(python_class, id) 

330 

331 if isinstance(id, URIRef): 

332 uri_str = str(id) 

333 for ns in self._allowed_namespaces: 

334 if uri_str.startswith(ns) and id not in (OWL.Thing, OWL.Nothing, RDFS.Literal): 

335 return None 

336 

337 # Individual punning: non sovrascrivere tipi esistenti non-Individual 

338 if python_class == Individual and id in self._instance_cache: 

339 is_named_individual = (id, RDF.type, OWL.NamedIndividual) in self.graph 

340 if not is_named_individual: 

341 for existing in self._instance_cache[id]: 

342 if not isinstance(existing, Individual): 

343 return existing 

344 

345 if id in self._instance_cache: 

346 if isinstance(id, BNode): 

347 return next(iter(self._instance_cache[id])) 

348 if isinstance(id, URIRef): 

349 for obj in self._instance_cache[id]: 

350 if isinstance(obj, python_class): 

351 return obj 

352 

353 instance = python_class() 

354 if id not in self._instance_cache: 

355 self._instance_cache[id] = set() 

356 self._instance_cache[id].add(instance) 

357 instance.set_has_identifier(str(id)) 

358 

359 if populate: 

360 self.populate_instance(instance, id) 

361 

362 return instance 

363 

364 except Exception as e: 

365 print(f"Cannot create {python_class.__name__ if python_class else 'Unknown'} for {id}: {e}") 

366 return None 

367 

368 def populate_instance(self, instance, uri: Node): 

369 if isinstance(uri, URIRef): 

370 instance.set_has_identifier(str(uri)) 

371 elif isinstance(uri, BNode): 

372 instance.has_identifier = str(uri) 

373 

374 if instance not in self._triples_map: 

375 self._triples_map[instance] = set() 

376 

377 for predicate, obj in self.graph.predicate_objects(uri): 

378 predicate_str = str(predicate) 

379 predicate_namespace = ( 

380 predicate_str.rsplit('#', 1)[0] + '#' 

381 if '#' in predicate_str 

382 else predicate_str.rsplit('/', 1)[0] + '/' 

383 ) 

384 

385 if predicate_namespace not in self._allowed_namespaces: 

386 continue 

387 

388 if predicate in self._property_mapping: 

389 config = self._property_mapping[predicate] 

390 

391 target_classes = config.get('target_classes', []) 

392 if target_classes and not self._instance_matches_target(instance, target_classes): 

393 continue 

394 

395 if 'handler' in config: 

396 handler_name = config['handler'] 

397 # handler existence already guaranteed by _validate_handlers 

398 handler = getattr(self, handler_name) 

399 try: 

400 handler(instance, uri, predicate, obj, None) 

401 self._triples_map[instance].add((uri, predicate, obj)) 

402 except Exception as e: 

403 print(f" Errore handler {handler_name}: {e}") 

404 continue 

405 

406 if 'setters' in config: 

407 try: 

408 self._apply_setters(instance, config['setters'], obj) 

409 self._triples_map[instance].add((uri, predicate, obj)) 

410 except Exception as e: 

411 print(f" Errore setters: {e}") 

412 continue 

413 

414 if self._is_rdf_collection(obj): 

415 self._handle_collection_object(instance, predicate, obj) 

416 self._triples_map[instance].add((uri, predicate, obj)) 

417 

418 # ========== HELPERS ========== 

419 

420 def _is_triple_mapped(self, subj, pred, obj) -> bool: 

421 if subj not in self._instance_cache: 

422 return False 

423 instances = self._instance_cache[subj] 

424 instances_list = instances if isinstance(instances, set) else [instances] 

425 for instance in instances_list: 

426 if instance in self._triples_map: 

427 if (subj, pred, obj) in self._triples_map[instance]: 

428 return True 

429 return False 

430 

431 def _convert_collection_to_container(self, collection_uri): 

432 if collection_uri in self._instance_cache: 

433 for cached in self._instance_cache[collection_uri]: 

434 if isinstance(cached, Container): 

435 return cached 

436 

437 container = Container() 

438 container.set_has_identifier(str(collection_uri)) 

439 

440 if collection_uri not in self._instance_cache: 

441 self._instance_cache[collection_uri] = set() 

442 self._instance_cache[collection_uri].add(container) 

443 

444 try: 

445 collection = RDFLibCollection(self.graph, collection_uri) 

446 members = [] 

447 for item in collection: 

448 if isinstance(item, RDFlibLiteral): 

449 members.append(self._create_literal(item)) 

450 else: 

451 member_instance = self.get_or_create(item, Resource) 

452 if member_instance: 

453 members.append(member_instance) 

454 container.set_has_members(members) 

455 except Exception as e: 

456 print(f"Errore Collection: {e}") 

457 

458 return container 

459 

460 def _create_statement_for_triple(self, subj, pred, obj): 

461 statement = Statement() 

462 stmt_bnode = BNode() 

463 statement.set_has_identifier(str(stmt_bnode)) 

464 

465 if statement not in self._triples_map: 

466 self._triples_map[statement] = set() 

467 self._triples_map[statement].add((subj, pred, obj)) 

468 

469 subj_obj = self.get_or_create(subj, Resource) 

470 if subj_obj: 

471 statement.set_has_subject(subj_obj) 

472 

473 pred_inst = self.get_or_create(pred, Property) 

474 if pred_inst: 

475 statement.set_has_predicate(pred_inst) 

476 

477 if self._is_rdf_collection(obj): 

478 obj_inst = self._convert_collection_to_container(obj) 

479 elif isinstance(obj, RDFlibLiteral): 

480 obj_inst = self._create_literal(obj) 

481 else: 

482 obj_inst = self.get_or_create(obj, Resource) 

483 

484 if obj_inst: 

485 statement.set_has_object(obj_inst) 

486 

487 if stmt_bnode not in self._instance_cache: 

488 self._instance_cache[stmt_bnode] = set() 

489 self._instance_cache[stmt_bnode].add(statement)