Coverage for lode / reader / logic / base_logic.py: 64%
298 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-03-25 15:05 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-03-25 15:05 +0000
1# base_logic.py
2from abc import ABC, abstractmethod
3from rdflib import Graph, URIRef, Node, Literal as RDFlibLiteral, BNode
4from rdflib.namespace import RDF, RDFS, OWL, SKOS, XSD
5from rdflib.collection import Collection as RDFLibCollection
7from lode.models import *
9# ========== ALLOWED CLASSES PER FORMATO ==========
11# ALLOWED_CLASSES = {
12# 'RDF': {
13# Statement, Property, Container, Datatype, Literal, Resource, Concept
14# },
15# 'OWL': {
16# Statement, Literal, Relation, Container,
17# Concept, Attribute, Datatype,
18# Individual, Model, Annotation,
19# TruthFunction, Value, OneOf, Quantifier, Cardinality, PropertyConceptRestriction,
20# Collection, Restriction, Resource
21# },
22# 'SKOS': {
23# Collection, Literal, Resource,
24# Concept, Model, Datatype
25# }
26# }
29class BaseLogic(ABC):
30 """
31 Logica base comune per parsing RDF.
32 """
34 def __init__(self, graph: Graph, instance_cache: dict, strategy):
35 self.graph = graph
36 self._instance_cache = instance_cache
37 self._strategy = strategy
38 self._property_mapping = strategy.get_property_mapping()
39 self._allowed_classes = self._get_allowed_classes()
40 self._triples_map = {}
41 # Namespaces now driven by config YAML (key: 'namespaces')
42 self._allowed_namespaces = self._get_allowed_namespaces()
43 # Validate all handlers declared in config exist on this instance
44 self._validate_handlers()
46 # ========== METODI ASTRATTI ==========
48 def _get_allowed_classes(self) -> set:
49 class_names = self._strategy.config.get('allowed_classes', [])
50 return {self._strategy.CLASSES[name] for name in class_names if name in self._strategy.CLASSES}
52 def _get_allowed_namespaces(self) -> set:
53 """
54 Reads namespaces from config YAML key 'namespaces'.
55 Subclasses do NOT need to override this anymore.
56 """
57 return set(self._strategy.config.get('namespaces', []))
59 @abstractmethod
60 def phase1_classify_from_predicates(self):
61 pass
63 @abstractmethod
64 def phase2_create_from_types(self):
65 pass
67 @abstractmethod
68 def phase3_populate_properties(self):
69 pass
71 @abstractmethod
72 def phase4_process_group_axioms(self):
73 pass
75 @abstractmethod
76 def phase5_fallback(self):
77 pass
79 def phase6_create_statements(self):
80 """Crea Statement per triple non mappate"""
81 for subj, pred, obj in self.graph:
82 if pred not in [RDF.first, RDF.rest, RDF.nil, OWL.distinctMembers, OWL.members]:
83 if self._is_triple_mapped(subj, pred, obj):
84 continue
85 self._create_statement_for_triple(subj, pred, obj)
87 # ========== VALIDAZIONE CONFIG -> LOGIC ==========
89 def _validate_handlers(self):
90 """
91 Fail-fast: verifica che tutti gli handler dichiarati nel config
92 esistano come metodi su questa istanza Logic.
93 Solleva AttributeError subito, prima di qualsiasi parsing.
94 """
95 errors = []
97 for uri_str, cfg in self._strategy.config.get('mapper', {}).items():
98 handler_name = cfg.get('handler')
99 if handler_name and not hasattr(self, handler_name):
100 errors.append(
101 f"mapper['{uri_str}'].handler='{handler_name}' "
102 f"not found on {type(self).__name__}"
103 )
105 for uri_str, handler_name in self._strategy.config.get('enricher', {}).items():
106 if isinstance(handler_name, str) and not hasattr(self, handler_name):
107 errors.append(
108 f"enricher['{uri_str}']='{handler_name}' "
109 f"not found on {type(self).__name__}"
110 )
112 if errors:
113 raise AttributeError(
114 f"Config/Logic contract violations in {type(self).__name__}:\n"
115 + "\n".join(f" - {e}" for e in errors)
116 )
118 # ========== RISOLUZIONE CLASSI AMMESSE ==========
120 def _resolve_allowed_class(self, python_class: type, id: Node = None) -> type:
121 """
122 Resolves a Python class to one allowed by the current format, by walking
123 the MRO until a class present in _allowed_classes is found.
124 Before the MRO walk, calls _pre_resolve_hook to allow subclasses to
125 short-circuit resolution with custom logic (e.g. reusing an existing
126 cached type instead of silently downcasting).
127 Falls back to Resource if nothing is found.
128 """
129 if python_class in self._allowed_classes:
130 return python_class
132 # Hook per logica custom pre-MRO (es. OWL: controlla cache)
133 resolved = self._pre_resolve_hook(python_class, id)
134 if resolved:
135 return resolved
137 for parent_class in python_class.__mro__[1:]:
138 if parent_class in self._allowed_classes:
139 return parent_class
141 print(f" [WARN] {python_class.__name__} -> Resource (fallback finale)")
142 return Resource
144 def _pre_resolve_hook(self, python_class: type, id: Node) -> type | None:
145 """
146 Hook opzionale per logica custom pre-MRO.
147 Le subclass possono override senza toccare _resolve_allowed_class.
148 Ritorna None per delegare al comportamento base.
149 """
150 return None
152 # ========== UTILITIES ==========
154 def _traverse_hierarchy(
155 self,
156 start: object,
157 next_getter: str,
158 direction: str = "up", # "up" | "down" | "both"
159 collect: callable = None, # (node) -> value | None — ferma e ritorna quando non None
160 visit_all: callable = None, # (node) -> None — visita ogni nodo senza fermarsi
161 ) -> object | None:
162 """
163 Generic BFS traversal along a property hierarchy.
165 Parameters
166 ----------
167 start : starting node (Python model instance)
168 next_getter : name of the getter that returns the next nodes in the
169 'up' direction (e.g. 'get_is_sub_property_of').
170 For 'down' the cache is scanned for instances that list
171 `start` as one of their `next_getter` targets.
172 direction : 'up' - follow next_getter only
173 'down' - scan cache for reverse links only
174 'both' - up first, then down
175 collect : callable(node) -> value | None
176 Called on every visited node (including start).
177 When it returns a non-None value the traversal stops
178 immediately and that value is returned.
179 visit_all : callable(node) -> None
180 Called on every visited node; traversal never stops early.
181 Mutually exclusive with collect (collect takes priority).
183 Returns the first non-None value from collect, or None if visit_all is used.
184 """
185 visited = set()
186 queue = [start]
187 result = None
189 while queue:
190 current = queue.pop(0)
191 if id(current) in visited:
192 continue
193 visited.add(id(current))
195 # --- collect / visit ---
196 if collect:
197 value = collect(current)
198 if value is not None:
199 return value
200 elif visit_all:
201 visit_all(current)
203 # --- enqueue next nodes ---
204 if direction in ("up", "both"):
205 getter = getattr(current, next_getter, None)
206 if getter:
207 nexts = getter()
208 if nexts:
209 if not isinstance(nexts, list):
210 nexts = [nexts]
211 queue.extend(nexts)
213 if direction in ("down", "both"):
214 for instances_set in self._instance_cache.values():
215 for inst in instances_set:
216 if inst is current or id(inst) in visited:
217 continue
218 getter = getattr(inst, next_getter, None)
219 if getter:
220 parents = getter() or []
221 if not isinstance(parents, list):
222 parents = [parents]
223 if any(p is current for p in parents):
224 queue.append(inst)
226 return None
228 # def is_in_range_or_domain_of_property(self, property_getter, property_getter_inverse, property_instance):
230 # if isinstance(object, (Concept, Individual, Datatype)):
231 # obj_inst = property_instance.property_getter()
232 # obj_inst.property_getter_inverse()
235 def _create_literal(self, rdflib_literal):
236 literal_key = f"LITERAL::{rdflib_literal}"
237 if literal_key in self._instance_cache:
238 return next(iter(self._instance_cache[literal_key]))
240 literal = Literal()
241 literal.set_has_value(str(rdflib_literal))
243 if rdflib_literal.language:
244 literal.set_has_language(rdflib_literal.language)
246 if rdflib_literal.datatype:
247 dt = self.get_or_create(rdflib_literal.datatype, Datatype)
248 if dt:
249 literal.set_has_type(dt)
251 self._instance_cache[literal_key] = {literal}
252 return literal
254 def _is_rdf_collection(self, node: Node) -> bool:
255 return (node, RDF.first, None) in self.graph
257 def _instance_matches_target(self, instance, target_classes: list) -> bool:
258 instance_class = instance.__class__
259 if instance_class in target_classes:
260 return True
261 for parent_class in instance_class.__mro__[1:]:
262 if parent_class in target_classes:
263 return True
264 return False
266 def _apply_setters(self, instance, setters_config, obj):
267 for setter_item in setters_config:
268 if isinstance(setter_item, dict):
269 for setter_name, value_type in setter_item.items():
270 if not hasattr(instance, setter_name):
271 continue
272 setter = getattr(instance, setter_name)
273 if value_type == 'Literal':
274 try:
275 setter(self._create_literal(obj))
276 except:
277 continue
278 elif isinstance(value_type, bool):
279 setter(value_type)
280 elif isinstance(value_type, str):
281 setter(value_type)
282 elif isinstance(value_type, type):
283 obj_instance = self.get_or_create(obj, value_type)
284 if obj_instance:
285 setter(obj_instance)
286 else:
287 setter(obj)
288 else:
289 if hasattr(instance, setter_item):
290 getattr(instance, setter_item)()
292 def _handle_collection_object(self, instance, predicate, collection_uri):
293 try:
294 collection = RDFLibCollection(self.graph, collection_uri)
295 items = []
296 for item in collection:
297 if isinstance(item, RDFlibLiteral):
298 items.append(self._create_literal(item))
299 else:
300 item_instance = self.get_or_create(item, Resource)
301 if item_instance:
302 items.append(item_instance)
304 config = self._property_mapping.get(predicate, {})
305 for setter_item in config.get('setters', []):
306 if isinstance(setter_item, dict):
307 for setter_name in setter_item:
308 if hasattr(instance, setter_name):
309 getattr(instance, setter_name)(items)
310 break
311 except Exception as e:
312 print(f"Errore Collection: {e}")
314 def clear_cache(self):
315 self._instance_cache.clear()
317 # ========== LOGIC CORE ==========
319 def get_or_create(self, id: Node, python_class: type = None, populate: bool = True):
320 try:
322 if isinstance(id, RDFlibLiteral):
323 return self._create_literal(id)
325 if isinstance(id, URIRef) and str(id).startswith(str(XSD)):
326 python_class = Datatype
328 if python_class:
329 python_class = self._resolve_allowed_class(python_class, id)
331 if isinstance(id, URIRef):
332 uri_str = str(id)
333 for ns in self._allowed_namespaces:
334 if uri_str.startswith(ns) and id not in (OWL.Thing, OWL.Nothing, RDFS.Literal):
335 return None
337 # Individual punning: non sovrascrivere tipi esistenti non-Individual
338 if python_class == Individual and id in self._instance_cache:
339 is_named_individual = (id, RDF.type, OWL.NamedIndividual) in self.graph
340 if not is_named_individual:
341 for existing in self._instance_cache[id]:
342 if not isinstance(existing, Individual):
343 return existing
345 if id in self._instance_cache:
346 if isinstance(id, BNode):
347 return next(iter(self._instance_cache[id]))
348 if isinstance(id, URIRef):
349 for obj in self._instance_cache[id]:
350 if isinstance(obj, python_class):
351 return obj
353 instance = python_class()
354 if id not in self._instance_cache:
355 self._instance_cache[id] = set()
356 self._instance_cache[id].add(instance)
357 instance.set_has_identifier(str(id))
359 if populate:
360 self.populate_instance(instance, id)
362 return instance
364 except Exception as e:
365 print(f"Cannot create {python_class.__name__ if python_class else 'Unknown'} for {id}: {e}")
366 return None
368 def populate_instance(self, instance, uri: Node):
369 if isinstance(uri, URIRef):
370 instance.set_has_identifier(str(uri))
371 elif isinstance(uri, BNode):
372 instance.has_identifier = str(uri)
374 if instance not in self._triples_map:
375 self._triples_map[instance] = set()
377 for predicate, obj in self.graph.predicate_objects(uri):
378 predicate_str = str(predicate)
379 predicate_namespace = (
380 predicate_str.rsplit('#', 1)[0] + '#'
381 if '#' in predicate_str
382 else predicate_str.rsplit('/', 1)[0] + '/'
383 )
385 if predicate_namespace not in self._allowed_namespaces:
386 continue
388 if predicate in self._property_mapping:
389 config = self._property_mapping[predicate]
391 target_classes = config.get('target_classes', [])
392 if target_classes and not self._instance_matches_target(instance, target_classes):
393 continue
395 if 'handler' in config:
396 handler_name = config['handler']
397 # handler existence already guaranteed by _validate_handlers
398 handler = getattr(self, handler_name)
399 try:
400 handler(instance, uri, predicate, obj, None)
401 self._triples_map[instance].add((uri, predicate, obj))
402 except Exception as e:
403 print(f" Errore handler {handler_name}: {e}")
404 continue
406 if 'setters' in config:
407 try:
408 self._apply_setters(instance, config['setters'], obj)
409 self._triples_map[instance].add((uri, predicate, obj))
410 except Exception as e:
411 print(f" Errore setters: {e}")
412 continue
414 if self._is_rdf_collection(obj):
415 self._handle_collection_object(instance, predicate, obj)
416 self._triples_map[instance].add((uri, predicate, obj))
418 # ========== HELPERS ==========
420 def _is_triple_mapped(self, subj, pred, obj) -> bool:
421 if subj not in self._instance_cache:
422 return False
423 instances = self._instance_cache[subj]
424 instances_list = instances if isinstance(instances, set) else [instances]
425 for instance in instances_list:
426 if instance in self._triples_map:
427 if (subj, pred, obj) in self._triples_map[instance]:
428 return True
429 return False
431 def _convert_collection_to_container(self, collection_uri):
432 if collection_uri in self._instance_cache:
433 for cached in self._instance_cache[collection_uri]:
434 if isinstance(cached, Container):
435 return cached
437 container = Container()
438 container.set_has_identifier(str(collection_uri))
440 if collection_uri not in self._instance_cache:
441 self._instance_cache[collection_uri] = set()
442 self._instance_cache[collection_uri].add(container)
444 try:
445 collection = RDFLibCollection(self.graph, collection_uri)
446 members = []
447 for item in collection:
448 if isinstance(item, RDFlibLiteral):
449 members.append(self._create_literal(item))
450 else:
451 member_instance = self.get_or_create(item, Resource)
452 if member_instance:
453 members.append(member_instance)
454 container.set_has_members(members)
455 except Exception as e:
456 print(f"Errore Collection: {e}")
458 return container
460 def _create_statement_for_triple(self, subj, pred, obj):
461 statement = Statement()
462 stmt_bnode = BNode()
463 statement.set_has_identifier(str(stmt_bnode))
465 if statement not in self._triples_map:
466 self._triples_map[statement] = set()
467 self._triples_map[statement].add((subj, pred, obj))
469 subj_obj = self.get_or_create(subj, Resource)
470 if subj_obj:
471 statement.set_has_subject(subj_obj)
473 pred_inst = self.get_or_create(pred, Property)
474 if pred_inst:
475 statement.set_has_predicate(pred_inst)
477 if self._is_rdf_collection(obj):
478 obj_inst = self._convert_collection_to_container(obj)
479 elif isinstance(obj, RDFlibLiteral):
480 obj_inst = self._create_literal(obj)
481 else:
482 obj_inst = self.get_or_create(obj, Resource)
484 if obj_inst:
485 statement.set_has_object(obj_inst)
487 if stmt_bnode not in self._instance_cache:
488 self._instance_cache[stmt_bnode] = set()
489 self._instance_cache[stmt_bnode].add(statement)