Coverage for lode / reader / loader.py: 19%
77 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-03-25 15:05 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-03-25 15:05 +0000
1"""
2Reader - Caricamento e validazione file RDF
3"""
4import requests
5from rdflib import Graph
6from typing import Dict, Optional
7from urllib.parse import urlparse
9import lode.reader.modules as modules
10from lode.exceptions import ArtefactLoadError, ArtefactNotFoundError
13class Loader:
14 """Gestisce il caricamento di file RDF"""
16 def __init__(self, file_path: Optional[str] = None, imported=None, closure=None):
18 self.graph = Graph()
19 self._imported = imported
20 self._closure = closure
22 if file_path:
23 self.load(file_path)
25 # ----------------------------------------------------------
26 # MAIN LOAD METHOD
27 # ----------------------------------------------------------
28 def load(self, source: str) -> None:
29 """Loads RDF from local file or from URL with content negotiation"""
31 if self._is_url(source):
32 self._load_from_url_with_content_negotiation(source)
33 else:
34 self._load_from_local_file(source)
36 self._apply_modules()
38 # ----------------------------------------------------------
39 # MODULES MAIN HANDLER
40 # ----------------------------------------------------------
42 def _apply_modules(self) -> None:
44 if self._imported and self._closure:
45 self.graph = modules.apply_closure(self.graph)
46 if self._imported:
47 self.graph = modules.apply_imported(self.graph)
48 elif self._closure:
49 self.graph = modules.apply_closure(self.graph)
51 # ----------------------------------------------------------
52 # CONTENT NEGOTIATION FOR URLS
53 # ----------------------------------------------------------
54 def _load_from_url_with_content_negotiation(self, url: str) -> None:
55 """RDF graph loading with content-negotiation for semantic artefact loaded from URL"""
57 headers = {
58 "Accept": (
59 "text/turtle, application/rdf+xml, application/ld+json, "
60 "application/n-triples, application/n-quads, */*;q=0.1"
61 )
62 }
64 try:
65 response = requests.get(url, headers=headers, timeout=10)
67 if response.status_code != 200:
68 raise ArtefactNotFoundError(
69 "Cannot load provided Semantic Artefact",
70 context={"url": url, "http_status": response.status_code}
71 )
73 content = response.text
74 content_type = response.headers.get("Content-Type", "").lower()
76 # Format guessed from HTTP Content-Type
77 guessed_format = self._guess_format_from_content_type(content_type)
79 self.graph = Graph()
81 # If it recognises the Content-Type → parse directly
82 if guessed_format:
83 try:
84 self.graph.parse(data=content, format=guessed_format)
85 return
86 except:
87 pass # fallback below
89 # Otherwise, try all formats
90 for fmt in ["xml", "application/rdf+xml", "turtle", "json-ld", "nt", "n3"]:
91 try:
92 self.graph.parse(data=content, format=fmt)
93 return
94 except:
95 continue
97 raise ArtefactLoadError(
98 "Could not parse RDF after content negotiation",
99 context={"url": url, "formats_tried": ["xml", "turtle", "json-ld", "nt", "n3"]}
100 )
102 except requests.RequestException as e:
103 raise ArtefactNotFoundError(
104 "Network error fetching artefact",
105 context={"url": url, "original_error": str(e)}
106 )
108 # ----------------------------------------------------------
109 # LOCAL FILE LOADING
110 # ----------------------------------------------------------
111 def _load_from_local_file(self, path: str) -> Dict[str, any]:
112 formats = ['xml', 'turtle', 'n3', 'nt', 'json-ld']
114 for fmt in formats:
115 try:
116 self.graph = Graph()
117 self.graph.parse(path, format=fmt)
118 return {
119 "success": True,
120 "message": f"{len(self.graph)} triples loaded (format: {fmt})"
121 }
122 except Exception:
123 continue
125 return {
126 "success": False,
127 "message": f"Could not load {path} with any known RDF format"
128 }
130 # ----------------------------------------------------------
131 # HELPERS
132 # ----------------------------------------------------------
133 def _is_url(self, s: str) -> bool:
134 try:
135 return urlparse(s).scheme in ("http", "https")
136 except Exception:
137 return False
139 def _guess_format_from_content_type(self, content_type: str) -> Optional[str]:
140 if "text/turtle" in content_type or "application/x-turtle" in content_type:
141 return "turtle"
142 if "application/rdf+xml" in content_type:
143 return "xml"
144 if "application/ld+json" in content_type or "json" in content_type:
145 return "json-ld"
146 if "application/n-triples" in content_type:
147 return "nt"
148 return None
150 def get_graph(self) -> Graph:
151 return self.graph