Coverage for lode / reader / loader.py: 19%

77 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-03-25 15:05 +0000

1""" 

2Reader - Caricamento e validazione file RDF 

3""" 

4import requests 

5from rdflib import Graph 

6from typing import Dict, Optional 

7from urllib.parse import urlparse 

8 

9import lode.reader.modules as modules 

10from lode.exceptions import ArtefactLoadError, ArtefactNotFoundError 

11 

12 

13class Loader: 

14 """Gestisce il caricamento di file RDF""" 

15 

16 def __init__(self, file_path: Optional[str] = None, imported=None, closure=None): 

17 

18 self.graph = Graph() 

19 self._imported = imported 

20 self._closure = closure 

21 

22 if file_path: 

23 self.load(file_path) 

24 

25 # ---------------------------------------------------------- 

26 # MAIN LOAD METHOD 

27 # ---------------------------------------------------------- 

28 def load(self, source: str) -> None: 

29 """Loads RDF from local file or from URL with content negotiation""" 

30 

31 if self._is_url(source): 

32 self._load_from_url_with_content_negotiation(source) 

33 else: 

34 self._load_from_local_file(source) 

35 

36 self._apply_modules() 

37 

38 # ---------------------------------------------------------- 

39 # MODULES MAIN HANDLER 

40 # ---------------------------------------------------------- 

41 

42 def _apply_modules(self) -> None: 

43 

44 if self._imported and self._closure: 

45 self.graph = modules.apply_closure(self.graph) 

46 if self._imported: 

47 self.graph = modules.apply_imported(self.graph) 

48 elif self._closure: 

49 self.graph = modules.apply_closure(self.graph) 

50 

51 # ---------------------------------------------------------- 

52 # CONTENT NEGOTIATION FOR URLS 

53 # ---------------------------------------------------------- 

54 def _load_from_url_with_content_negotiation(self, url: str) -> None: 

55 """RDF graph loading with content-negotiation for semantic artefact loaded from URL""" 

56 

57 headers = { 

58 "Accept": ( 

59 "text/turtle, application/rdf+xml, application/ld+json, " 

60 "application/n-triples, application/n-quads, */*;q=0.1" 

61 ) 

62 } 

63 

64 try: 

65 response = requests.get(url, headers=headers, timeout=10) 

66 

67 if response.status_code != 200: 

68 raise ArtefactNotFoundError( 

69 "Cannot load provided Semantic Artefact", 

70 context={"url": url, "http_status": response.status_code} 

71 ) 

72 

73 content = response.text 

74 content_type = response.headers.get("Content-Type", "").lower() 

75 

76 # Format guessed from HTTP Content-Type 

77 guessed_format = self._guess_format_from_content_type(content_type) 

78 

79 self.graph = Graph() 

80 

81 # If it recognises the Content-Type → parse directly 

82 if guessed_format: 

83 try: 

84 self.graph.parse(data=content, format=guessed_format) 

85 return 

86 except: 

87 pass # fallback below 

88 

89 # Otherwise, try all formats 

90 for fmt in ["xml", "application/rdf+xml", "turtle", "json-ld", "nt", "n3"]: 

91 try: 

92 self.graph.parse(data=content, format=fmt) 

93 return 

94 except: 

95 continue 

96 

97 raise ArtefactLoadError( 

98 "Could not parse RDF after content negotiation", 

99 context={"url": url, "formats_tried": ["xml", "turtle", "json-ld", "nt", "n3"]} 

100 ) 

101 

102 except requests.RequestException as e: 

103 raise ArtefactNotFoundError( 

104 "Network error fetching artefact", 

105 context={"url": url, "original_error": str(e)} 

106 ) 

107 

108 # ---------------------------------------------------------- 

109 # LOCAL FILE LOADING 

110 # ---------------------------------------------------------- 

111 def _load_from_local_file(self, path: str) -> Dict[str, any]: 

112 formats = ['xml', 'turtle', 'n3', 'nt', 'json-ld'] 

113 

114 for fmt in formats: 

115 try: 

116 self.graph = Graph() 

117 self.graph.parse(path, format=fmt) 

118 return { 

119 "success": True, 

120 "message": f"{len(self.graph)} triples loaded (format: {fmt})" 

121 } 

122 except Exception: 

123 continue 

124 

125 return { 

126 "success": False, 

127 "message": f"Could not load {path} with any known RDF format" 

128 } 

129 

130 # ---------------------------------------------------------- 

131 # HELPERS 

132 # ---------------------------------------------------------- 

133 def _is_url(self, s: str) -> bool: 

134 try: 

135 return urlparse(s).scheme in ("http", "https") 

136 except Exception: 

137 return False 

138 

139 def _guess_format_from_content_type(self, content_type: str) -> Optional[str]: 

140 if "text/turtle" in content_type or "application/x-turtle" in content_type: 

141 return "turtle" 

142 if "application/rdf+xml" in content_type: 

143 return "xml" 

144 if "application/ld+json" in content_type or "json" in content_type: 

145 return "json-ld" 

146 if "application/n-triples" in content_type: 

147 return "nt" 

148 return None 

149 

150 def get_graph(self) -> Graph: 

151 return self.graph 

152 

153 

154