Coverage for ramose / api_manager.py: 100%

131 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-15 15:58 +0000

1# SPDX-FileCopyrightText: 2018-2021 Silvio Peroni <silvio.peroni@unibo.it> 

2# SPDX-FileCopyrightText: 2020-2021 Marilena Daquino <marilena.daquino2@unibo.it> 

3# SPDX-FileCopyrightText: 2022 Davide Brembilla 

4# SPDX-FileCopyrightText: 2024 Ivan Heibi <ivan.heibi2@unibo.it> 

5# SPDX-FileCopyrightText: 2025 Sergei Slinkin 

6# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it> 

7# 

8# SPDX-License-Identifier: ISC 

9 

10import csv 

11from collections import OrderedDict 

12from importlib import import_module 

13from pathlib import Path 

14from re import findall, match, sub 

15from sys import maxsize, path 

16from urllib.parse import urlsplit 

17 

18from ramose._constants import PARAM_NAME 

19from ramose.cache import ResultCache 

20from ramose.hash_format import HashFormatHandler, parse_custom_params, parse_disable_params 

21from ramose.operation import Operation 

22 

23 

24class APIManager: 

25 # Fixing max size for CSV 

26 @staticmethod 

27 def __max_size_csv(): 

28 max_int = maxsize 

29 while True: 

30 try: 

31 csv.field_size_limit(max_int) 

32 break 

33 except OverflowError: # pragma: no cover 

34 max_int = int(max_int / 10) 

35 

36 def __init__(self, conf_files, endpoint_override=None, cache_dir=None, cache_ttl=86400): 

37 """This is the constructor of the APIManager class. It takes in input a list of API configuration files, each 

38 defined according to the Hash Format and following a particular structure, and stores all the operations 

39 defined within a dictionary. Optionally, an endpoint_override parameter can be provided to override the 

40 SPARQL endpoint defined in the configuration files (useful for staging/production environments). 

41 The structure of each item in the dictionary of the operations is defined as follows: 

42 

43 { 

44 "/api/v1/references/(.+)": { 

45 "sparql": "PREFIX ...", 

46 "method": "get", 

47 ... 

48 }, 

49 ... 

50 } 

51 

52 In particular, each key in the dictionary identifies the full URL of a particular API operation, and it is 

53 used so as to understand with operation should be called once an API call is done. The object associated 

54 as value of this key is the transformation of the related operation defined in the input Hash Format file 

55 into a dictionary. 

56 

57 In addition, it also defines additional structure, such as the functions to be used for interpreting the 

58 values returned by a SPARQL query, some operations that can be used for filtering the results, and the 

59 HTTP methods to call for making the request to the SPARQL endpoint specified in the configuration file.""" 

60 APIManager.__max_size_csv() 

61 

62 self._cache = ResultCache(cache_dir) if cache_dir else None 

63 self._cache_ttl = cache_ttl 

64 

65 self.all_conf = OrderedDict() 

66 self.base_url = [] 

67 for conf_file in conf_files: 

68 conf = OrderedDict() 

69 tp = None 

70 conf_json = HashFormatHandler().read(conf_file) 

71 base_url = None 

72 addon = None 

73 website = "" 

74 sparql_http_method = "get" 

75 sources_map = {} 

76 engine = "sparql" 

77 disable_params_api: set[str] = set() 

78 for item in conf_json: 

79 if base_url is None: 

80 base_url = item["url"] 

81 self.base_url.append(item["url"]) 

82 website = item["base"] 

83 tp = endpoint_override or item["endpoint"] 

84 

85 # Engine selection at API level (optional) 

86 if "engine" in item: 

87 engine = item["engine"].strip().lower() 

88 

89 # Optional: named sources registry 

90 if "sources" in item: 

91 for raw_pair in item["sources"].split(";"): 

92 pair = raw_pair.strip() 

93 if not pair: 

94 continue 

95 name, url = pair.split("=", 1) 

96 sources_map[name.strip()] = url.strip() 

97 

98 if "disable_params" in item: 

99 disable_params_api = parse_disable_params(item["disable_params"]) 

100 

101 if "addon" in item: 

102 addon_path = (Path(conf_file).parent / item["addon"]).resolve() 

103 path.append(str(addon_path.parent)) 

104 addon = import_module(addon_path.name) 

105 sparql_http_method = "get" 

106 if "method" in item: 

107 sparql_http_method = item["method"].strip().lower() 

108 else: 

109 conf[APIManager.nor_api_url(item, base_url)] = item 

110 

111 self.all_conf[base_url] = { 

112 "conf": conf, 

113 "tp": tp, 

114 "conf_json": conf_json, 

115 "base_url": base_url, 

116 "website": website, 

117 "addon": addon, 

118 "sparql_http_method": sparql_http_method, 

119 "sources_map": sources_map, 

120 "engine": engine, 

121 "disable_params": disable_params_api, 

122 } 

123 

124 self._operation_prefixes = APIManager._build_operation_prefixes(self.all_conf) 

125 

126 @staticmethod 

127 def _build_operation_prefixes(all_conf): 

128 prefixes = [] 

129 for base, api_data in all_conf.items(): 

130 for item in api_data["conf"].values(): 

131 template = item["url"] 

132 brace_pos = template.find("{") 

133 if brace_pos != -1: 

134 prefixes.append((base + template[:brace_pos], base, item)) 

135 prefixes.sort(key=lambda entry: len(entry[0]), reverse=True) 

136 return prefixes 

137 

138 @staticmethod 

139 def nor_api_url(i, b=""): 

140 """This method takes an API operation object and an optional base URL (e.g. "/api/v1") as input 

141 and returns the URL composed by the base URL plus the API URL normalised according to specific rules. In 

142 particular, these normalisation rules takes the operation URL (e.g. "#url /citations/{oci}") and the 

143 specification of the shape of all the parameters between brackets in the URL (e.g. "#oci str([0-9]+-[0-9]+)"), 

144 and returns a new operation URL where the parameters have been substituted with the regular expressions 

145 defining them (e.g. "/citations/([0-9]+-[0-9]+)"). This URL will be used by RAMOSE for matching the 

146 particular API calls with the specific operation to execute.""" 

147 result = i["url"] 

148 

149 for term in findall(PARAM_NAME, result): 

150 try: 

151 t = i[term] 

152 except KeyError: 

153 t = "str(.+)" 

154 result = result.replace(f"{{{term}}}", "{}".format(sub(r"^[^\(]+(\(.+\))$", r"\1", t))) 

155 

156 return f"{b}{result}" 

157 

158 def best_match(self, u): 

159 """This method takes an URL of an API call in input and find the API operation URL and the related 

160 configuration that best match with the API call, if any.""" 

161 cur_u = sub(r"\?.*$", "", u) 

162 for base_url in self.all_conf: 

163 if u.startswith(base_url): 

164 conf = self.all_conf[base_url] 

165 for pat in conf["conf"]: 

166 if match(f"^{pat}$", cur_u): 

167 return conf, pat 

168 return None, None 

169 

170 def get_op(self, op_complete_url): 

171 """This method returns a new object of type Operation which represent the operation specified by 

172 the input URL (parameter 'op_complete_url)'. In case no operation can be found according by checking 

173 the configuration files available in the APIManager, a tuple with an HTTP error code and a message 

174 is returned instead.""" 

175 url_parsed = urlsplit(op_complete_url) 

176 op_url = url_parsed.path 

177 

178 conf, op = self.best_match(op_url) 

179 if conf is not None: 

180 op_conf = conf["conf"][op] 

181 op_engine = conf.get("engine", "sparql") 

182 if "engine" in op_conf: 

183 op_engine = op_conf["engine"].strip().lower() 

184 

185 # Build op-level format map from the operation block 

186 op_format_map = {} 

187 if "format" in op_conf: 

188 fm_val = op_conf["format"] 

189 fm_list = fm_val if isinstance(fm_val, list) else [fm_val] 

190 for fm in fm_list: 

191 for raw_part in fm.split(";"): 

192 part = raw_part.strip() 

193 if not part: 

194 continue 

195 fmt, func = part.split(",", 1) 

196 op_format_map[fmt.strip()] = func.strip() 

197 

198 custom_params_map = parse_custom_params(op_conf["custom_params"]) if "custom_params" in op_conf else {} 

199 

200 api_disabled = conf["disable_params"] 

201 op_disabled = parse_disable_params(op_conf["disable_params"]) if "disable_params" in op_conf else set() 

202 effective_disabled = api_disabled | op_disabled 

203 

204 return Operation( 

205 op_complete_url, 

206 op, 

207 op_conf, 

208 conf["tp"], 

209 conf["sparql_http_method"], 

210 conf["addon"], 

211 op_format_map, 

212 conf.get("sources_map", {}), 

213 op_engine, 

214 custom_params_map, 

215 effective_disabled, 

216 cache=self._cache, 

217 default_cache_ttl=self._cache_ttl, 

218 ) 

219 

220 for prefix, base_url, item in self._operation_prefixes: 

221 if op_url.startswith(prefix): 

222 template = item["url"] 

223 full_template = base_url + template 

224 param_value = op_url[len(prefix) :] 

225 param_names = findall(PARAM_NAME, template) 

226 if not param_value: 

227 msg = f"HTTP status code 400: the operation '{full_template}' requires a value for parameter '{param_names[0]}'" 

228 else: 

229 msg = f"HTTP status code 400: the value '{param_value}' is not valid for parameter '{param_names[0]}' in operation '{full_template}'" 

230 if "call" in item: 

231 msg += f". Example: {base_url}{item['call']}" 

232 return 400, msg, "text/plain" 

233 

234 return 404, "HTTP status code 404: the operation requested does not exist", "text/plain"