Coverage for ramose / api_manager.py: 100%
131 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-15 15:58 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-15 15:58 +0000
1# SPDX-FileCopyrightText: 2018-2021 Silvio Peroni <silvio.peroni@unibo.it>
2# SPDX-FileCopyrightText: 2020-2021 Marilena Daquino <marilena.daquino2@unibo.it>
3# SPDX-FileCopyrightText: 2022 Davide Brembilla
4# SPDX-FileCopyrightText: 2024 Ivan Heibi <ivan.heibi2@unibo.it>
5# SPDX-FileCopyrightText: 2025 Sergei Slinkin
6# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it>
7#
8# SPDX-License-Identifier: ISC
10import csv
11from collections import OrderedDict
12from importlib import import_module
13from pathlib import Path
14from re import findall, match, sub
15from sys import maxsize, path
16from urllib.parse import urlsplit
18from ramose._constants import PARAM_NAME
19from ramose.cache import ResultCache
20from ramose.hash_format import HashFormatHandler, parse_custom_params, parse_disable_params
21from ramose.operation import Operation
24class APIManager:
25 # Fixing max size for CSV
26 @staticmethod
27 def __max_size_csv():
28 max_int = maxsize
29 while True:
30 try:
31 csv.field_size_limit(max_int)
32 break
33 except OverflowError: # pragma: no cover
34 max_int = int(max_int / 10)
36 def __init__(self, conf_files, endpoint_override=None, cache_dir=None, cache_ttl=86400):
37 """This is the constructor of the APIManager class. It takes in input a list of API configuration files, each
38 defined according to the Hash Format and following a particular structure, and stores all the operations
39 defined within a dictionary. Optionally, an endpoint_override parameter can be provided to override the
40 SPARQL endpoint defined in the configuration files (useful for staging/production environments).
41 The structure of each item in the dictionary of the operations is defined as follows:
43 {
44 "/api/v1/references/(.+)": {
45 "sparql": "PREFIX ...",
46 "method": "get",
47 ...
48 },
49 ...
50 }
52 In particular, each key in the dictionary identifies the full URL of a particular API operation, and it is
53 used so as to understand with operation should be called once an API call is done. The object associated
54 as value of this key is the transformation of the related operation defined in the input Hash Format file
55 into a dictionary.
57 In addition, it also defines additional structure, such as the functions to be used for interpreting the
58 values returned by a SPARQL query, some operations that can be used for filtering the results, and the
59 HTTP methods to call for making the request to the SPARQL endpoint specified in the configuration file."""
60 APIManager.__max_size_csv()
62 self._cache = ResultCache(cache_dir) if cache_dir else None
63 self._cache_ttl = cache_ttl
65 self.all_conf = OrderedDict()
66 self.base_url = []
67 for conf_file in conf_files:
68 conf = OrderedDict()
69 tp = None
70 conf_json = HashFormatHandler().read(conf_file)
71 base_url = None
72 addon = None
73 website = ""
74 sparql_http_method = "get"
75 sources_map = {}
76 engine = "sparql"
77 disable_params_api: set[str] = set()
78 for item in conf_json:
79 if base_url is None:
80 base_url = item["url"]
81 self.base_url.append(item["url"])
82 website = item["base"]
83 tp = endpoint_override or item["endpoint"]
85 # Engine selection at API level (optional)
86 if "engine" in item:
87 engine = item["engine"].strip().lower()
89 # Optional: named sources registry
90 if "sources" in item:
91 for raw_pair in item["sources"].split(";"):
92 pair = raw_pair.strip()
93 if not pair:
94 continue
95 name, url = pair.split("=", 1)
96 sources_map[name.strip()] = url.strip()
98 if "disable_params" in item:
99 disable_params_api = parse_disable_params(item["disable_params"])
101 if "addon" in item:
102 addon_path = (Path(conf_file).parent / item["addon"]).resolve()
103 path.append(str(addon_path.parent))
104 addon = import_module(addon_path.name)
105 sparql_http_method = "get"
106 if "method" in item:
107 sparql_http_method = item["method"].strip().lower()
108 else:
109 conf[APIManager.nor_api_url(item, base_url)] = item
111 self.all_conf[base_url] = {
112 "conf": conf,
113 "tp": tp,
114 "conf_json": conf_json,
115 "base_url": base_url,
116 "website": website,
117 "addon": addon,
118 "sparql_http_method": sparql_http_method,
119 "sources_map": sources_map,
120 "engine": engine,
121 "disable_params": disable_params_api,
122 }
124 self._operation_prefixes = APIManager._build_operation_prefixes(self.all_conf)
126 @staticmethod
127 def _build_operation_prefixes(all_conf):
128 prefixes = []
129 for base, api_data in all_conf.items():
130 for item in api_data["conf"].values():
131 template = item["url"]
132 brace_pos = template.find("{")
133 if brace_pos != -1:
134 prefixes.append((base + template[:brace_pos], base, item))
135 prefixes.sort(key=lambda entry: len(entry[0]), reverse=True)
136 return prefixes
138 @staticmethod
139 def nor_api_url(i, b=""):
140 """This method takes an API operation object and an optional base URL (e.g. "/api/v1") as input
141 and returns the URL composed by the base URL plus the API URL normalised according to specific rules. In
142 particular, these normalisation rules takes the operation URL (e.g. "#url /citations/{oci}") and the
143 specification of the shape of all the parameters between brackets in the URL (e.g. "#oci str([0-9]+-[0-9]+)"),
144 and returns a new operation URL where the parameters have been substituted with the regular expressions
145 defining them (e.g. "/citations/([0-9]+-[0-9]+)"). This URL will be used by RAMOSE for matching the
146 particular API calls with the specific operation to execute."""
147 result = i["url"]
149 for term in findall(PARAM_NAME, result):
150 try:
151 t = i[term]
152 except KeyError:
153 t = "str(.+)"
154 result = result.replace(f"{{{term}}}", "{}".format(sub(r"^[^\(]+(\(.+\))$", r"\1", t)))
156 return f"{b}{result}"
158 def best_match(self, u):
159 """This method takes an URL of an API call in input and find the API operation URL and the related
160 configuration that best match with the API call, if any."""
161 cur_u = sub(r"\?.*$", "", u)
162 for base_url in self.all_conf:
163 if u.startswith(base_url):
164 conf = self.all_conf[base_url]
165 for pat in conf["conf"]:
166 if match(f"^{pat}$", cur_u):
167 return conf, pat
168 return None, None
170 def get_op(self, op_complete_url):
171 """This method returns a new object of type Operation which represent the operation specified by
172 the input URL (parameter 'op_complete_url)'. In case no operation can be found according by checking
173 the configuration files available in the APIManager, a tuple with an HTTP error code and a message
174 is returned instead."""
175 url_parsed = urlsplit(op_complete_url)
176 op_url = url_parsed.path
178 conf, op = self.best_match(op_url)
179 if conf is not None:
180 op_conf = conf["conf"][op]
181 op_engine = conf.get("engine", "sparql")
182 if "engine" in op_conf:
183 op_engine = op_conf["engine"].strip().lower()
185 # Build op-level format map from the operation block
186 op_format_map = {}
187 if "format" in op_conf:
188 fm_val = op_conf["format"]
189 fm_list = fm_val if isinstance(fm_val, list) else [fm_val]
190 for fm in fm_list:
191 for raw_part in fm.split(";"):
192 part = raw_part.strip()
193 if not part:
194 continue
195 fmt, func = part.split(",", 1)
196 op_format_map[fmt.strip()] = func.strip()
198 custom_params_map = parse_custom_params(op_conf["custom_params"]) if "custom_params" in op_conf else {}
200 api_disabled = conf["disable_params"]
201 op_disabled = parse_disable_params(op_conf["disable_params"]) if "disable_params" in op_conf else set()
202 effective_disabled = api_disabled | op_disabled
204 return Operation(
205 op_complete_url,
206 op,
207 op_conf,
208 conf["tp"],
209 conf["sparql_http_method"],
210 conf["addon"],
211 op_format_map,
212 conf.get("sources_map", {}),
213 op_engine,
214 custom_params_map,
215 effective_disabled,
216 cache=self._cache,
217 default_cache_ttl=self._cache_ttl,
218 )
220 for prefix, base_url, item in self._operation_prefixes:
221 if op_url.startswith(prefix):
222 template = item["url"]
223 full_template = base_url + template
224 param_value = op_url[len(prefix) :]
225 param_names = findall(PARAM_NAME, template)
226 if not param_value:
227 msg = f"HTTP status code 400: the operation '{full_template}' requires a value for parameter '{param_names[0]}'"
228 else:
229 msg = f"HTTP status code 400: the value '{param_value}' is not valid for parameter '{param_names[0]}' in operation '{full_template}'"
230 if "call" in item:
231 msg += f". Example: {base_url}{item['call']}"
232 return 400, msg, "text/plain"
234 return 404, "HTTP status code 404: the operation requested does not exist", "text/plain"