Coverage for ramose.py: 52%

1# SPDX-FileCopyrightText: 2018-2021 essepuntato <essepuntato@gmail.com>

2# SPDX-FileCopyrightText: 2020-2021 marilena <marilena.daquino2@unibo.it>

3# SPDX-FileCopyrightText: 2022 dbrembilla <davide.brembilla98@gmail.com>

4# SPDX-FileCopyrightText: 2024 Ivan Heibi <ivanhb.ita@gmail.com>

5# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it>

7# SPDX-License-Identifier: ISC

10from abc import abstractmethod

11from re import search, DOTALL, findall, sub, match, split

12from requests import get, post, put, delete

13from requests.exceptions import RequestException

14from requests import Session as _RequestsSession

15_http_session = _RequestsSession()

16from csv import DictReader, reader, writer

17from json import dumps

18from io import StringIO

19from sys import exc_info, maxsize, path

20from collections import OrderedDict

21from markdown import markdown

22from importlib import import_module

23from urllib.parse import parse_qs, urlsplit, quote, unquote

24from operator import add, itemgetter, gt, eq, lt

25from dateutil.parser import parse

26from datetime import datetime

27from isodate import parse_duration

28from argparse import ArgumentParser

29import json

30import logging

31import pysparql_anything

32import re

33import time

34import yaml

35from os.path import abspath, dirname, basename

36from os import path as pt

37from os import sep, getcwd

38from itertools import product

41FIELD_TYPE_RE = r"([^$\s]+)\(([^$]+)\)"

42PARAM_NAME = r"{([^{}]+)}"

43DEFAULT_HTTP_TIMEOUT = 60

46class HashFormatHandler(object):

47 """This class creates an object capable to read files stored in Hash Format (see

48 https://github.com/opencitations/ramose#Hashformat-configuration-file). A Hash Format

49 file (.hf) is a specification file that includes information structured using the following

50 syntax:

52 ```

53 #<field_name_1> <field_value_1>

54 #<field_name_1> <field_value_2>

55 #<field_name_3> <field_value_3>

56 [...]

57 #<field_name_n> <field_value_n>

58 ```"""

60 def read(self, file_path):

61 """This method takes in input a path of a file containing a document specified in

62 Hash Format, and returns its representation as list of dictionaries."""

63 result = []

65 with open(file_path, "r", newline=None) as f:

66 first_field_name = None

67 cur_object = None

68 cur_field_name = None

69 cur_field_content = None

70 for line in f.readlines():

71 cur_matching = search(r"^#([^\s]+)\s(.+)$", line, DOTALL)

72 if cur_matching is not None:

73 cur_field_name = cur_matching.group(1)

74 cur_field_content = cur_matching.group(2)

76 # If both the name and the content are defined, continue to process

77 if cur_field_name and cur_field_content:

78 # Identify the separator key

79 if first_field_name is None:

80 first_field_name = cur_field_name

82 # If the current field is equal to the separator key,

83 # then create a new object

84 if cur_field_name == first_field_name:

85 # If there is an already defined object, add it to the

86 # final result

87 if cur_object is not None:

88 result.append(cur_object)

89 cur_object = {}

91 # Add the new key to the object

92 cur_object[cur_field_name] = cur_field_content

93 elif cur_object is not None and len(cur_object) > 0:

94 cur_object[cur_field_name] += line

96 # Insert the last object in the result

97 if cur_object is not None and len(cur_object) > 0:

98 result.append(cur_object)

100 # Clean the final \n

101 for item in result:

102 for key in item:

103 item[key] = item[key].rstrip()

104

105 return result

106

107

108class DocumentationHandler(object):

109 def __init__(self, api_manager):

110 """This class provides the main structure for returning a human-readable documentation of all

111 the operations described in the configuration files handled by the APIManager specified as input."""

112 self.conf_doc = api_manager.all_conf

113

114 @abstractmethod

115 def get_documentation(self, *args, **dargs):

116 """An abstract method that returns a string defining the human-readable documentation of the operations

117 available in the input APIManager."""

118 pass # pragma: no cover

119

120 @abstractmethod

121 def store_documentation(self, file_path, *args, **dargs):

122 """An abstract method that store in the input file path (parameter 'file_path') the human-readable

123 documentation of the operations available in the input APIManager."""

124 pass # pragma: no cover

125

126 @abstractmethod

127 def get_index(self, *args, **dargs):

128 """An abstract method that returns a string defining the index of all the various configuration files

129 handled by the input APIManager."""

130 pass # pragma: no cover

131

132

133class HTMLDocumentationHandler(DocumentationHandler):

134 # HTML documentation: START

135 def __title(self, conf):

136 """This method returns the title string defined in the API specification."""

137 return conf["conf_json"][0]["title"]

138

139 def __htmlmetadescription(self, conf):

140 """This method returns the HTML meta-description tag defined in the API specification."""

141 desc = conf["conf_json"][0].get("html_meta_description")

142 if desc:

143 return '<meta name="description" content="%s"/>' % desc

144 return "" # pragma: no cover

145

146 def __sidebar(self, conf):

147 """This method builds the sidebar of the API documentation"""

148 result = ""

149

150 i = conf["conf_json"][0]

151 result += """

152

153 <h4>%s</h4>

154 <ul id="sidebar_menu" class="sidebar_menu">

155 <li><a class="btn active" href="#description">DESCRIPTION</a></li>

156 <li><a class="btn" href="#parameters">PARAMETERS</a></li>

157 <li><a class="btn" href="#operations">OPERATIONS</a>

158 <ul class="sidebar_submenu">%s</ul>

159 </li>

160 <li><a class="btn active" href="/">HOME</a></li>

161 </ul>

162 """ % \

163 (i["title"], "".join(["<li><a class='btn' href='#%s'>%s</a></li>" % (op["url"], op["url"])

164 for op in conf["conf_json"][1:]]))

165 return result

166

167 def __header(self, conf):

168 """This method builds the header of the API documentation"""

169 result = ""

170

171 i = conf["conf_json"][0]

172 result += """

173<a id='toc'></a>

174# %s

175

176**Version:** %s <br/>

177**API URL:** <a href="%s">%s</a><br/>

178**Contact:** %s<br/>

179**License:** %s<br/>

180

181

182

183## <a id="description"></a>Description [back to top](#toc)

184

185%s

186

187%s""" % \

188 (i["title"], i["version"], i["base"] + i["url"], i["base"] + i["url"], i["contacts"], i["license"],

189

190 i["description"], self.__parameters())

191 # (i["title"], i["version"], i["base"] + i["url"], i["base"] + i["url"], i["contacts"], i["contacts"], i["license"],

192 # "".join(["<li>[%s](#%s): %s</li>" % (op["url"], op["url"], op["description"].split("\n")[0])

193 # for op in self.conf_json[1:]]),

194 # i["description"], self.__parameters())

195 return markdown(result)

196

197 def __parameters(self):

198 result = """## <a id="parameters"></a>Parameters [back to top](#toc)

199

200Parameters can be used to filter and control the results returned by the API. They are passed as normal HTTP parameters in the URL of the call. They are:

201

2021. `require=<field_name>`: all the rows that have an empty value in the `<field_name>` specified are removed from the result set - e.g. `require=given_name` removes all the rows that do not have any string specified in the `given_name` field.

203

2042. `filter=<field_name>:<operator><value>`: only the rows compliant with `<value>` are kept in the result set. The parameter `<operation>` is not mandatory. If `<operation>` is not specified, `<value>` is interpreted as a regular expression, otherwise it is compared by means of the specified operation. Possible operators are "=", "<", and ">". For instance, `filter=title:semantics?` returns all the rows that contain the string "semantic" or "semantics" in the field `title`, while `filter=date:>2016-05` returns all the rows that have a `date` greater than May 2016.

205

2063. `sort=<order>(<field_name>)`: sort in ascending (`<order>` set to "asc") or descending (`<order>` set to "desc") order the rows in the result set according to the values in `<field_name>`. For instance, `sort=desc(date)` sorts all the rows according to the value specified in the field `date` in descending order.

207

2084. `format=<format_type>`: the final table is returned in the format specified in `<format_type>` that can be either "csv" or "json" - e.g. `format=csv` returns the final table in CSV format. This parameter has higher priority of the type specified through the "Accept" header of the request. Thus, if the header of a request to the API specifies `Accept: text/csv` and the URL of such request includes `format=json`, the final table is returned in JSON.

209

2105. `json=<operation_type>("<separator>",<field>,<new_field_1>,<new_field_2>,...)`: in case a JSON format is requested in return, tranform each row of the final JSON table according to the rule specified. If `<operation_type>` is set to "array", the string value associated to the field name `<field>` is converted into an array by splitting the various textual parts by means of `<separator>`. For instance, considering the JSON table `[ { "names": "Doe, John; Doe, Jane" }, ... ]`, the execution of `array("; ",names)` returns `[ { "names": [ "Doe, John", "Doe, Jane" ], ... ]`. Instead, if `<operation_type>` is set to "dict", the string value associated to the field name `<field>` is converted into a dictionary by splitting the various textual parts by means of `<separator>` and by associating the new fields `<new_field_1>`, `<new_field_2>`, etc., to these new parts. For instance, considering the JSON table `[ { "name": "Doe, John" }, ... ]`, the execution of `dict(", ",name,fname,gname)` returns `[ { "name": { "fname": "Doe", "gname": "John" }, ... ]`.

211

212It is possible to specify one or more filtering operation of the same kind (e.g. `require=given_name&require=family_name`). In addition, these filtering operations are applied in the order presented above - first all the `require` operation, then all the `filter` operations followed by all the `sort` operation, and finally the `format` and the `json` operation (if applicable). It is worth mentioning that each of the aforementioned rules is applied in order, and it works on the structure returned after the execution of the previous rule.

213

214Example: `<api_operation_url>?require=doi&filter=date:>2015&sort=desc(date)`."""

215 return markdown(result)

216

217 def __operations(self, conf):

218 """This method returns the description of all the operations defined in the API."""

219 result = """## Operations [back to top](#toc)

220The operations that this API implements are:

221"""

222 ops = "\n"

223

224 for op in conf["conf_json"][1:]:

225 params = []

226 for p in findall(PARAM_NAME, op["url"]):

227 p_type = "str"

228 p_shape = ".+"

229 if p in op:

230 p_type, p_shape = findall(

231 r"^\s*([^$]+)\((.+)$\s*$", op[p])[0]

232

233 params.append(

234 "<em>%s</em>: type <em>%s</em>, regular expression shape <code>%s</code>" % (p, p_type, p_shape))

235 result += "\n* [%s](#%s): %s" % (op["url"],

236 op["url"], op["description"].split("\n")[0])

237 ops += """<div id="%s">

238<h3>%s <a href="#operations">back to operations</a></h3>

239

240%s

241

242<p class="attr"><strong>Accepted HTTP method(s)</strong> <span class="attr_val method">%s</span></p>

243<p class="attr params"><strong>Parameter(s)</strong> <span class="attr_val">%s</span></p>

244<p class="attr"><strong>Result fields type</strong><span class="attr_val">%s</span></p>

245<p class="attr"><strong>Example</strong><span class="attr_val"><a target="_blank" href="%s">%s</a></span></p>

246<p class="ex attr"><strong>Exemplar output (in JSON)</strong></p>

247<pre><code>%s</code></pre></div>""" % (op["url"], op["url"], markdown(op["description"]),

248 ", ".join(

249 split(r"\s+", op["method"].strip())), "</li><li>".join(params),

250 ", ".join(["%s <em>(%s)</em>" % (f, t) for t, f in

251 findall(FIELD_TYPE_RE, op["field_type"])]),

252 conf["website"] + conf["base_url"] + op["call"], op["call"], op["output_json"])

253 return markdown(result) + ops

254

255 def __footer(self):

256 """This method returns the footer of the API documentation."""

257 result = """This API and the related documentation has been created with <a href="https://github.com/opencitations/ramose" target="_blank">RAMOSE</a>, the *Restful API Manager Over SPARQL Endpoints*, developed by <a href="http://orcid.org/0000-0003-0530-4305" target="_blank">Silvio Peroni</a> and <a href="https://marilenadaquino.github.io">Marilena Daquino</a>."""

258 return markdown(result)

259

260 def __css(self):

261 return """

262 @import url('https://fonts.googleapis.com/css2?family=Karla:wght@300;400&display=swap');

263 @media screen and (max-width: 850px) {

264 aside { display: none; }

265 main, #operations, .dashboard, body>footer {margin-left: 15% !important;}

266 #operations > ul:nth-of-type(1) li { display:block !important; max-width: 100% !important; }

267 h3 a[href] {display:block !important; float: none !important; font-size: 0.5em !important;}

268 a {overflow: hidden; text-overflow: ellipsis;}

269 .info_api, .api_calls {display: block !important; max-width: 100% !important;}

270 }

271

272 * {

273 font-family: 'Karla', Geneva, sans-serif;

274 }

275

276 body {

277 margin: 3% 15% 7% 0px;

278 line-height: 1.5em;

279 letter-spacing: 0.02em;

280 font-size : 1em;

281 font-weight:300;

282 color: #303030;

283 text-align: justify;

284 background-color: #edf0f2;

285 }

286

287 aside {

288 height : 100%;

289 width: 20%;

290 position: fixed;

291 z-index: 1;

292 top: 0;

293 left: 0;

294 /*background-color: #404040;*/

295 overflow-x: hidden;

296 background-color: white;

297 box-shadow:0px 10px 30px 0px rgba(133,66,189,0.1);

298 }

299 p strong {

300 text-transform: uppercase;

301 font-size: 0.9em;

302 }

303 aside h4 {

304 padding: 20px 9%;

305 margin: 0px !important;

306 color: #9931FC;

307 text-align: left !important;

308 }

309

310 .sidebar_menu , .sidebar_submenu {

311 list-style-type: none;

312 padding-left:0px !important;

313 margin-top: 10px;

314

315 }

316

317 .sidebar_menu > li {

318 padding: 2% 0px;

319 border-top : solid 0.7px grey;

320 }

321

322 .sidebar_menu a {

323 padding: 1% 9%;

324 background-image: none !important;

325 color: grey;

326 display: block;

327 }

328

329 .sidebar_menu a:hover {

330 border-left: solid 5px rgba(154, 49, 252,.5);

331 font-weight: 400;

332 }

333

334 .sidebar_submenu > li {

335 padding-left:0px !important;

336 background-color:#edf0f2;

337 font-size: 0.8em;

338 }

339

340 main , #operations , .dashboard, body>footer {

341 margin-left: 33%;

342 }

343 .dashboard {text-align: center;}

344 main h1+p , .info_api{

345

346 padding-left: 3%;

347 font-size: 0.9em;

348 line-height: 1.4em;

349 }

350

351 main h1+p {border-left: solid 5px rgba(154, 49, 252,.5);}

352

353 #operations h3 {

354 color: #9931FC;

355 margin-bottom: 0px;

356 padding: 10px;

357 }

358

359 #operations > ul:nth-of-type(1) {

360 padding-left: 0px !important;

361 text-align: center;

362 }

363

364 #operations > ul:nth-of-type(1) li {

365 background-color: white;

366 text-align: left;

367 display: inline-block;

368 overflow: hidden;

369 text-overflow: ellipsis;

370 max-width: 35%;

371 height: 200px;

372 padding:4%;

373 margin: 1% 2% 1% 0px;

374 border-radius: 10px;

375 box-shadow: 0px 10px 30px 0px rgba(133,66,189,0.1);

376 vertical-align:top;

377 }

378

379 #operations > div {

380 background-color: white;

381 margin-top: 20px;

382 padding: 2%;

383 border-radius: 18px;

384 box-shadow: 0px 10px 30px 0px rgba(133,66,189,0.1);

385 }

386

387 #operations > div > * {

388 padding: 0px 2%;

389 }

390

391 #operations > div ul, .params+ul{

392 list-style-type: none;

393 font-size: 0.85em;

394 }

395 #operations > div ul:nth-of-type(1) li, .params+ul li {

396 margin: 10px 0px;

397 }

398

399 #operations > div ul:nth-of-type(1) li em, .params+ul li em {

400 font-style: normal;

401 font-weight: 400;

402 color: #9931FC;

403 border-left: solid 2px #9931FC;

404 padding:5px;

405 }

406

407 .attr {

408 border-top: solid 1px rgba(133,66,189,0.1);

409 padding: 2% !important;

410 display:block;

411 vertical-align: top;

412 font-size: 0.8em;

413 text-align: left;

414 }

415

416 .attr strong {

417 width: 30%;

418 color: grey;

419 font-weight: 400;

420 font-style: normal;

421 display:inline-block;

422 vertical-align: top;

423 }

424

425 .attr_val {

426 max-width: 50%;

427 display:inline-table;

428 height: 100%;

429 vertical-align: top;

430 }

431

432 .method {

433 text-transform: uppercase;

434 }

435

436 .params {

437 margin-bottom: 0;

438 }

439

440 pre {

441 background-color: #f0f0f5;

442 padding: 10px;

443 margin-top: 0;

444 margin-bottom: 0;

445 border-radius: 0 0 14px 14px;

446 font-family: monospace !important;

447 overflow: scroll;

448 line-height: 1.2em;

449 height: 250px;

450 }

451

452 pre code {

453 font-family: monospace !important;

454 }

455

456 p.ex {

457 background-color: #f0f0f5;

458 margin-bottom: 0px;

459 padding-top: 5px;

460 padding-bottom: 5px;

461 }

462

463 h2:first-of-type {

464 margin-bottom: 15px;

465 }

466

467 ol:first-of-type {

468 margin-top: 0;

469 }

470

471 :not(pre) > code {

472 background-color: #f0f0f5;

473 color: #8585ad;

474 padding: 0 2px 0 2px;

475 border-radius: 3px;

476 font-family : monospace;

477 font-size: 1.2em !important;

478 }

479

480 /**:not(div) > p {

481 margin-left: 1.2%;

482 }*/

483

484 h1 {font-size: 2.5em;}

485 h1, h2 {

486 text-transform: uppercase;

487 }

488

489 h1, h2, h3, h4, h5, h6 {

490 line-height: 1.2em;

491 padding-top:1em;

492 text-align: left !important;

493 font-weight:400;

494 }

495

496 h2 ~ h2, section > h2 {

497

498 padding-top: 5px;

499 margin-top: 40px;

500 }

501

502 h2 a[href], h3 a[href] {

503 background-image: none;

504 text-transform:uppercase;

505 padding: 1px 3px 1px 3px;

506 font-size: 12pt;

507 float: right;

508 position:relative;

509 top: -3px;

510 }

511

512 h2 a[href]::before , h3 a[href]::before {

513 content: " \u2191";

514 width: 20px;

515 height: 20px;

516 display:inline-block;

517 color: #9931FC;

518 text-align:center;

519 margin-right: 10px;

520 }

521

522 /*h3 a[href] {

523 color:white

524 background-image: none;

525 text-transform:uppercase;

526 padding: 1px 3px 1px 3px;

527 font-size: 8pt !important;

528 border: 1px solid #9931FC;

529 float: right;

530 position:relative;

531 top: -11px;

532 right: -11px;

533 border-radius: 0 14px 0 0;

534 }*/

535

536 p {

537 overflow-wrap: break-word;

538 word-wrap: break-word;

539 }

540

541 a {

542 color : black;

543 text-decoration: none;

544 background-image: -webkit-gradient(linear,left top, left bottom,color-stop(50%, transparent),color-stop(0, rgba(154, 49, 252,.5)));

545 background-image: linear-gradient(180deg,transparent 50%,rgba(154, 49, 252,.5) 0);

546 background-position-y: 3px;

547 background-position-x: 0px;

548 background-repeat: no-repeat;

549 -webkit-transition: .15s ease;

550 transition: .15s ease;

551 }

552

553 a:hover {

554 color: #282828;

555 background-position: top 6px right 0px;

556 background-image: -webkit-gradient(linear,left top, left bottom,color-stop(60%, transparent),color-stop(0, #9931FC));

557 background-image: linear-gradient(180deg,transparent 60%,#9931FC 0);

558 }

559

560 footer {

561 margin-top: 20px;

562 border-top: 1px solid lightgrey;

563 text-align: center;

564 color: grey;

565 font-size: 9pt;

566 }

567 /* dashboard */

568

569 .info_api {

570 max-width: 35%;

571 border-radius: 15px;

572 text-align: left;

573 vertical-align: top;

574 background-color: #9931FC;

575 color: white;

576 }

577

578 .info_api, .api_calls {

579 display: inline-block;

580 text-align: left;

581 height: 200px;

582 padding:4%;

583 margin: 1% 2% 1% 0px;

584 border-radius: 10px;

585 box-shadow: 0px 10px 30px 0px rgba(133,66,189,0.1);

586 vertical-align:top;

587 }

588

589 .api_calls {

590 max-width: 40%;

591 background-color: white;

592 scroll-behavior: smooth;

593 overflow: auto;

594 overflow-y: scroll;

595 scrollbar-color: #9931FC rgb(154, 49, 252);

596 border-radius: 10px;

597 }

598 .api_calls div {padding-bottom:2%;}

599

600 .api_calls:hover {

601 overflow-y: scroll;

602 }

603 .api_calls h4, .info_api h2 {padding-top: 0px !important; margin-top: 0px !important;}

604 .api_calls div p {

605 padding: 0.2em 0.5em;

606 border-top: solid 1px #F8F8F8;

607 }

608

609 .date_log , .method_log {

610 color: grey;

611 font-size: 0.8em;

612

613 }

614 .method_log {margin-left: 15px;}

615 .date_log {display:inline-grid;}

616

617 .group_log:nth-child(odd) {

618 margin-right:5px;

619 font-size: 0.9em;

620 }

621

622 .group_log:nth-child(even) {

623 display: inline-grid;

624 vertical-align: top;

625 }

626 .status_log {padding-right:15px;}

627 .status_log::before {

628 content: '';

629 display: inline-block;

630 width: 1em;

631 height: 1em;

632 vertical-align: middle;

633 -moz-border-radius: 50%;

634 -webkit-border-radius: 50%;

635 border-radius: 50%;

636 background-color: grey;

637 margin-right: 0.8em;

638 }

639

640 .code_200::before {

641 background-color: #00cc00;

642 }

643

644 .code_404::before {

645 background-color: #cccc00;

646 }

647

648 .code_500::before {

649 background-color: #cc0000;

650 }

651

652 """

653

654 def __css_path(self, css_path=None):

655 """Add link to a css file if specified in argument -css"""

656 return """<link rel="stylesheet" type="text/css" href='"""+css_path+"""'>""" if css_path else ""

657

658 def logger_ramose(self): # pragma: no cover

659 """This method adds logging info to a local file"""

660 # logging

661 logFormatter = logging.Formatter(

662 "[%(asctime)s] [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s")

663 rootLogger = logging.getLogger()

664

665 fileHandler = logging.FileHandler("ramose.log")

666 fileHandler.setFormatter(logFormatter)

667 rootLogger.addHandler(fileHandler)

668

669 consoleHandler = logging.StreamHandler()

670 consoleHandler.setFormatter(logFormatter)

671 rootLogger.addHandler(consoleHandler)

672

673 def __parse_logger_ramose(self):

674 """This method reads logging info stored into a local file, so as to be browsed in the dashboard.

675 Returns: the html including the list of URLs of current working APIs and basic logging info """

676 try:

677 with open("ramose.log") as l_f:

678 logs = ''.join(l_f.readlines())

679 except FileNotFoundError:

680 logs = ""

681 rev_list = set()

682 rev_list_add = rev_list.add

683 rev_list = [x for x in list(reversed(logs.splitlines())) if not (

684 x in rev_list or rev_list_add(x))]

685

686 html = """

687 <p></p>

688 <aside>

689 <h4>RAMOSE API DASHBOARD</h4>

690 <ul id="sidebar_menu" class="sidebar_menu">"""

691

692 for api_url, api_dict in self.conf_doc.items():

693 html += """

694 <li><a class="btn active" href="%s">%s</a></li>

695 """ % (api_url, api_dict["conf_json"][0]["title"])

696

697 html += """

698 </ul>

699 </aside>

700 <header class="dashboard">

701 <h1>API MONITORING</h1>"""

702

703 for api_url, api_dict in self.conf_doc.items():

704 clean_list = [

705 l for l in rev_list if api_url in l and "debug" not in l]

706 api_logs_list = ''.join(["<p>"+self.clean_log(l, api_url)

707 + "</p>" for l in clean_list if self.clean_log(l, api_url) != ''])

708 api_title = api_dict["conf_json"][0]["title"]

709 html += """

710 <div class="info_api">

711 <h2>%s</h2>

712 <a id="view_doc" href="%s">VIEW DOCUMENTATION</a><br/>

713 <a href="%s">GO TO SPARQL ENDPOINT</a><br/>

714 </div>

715 <div class="api_calls">

716 <h4>Last calls</h4>

717 <div>

718 %s

719 </div>

720

721 </div>

722 """ % (api_title, api_url, api_dict["tp"], api_logs_list)

723 return html

724

725 def get_documentation(self, css_path=None, base_url=None):

726 """This method generates the HTML documentation of an API described in configuration file."""

727 if base_url is None:

728 first_key = next(iter(self.conf_doc))

729 conf = self.conf_doc[first_key]

730 else:

731 conf = self.conf_doc['/'+base_url]

732

733 return 200, """<!DOCTYPE html>

734<html xmlns="http://www.w3.org/1999/xhtml">

735 <head>

736 <title>%s</title>

737 %s

738 <meta http-equiv="content-type" content="text/html; charset=utf-8"/>

739 <meta name="viewport" content="width=device-width" />

740 <style>%s</style>

741 %s

742 </head>

743 <body>

744 <aside>%s</aside>

745 <main>%s</main>

746 <section id="operations">%s</section>

747 <footer>%s</footer>

748 </body>

749</html>""" % (

750 self.__title(conf),

751 self.__htmlmetadescription(conf),

752 self.__css(),

753 self.__css_path(css_path),

754 self.__sidebar(conf),

755 self.__header(conf),

756 self.__operations(conf),

757 self.__footer()

758 )

759

760 def get_index(self, css_path=None):

761 """This method generates the index of all the HTML documentations that can be

762 created from the configuration file."""

763

764 return """

765 <!doctype html>

766 <html lang="en">

767 <head>

768 <meta charset="utf-8">

769 <title>RAMOSE</title>

770 <meta name="description" content="Documentation of RAMOSE API Manager">

771 <style>%s</style>

772 %s

773 </head>

774 <body>

775 %s

776 <footer>%s</footer>

777 </body>

778 </html>

779 """ % (self.__css(), self.__css_path(css_path), self.__parse_logger_ramose(), self.__footer())

780

781 def store_documentation(self, file_path, css_path=None):

782 """This method stores the HTML documentation of an API in a file."""

783 _, html = self.get_documentation(css_path)

784 with open(file_path, "w") as f:

785 f.write(html)

786

787 def clean_log(self, l, api_url):

788 """This method parses logs lines into structured data."""

789 if "- - " not in l:

790 return ''

791 s = l.split("- - ", 1)[1]

792 date = s[s.find("[")+1:s.find("]")]

793 method = s.split('"')[1::2][0].split()[0]

794 cur_call = s.split('"')[1::2][0].split()[1].strip()

795 status = sub(r"\D+", "", s.split('"', 2)[2])

796 if cur_call != api_url+'/':

797 full_str = "<span class='group_log'><span class='status_log code_"+status+"'>"+status+"</span>"+"<span class='date_log'>"+date+"</span><span class='method_log'>" + \

798 method+"</span></span>"+"<span class='group_log'><span class='call_log'><a href='" + \

799 cur_call+"' target='_blank'>"+cur_call+"</a></span></span>"

800 else:

801 full_str = ''

802 return full_str

803

804

805class OpenAPIDocumentationHandler(DocumentationHandler):

806 """

807 Export RAMOSE .hf configuration(s) to an OpenAPI 3.0 YAML specification.

808

809 Notes:

810 - OpenAPI is a surface contract. RAMOSE implementation details are preserved as vendor extensions.

811 - Extra RAMOSE config fields from Tables 1-2 are kept as x-ramose-* where OpenAPI has no native field.

812 """

813

814 # -------------------------

815 # Small utilities

816 # -------------------------

817 def _normalize_base_url(self, base_url):

818 if base_url is None:

819 return None

820 return base_url[1:] if base_url.startswith("/") else base_url

821

822 def _get_conf(self, base_url=None):

823 if base_url is None:

824 first_key = next(iter(self.conf_doc))

825 return self.conf_doc[first_key]

826 base_url = self._normalize_base_url(base_url)

827 return self.conf_doc["/" + base_url]

828

829 def _schema_for_ramose_type(self, t):

830 t = (t or "str").strip().lower()

831 if t == "int":

832 return {"type": "integer"}

833 if t == "float":

834 return {"type": "number"}

835 if t == "datetime":

836 return {"type": "string", "format": "date-time"}

837 if t == "duration":

838 # OpenAPI doesn't standardize duration; still useful as hint.

839 return {"type": "string", "format": "duration"}

840 return {"type": "string"}

841

842 def _parse_param_type_shape(self, s):

843 # expected "type(regex)"

844 try:

845 t, shape = findall(r"^\s*([^$]+)\((.+)$\s*$", s)[0]

846 return t.strip(), shape.strip()

847 except Exception:

848 return "str", ".+"

849

850 def _guess_contact(self, contacts_value):

851 """

852 Table 1: '#contacts <contact_url>' but in practice it's often an email.

853 Prefer OpenAPI contact.email when it looks like an email.

854 """

855 if not contacts_value:

856 return None

857 c = str(contacts_value).strip()

858 if "@" in c and " " not in c and "/" not in c:

859 return {"email": c}

860 return {"name": c}

861

862 def _clean_text(self, v):

863 """

864 Normalize text coming from .hf parsing so Swagger/ YAML render nicely:

865 - remove wrapping quotes if they were included as part of the value

866 - turn literal '\\n' into real newlines

867 - trim whitespace

868 """

869 if v is None:

870 return None

871 s = str(v).strip()

872 # Strip wrapping quotes if parser stored them as part of the value

873 if len(s) >= 2 and ((s[0] == s[-1] == '"') or (s[0] == s[-1] == "'")):

874 s = s[1:-1].strip()

875 # Convert literal backslash-n sequences to actual newlines

876 s = s.replace("\\n", "\n")

877 return s

878

879 def _param_hint_from_preprocess(self, preprocess_str, param_name):

880 """

881 Table 2: preprocess functions like 'lower(doi) --> split_dois(dois)'.

882 Not formalizable in OpenAPI, but helpful as a hint.

883 """

884 if not preprocess_str:

885 return ""

886 s = str(preprocess_str)

887 # Any function call mentioning the param inside (...)?

888 if re.search(r"$[^)]*\b" + re.escape(param_name) + r"\b[^)]*$", s):

889 return f"Note: input is pre-processed by RAMOSE: {s}"

890 return ""

891

892 def _try_parse_output_json(self, output_json_value):

893 """

894 Table 2: '#output_json <ex_response>' (JSON example).

895 """

896 if not output_json_value:

897 return None

898 try:

899 return json.loads(output_json_value)

900 except Exception:

901 return None

902

903 # -------------------------

904 # Formats / media-types

905 # -------------------------

906 def _collect_format_tokens(self, conf):

907 # always supported by RAMOSE docs

908 formats = {"csv", "json"}

909 for op in conf["conf_json"][1:]:

910 if "format" in op:

911 fm_val = op["format"]

912 fm_list = fm_val if isinstance(fm_val, list) else [fm_val]

913 for fm in fm_list:

914 for part in str(fm).split(";"):

915 part = part.strip()

916 if not part:

917 continue

918 # expected "fmt,func"

919 fmt = part.split(",", 1)[0].strip()

920 if fmt:

921 formats.add(fmt)

922 return sorted(formats)

923

924 def _media_type_for_format(self, fmt):

925 fmt = (fmt or "").strip().lower()

926 mapping = {

927 "json": "application/json",

928 "csv": "text/csv",

929 "xml": "application/xml",

930 "rdfxml": "application/rdf+xml",

931 "rdf+xml": "application/rdf+xml",

932 "ttl": "text/turtle",

933 "turtle": "text/turtle",

934 "nt": "application/n-triples",

935 "ntriples": "application/n-triples",

936 "n-triples": "application/n-triples",

937 "nq": "application/n-quads",

938 "n-quads": "application/n-quads",

939 "trig": "application/trig",

940 }

941 return mapping.get(fmt, None)

942

943 def _build_response_content(self, ok_schema, formats_enum, ok_example=None, err_schema_ref=None):

944 """

945 Build OpenAPI 'content' dict for responses based on supported formats.

946 JSON gets structured schema. Others are represented as string payloads.

947 If err_schema_ref is provided, also returns an error-content dict.

948 """

949 content = OrderedDict()

950

951 # JSON: structured

952 content["application/json"] = {"schema": ok_schema}

953 if ok_example is not None:

954 content["application/json"]["examples"] = {"example": {"value": ok_example}}

955

956 # CSV: textual

957 content["text/csv"] = {"schema": {"type": "string"}}

958

959 # Other formats discovered in .hf (#format)

960 for fmt in formats_enum or []:

961 mt = self._media_type_for_format(fmt)

962 if mt is None or mt in content:

963 continue

964 if mt in ("application/json", "text/csv"):

965 continue

966 content[mt] = {"schema": {"type": "string"}}

967

968 if err_schema_ref:

969 err_content = OrderedDict()

970 err_content["application/json"] = {"schema": {"$ref": err_schema_ref}}

971 err_content["text/csv"] = {"schema": {"type": "string"}}

972 for fmt in formats_enum or []:

973 mt = self._media_type_for_format(fmt)

974 if mt is None or mt in err_content:

975 continue

976 if mt in ("application/json", "text/csv"):

977 continue

978 err_content[mt] = {"schema": {"type": "string"}}

979 return content, err_content

980

981 return content

982

983 # -------------------------

984 # Examples from #call

985 # -------------------------

986 def _extract_param_examples_from_call(self, path_template, call_value):

987 """

988 Given a template like '/metadata/{dois}' and a call like

989 '/metadata/10.1/abc__10.2/xyz', return {'dois': '10.1/abc__10.2/xyz'}.

990

991 IMPORTANT: RAMOSE allows slashes inside the last param because it routes

992 everything via <path:api_url>. OpenAPI tooling typically expects these

993 slashes to be URL-encoded in examples.

994 """

995 if not call_value:

996 return {}

997

998 call_path = str(call_value).split("?", 1)[0].strip()

999

1000 if not path_template.startswith("/"):

1001 path_template = "/" + path_template

1002 if not call_path.startswith("/"):

1003 call_path = "/" + call_path

1004

1005 parts = path_template.split("/")

1006 re_parts = []

1007

1008 # Allow '/' inside the LAST parameter segment (captures the rest of the path)

1009 last_index = len(parts) - 1

1010

1011 for i, part in enumerate(parts):

1012 if part.startswith("{") and part.endswith("}"):

1013 name = part[1:-1]

1014 if i == last_index:

1015 # last param: capture everything to end, including slashes

1016 re_parts.append(r"(?P<%s>.+)" % name)

1017 else:

1018 # middle params: standard segment (no slash)

1019 re_parts.append(r"(?P<%s>[^/]+)" % name)

1020 else:

1021 re_parts.append(re.escape(part))

1022

1023 pat = "^" + "/".join(re_parts) + "$"

1024 m = re.match(pat, call_path)

1025 if not m:

1026 return {}

1027 return {k: v for k, v in m.groupdict().items() if v is not None}

1028

1029 # -------------------------

1030 # Schema from field_type

1031 # -------------------------

1032 def _build_row_schema_from_field_type(self, field_type_str):

1033 props = OrderedDict()

1034 for t, f in findall(FIELD_TYPE_RE, field_type_str or ""):

1035 props[f] = self._schema_for_ramose_type(t)

1036 return {"type": "object", "properties": props}

1037

1038 # -------------------------

1039 # Main builder

1040 # -------------------------

1041 def _build_openapi(self, base_url=None):

1042 conf = self._get_conf(base_url)

1043 api_meta = conf["conf_json"][0]

1044 formats_enum = self._collect_format_tokens(conf)

1045

1046 spec = OrderedDict()

1047 spec["openapi"] = "3.0.3"

1048

1049 # info

1050 spec["info"] = OrderedDict(

1051 [

1052 ("title", api_meta.get("title", "RAMOSE API")),

1053 ("version", api_meta.get("version", "0.0.0")),

1054 ]

1055 )

1056 if "description" in api_meta:

1057 spec["info"]["description"] = api_meta["description"]

1058 if "license" in api_meta:

1059 spec["info"]["license"] = {"name": api_meta["license"]}

1060 if "contacts" in api_meta:

1061 contact_obj = self._guess_contact(api_meta.get("contacts"))

1062 if contact_obj:

1063 spec["info"]["contact"] = contact_obj

1064

1065 # servers

1066 base = api_meta.get("base", "")

1067 root = api_meta.get("url", "")

1068 spec["servers"] = [{"url": f"{base}{root}"}]

1069

1070 # Preserve additional Table 1 fields as vendor extensions

1071 if "endpoint" in api_meta:

1072 spec["x-ramose-endpoint"] = api_meta.get("endpoint")

1073 if "addon" in api_meta:

1074 spec["x-ramose-addon"] = api_meta.get("addon")

1075 if "method" in api_meta:

1076 # Table 1: method used to send request to SPARQL endpoint

1077 spec["x-ramose-sparql-method"] = api_meta.get("method")

1078

1079 # components

1080 spec["components"] = {"schemas": {}, "parameters": {}}

1081

1082 spec["components"]["schemas"]["Error"] = {

1083 "type": "object",

1084 "properties": {"error": {"type": "integer"}, "message": {"type": "string"}},

1085 "required": ["error", "message"],

1086 }

1087

1088 # Common query params (as in HTML docs)

1089 spec["components"]["parameters"]["require"] = {

1090 "name": "require",

1091 "in": "query",

1092 "description": "Remove rows that have an empty value in the specified field. Repeatable.",

1093 "required": False,

1094 "style": "form",

1095 "explode": True,

1096 "schema": {"type": "array", "items": {"type": "string"}},

1097 }

1098 spec["components"]["parameters"]["filter"] = {

1099 "name": "filter",

1100 "in": "query",

1101 "description": (

1102 "Filter rows. Repeatable.\n\n"

1103 "Syntax: `field:opvalue` where `op` is one of `=`, `<`, `>`.\n"

1104 "If `op` is omitted, `value` is treated as a regex."

1105 ),

1106 "required": False,

1107 "style": "form",

1108 "explode": True,

1109 "schema": {"type": "array", "items": {"type": "string"}},

1110 }

1111 spec["components"]["parameters"]["sort"] = {

1112 "name": "sort",

1113 "in": "query",

1114 "description": "Sort rows. Syntax: asc(field) or desc(field). Repeatable.",

1115 "required": False,

1116 "style": "form",

1117 "explode": True,

1118 "schema": {"type": "array", "items": {"type": "string"}},

1119 }

1120 spec["components"]["parameters"]["format"] = {

1121 "name": "format",

1122 "in": "query",

1123 "description": "Force output format (overrides Accept header).",

1124 "required": False,

1125 "schema": {"type": "string", "enum": formats_enum},

1126 }

1127 spec["components"]["parameters"]["json"] = {

1128 "name": "json",

1129 "in": "query",

1130 "description": (

1131 "Transform JSON output rows. Repeatable.\n\n"

1132 "Syntax:\n"

1133 "- `array(\"<sep>\", field)`\n"

1134 "- `dict(\"<sep>\", field, new_field_1, new_field_2, ...)`\n\n"

1135 "Where `<sep>` is a string separator (e.g. `,` or `__`)."

1136 ),

1137 "required": False,

1138 "style": "form",

1139 "explode": True,

1140 "schema": {"type": "array", "items": {"type": "string"}},

1141 }

1142

1143 common_param_refs = [

1144 {"$ref": "#/components/parameters/require"},

1145 {"$ref": "#/components/parameters/filter"},

1146 {"$ref": "#/components/parameters/sort"},

1147 {"$ref": "#/components/parameters/format"},

1148 {"$ref": "#/components/parameters/json"},

1149 ]

1150

1151 # paths

1152 spec["paths"] = OrderedDict()

1153 tag_name = api_meta.get("title", "RAMOSE API")

1154

1155 for op in conf["conf_json"][1:]:

1156 raw_path = op.get("url", "")

1157 if not raw_path.startswith("/"):

1158 raw_path = "/" + raw_path

1159

1160 if raw_path not in spec["paths"]:

1161 spec["paths"][raw_path] = OrderedDict()

1162

1163 # path parameters

1164 path_params = []

1165 for p in findall(PARAM_NAME, raw_path):

1166 t = "str"

1167 shape = ".+"

1168 if p in op:

1169 t, shape = self._parse_param_type_shape(op[p])

1170

1171 schema = self._schema_for_ramose_type(t)

1172 if schema.get("type") == "string" and shape:

1173 schema["pattern"] = shape

1174

1175 param_obj = {

1176 "name": p,

1177 "in": "path",

1178 "required": True,

1179 "schema": schema,

1180 }

1181

1182 hint = self._param_hint_from_preprocess(op.get("preprocess"), p)

1183 if hint:

1184 param_obj["description"] = hint

1185

1186 path_params.append(param_obj)

1187

1188 # Examples from Table 2 '#call'

1189 call_examples = self._extract_param_examples_from_call(raw_path, op.get("call"))

1190 for param in path_params:

1191 nm = param.get("name")

1192 if nm in call_examples:

1193 # Encode slashes etc. so Swagger UI / generated clients behave correctly

1194 param["example"] = quote(call_examples[nm], safe="-._~__")

1195 if "__" in call_examples[nm] and "description" not in param:

1196 param["description"] = "Multiple values can be provided separated by '__'."

1197

1198 # response schema: array of row objects

1199 row_schema = self._build_row_schema_from_field_type(op.get("field_type", ""))

1200 ok_schema = {"type": "array", "items": row_schema}

1201 ok_example = self._try_parse_output_json(op.get("output_json"))

1202

1203 ok_content, err_content = self._build_response_content(

1204 ok_schema=ok_schema,

1205 formats_enum=formats_enum,

1206 ok_example=ok_example,

1207 err_schema_ref="#/components/schemas/Error",

1208 )

1209

1210 # methods can be space-separated in RAMOSE

1211 methods = split(r"\s+", op.get("method", "get").strip())

1212 for m in [mm for mm in methods if mm]:

1213 m = m.lower()

1214

1215 summary = ""

1216 if "description" in op and op["description"]:

1217 summary = op["description"].split("\n")[0].strip()

1218

1219 # Build a nicer description (and optionally include SPARQL as a markdown code block)

1220 desc = self._clean_text(op.get("description")) or ""

1221 spr = self._clean_text(op.get("sparql"))

1222

1223 if spr:

1224 desc += "\n\n---\n\n### RAMOSE SPARQL\n\n```sparql\n" + spr + "\n```"

1225

1226 op_obj = OrderedDict(

1227 [

1228 ("tags", [tag_name]),

1229 ("summary", summary),

1230 ("description", desc),

1231 ("parameters", path_params + common_param_refs),

1232 (

1233 "responses",

1234 OrderedDict(

1235 [

1236 (

1237 "200",

1238 {

1239 "description": "Successful response",

1240 "content": ok_content,

1241 },

1242 ),

1243 (

1244 "default",

1245 {

1246 "description": "Error",

1247 "content": err_content,

1248 },

1249 ),

1250 ]

1251 ),

1252 ),

1253 ]

1254 )

1255

1256 # Option B: keep RAMOSE-specific stuff under one vendor extension object

1257 ramose_ext = OrderedDict()

1258

1259 pre = self._clean_text(op.get("preprocess"))

1260 post_val = self._clean_text(op.get("postprocess"))

1261 call = self._clean_text(op.get("call"))

1262

1263 if pre:

1264 ramose_ext["preprocess"] = pre

1265 if post_val:

1266 ramose_ext["postprocess"] = post_val

1267 if call:

1268 ramose_ext["call"] = call

1269

1270 # Instead of embedding the giant SPARQL here (which makes the YAML hard to read),

1271 # we indicate where it is rendered.

1272 if spr:

1273 ramose_ext["sparql_in_description"] = True

1274

1275 if ramose_ext:

1276 op_obj["x-ramose"] = ramose_ext

1277

1278 # Assign the operation

1279 spec["paths"][raw_path][m] = op_obj

1280

1281 return spec

1282

1283 # -------------------------

1284 # PyYAML compatibility

1285 # -------------------------

1286 def _to_builtin(self, obj):

1287 """Recursively convert OrderedDict (and other non-builtin containers)

1288 to plain Python builtins so that yaml.safe_dump can serialize it."""

1289 if isinstance(obj, OrderedDict):

1290 obj = dict(obj)

1291 if isinstance(obj, dict):

1292 return {k: self._to_builtin(v) for k, v in obj.items()}

1293 if isinstance(obj, (list, tuple, set)):

1294 return [self._to_builtin(v) for v in obj]

1295 return obj

1296

1297 def _dump_yaml(self, spec):

1298 """

1299 Dump OpenAPI spec to YAML with nice formatting:

1300 - multiline strings become block scalars (|)

1301 - keys keep insertion order (sort_keys=False)

1302 """

1303 class _RamoseYamlDumper(yaml.SafeDumper):

1304 pass

1305

1306 def _str_presenter(dumper, data):

1307 if "\n" in data:

1308 return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")

1309 return dumper.represent_scalar("tag:yaml.org,2002:str", data)

1310

1311 _RamoseYamlDumper.add_representer(str, _str_presenter)

1312 return yaml.dump(spec, Dumper=_RamoseYamlDumper, sort_keys=False, allow_unicode=True)

1313

1314 def get_documentation(self, base_url=None):

1315 spec = self._build_openapi(base_url=base_url)

1316 spec = self._to_builtin(spec)

1317 yml = self._dump_yaml(spec)

1318 return 200, yml

1319

1320 def store_documentation(self, file_path, base_url=None):

1321 yml = self.get_documentation(base_url=base_url)[1]

1322 with open(file_path, "w", encoding="utf8") as f:

1323 f.write(yml)

1324

1325 def get_index(self, *args, **dargs):

1326 # Not used by the current UI. Keep a minimal placeholder.

1327 return "OpenAPI exporter available."

1328

1329

1330class DataType(object):

1331 def __init__(self):

1332 """This class implements all the possible data types that can be used within

1333 the configuration file of RAMOSE. In particular, it provides methods for converting

1334 a string into the related Python data type representation."""

1335 self.func = {

1336 "str": DataType.str,

1337 "int": DataType.int,

1338 "float": DataType.float,

1339 "duration": DataType.duration,

1340 "datetime": DataType.datetime

1341 }

1342

1343 def get_func(self, name_str):

1344 """This method returns the method for handling a given data type expressed as a string name."""

1345 return self.func.get(name_str)

1346

1347 @staticmethod

1348 def duration(s):

1349 """This method returns the data type for durations according to the XML Schema

1350 Recommendation (https://www.w3.org/TR/xmlschema11-2/#duration) from the input string.

1351 In case the input string is None or it is empty, an high duration value

1352 (i.e. 2000 years) is returned."""

1353 if s is None or s == "":

1354 d = parse_duration("P2000Y")

1355 else:

1356 d = parse_duration(s)

1357

1358 return datetime(1983, 1, 15) + d

1359

1360 @staticmethod

1361 def datetime(s):

1362 """This method returns the data type for datetime according to the ISO 8601

1363 (https://en.wikipedia.org/wiki/ISO_8601) from the input string. In case the input string is None or

1364 it is empty, a low date value (i.e. 0001-01-01) is returned."""

1365 default = datetime(1, 1, 1, 0, 0)

1366 if s is None or s == "":

1367 d = parse("0001-01-01", default=default)

1368 else:

1369 d = parse(s, default=default)

1370

1371 return d

1372

1373 @staticmethod

1374 def str(s):

1375 """This method returns the data type for strings. In case the input string is None, an empty string

1376 is returned."""

1377 if s is None:

1378 l = ""

1379 else:

1380 l = str(s).lower()

1381

1382 return l

1383

1384 @staticmethod

1385 def int(s):

1386 """This method returns the data type for integer numbers from the input string. In case the input string is

1387 None or it is empty, a low integer value is returned."""

1388 if s is None or s == "":

1389 i = -maxsize

1390 else:

1391 i = int(s)

1392

1393 return i

1394

1395 @staticmethod

1396 def float(s):

1397 """This method returns the data type for float numbers from the input string. In case the input string is

1398 None or it is empty, a low float value is returned."""

1399 if s is None or s == "":

1400 f = float(-maxsize)

1401 else:

1402 f = float(s)

1403

1404 return f

1405

1406

1407class Operation(object):

1408 def __init__(self, op_complete_url, op_key, i, tp, sparql_http_method, addon,

1409 format=None, sources_map=None, allow_inline_endpoints=False, engine="sparql"):

1410 """This class is responsible for materialising a API operation to be run against a SPARQL endpoint

1411 (or, depending on configuration, through the SPARQL.Anything engine).

1412

1413 It takes in input a full URL referring to a call to an operation (parameter 'op_complete_url'),

1414 the particular shape representing an operation (parameter 'op_key'), the definition (in JSON) of such

1415 operation (parameter 'i'), the URL of the triplestore to contact (parameter 'tp'), the HTTP method

1416 to use for the SPARQL request (parameter 'sparql_http_method', set to either 'get' or 'post'), the path

1417 of the Python file which defines additional functions for use in the operation (parameter 'addon'), and formats

1418 with the names of the corresponding functions responsible for converting CSV data into the specified formats

1419 (parameter 'format').

1420 It also accepts a mapping of named sources to endpoint URLs referenced by @@with directives

1421 (parameter 'sources_map'), a flag controlling whether @@endpoint directives are allowed to override

1422 endpoints inline (parameter 'allow_inline_endpoints'), and the engine identifier selecting the execution

1423 backend (parameter 'engine')."""

1424 self.url_parsed = urlsplit(op_complete_url)

1425 self.op_url = self.url_parsed.path

1426 self.op = op_key

1427 self.i = i

1428 self.tp = tp

1429 self.sparql_http_method = sparql_http_method

1430 self.addon = addon

1431 self.format = format or {}

1432 self.sources_map = sources_map or {}

1433 self.allow_inline_endpoints = allow_inline_endpoints

1434 self.engine = engine

1435 self._sa_engine = None

1436

1437 self.operation = {

1438 "=": eq,

1439 "<": lt,

1440 ">": gt

1441 }

1442

1443 self.dt = DataType()

1444

1445 # START: Ancillary methods

1446 @staticmethod

1447 def get_content_type(ct):

1448 """It returns the mime type of a given textual representation of a format, being it either

1449 'csv' or 'json."""

1450 content_type = ct

1451

1452 if ct == "csv":

1453 content_type = "text/csv"

1454 elif ct == "json":

1455 content_type = "application/json"

1456

1457 return content_type

1458

1459 def conv(self, s, query_string, c_type="text/csv"):

1460 """This method takes a string representing a CSV document and converts it in the requested format according

1461 to what content type is specified as input."""

1462

1463 content_type = Operation.get_content_type(c_type)

1464

1465 # Overwrite if requesting a particular format via the URL

1466 if "format" in query_string:

1467 req_formats = query_string["format"]

1468

1469 for req_format in req_formats:

1470 content_type = Operation.get_content_type(req_format)

1471

1472 if req_format in self.format:

1473 converter_func = getattr(self.addon, self.format[req_format])

1474 return converter_func(s), content_type

1475

1476 # If a non built-in format was requested but no converter ran,

1477 # force CSV Content-Type instead of echoing the requested token.

1478 if content_type not in ("text/csv", "application/json"):

1479 content_type = "text/csv"

1480

1481 if "application/json" in content_type:

1482 with StringIO(s) as f:

1483 r = []

1484 for i in DictReader(f):

1485 r.append(dict(i))

1486

1487 # See if any restructuring of the final JSON is required

1488 r = Operation.structured(query_string, r)

1489

1490 return dumps(r, ensure_ascii=False, indent=4), content_type

1491 else:

1492 return s, content_type

1493

1494 @staticmethod

1495 def pv(i, r=None):

1496 """This method returns the plain value of a particular item 'i' of the result returned by the SPARQL query.

1497

1498 In case 'r' is specified (i.e. a row containing a set of results), then 'i' must be the index of the item

1499 within that row."""

1500 if r is None:

1501 return i[1]

1502 else:

1503 return Operation.pv(r[i])

1504

1505 @staticmethod

1506 def tv(i, r=None):

1507 """This method returns the typed value of a particular item 'i' of the result returned by the SPARQL query.

1508 The type associated to that value is actually specified by means of the particular configuration provided

1509 in the specification file of the API - field 'field_type'.

1510

1511 In case 'r' is specified (i.e. a row containing a set of results), then 'i' must be the index of the item

1512 within that row."""

1513 if r is None:

1514 return i[0]

1515 else:

1516 return Operation.tv(r[i])

1517

1518 @staticmethod

1519 def do_overlap(r1, r2):

1520 """This method returns a boolean that says if the two ranges (i.e. two pairs of integers) passed as inputs

1521 actually overlap one with the other."""

1522 r1_s, r1_e = r1

1523 r2_s, r2_e = r2

1524

1525 return r1_s <= r2_s <= r1_e or r2_s <= r1_s <= r2_e

1526

1527 @staticmethod

1528 def get_item_in_dict(d_or_l, key_list, prev=None):

1529 """This method takes as input a dictionary or a list of dictionaries and browses it until the value

1530 specified following the chain indicated in 'key_list' is not found. It returns a list of all the

1531 values that matched with such search."""

1532 if prev is None:

1533 res = []

1534 else:

1535 res = prev.copy()

1536

1537 if type(d_or_l) is dict:

1538 d_list = [d_or_l]

1539 if type(d_or_l) is list:

1540 d_list = d_or_l

1541

1542 for d in d_list:

1543 key_list_len = len(key_list)

1544

1545 if key_list_len >= 1:

1546 key = key_list[0]

1547 if key in d:

1548 if key_list_len == 1:

1549 res.append(d[key])

1550 else:

1551 res = Operation.get_item_in_dict(d[key], key_list[1:], res)

1552

1553 return res

1554

1555 @staticmethod

1556 def add_item_in_dict(d_or_l, key_list, item, idx):

1557 """This method takes as input a dictionary or a list of dictionaries, browses it until the value

1558 specified following the chain indicated in 'key_list' is not found, and then substitutes it with 'item'.

1559 In case the final object retrieved is a list, it selects the object in position 'idx' before the

1560 substitution."""

1561 key_list_len = len(key_list)

1562

1563 if key_list_len >= 1:

1564 key = key_list[0]

1565

1566 if type(d_or_l) is list:

1567 if key_list_len == 1:

1568 d_or_l[idx][key] = item

1569 else:

1570 for i in d_or_l:

1571 Operation.add_item_in_dict(i, key_list, item, idx)

1572 else:

1573 if key in d_or_l:

1574 if key_list_len == 1:

1575 d_or_l[key] = item

1576 else:

1577 Operation.add_item_in_dict(d_or_l[key], key_list[1:], item, idx)

1578

1579 @staticmethod

1580 def structured(params, json_table):

1581 """This method checks if there are particular transformation rules specified in 'params' for a JSON output,

1582 and convert each row of the input table ('json_table') according to these rules.

1583 There are two specific rules that can be applied:

1584

1585 1. array("<separator>",<field>): it converts the string value associated to the field name '<field>' into

1586 an array by splitting the various textual parts by means of '<separator>'. For instance, consider the

1587 following JSON structure:

1588

1589 [

1590 { "names": "Doe, John; Doe, Jane" },

1591 { "names": "Doe, John; Smith, John" }

1592 ]

1593

1594 Executing the rule 'array("; ",names)' returns the following new JSON structure:

1595

1596 [

1597 { "names": [ "Doe, John", "Doe, Jane" ],

1598 { "names": [ "Doe, John", "Smith, John" ]

1599 ]

1600

1601 2. dict("separator",<field>,<new_field_1>,<new_field_2>,...): it converts the string value associated to

1602 the field name '<field>' into an dictionary by splitting the various textual parts by means of

1603 '<separator>' and by associating the new fields '<new_field_1>', '<new_field_2>', etc., to these new

1604 parts. For instance, consider the following JSON structure:

1605

1606 [

1607 { "name": "Doe, John" },

1608 { "name": "Smith, John" }

1609 ]

1610

1611 Executing the rule 'array(", ",name,family_name,given_name)' returns the following new JSON structure:

1612

1613 [

1614 { "name": { "family_name": "Doe", "given_name: "John" } },

1615 { "name": { "family_name": "Smith", "given_name: "John" } }

1616 ]

1617

1618 Each of the specified rules is applied in order, and it works on the JSON structure returned after

1619 the execution of the previous rule."""

1620 if "json" in params:

1621 fields = params["json"]

1622 for field in fields:

1623 ops = findall(r'([a-z]+)$("[^"]+"),([^$]+)\)', field)

1624 for op_type, s, es in ops:

1625 separator = sub('"(.+)"', "\\1", s)

1626 entries = [i.strip() for i in es.split(",")]

1627 keys = entries[0].split(".")

1628

1629 for row in json_table:

1630 v_list = Operation.get_item_in_dict(row, keys)

1631 for idx, v in enumerate(v_list):

1632 if op_type == "array":

1633 if type(v) is str:

1634 Operation.add_item_in_dict(row, keys,

1635 v.split(separator) if v != "" else [], idx)

1636 elif op_type == "dict":

1637 new_fields = entries[1:]

1638 new_fields_max_split = len(new_fields) - 1

1639 if type(v) is str:

1640 new_values = v.split(

1641 separator, new_fields_max_split)

1642 Operation.add_item_in_dict(row, keys,

1643 dict(

1644 zip(new_fields, new_values)) if v != "" else {},

1645 idx)

1646 elif type(v) is list:

1647 new_list = []

1648 for i in v:

1649 new_values = i.split(separator, new_fields_max_split)

1650 new_list.append(dict(zip(new_fields, new_values)))

1651 Operation.add_item_in_dict(row, keys, new_list, idx)

1652

1653 return json_table

1654 # END: Ancillary methods

1655

1656 # START: Processing methods

1657 def preprocess(self, par_dict, op_item, addon):

1658 """This method takes the a dictionary of parameters with the current typed values associated to them and

1659 the item of the API specification defining the behaviour of that operation, and preprocesses the parameters

1660 according to the functions specified in the '#preprocess' field (e.g. "#preprocess lower(doi)"), which is

1661 applied to the specified parameters as input of the function in consideration (e.g.

1662 "/api/v1/citations/10.1108/jd-12-2013-0166", converting the DOI in lowercase).

1663

1664 It is possible to run multiple functions sequentially by concatenating them with "-->" in the API

1665 specification document. In this case the output of the function f_i will becomes the input operation URL

1666 of the function f_i+1.

1667

1668 Finally, it is worth mentioning that all the functions specified in the "#preprocess" field must return

1669 a tuple of values defining how the particular value passed in the dictionary must be changed."""

1670 result = par_dict

1671

1672 if "preprocess" in op_item:

1673

1674 for pre in [sub(r"\s+", "", i) for i in op_item["preprocess"].split(" --> ")]:

1675 func_name = sub(r"^([^]+)\(.+$", r"\1", pre).strip()

1676 params_name = sub(r"^.+$([^\($]+)\).*", r"\1", pre).split(",")

1677

1678 param_list = ()

1679 for param_name in params_name:

1680 param_list += (result[param_name],)

1681

1682 # run function

1683 func = getattr(addon, func_name)

1684 res = func(*param_list)

1685

1686 # substitute res to the current parameter in result

1687 for idx in range(len(res)):

1688 result[params_name[idx]] = res[idx]

1689

1690 return result

1691

1692 def postprocess(self, res, op_item, addon):

1693 """This method takes the result table returned by running the SPARQL query in an API operation (specified

1694 as input) and change some of such results according to the functions specified in the '#postprocess'

1695 field (e.g. "#postprocess remove_date("2018")"). These functions can take parameters as input, while the first

1696 unspecified parameters will be always the result table. It is worth mentioning that this result table (i.e.

1697 a list of tuples) actually contains, in each cell, a tuple defining the plain value as well as the typed

1698 value for enabling better comparisons and operations if needed. An example of this table of result is shown as

1699 follows:

1700

1701 [

1702 ("id", "date"),

1703 ("my_id_1", "my_id_1"), (datetime(2018, 3, 2), "2018-03-02"),

1704 ...

1705 ]

1706

1707 Note that the typed value and the plain value of each cell can be selected by using the methods "tv" and "pv"

1708 respectively. In addition, it is possible to run multiple functions sequentially by concatenating them

1709 with "-->" in the API specification document. In this case the output of the function f_i will becomes

1710 the input result table of the function f_i+1."""

1711 result = res

1712

1713 if "postprocess" in op_item:

1714 for post in [i.strip() for i in op_item["postprocess"].split(" --> ")]:

1715 func_name = sub(r"^([^]+)\(.+$", r"\1", post).strip()

1716 param_str = sub(r"^.+$([^\($]*)\).*", r"\1", post)

1717 if param_str == "":

1718 params_values = ()

1719 else:

1720 params_values = next(reader(param_str.splitlines(), skipinitialspace=True))

1721

1722 func = getattr(addon, func_name)

1723 func_params = (result,) + tuple(params_values)

1724 result, do_type_fields = func(*func_params)

1725 if do_type_fields:

1726 result = self.type_fields(result, op_item)

1727

1728 return result

1729

1730 def handling_params(self, params, table):

1731 """This method is used for filtering the results that are returned after the post-processing

1732 phase. In particular, it is possible to:

1733

1734 1. [require=<field_name>] exclude all the rows that have an empty value in the field specified - e.g. the

1735 "require=doi" remove all the rows that do not have any string specified in the "doi" field;

1736

1737 2. [filter=<field_name>:<operator><value>] consider only the rows where the string in the input field

1738 is compliant with the value specified. If no operation is specified, the value is interpreted as a

1739 regular expression, otherwise it is compared according to the particular type associated to that field.

1740 Possible operators are "=", "<", and ">" - e.g. "filter=title:semantics?" returns all the rows that contain

1741 the string "semantic" or "semantics" in the field title, while "filter=date:>2016-05" returns all the rows

1742 that have a date greater than May 2016;

1743

1744 3. [sort=<order>(<field_name>)] sort all the results according to the value and type of the particular

1745 field specified in input. It is possible to sort the rows either in ascending ("asc") or descending

1746 ("desc") order - e.g. "sort=desc(date)" sort all the rows according to the value specified in the

1747 field "date" in descending order.

1748

1749 Note that these filtering operations are applied in the order presented above - first the "require", then

1750 the "filter", and finally the "sort". It is possible to specify one or more filtering operation of the

1751 same kind (e.g. "require=doi&require=title").

1752 """

1753 header = table[0]

1754 result = table[1:]

1755

1756 if "exclude" in params or "require" in params:

1757 fields = params["exclude"] if "exclude" in params else params["require"]

1758 for field in fields:

1759 field_idx = header.index(field)

1760 tmp_result = []

1761 for row in result:

1762 value = Operation.pv(field_idx, row)

1763 if value is not None and value != "":

1764 tmp_result.append(row)

1765 result = tmp_result

1766

1767 if "filter" in params:

1768 fields = params["filter"]

1769 for field in fields:

1770 field_name, field_value = field.split(":", 1)

1771

1772 try:

1773 field_idx = header.index(field_name)

1774 flag = field_value[0]

1775 if flag in ("<", ">", "="):

1776 value = field_value[1:].lower()

1777 tmp_result = []

1778 for row in result:

1779 v_result = Operation.tv(field_idx, row)

1780 v_to_compare = self.dt.get_func(type(v_result).__name__)(value)

1781

1782 if self.operation[flag](v_result, v_to_compare):

1783 tmp_result.append(row)

1784 result = tmp_result

1785

1786 else:

1787 result = list(filter(

1788 lambda i: search(field_value.lower(),

1789 Operation.pv(field_idx, i).lower()), result))

1790 except ValueError:

1791 pass # do nothing

1792

1793 if "sort" in params:

1794 fields = sorted(params["sort"], reverse=True)

1795 field_names = []

1796 order = []

1797 for field in fields:

1798 order_names = findall(r"^(desc|asc)$([^\($]+)\)$", field)

1799 if order_names:

1800 order.append(order_names[0][0])

1801 field_names.append(order_names[0][1])

1802 else:

1803 order.append("asc")

1804 field_names.append(field)

1805

1806 for idx in range(len(field_names)):

1807 field_name = field_names[idx]

1808 try:

1809 desc_order = False

1810 if idx < len(order):

1811 field_order = order[idx].lower().strip()

1812 desc_order = True if field_order == "desc" else False

1813

1814 field_idx = header.index(field_name)

1815 result = sorted(result, key=itemgetter(field_idx), reverse=desc_order)

1816 except ValueError:

1817 pass # do nothing

1818

1819 return [header] + result

1820

1821 def type_fields(self, res, op_item):

1822 """It creates a version of the results 'res' that adds, to each value of the fields, the same value interpreted

1823 with the type specified in the specification file (field 'field_type'). Note that 'str' is used as default in

1824 case no further specifications are provided."""

1825 result = []

1826 cast_func = {}

1827 header = res[0]

1828 for heading in header:

1829 cast_func[heading] = DataType.str

1830

1831 if "field_type" in op_item:

1832 for f, p in findall(FIELD_TYPE_RE, op_item["field_type"]):

1833 cast_func[p] = self.dt.get_func(f)

1834

1835 for row in res[1:]:

1836 new_row = []

1837 for idx in range(len(header)):

1838 heading = header[idx]

1839 cur_value = row[idx]

1840 if type(cur_value) is tuple:

1841 cur_value = cur_value[1]

1842 new_row.append((cast_func[heading](cur_value), cur_value))

1843 result.append(new_row)

1844

1845 return [header] + result

1846

1847 def remove_types(self, res):

1848 """This method takes the results 'res' that include also the typed value and returns a version of such

1849 results without the types that is ready to be stored on the file system."""

1850 result = [res[0]]

1851

1852 for row in res[1:]:

1853 result.append(tuple(Operation.pv(idx, row) for idx in range(len(row))))

1854

1855 return result

1856

1857 @staticmethod

1858 def _is_directive(line):

1859 return line.strip().startswith("@@")

1860

1861 def _parse_steps(self, text, default_endpoint, params):

1862 """

1863 Returns a list of steps:

1864 - ("QUERY", endpoint_url, query_text)

1865 - ("JOIN", left_var, right_var, how) # how in {"inner","left"}

1866 - ("REMOVE", [vars])

1867 - ("WITH", endpoint_url) # resolved from sources_map

1868 - ("ENDPOINT", endpoint_url) # explicit url (if allowed)

1869 - ("VALUES_INJECT", [vars]) # @@values ?var1 ?var2 ...

1870 - ("FOREACH_SETUP", alias, var_name) # @@values ?var:alias

1871 - ("FOREACH_MARK", alias, delay_seconds) # @@foreach alias [delay]

1872 """

1873 steps = []

1874 cur_query = []

1875 current_endpoint = default_endpoint

1876

1877 def flush_query():

1878 if cur_query:

1879 q = "\n".join(cur_query).strip()

1880 if not q:

1881 cur_query.clear()

1882 return

1883 # parameter substitution [[...]]

1884 for p, v in params.items():

1885 q = q.replace(f"[[{p}]]", str(v))

1886 steps.append(("QUERY", current_endpoint, q))

1887 cur_query.clear()

1888

1889 for raw in text.splitlines():

1890 line = raw.rstrip("\n")

1891 if not self._is_directive(line):

1892 cur_query.append(line)

1893 continue

1894

1895 # directive line -> first close any pending query

1896 flush_query()

1897

1898 body = line.strip()[2:].strip() # remove leading @@

1899 parts = body.split()

1900 cmd = parts[0].lower()

1901

1902 if cmd == "with":

1903 name = parts[1]

1904 if name not in self.sources_map:

1905 raise ValueError(f"Unknown source '{name}' in @@with; declare it in #sources.")

1906 current_endpoint = self.sources_map[name]

1907

1908 elif cmd == "endpoint":

1909 url = parts[1]

1910 if not self.allow_inline_endpoints:

1911 raise ValueError("@@endpoint not allowed (enable #allow_inline_endpoints).")

1912 current_endpoint = url

1913

1914 elif cmd == "join":

1915 left = parts[1]

1916 right = parts[2]

1917 how = "inner"

1918 if len(parts) >= 4 and parts[3].startswith("type="):

1919 how = parts[3].split("=", 1)[1].lower()

1920 steps.append(("JOIN", left, right, how))

1921

1922 elif cmd == "remove":

1923 vars_ = parts[1:]

1924 steps.append(("REMOVE", vars_))

1925

1926 elif cmd == "values":

1927 # syntax:

1928 # @@values ?var1 ?var2 ...

1929 # @@values ?var:alias -> FOREACH_SETUP (for @@foreach)

1930 tokens = parts[1:]

1931 if not tokens:

1932 raise ValueError("@@values needs at least one variable")

1933

1934 alias_specs = [t for t in tokens if ":" in t]

1935 if alias_specs:

1936 # We only support exactly one ?var:alias pair for now

1937 if len(tokens) != 1 or len(alias_specs) != 1:

1938 raise ValueError(

1939 "@@values with alias supports exactly one ?var:alias pair"

1940 )

1941 var_token = alias_specs[0]

1942 var_name, alias = var_token.split(":", 1)

1943 steps.append(("FOREACH_SETUP", alias, var_name))

1944 else:

1945 vars_ = tokens

1946 steps.append(("VALUES_INJECT", vars_))

1947

1948 elif cmd == "foreach":

1949 # syntax: @@foreach alias [delay_seconds]

1950 if len(parts) < 2:

1951 raise ValueError("@@foreach requires an alias name")

1952 alias = parts[1]

1953 delay = 0.0

1954 if len(parts) >= 3:

1955 try:

1956 delay = float(parts[2])

1957 except ValueError:

1958 raise ValueError(f"Invalid delay value in @@foreach: {parts[2]!r}")

1959 steps.append(("FOREACH_MARK", alias, delay))

1960

1961 else:

1962 raise ValueError(f"Unknown directive @@{cmd}")

1963

1964 flush_query()

1965 return steps

1966

1967 def _run_sparql_dicts(self, endpoint_url, query_text):

1968 """Run a SELECT query against a SPARQL endpoint and return a list of dict rows.

1969

1970 This always requests CSV and parses it via DictReader, to stay consistent

1971 with RAMOSE's legacy pipeline.

1972 """

1973 try:

1974 if self.sparql_http_method == "get":

1975 r = _http_session.get(

1976 endpoint_url + "?query=" + quote(query_text),

1977 headers={

1978 "Accept": "text/csv",

1979 "User-Agent": "RAMOSE/2.0.0",

1980 },

1981 timeout=DEFAULT_HTTP_TIMEOUT,

1982 )

1983 else:

1984 r = _http_session.post(

1985 endpoint_url,

1986 data=query_text,

1987 headers={

1988 "Accept": "text/csv",

1989 "Content-Type": "application/sparql-query",

1990 "User-Agent": "RAMOSE/2.0.0",

1991 },

1992 timeout=DEFAULT_HTTP_TIMEOUT,

1993 )

1994 except RequestException as e:

1995 raise RuntimeError(f"SPARQL request failed: {e}") from e

1996

1997 r.encoding = "utf-8"

1998 if r.status_code != 200:

1999 raise RuntimeError(f"SPARQL {r.status_code}: {r.reason}")

2000 text = r.content.decode("utf-8-sig", errors="replace")

2001 list_of_lines = text.splitlines()

2002 return list(DictReader(list_of_lines))

2003

2004 def _run_sparql_anything_dicts(self, query_text, values=None):

2005 """

2006 Execute a SPARQL Anything SELECT query via PySPARQL-Anything and return

2007 a list of dicts (one per row), in the same shape as _run_sparql_dicts.

2008

2009 query_text: full SPARQL (Anything) query string

2010 (typically containing SERVICE <x-sparql-anything:...>).

2011 values: optional dict of template parameters for the query

2012 (name -> value), passed to SPARQL Anything's `values=`.

2013 """

2014 # Lazily create and cache the engine so we don't re-initialise the JVM

2015 engine = getattr(self, "_sa_engine", None)

2016 if engine is None:

2017 engine = pysparql_anything.SparqlAnything()

2018 self._sa_engine = engine

2019

2020 # Build kwargs for PySPARQL-Anything

2021 kwargs = {"query": query_text}

2022 if values:

2023 # SPARQL Anything expects a dict[str, str]

2024 kwargs["values"] = {str(k): str(v) for k, v in values.items()}

2025

2026 # Ask PySPARQL-Anything for a Python dict structure

2027 result = engine.select(output_type=dict, **kwargs)

2028

2029 # --- Normalisation to list[dict] -----------------------------------

2030 # 1) If it's already a list of dicts, just return it.

2031 if isinstance(result, list):

2032 if result and isinstance(result[0], dict):

2033 return result

2034 # list but not dicts (tuples, etc.): coerce

2035 return [dict(row) for row in result]

2036

2037 # 2) If it's not a dict at all, just wrap it as a single-row result.

2038 if not isinstance(result, dict):

2039 return [dict(result=result)]

2040

2041 # 3) Try standard SPARQL JSON ResultSet shape: { "head": {vars}, "results": { "bindings": [...] } }

2042 head = result.get("head")

2043 results = result.get("results")

2044 if isinstance(head, dict) and isinstance(results, dict) and "bindings" in results:

2045 vars_ = head.get("vars") or []

2046 rows = []

2047 for b in results.get("bindings", []):

2048 row = {}

2049 for v in vars_:

2050 cell = b.get(v)

2051 if isinstance(cell, dict):

2052 # standard SPARQL JSON: { "type": "...", "value": "..." , ... }

2053 row[v] = cell.get("value")

2054 else:

2055 row[v] = cell

2056 rows.append(row)

2057 return rows

2058

2059 # 4) Otherwise assume it is a mapping column_name -> list-of-values (or scalars)

2060 rows = []

2061 cols = list(result.keys())

2062

2063 # Find maximum column length, if columns are lists/tuples

2064 max_len = 0

2065 for c in cols:

2066 v = result[c]

2067 if isinstance(v, (list, tuple)):

2068 max_len = max(max_len, len(v))

2069

2070 if max_len:

2071 for i in range(max_len):

2072 row = {}

2073 for c in cols:

2074 v = result[c]

2075 if isinstance(v, (list, tuple)):

2076 row[c] = v[i] if i < len(v) else None

2077 else:

2078 # scalar: repeat in every row

2079 row[c] = v

2080 rows.append(row)

2081 return rows

2082

2083 # 5) Fallback: treat the dict as a single-row result

2084 return [result]

2085

2086 def _run_query_dicts(self, endpoint_url, query_text):

2087 """

2088 Dispatch query execution to the appropriate backend, with support

2089 for per-query engine selection in multi-source mode.

2090

2091 Rules:

2092 - If endpoint_url is the special string "sparql-anything" (case-insensitive),

2093 then always use SPARQL.ANYTHING (PySPARQL-Anything) for this query.

2094 - Otherwise, fall back to the operation-level engine:

2095 * engine == "sparql-anything" -> SPARQL.ANYTHING

2096 * else -> standard HTTP SPARQL

2097 """

2098

2099 # Per-query override: @@endpoint sparql-anything

2100 if endpoint_url and str(endpoint_url).strip().lower() == "sparql-anything":

2101 return self._run_sparql_anything_dicts(query_text)

2102

2103 # Default behaviour: operation-level engine

2104 if self.engine == "sparql-anything":

2105 return self._run_sparql_anything_dicts(query_text)

2106 else:

2107 return self._run_sparql_dicts(endpoint_url, query_text)

2108

2109 def _inject_values_clause(self, query_text, vars_, acc_rows):

2110 # build distinct tuples for requested vars from the accumulator

2111 cols = [v.lstrip("?") for v in vars_]

2112 tuples, seen = [], set()

2113 for row in (acc_rows or []):

2114 tup = tuple(row.get(c, "") for c in cols)

2115 if all(tup) and tup not in seen:

2116 seen.add(tup)

2117 tuples.append(tup)

2118 if not tuples:

2119 return query_text # nothing to inject

2120

2121 # format literals vs IRIs

2122 def fmt(x):

2123 s = str(x)

2124 if s.startswith("http://") or s.startswith("https://"):

2125 return f"<{s}>"

2126 return '"' + s.replace('\\', '\\\\').replace('"', '\\"') + '"'

2127

2128 head = "VALUES (" + " ".join(vars_) + ") {\n"

2129 body = "\n".join(" (" + " ".join(fmt(v) for v in tup) + ")" for tup in tuples)

2130 tail = "\n}\n"

2131

2132 i = query_text.find("{")

2133 if i == -1:

2134 # no WHERE brace: put VALUES at top (legal SPARQL)

2135 return head + body + tail + query_text

2136 j = i + 1

2137 return query_text[:j] + "\n" + head + body + tail + query_text[j:]

2138

2139 @staticmethod

2140 def _drop_columns(rows, vars_):

2141 if not rows:

2142 return rows

2143 vars_set = set(v.lstrip("?") for v in vars_)

2144 out = []

2145 for r in rows:

2146 out.append({k: v for k, v in r.items() if k not in vars_set and ("?" + k) not in vars_set})

2147 return out

2148

2149 def _norm_join_key(self, v):

2150 if v is None:

2151 return None

2152 s = str(v).strip()

2153 # unify scheme for w3id IRIs (and similar)

2154 if s.startswith("http://"):

2155 s = "https://" + s[len("http://"):]

2156 # drop a single trailing slash for stability

2157 if s.endswith("/"):

2158 s = s[:-1]

2159 return s

2160

2161 def _join(self, left_rows, right_rows, lkey, rkey, how="inner"):

2162 """

2163 Merge two row sets on lkey (from left_rows) and rkey (from right_rows).

2164 - lkey/rkey may be passed as '?var' or 'var' -> we normalize to bare names.

2165 - Keys are normalized with _norm_join_key (e.g., http -> https, trim slash).

2166 - When 'left', all left rows are preserved even if no match on the right.

2167 - Right-hand columns are copied into the merged row; collisions are avoided.

2168 """

2169 # 1) Normalize column names (strip leading '?')

2170 lcol = lkey.lstrip("?")

2171 rcol = rkey.lstrip("?")

2172

2173 left_rows = left_rows or []

2174 right_rows = right_rows or []

2175

2176 # 2) Build an index for right_rows on normalized rcol values

2177 rindex = {}

2178 for r in right_rows:

2179 rk = self._norm_join_key(r.get(rcol))

2180 if rk is None:

2181 continue

2182 rindex.setdefault(rk, []).append(r)

2183

2184 # determine right columns to copy (excluding the join key)

2185 right_cols = [c for c in (right_rows[0].keys() if right_rows else []) if c != rcol]

2186

2187 out = []

2188 for l in left_rows:

2189 lk = self._norm_join_key(l.get(lcol))

2190 matches = rindex.get(lk, [])

2191 if matches:

2192 for r in matches:

2193 merged = dict(l)

2194 for c in right_cols:

2195 rv = r.get(c)

2196 if rv is None:

2197 continue

2198 if c not in merged or merged[c] in ("", None):

2199 merged[c] = rv

2200 else:

2201 alt = f"{c}_r"

2202 if alt not in merged or merged[alt] in ("", None):

2203 merged[alt] = rv

2204 out.append(merged)

2205 else:

2206 if how == "left":

2207 out.append(dict(l))

2208 # inner: drop

2209 return out

2210

2211 @staticmethod

2212 def _header_from_field_type(op_item, acc):

2213 # Respect #field_type order if provided, else derive from data

2214 if "field_type" in op_item:

2215 # FIELD_TYPE_RE is global in this file

2216 return [f for (_, f) in findall(FIELD_TYPE_RE, op_item["field_type"])]

2217 # fallback to keys of first row

2218 return list(acc[0].keys()) if acc else []

2219

2220 @staticmethod

2221 def _to_csv_rows(header, acc):

2222 rows = [header]

2223 for d in acc:

2224 rows.append([d.get(h, "") for h in header])

2225 return rows

2226

2227 def exec(self, method="get", content_type="application/json"):

2228 """This method takes in input the HTTP method to use for the call

2229 and the content type to return, and execute the operation as indicated

2230 in the specification file, by running (in the following order):

2231

2232 1. the methods to preprocess the query;

2233 2. the SPARQL query related to the operation called, by using the parameters indicated in the URL;

2234 3. the specification of all the types of the various rows returned;

2235 4. the methods to postprocess the result;

2236 5. the application of the filter to remove, filter, sort the result;

2237 6. the removal of the types added at the step 3, so as to have a data structure ready to be returned;

2238 7. the conversion in the format requested by the user."""

2239 str_method = method.lower()

2240 m = self.i["method"].split()

2241

2242 if str_method in m:

2243 try:

2244 par_dict = {}

2245 par_man = match(self.op, self.op_url).groups()

2246 for idx, par in enumerate(findall("{([^{}]+)}", self.i["url"])):

2247 try:

2248 par_type = self.i[par].split("(")[0]

2249 if par_type == "str":

2250 par_value = par_man[idx]

2251 else:

2252 par_value = self.dt.get_func(par_type)(par_man[idx])

2253 except KeyError:

2254 par_value = par_man[idx]

2255 par_dict[par] = par_value

2256

2257 if self.addon is not None:

2258 self.preprocess(par_dict, self.i, self.addon)

2259

2260 sparql_text = self.i["sparql"]

2261

2262 if "@@" not in sparql_text:

2263 # Fast path: single-query (legacy behavior)

2264

2265 if self.engine == "sparql-anything":

2266 query = sparql_text

2267 for param in par_dict:

2268 query = query.replace("[[%s]]" % param, str(par_dict[param]))

2269 rows = self._run_sparql_anything_dicts(query)

2270 header = self._header_from_field_type(self.i, rows or [])

2271 csv_rows = self._to_csv_rows(header, rows or [])

2272 res = self.type_fields(csv_rows, self.i)

2273 if self.addon is not None:

2274 res = self.postprocess(res, self.i, self.addon)

2275 q_string = parse_qs(quote(self.url_parsed.query, safe="&="))

2276 res = self.handling_params(q_string, res)

2277 res = self.remove_types(res)

2278 s_res = StringIO()

2279 writer(s_res).writerows(res)

2280 body, ctype = self.conv(s_res.getvalue(), q_string, content_type)

2281 return 200, body, ctype

2282

2283 # Handle in case the parameters are lists, we need to generate all possible combinations

2284 par_dict = {p_k: [par_dict[p_k]] if not isinstance(par_dict[p_k], list) else par_dict[p_k] for p_k in par_dict}

2285 combinations = product(*par_dict.values())

2286

2287 parameters_comb = []

2288 for combination in combinations:

2289 parameters_comb.append(dict(zip(list(par_dict.keys()), list(combination))))

2290

2291 # the __parameters_comb__ varaible is a list of dictionaries,

2292 # each dictionary stores a possible combination of parameter values

2293 #

2294 # Example: {"id":"5","area":["A1","A2"]} -> [ {"id":"5","area":"A1"}, {"id":"5","area":"A2"} ]

2295 # Example: {"id":"5","area":"A1"} -> [ {"id":"5","area":"A1"} ]

2296

2297 # iterate over __parameters_comb__

2298

2299 list_of_res = []

2300 include_header_line = True

2301 for par_dict in parameters_comb:

2302

2303 query = self.i["sparql"]

2304 for param in par_dict:

2305 query = query.replace("[[%s]]" % param, str(par_dict[param]))

2306

2307 # GET and POST are sync

2308 # TODO: use threads to make it parallel

2309

2310 if self.sparql_http_method == "get":

2311 r = _http_session.get(self.tp + "?query=" + quote(query),

2312 headers={"Accept": "text/csv"}, timeout=DEFAULT_HTTP_TIMEOUT)

2313 else:

2314 r = _http_session.post(self.tp, data=query, headers={"Accept": "text/csv",

2315 "Content-Type": "application/sparql-query"}, timeout=DEFAULT_HTTP_TIMEOUT)

2316 r.encoding = "utf-8"

2317

2318 sc = r.status_code

2319 if sc == 200:

2320 # This line has been added to avoid a strage behaviour of the 'splitlines' method in

2321 # presence of strange characters (non-UTF8).

2322 list_of_lines = [line.decode("utf-8") for line in r.text.encode("utf-8").splitlines()]

2323

2324 else:

2325 return sc, "HTTP status code %s: %s" % (sc, r.reason), "text/plain"

2326

2327 # each res will have a list of list_of_line

2328 # include the header of the first result only

2329 if not include_header_line:

2330 list_of_lines = list_of_lines[1:]

2331 include_header_line = False

2332

2333 # list_of_res Example:

2334 # [ ["id,val","01,a","02,b"] , ["id,val","05,u","08,p"] ]

2335 list_of_res += list_of_lines

2336

2337 #

2338 # ----- DELEGATE to POST PROCESSING operations

2339 # return 200, "HTTP print for debug %s: %s" % (200, list_of_res), "text/plain"

2340

2341 res = self.type_fields(list(reader(list_of_res)), self.i)

2342 if self.addon is not None:

2343 res = self.postprocess(res, self.i, self.addon)

2344 q_string = parse_qs(quote(self.url_parsed.query, safe="&="))

2345 res = self.handling_params(q_string, res)

2346 res = self.remove_types(res)

2347 s_res = StringIO()

2348 writer(s_res).writerows(res)

2349 return (sc,) + self.conv(s_res.getvalue(), q_string, content_type)

2350

2351 else:

2352 # Multi-source path: @@ directives present

2353 try:

2354 steps = self._parse_steps(sparql_text, self.tp, par_dict)

2355

2356 acc = None # list of dict rows

2357 pending_join = None

2358 pending_values_vars = None

2359

2360 foreach_sources = {} # alias -> column name (without '?')

2361 pending_foreach = None # (alias, delay_seconds)

2362

2363 for st in steps:

2364 tag = st[0]

2365

2366 if tag == "QUERY":

2367 _, endpoint_url, qtxt = st

2368 if not qtxt or not qtxt.strip():

2369 continue # defensive: skip any empty query steps

2370

2371 # FOREACH mode: run one query per value

2372 if pending_foreach is not None:

2373 alias, delay = pending_foreach

2374

2375 if alias not in foreach_sources:

2376 raise ValueError(

2377 f"@@foreach refers to unknown alias '{alias}'. "

2378 f"Declare it with @@values ?var:{alias} before @@foreach."

2379 )

2380

2381 source_col = foreach_sources[alias] # e.g. "br"

2382

2383 # Collect distinct non-empty values from the accumulator

2384 values = []

2385 seen = set()

2386 for row in (acc or []):

2387 v = row.get(source_col)

2388 if v and v not in seen:

2389 seen.add(v)

2390 values.append(v)

2391

2392 all_rows = []

2393 for idx_val, val in enumerate(values):

2394 # Substitute [[alias]] in the query text

2395 q_one = qtxt.replace(f"[[{alias}]]", str(val))

2396 sub_rows = self._run_query_dicts(endpoint_url, q_one)

2397 if sub_rows:

2398 all_rows.extend(sub_rows)

2399 # Sleep between calls if requested

2400 if delay and idx_val + 1 < len(values):

2401 time.sleep(delay)

2402

2403 rows = all_rows

2404 # FOREACH applies only to this single QUERY

2405 pending_foreach = None

2406 # In FOREACH mode we ignore any pending VALUES_INJECT

2407 pending_values_vars = None

2408

2409 else:

2410 # Normal multi-source behaviour

2411 if pending_values_vars:

2412 # acc is the current accumulator rows

2413 qtxt = self._inject_values_clause(qtxt, pending_values_vars, acc)

2414 pending_values_vars = None # only affects this single query

2415 rows = self._run_query_dicts(endpoint_url, qtxt)

2416

2417 if acc is None:

2418 # first query defines the accumulator

2419 acc = rows

2420 else:

2421 if pending_join:

2422 lvar, rvar, how = pending_join

2423 acc = self._join(acc, rows, lvar, rvar, how)

2424 pending_join = None

2425 else:

2426 raise ValueError(

2427 "Multiple QUERY steps without an explicit @@join directive"

2428 )

2429

2430 elif tag == "JOIN":

2431 pending_join = (st[1], st[2], st[3] if len(st) > 3 and st[3] else "inner")

2432

2433 elif tag == "REMOVE":

2434 _, vars_ = st

2435 acc = self._drop_columns(acc or [], vars_)

2436

2437 elif tag == "VALUES_INJECT":

2438 # st = ("VALUES_INJECT", ["?br", ...])

2439 pending_values_vars = st[1]

2440

2441 elif tag == "FOREACH_SETUP":

2442 # st = ("FOREACH_SETUP", alias, var_name)

2443 _, alias, var_name = st

2444 foreach_sources[alias] = var_name.lstrip("?")

2445

2446 elif tag == "FOREACH_MARK":

2447 # st = ("FOREACH_MARK", alias, delay)

2448 _, alias, delay = st

2449 pending_foreach = (alias, delay)

2450

2451 else:

2452 raise RuntimeError(f"Unknown step tag {tag}")

2453

2454 # Convert merged dict rows -> CSV rows; then run the usual pipeline

2455 header = self._header_from_field_type(self.i, acc or [])

2456 csv_rows = self._to_csv_rows(header, acc or [])

2457

2458 res = self.type_fields(csv_rows, self.i)

2459 if self.addon is not None:

2460 res = self.postprocess(res, self.i, self.addon)

2461 q_string = parse_qs(quote(self.url_parsed.query, safe="&="))

2462 res = self.handling_params(q_string, res)

2463 res = self.remove_types(res)

2464 s_res = StringIO()

2465 writer(s_res).writerows(res)

2466 body, ctype = self.conv(s_res.getvalue(), q_string, content_type)

2467 return 200, body, ctype

2468

2469 except ValueError as ve:

2470 sc = 400

2471 return sc, f"HTTP status code {sc}: {ve}", "text/plain"

2472 except RuntimeError as re_err:

2473 sc = 502

2474 return sc, f"HTTP status code {sc}: {re_err}", "text/plain"

2475

2476 except TimeoutError:

2477 exc_type, exc_obj, exc_tb = exc_info()

2478 sc = 408

2479 return sc, "HTTP status code %s: request timeout - %s: %s (line %s)" % \

2480 (sc, exc_type.__name__, exc_obj,

2481 exc_tb.tb_lineno), "text/plain"

2482 except TypeError:

2483 exc_type, exc_obj, exc_tb = exc_info()

2484 sc = 400

2485 return sc, "HTTP status code %s: " \

2486 "parameter in the request not compliant with the type specified - %s: %s (line %s)" % \

2487 (sc, exc_type.__name__, exc_obj,

2488 exc_tb.tb_lineno), "text/plain"

2489 except:

2490 exc_type, exc_obj, exc_tb = exc_info()

2491 sc = 500

2492 return sc, "HTTP status code %s: something unexpected happened - %s: %s (line %s)" % \

2493 (sc, exc_type.__name__, exc_obj,

2494 exc_tb.tb_lineno), "text/plain"

2495 else:

2496 sc = 405

2497 return sc, "HTTP status code %s: '%s' method not allowed" % (sc, str_method), "text/plain"

2498 # END: Processing methods

2499

2500

2501class APIManager(object):

2502 # Fixing max size for CSV

2503 @staticmethod

2504 def __max_size_csv():

2505 from sys import maxsize

2506 import csv

2507 maxInt = maxsize

2508 while True:

2509 try:

2510 csv.field_size_limit(maxInt)

2511 break

2512 except OverflowError: # pragma: no cover

2513 maxInt = int(maxInt/10)

2514

2515 # Constructor: START

2516 def __init__(self, conf_files, endpoint_override=None):

2517 """This is the constructor of the APIManager class. It takes in input a list of API configuration files, each

2518 defined according to the Hash Format and following a particular structure, and stores all the operations

2519 defined within a dictionary. Optionally, an endpoint_override parameter can be provided to override the

2520 SPARQL endpoint defined in the configuration files (useful for staging/production environments).

2521 The structure of each item in the dictionary of the operations is defined as follows:

2522

2523 {

2524 "/api/v1/references/(.+)": {

2525 "sparql": "PREFIX ...",

2526 "method": "get",

2527 ...

2528 },

2529 ...

2530 }

2531

2532 In particular, each key in the dictionary identifies the full URL of a particular API operation, and it is

2533 used so as to understand with operation should be called once an API call is done. The object associated

2534 as value of this key is the transformation of the related operation defined in the input Hash Format file

2535 into a dictionary.

2536

2537 In addition, it also defines additional structure, such as the functions to be used for interpreting the

2538 values returned by a SPARQL query, some operations that can be used for filtering the results, and the

2539 HTTP methods to call for making the request to the SPARQL endpoint specified in the configuration file."""

2540 APIManager.__max_size_csv()

2541

2542 self.all_conf = OrderedDict()

2543 self.base_url = []

2544 for conf_file in conf_files:

2545 conf = OrderedDict()

2546 tp = None

2547 conf_json = HashFormatHandler().read(conf_file)

2548 base_url = None

2549 addon = None

2550 sources_map = {}

2551 allow_inline_endpoints = False

2552 engine = "sparql"

2553 for item in conf_json:

2554 if base_url is None:

2555 base_url = item["url"]

2556 self.base_url.append(item["url"])

2557 website = item["base"]

2558 tp = endpoint_override if endpoint_override else item["endpoint"]

2559

2560 # Engine selection at API level (optional)

2561 if "engine" in item:

2562 engine = item["engine"].strip().lower()

2563

2564 # Optional: named sources registry

2565 if "sources" in item:

2566 # expected: "name1=url1; name2=url2"

2567 for pair in item["sources"].split(";"):

2568 pair = pair.strip()

2569 if not pair:

2570 continue

2571 name, url = pair.split("=", 1)

2572 sources_map[name.strip()] = url.strip()

2573

2574 # Optional: allow explicit @@endpoint <url> in #sparql

2575 if "allow_inline_endpoints" in item:

2576 allow_inline_endpoints = str(item["allow_inline_endpoints"]).strip().lower() in ("true", "1", "yes", "y")

2577

2578 if "addon" in item:

2579 addon_abspath = abspath(dirname(conf_file) + sep + item["addon"])

2580 path.append(dirname(addon_abspath))

2581 addon = import_module(basename(addon_abspath))

2582 sparql_http_method = "post"

2583 if "method" in item:

2584 sparql_http_method = item["method"].strip().lower()

2585 else:

2586 conf[APIManager.nor_api_url(item, base_url)] = item

2587

2588 self.all_conf[base_url] = {

2589 "conf": conf,

2590 "tp": tp,

2591 "conf_json": conf_json,

2592 "base_url": base_url,

2593 "website": website,

2594 "addon": addon,

2595 "sparql_http_method": sparql_http_method,

2596 "sources_map": sources_map,

2597 "allow_inline_endpoints": allow_inline_endpoints,

2598 "engine": engine,

2599 }

2600 # Constructor: END

2601

2602 # START: Ancillary methods

2603 @staticmethod

2604 def nor_api_url(i, b=""):

2605 """This method takes an API operation object and an optional base URL (e.g. "/api/v1") as input

2606 and returns the URL composed by the base URL plus the API URL normalised according to specific rules. In

2607 particular, these normalisation rules takes the operation URL (e.g. "#url /citations/{oci}") and the

2608 specification of the shape of all the parameters between brackets in the URL (e.g. "#oci str([0-9]+-[0-9]+)"),

2609 and returns a new operation URL where the parameters have been substituted with the regular expressions

2610 defining them (e.g. "/citations/([0-9]+-[0-9]+)"). This URL will be used by RAMOSE for matching the

2611 particular API calls with the specific operation to execute."""

2612 result = i["url"]

2613

2614 for term in findall(PARAM_NAME, result):

2615 try:

2616 t = i[term]

2617 except KeyError:

2618 t = "str(.+)"

2619 result = result.replace("{%s}" % term, "%s" % sub(r"^[^$]+(\(.+$)$", r"\1", t))

2620

2621 return "%s%s" % (b, result)

2622

2623 def best_match(self, u):

2624 """This method takes an URL of an API call in input and find the API operation URL and the related

2625 configuration that best match with the API call, if any."""

2626 #u = u.decode('UTF8') if isinstance(u, (bytes, bytearray)) else u

2627 cur_u = sub(r"\?.*$", "", u)

2628 result = None, None

2629 for base_url in self.all_conf:

2630 if u.startswith(base_url):

2631 conf = self.all_conf[base_url]

2632 for pat in conf["conf"]:

2633 if match("^%s$" % pat, cur_u):

2634 result = conf, pat

2635 break

2636 return result

2637 # END: Ancillary methods

2638

2639 # START: Processing methods

2640 def get_op(self, op_complete_url):

2641 """This method returns a new object of type Operation which represent the operation specified by

2642 the input URL (parameter 'op_complete_url)'. In case no operation can be found according by checking

2643 the configuration files available in the APIManager, a tuple with an HTTP error code and a message

2644 is returned instead."""

2645 url_parsed = urlsplit(op_complete_url)

2646 op_url = url_parsed.path

2647

2648 conf, op = self.best_match(op_url)

2649 if op is not None:

2650 op_conf = conf["conf"][op]

2651 op_engine = conf.get("engine", "sparql")

2652 if "engine" in op_conf:

2653 op_engine = op_conf["engine"].strip().lower()

2654

2655 # Build op-level format map from the operation block

2656 op_format_map = {}

2657 if "format" in op_conf:

2658 fm_val = op_conf["format"]

2659 fm_list = fm_val if isinstance(fm_val, list) else [fm_val]

2660 for fm in fm_list:

2661 for part in fm.split(";"):

2662 part = part.strip()

2663 if not part:

2664 continue

2665 fmt, func = part.split(",", 1)

2666 op_format_map[fmt.strip()] = func.strip()

2667

2668 return Operation(

2669 op_complete_url,

2670 op,

2671 op_conf,

2672 conf["tp"],

2673 conf["sparql_http_method"],

2674 conf["addon"],

2675 op_format_map,

2676 conf.get("sources_map", {}),

2677 conf.get("allow_inline_endpoints", False),

2678 op_engine,

2679 )

2680 else:

2681 sc = 404

2682 return sc, "HTTP status code %s: the operation requested does not exist" % sc, "text/plain"

2683 # END: Processing methods

2684

2685

2686if __name__ == "__main__":

2687 arg_parser = ArgumentParser("ramose.py", description="The 'Restful API Manager Over SPARQL Endpoints' (a.k.a. "

2688 "'RAMOSE') is an application that allows one to expose a "

2689 "Restful API interface, according to a particular "

2690 "specification document, to interact with a SPARQL endpoint.")

2691

2692 arg_parser.add_argument("-s", "--spec", dest="spec", required=True, nargs='+',

2693 help="The file(s) in hash format containing the specification of the API(s).")

2694 arg_parser.add_argument("-m", "--method", dest="method", default="get",

2695 help="The method to use to make a request to the API.")

2696 arg_parser.add_argument("-c", "--call", dest="call",

2697 help="The URL to call for querying the API.")

2698 arg_parser.add_argument("-f", "--format", dest="format", default="application/json",

2699 help="The format in which to get the response.")

2700 arg_parser.add_argument("-d", "--doc", dest="doc", default=False, action="store_true",

2701 help="Say to generate the HTML documentation of the API (if it is specified, all "

2702 "the arguments '-m', '-c', and '-f' won't be considered).")

2703 arg_parser.add_argument("--openapi", dest="openapi", default=False, action="store_true",

2704 help="Export the API specification to OpenAPI 3.0 YAML.")

2705 arg_parser.add_argument("--api-base", dest="api_base", default=None,

2706 help="When exporting docs/OpenAPI with multiple specs loaded, choose which API base URL to export.")

2707 arg_parser.add_argument("-o", "--output", dest="output",

2708 help="A file where to store the response.")

2709 arg_parser.add_argument("-w", "--webserver", dest="webserver", default=False,

2710 help="The host:port where to deploy a Flask webserver for testing the API.")

2711 arg_parser.add_argument("-css", "--css", dest="css",

2712 help="The path of a .css file for styling the API documentation (to be specified either with '-w' or with '-d' and '-o' arguments).")

2713

2714 args = arg_parser.parse_args()

2715 am = APIManager(args.spec)

2716 dh = HTMLDocumentationHandler(am)

2717 oah = OpenAPIDocumentationHandler(am)

2718

2719 css_path = args.css if args.css else None

2720

2721 if args.webserver:

2722 try:

2723 import logging

2724 from flask import Flask, request, make_response, send_from_directory

2725 from werkzeug.exceptions import HTTPException

2726

2727 # logs

2728 dh.logger_ramose()

2729

2730 # web server

2731 host_name = args.webserver.rsplit(':', 1)[0] if ':' in args.webserver else '127.0.0.1'

2732 port = args.webserver.rsplit(':', 1)[1] if ':' in args.webserver else '8080'

2733

2734 app = Flask(__name__)

2735

2736 # This is due to Flask routing rules that do not accept URLs without the starting slash

2737 # but ramose calls start with the slash, hence we remove it if the flag args.webserver is added

2738 if args.call:

2739 args.call = args.call[1:]

2740

2741 # routing

2742 @app.route('/')

2743 def home():

2744

2745 index = dh.get_index(css_path)

2746 return index

2747

2748 @app.route('/<path:api_url>')

2749 def doc(api_url):

2750 res, status = dh.get_index(css_path), 404

2751 # --- OpenAPI export endpoint ---

2752 # Example: /api/v1/openapi.yaml (or .yml)

2753 if api_url.endswith("openapi.yaml") or api_url.endswith("openapi.yml"):

2754 base = api_url.rsplit("/", 1)[0] # e.g. "api/v1"

2755 if "/" + base in am.all_conf:

2756 status, yml = oah.get_documentation(base_url=base)

2757 response = make_response(yml, status)

2758 response.headers.set("Content-Type", "application/yaml")

2759 response.headers.set("Access-Control-Allow-Origin", "*")

2760 response.headers.set("Access-Control-Allow-Credentials", "true")

2761 return response

2762 else:

2763 return res, status

2764 # --- end OpenAPI export endpoint ---

2765 if any(api_u in '/'+api_url for api_u, api_dict in am.all_conf.items()):

2766 # documentation

2767 if any(api_u == '/'+api_url for api_u,api_dict in am.all_conf.items()):

2768 status, res = dh.get_documentation(css_path, api_url)

2769 return res, status

2770 # api calls

2771 else:

2772 cur_call = '/'+api_url

2773 format = request.args.get('format')

2774 content_type = "text/csv" if format is not None and "csv" in format else "application/json"

2775

2776 op = am.get_op(cur_call+'?'+unquote(request.query_string.decode('utf8')))

2777 if type(op) is Operation: # Operation found

2778 status, res, c_type = op.exec(content_type=content_type)

2779 else: # HTTP error

2780 status, res, c_type = op

2781

2782 if status == 200:

2783 response = make_response(res, status)

2784 response.headers.set('Content-Type', c_type)

2785 else:

2786 # The API Manager returns a text/plain message when there is an error.

2787 # Now set to return the header requested by the user

2788 if content_type == "text/csv":

2789 si = StringIO()

2790 cw = writer(si)

2791 cw.writerows([["error","message"], [str(status),str(res)]])

2792 response = make_response(si.getvalue(), status)

2793 response.headers.set("Content-Disposition", "attachment", filename="error.csv")

2794 else:

2795 m_res = {"error": status, "message": res}

2796 mes = dumps(m_res)

2797 response = make_response(mes, status)

2798 response.headers.set('Content-Type', content_type) # overwrite text/plain

2799

2800 # allow CORS anyway

2801 response.headers.set('Access-Control-Allow-Origin', '*')

2802 response.headers.set('Access-Control-Allow-Credentials', 'true')

2803

2804 return response

2805 else:

2806 return res, status

2807

2808 app.run(host=str(host_name), debug=True, port=str(port))

2809

2810 except Exception as e:

2811 exc_type, exc_obj, exc_tb = exc_info()

2812 fname = pt.split(exc_tb.tb_frame.f_code.co_filename)[1]

2813 print("[ERROR]", exc_type, fname, exc_tb.tb_lineno)

2814

2815 else:

2816 # run locally via shell

2817 if args.openapi:

2818 res = oah.get_documentation(base_url=args.api_base) + ("application/yaml", )

2819 elif args.doc:

2820 res = dh.get_documentation(css_path) + ("text/html", )

2821 else:

2822 op = am.get_op(args.call)

2823 if type(op) is Operation: # Operation found

2824 res = op.exec(args.method, args.format)

2825 else: # HTTP error

2826 res = op

2827

2828 if args.output is None:

2829 print("# Response HTTP code: %s\n# Body:\n%s\n# Content-type: %s" % res)

2830 else:

2831 with open(args.output, "w") as f:

2832 f.write(res[1])