Coverage for ramose / hash_format.py: 98%
45 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-15 15:58 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-15 15:58 +0000
1# SPDX-FileCopyrightText: 2018-2021 Silvio Peroni <silvio.peroni@unibo.it>
2# SPDX-FileCopyrightText: 2020-2021 Marilena Daquino <marilena.daquino2@unibo.it>
3# SPDX-FileCopyrightText: 2022 Davide Brembilla
4# SPDX-FileCopyrightText: 2024 Ivan Heibi <ivan.heibi2@unibo.it>
5# SPDX-FileCopyrightText: 2025 Sergei Slinkin
6# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it>
7#
8# SPDX-License-Identifier: ISC
10from pathlib import Path
11from re import DOTALL, search
13BUILTIN_PARAMS = frozenset({"require", "filter", "sort", "format", "json", "page", "page_size"})
16def parse_disable_params(raw: str) -> set[str]:
17 stripped = raw.strip()
18 if stripped == "*":
19 return set(BUILTIN_PARAMS)
20 return {name.strip() for name in stripped.split(",") if name.strip()}
23def parse_custom_params(raw: str) -> dict[str, dict[str, str]]:
24 result = {}
25 for raw_part in raw.split(";"):
26 part = raw_part.strip()
27 if not part:
28 continue
29 name, handler, phase, *desc_parts = part.split(",", 3)
30 result[name.strip()] = {
31 "handler": handler.strip(),
32 "phase": phase.strip(),
33 "description": desc_parts[0].strip() if desc_parts else "",
34 }
35 return result
38class HashFormatHandler:
39 """This class creates an object capable to read files stored in Hash Format (see
40 https://github.com/opencitations/ramose#Hashformat-configuration-file). A Hash Format
41 file (.hf) is a specification file that includes information structured using the following
42 syntax:
44 ```
45 #<field_name_1> <field_value_1>
46 #<field_name_1> <field_value_2>
47 #<field_name_3> <field_value_3>
48 [...]
49 #<field_name_n> <field_value_n>
50 ```"""
52 def read(self, file_path):
53 """This method takes in input a path of a file containing a document specified in
54 Hash Format, and returns its representation as list of dictionaries."""
55 result = []
57 with Path(file_path).open(newline=None) as f:
58 first_field_name = None
59 cur_object: dict[str, str] = {}
60 cur_field_name = None
61 for line in f:
62 cur_matching = search(r"^#([^\s]+)\s(.+)$", line, DOTALL)
63 if cur_matching is not None:
64 cur_field_name = cur_matching.group(1)
65 cur_field_content = cur_matching.group(2)
67 # If both the name and the content are defined, continue to process
68 if cur_field_name and cur_field_content:
69 # Identify the separator key
70 if first_field_name is None:
71 first_field_name = cur_field_name
73 # If the current field is equal to the separator key,
74 # then create a new object
75 if cur_field_name == first_field_name:
76 # If there is an already defined object, add it to the
77 # final result
78 if cur_object:
79 result.append(cur_object)
80 cur_object = {}
82 # Add the new key to the object
83 cur_object[cur_field_name] = cur_field_content
84 elif cur_object and cur_field_name is not None:
85 cur_object[cur_field_name] += line
87 if cur_object:
88 result.append(cur_object)
90 # Clean the final \n
91 for item in result:
92 for key in item:
93 item[key] = item[key].rstrip()
95 return result