Coverage for ramose / datatype.py: 93%
68 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-07-01 13:49 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-07-01 13:49 +0000
1# SPDX-FileCopyrightText: 2018-2021 Silvio Peroni <silvio.peroni@unibo.it>
2# SPDX-FileCopyrightText: 2020-2021 Marilena Daquino <marilena.daquino2@unibo.it>
3# SPDX-FileCopyrightText: 2022 Davide Brembilla
4# SPDX-FileCopyrightText: 2024 Ivan Heibi <ivan.heibi2@unibo.it>
5# SPDX-FileCopyrightText: 2025 Sergei Slinkin
6# SPDX-FileCopyrightText: 2026 Arcangelo Massari <arcangelo.massari@unibo.it>
7#
8# SPDX-License-Identifier: ISC
10from __future__ import annotations
12from calendar import monthrange
13from datetime import datetime, timedelta, timezone
14from re import compile as re_compile
15from sys import maxsize
16from typing import TYPE_CHECKING, NamedTuple
18if TYPE_CHECKING:
19 from collections.abc import Callable
21# ISO 8601 duration format: PnYnMnDTnHnMnS
22# Python's stdlib has no parser for this format, so we handle it manually.
23# Each component is optional. The T separator marks the transition from date to time components.
24# A leading "-" denotes a negative duration, as allowed by XSD duration
25# (https://www.w3.org/TR/xmlschema11-2/#duration).
26# Examples: "P1Y", "P2M3D", "PT4H5M6S", "P1Y2M3DT4H5M6.5S", "-P1D"
27_YEAR_ONLY_LENGTH = 4
28_YEAR_MONTH_MIN_LENGTH = 6
29_YEAR_MONTH_MAX_LENGTH = 7
31_DURATION_PATTERN = re_compile(
32 r"(?P<sign>-)?P"
33 r"(?:(?P<years>\d+)Y)?"
34 r"(?:(?P<months>\d+)M)?"
35 r"(?:(?P<days>\d+)D)?"
36 r"(?:T"
37 r"(?:(?P<hours>\d+)H)?"
38 r"(?:(?P<minutes>\d+)M)?"
39 r"(?:(?P<seconds>\d+(?:\.\d+)?)S)?"
40 r")?",
41)
44class _ISODuration(NamedTuple):
45 """Parsed ISO 8601 duration with calendar components kept separate.
47 Years and months cannot be converted to a fixed number of days (a month is 28-31 days,
48 a year is 365 or 366). Following the same approach as isodate, they are stored as
49 integers and resolved only when added to a concrete reference date via calendar arithmetic.
50 """
52 years: int
53 months: int
54 remainder: timedelta
57def _parse_datetime(date_str: str) -> datetime:
58 """Parse ISO 8601 date strings, including partial formats not supported by fromisoformat.
60 fromisoformat does not accept year-only ("2015") or year-month ("2015-06") strings,
61 so those are handled explicitly. The trailing "Z" suffix is also normalized for Python 3.10
62 compatibility, where fromisoformat does not recognize it.
63 """
64 date_str = date_str.strip()
65 if len(date_str) == _YEAR_ONLY_LENGTH and date_str.isdigit():
66 return datetime(int(date_str), 1, 1, tzinfo=timezone.utc)
67 if len(date_str) in (_YEAR_MONTH_MIN_LENGTH, _YEAR_MONTH_MAX_LENGTH) and date_str[4] == "-":
68 year, month = date_str.split("-")
69 return datetime(int(year), int(month), 1, tzinfo=timezone.utc)
70 if date_str.endswith("Z"):
71 date_str = date_str[:-1] + "+00:00"
72 parsed = datetime.fromisoformat(date_str)
73 if parsed.tzinfo is None:
74 return parsed.replace(tzinfo=timezone.utc)
75 return parsed
78def _parse_duration(duration_str: str) -> _ISODuration:
79 """Parse an ISO 8601 duration string into an _ISODuration."""
80 duration_match = _DURATION_PATTERN.fullmatch(duration_str)
81 if not duration_match:
82 msg = f"Invalid ISO 8601 duration: {duration_str}"
83 raise ValueError(msg)
84 sign = -1 if duration_match.group("sign") else 1
85 parts = {key: value or "0" for key, value in duration_match.groupdict().items() if key != "sign"}
86 return _ISODuration(
87 years=sign * int(parts["years"]),
88 months=sign * int(parts["months"]),
89 remainder=sign
90 * timedelta(
91 days=int(parts["days"]),
92 hours=int(parts["hours"]),
93 minutes=int(parts["minutes"]),
94 seconds=float(parts["seconds"]),
95 ),
96 )
99def _add_duration(base: datetime, duration: _ISODuration) -> datetime:
100 """Add an ISO 8601 duration to a datetime using calendar arithmetic.
102 Years and months are added by adjusting the calendar fields directly,
103 clamping the day to the maximum valid day for the resulting month
104 (e.g. Jan 31 + 1 month = Feb 28). Days and smaller units are then
105 added as a timedelta.
106 """
107 total_months = base.month + duration.years * 12 + duration.months
108 year_carry, new_month = divmod(total_months - 1, 12)
109 new_month += 1
110 new_year = base.year + year_carry
111 max_day = monthrange(new_year, new_month)[1]
112 new_day = min(base.day, max_day)
113 shifted = base.replace(year=new_year, month=new_month, day=new_day)
114 return shifted + duration.remainder
117class DataType:
118 def __init__(self) -> None:
119 """This class implements all the possible data types that can be used within
120 the configuration file of RAMOSE. In particular, it provides methods for converting
121 a string into the related Python data type representation."""
122 self.func: dict[str, Callable[[str | None], str | int | float | datetime]] = {
123 "str": DataType.str,
124 "int": DataType.int,
125 "float": DataType.float,
126 "duration": DataType.duration,
127 "datetime": DataType.datetime,
128 }
130 def get_func(self, name_str: str) -> Callable[[str | None], str | int | float | datetime]:
131 """This method returns the method for handling a given data type expressed as a string name."""
132 return self.func[name_str]
134 @staticmethod
135 def duration(s: str | None) -> datetime:
136 """This method returns the data type for durations according to the XML Schema
137 Recommendation (https://www.w3.org/TR/xmlschema11-2/#duration) from the input string.
138 In case the input string is None or it is empty, an high duration value
139 (i.e. 2000 years) is returned."""
140 duration = _parse_duration("P2000Y") if s is None or s == "" else _parse_duration(s)
141 reference_date = datetime(1983, 1, 15, tzinfo=timezone.utc)
143 return _add_duration(reference_date, duration)
145 @staticmethod
146 def datetime(s: str | None) -> datetime:
147 """This method returns the data type for datetime according to the ISO 8601
148 (https://en.wikipedia.org/wiki/ISO_8601) from the input string. In case the input string is None or
149 it is empty, a low date value (i.e. 0001-01-01) is returned."""
150 return datetime(1, 1, 1, tzinfo=timezone.utc) if s is None or s == "" else _parse_datetime(s)
152 @staticmethod
153 def str(s: str | None) -> str:
154 """This method returns the data type for strings. In case the input string is None, an empty string
155 is returned."""
156 return "" if s is None else str(s).lower()
158 @staticmethod
159 def int(s: str | None) -> int:
160 """This method returns the data type for integer numbers from the input string. In case the input string is
161 None or it is empty, a low integer value is returned."""
162 return -maxsize if s is None or s == "" else int(s)
164 @staticmethod
165 def float(s: str | None) -> float:
166 """This method returns the data type for float numbers from the input string. In case the input string is
167 None or it is empty, a low float value is returned."""
168 return float(-maxsize) if s is None or s == "" else float(s)