Source code for ctparse.types

from datetime import datetime
from typing import Any, Dict, Optional, Tuple, Type, TypeVar

import regex
from regex import Regex
import enum

T = TypeVar("T", bound="Artifact")


[docs]class Artifact: def __init__(self) -> None: self.mstart = 0 self.mend = 0 self._attrs = ["mstart", "mend"]
[docs] def update_span(self: T, *args: "Artifact") -> T: self.mstart = args[0].mstart self.mend = args[-1].mend return self
def __len__(self) -> int: return self.mend - self.mstart def __bool__(self) -> bool: return True def __str__(self) -> str: return "" def __repr__(self) -> str: return "{}[{}-{}]{{{}}}".format( self.__class__.__name__, self.mstart, self.mend, str(self) )
[docs] def nb_str(self) -> str: """Return a string representation without the bounds information.""" return "{}[]{{{}}}".format(self.__class__.__name__, str(self))
def __eq__(self, other: Any) -> bool: if type(other) != type(self): return False else: return all(getattr(self, a) == getattr(other, a) for a in self._attrs) def __hash__(self) -> int: return hash(tuple(getattr(self, a) for a in self._attrs)) def _hasOnly(self, *args: str) -> bool: """check that all attributes set to True are set (i.e. not None) and all set to False are not set (i.e. None) """ return all( getattr(self, a) is not None if a in args else getattr(self, a) is None for a in self._attrs ) def _hasAtLeast(self, *args: str) -> bool: """check that all attributes set to True are set (i.e. not None) and all set to False are not set (i.e. None) """ return all(getattr(self, a) is not None for a in args)
[docs]class RegexMatch(Artifact): def __init__(self, id: int, m: Regex) -> None: super().__init__() self._attrs = ["mstart", "mend", "id"] self.key = "R{}".format(id) self.id = id self.match = m self.mstart = m.span(self.key)[0] self.mend = m.span(self.key)[1] self._text = m.group(self.key) def __str__(self) -> str: return "{}:{}".format(self.id, self._text)
_pod_hours = { "earlymorning": { "offset": (4, 7), "early": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (-1, -1), }, "late": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (1, 1), }, }, "morning": { "offset": (6, 9), "early": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (-1, -1), }, "late": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (1, 1), }, }, "forenoon": { "offset": (9, 12), "early": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (-1, -1), }, "late": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (1, 1), }, }, "noon": { "offset": (11, 13), "early": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (-1, -1), }, "late": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (1, 1), }, }, "afternoon": { "offset": (12, 17), "early": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (-1, -1), }, "late": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (1, 1), }, }, "evening": { "offset": (17, 20), "early": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (-1, -1), }, "late": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (1, 1), }, }, "lateevening": { "offset": (18, 21), "early": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (-1, -1), }, "late": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (1, 1), }, }, "night": { "offset": (19, 22), "early": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (-1, -1), }, "late": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (1, 1), }, }, "first": { "offset": (0, 0), "early": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (0, 0), }, "late": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (0, 0), }, }, "last": { "offset": (23, 23), "early": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (0, 0), }, "late": { "early": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "late": {"offset": (0, 0), "very": {"offset": (0, 0)}}, "very": {"offset": (0, 0)}, "offset": (0, 0), }, }, } def _mk_pod_hours() -> Dict[str, Tuple[int, int]]: def _add_ts(t1: Tuple[int, int], t2: Tuple[int, int]) -> Tuple[int, int]: return (t1[0] + t2[0], t1[1] + t2[1]) def _mk( pod: str, pod_data: Dict[str, Any], t: Tuple[int, int] ) -> Dict[str, Tuple[int, int]]: r = {pod: _add_ts(t, pod_data["offset"])} for k, v in pod_data.items(): if k == "offset": continue r.update(_mk(k + pod, v, r[pod])) return r res = {} for k, v in _pod_hours.items(): if k == "offset": continue res.update(_mk(k, v, (0, 0))) return res pod_hours = _mk_pod_hours() _TIME_REGEX = regex.compile( r"(\d{4}|X)-(\d{2}|X)-(\d{2}|X) (\d{2}|X):(\d{2}|X) \((\d|X)\/(\w+)\)" )
[docs]class Time(Artifact): def __init__( self, year: Optional[int] = None, month: Optional[int] = None, day: Optional[int] = None, hour: Optional[int] = None, minute: Optional[int] = None, DOW: Optional[int] = None, POD: Optional[str] = None, ) -> None: super().__init__() self._attrs = ["year", "month", "day", "hour", "minute", "DOW", "POD"] # Might add some validation here, did not to avoid the overhead self.year = year self.month = month self.day = day self.hour = hour self.minute = minute self.DOW = DOW self.POD = POD # ----------------------------------------------------------------------------- # Make sure to not accidentially test bool(x) as False when x==0, but you meant # x==None # ----------------------------------------------------------------------------- @property def isDOY(self) -> bool: """isDayOfYear <=> a dd.mm but not year""" return self._hasOnly("month", "day") @property def isDOM(self) -> bool: """isDayOfMonth <=> a dd but no month""" return self._hasOnly("day") @property def isDOW(self) -> bool: """isDayOfWeek <=> DOW is the 0=Monday index; fragile test, as the DOW could be accompanied by e.g. a full date etc.; in practice, however, the production rules do not do that. """ return self._hasOnly("DOW") @property def isMonth(self) -> bool: return self._hasOnly("month") @property def isPOD(self) -> bool: """isPartOfDay <=> morning, etc.; fragile, tests only that there is a POD and neither a full date nor a full time """ return self._hasOnly("POD") @property def isHour(self) -> bool: """only has an hour""" return self._hasOnly("hour") @property def isTOD(self) -> bool: """isTimeOfDay - only a time, not date""" return self._hasOnly("hour") or self._hasOnly("hour", "minute") @property def isDate(self) -> bool: """isDate - only a date, not time""" return self._hasOnly("year", "month", "day") @property def isDateTime(self) -> bool: """a date and a time""" return self._hasOnly("year", "month", "day", "hour") or self._hasOnly( "year", "month", "day", "hour", "minute" ) @property def isYear(self) -> bool: """just a year""" return self._hasOnly("year") @property def hasDate(self) -> bool: """at least a date""" return self._hasAtLeast("year", "month", "day") @property def hasDOY(self) -> bool: """at least a day of year""" return self._hasAtLeast("month", "day") @property def hasDOW(self) -> bool: """at least a day of week""" return self._hasAtLeast("DOW") @property def hasTime(self) -> bool: """at least a time to the hour""" return self._hasAtLeast("hour") @property def hasPOD(self) -> bool: """at least a part of day""" return self._hasAtLeast("POD") def __str__(self) -> str: return "{}-{}-{} {}:{} ({}/{})".format( "{:04d}".format(self.year) if self.year is not None else "X", "{:02d}".format(self.month) if self.month is not None else "X", "{:02d}".format(self.day) if self.day is not None else "X", "{:02d}".format(self.hour) if self.hour is not None else "X", "{:02d}".format(self.minute) if self.minute is not None else "X", "{:d}".format(self.DOW) if self.DOW is not None else "X", "{}".format(self.POD) if self.POD is not None else "X", )
[docs] @classmethod def from_str(cls: Type["Time"], text: str) -> "Time": match = _TIME_REGEX.match(text) if not match: raise ValueError("Invalid format") else: def parse_opt_int(x: str) -> Optional[int]: return None if x == "X" else int(x) pod = match.group(7) return cls( year=parse_opt_int(match.group(1)), month=parse_opt_int(match.group(2)), day=parse_opt_int(match.group(3)), hour=parse_opt_int(match.group(4)), minute=parse_opt_int(match.group(5)), DOW=parse_opt_int(match.group(6)), POD=None if pod == "X" else pod, )
@property def start(self) -> "Time": if self.hour is None and self.hasPOD: hour = pod_hours[self.POD][0] # type: ignore else: hour = self.hour or 0 return Time( year=self.year, month=self.month, day=self.day, hour=hour, minute=self.minute or 0, ) @property def end(self) -> "Time": if self.hour is None and self.hasPOD: hour = pod_hours[self.POD][1] # type: ignore else: hour = self.hour if self.hour is not None else 23 return Time( year=self.year, month=self.month, day=self.day, hour=hour, minute=self.minute if self.minute is not None else 59, ) @property def dt(self) -> datetime: # Use the start time, in case we have a POD specification t = self.start if t.year is None or t.month is None or t.day is None: raise ValueError( "cannot convert underspecified Time into datetime" ", missing at least one of year, month or day" ) return datetime(t.year, t.month, t.day, t.hour or 0, t.minute or 0)
[docs]class Interval(Artifact): def __init__( self, t_from: Optional[Time] = None, t_to: Optional[Time] = None ) -> None: super().__init__() self._attrs = ["t_from", "t_to"] self.t_from = t_from self.t_to = t_to @property def isTimeInterval(self) -> bool: if self.t_from is None or self.t_to is None: return False else: return self.t_from.isTOD and self.t_to.isTOD @property def isDateInterval(self) -> bool: if self.t_from is None or self.t_to is None: return False return self.t_from.isDate and self.t_to.isDate def __str__(self) -> str: return "{} - {}".format(str(self.t_from), str(self.t_to))
[docs] @classmethod def from_str(cls: Type["Interval"], text: str) -> "Interval": bounds = text.split(" - ") if len(bounds) != 2: raise ValueError("Invalid format") t_from = None if bounds[0] == "None" else Time.from_str(bounds[0]) t_to = None if bounds[1] == "None" else Time.from_str(bounds[1]) return cls(t_from=t_from, t_to=t_to)
@property def start(self) -> Optional[Time]: if self.t_from is not None: return self.t_from.start else: return None @property def end(self) -> Optional[Time]: if self.t_to is not None: return self.t_to.end else: return None
[docs]@enum.unique class DurationUnit(enum.Enum): MINUTES = "minutes" HOURS = "hours" DAYS = "days" NIGHTS = "nights" WEEKS = "weeks" MONTHS = "months"
[docs]class Duration(Artifact): def __init__(self, value: int, unit: DurationUnit): """Create a Duration using value and unit. Typical values for unit are: minute, hour, day, night, week, month, year """ super().__init__() self.value = value self.unit = unit def __str__(self) -> str: return "{} {}".format(self.value, self.unit.value) _mapping = { (DurationUnit.MINUTES, DurationUnit.MINUTES): (DurationUnit.MINUTES, 1, 1), (DurationUnit.MINUTES, DurationUnit.HOURS): (DurationUnit.MINUTES, 1, 60), (DurationUnit.MINUTES, DurationUnit.DAYS): (DurationUnit.MINUTES, 1, 60 * 24), (DurationUnit.MINUTES, DurationUnit.NIGHTS): (DurationUnit.MINUTES, 1, 60 * 24), (DurationUnit.MINUTES, DurationUnit.WEEKS): ( DurationUnit.MINUTES, 1, 60 * 24 * 7, ), (DurationUnit.MINUTES, DurationUnit.MONTHS): ( DurationUnit.MINUTES, 1, (365 * 24 * 60) / 12, ), (DurationUnit.HOURS, DurationUnit.MINUTES): (DurationUnit.MINUTES, 60, 1), (DurationUnit.HOURS, DurationUnit.HOURS): (DurationUnit.HOURS, 1, 1), (DurationUnit.HOURS, DurationUnit.DAYS): (DurationUnit.HOURS, 1, 24), (DurationUnit.HOURS, DurationUnit.NIGHTS): (DurationUnit.HOURS, 1, 24), (DurationUnit.HOURS, DurationUnit.WEEKS): (DurationUnit.HOURS, 1, 24 * 7), (DurationUnit.HOURS, DurationUnit.MONTHS): ( DurationUnit.HOURS, 1, (365 * 24) / 12, ), (DurationUnit.DAYS, DurationUnit.MINUTES): (DurationUnit.MINUTES, 24 * 60, 1), (DurationUnit.DAYS, DurationUnit.HOURS): (DurationUnit.HOURS, 24, 1), (DurationUnit.DAYS, DurationUnit.DAYS): (DurationUnit.DAYS, 1, 1), (DurationUnit.DAYS, DurationUnit.NIGHTS): (DurationUnit.DAYS, 1, 1), (DurationUnit.DAYS, DurationUnit.WEEKS): (DurationUnit.DAYS, 1, 7), (DurationUnit.DAYS, DurationUnit.MONTHS): (DurationUnit.DAYS, 1, 365 / 12), (DurationUnit.NIGHTS, DurationUnit.MINUTES): (DurationUnit.MINUTES, 24 * 60, 1), (DurationUnit.NIGHTS, DurationUnit.HOURS): (DurationUnit.HOURS, 24, 1), (DurationUnit.NIGHTS, DurationUnit.DAYS): (DurationUnit.DAYS, 1, 1), (DurationUnit.NIGHTS, DurationUnit.NIGHTS): (DurationUnit.DAYS, 1, 1), (DurationUnit.NIGHTS, DurationUnit.WEEKS): (DurationUnit.DAYS, 1, 7), (DurationUnit.NIGHTS, DurationUnit.MONTHS): (DurationUnit.DAYS, 1, 365 / 12), (DurationUnit.WEEKS, DurationUnit.MINUTES): ( DurationUnit.MINUTES, 7 * 24 * 60, 1, ), (DurationUnit.WEEKS, DurationUnit.HOURS): (DurationUnit.HOURS, 7 * 24, 1), (DurationUnit.WEEKS, DurationUnit.DAYS): (DurationUnit.DAYS, 7, 1), (DurationUnit.WEEKS, DurationUnit.NIGHTS): (DurationUnit.DAYS, 7, 1), (DurationUnit.WEEKS, DurationUnit.WEEKS): (DurationUnit.DAYS, 1, 1), (DurationUnit.WEEKS, DurationUnit.MONTHS): (DurationUnit.DAYS, 1, 52 / 12), (DurationUnit.MONTHS, DurationUnit.MINUTES): ( DurationUnit.MINUTES, (365 * 24 * 60) / 12, 1, ), (DurationUnit.MONTHS, DurationUnit.HOURS): ( DurationUnit.HOURS, (365 * 24) / 12, 1, ), (DurationUnit.MONTHS, DurationUnit.DAYS): (DurationUnit.DAYS, 365 / 12, 1), (DurationUnit.MONTHS, DurationUnit.NIGHTS): (DurationUnit.DAYS, 365 / 12, 1), (DurationUnit.MONTHS, DurationUnit.WEEKS): (DurationUnit.DAYS, 52 / 12, 1), (DurationUnit.MONTHS, DurationUnit.MONTHS): (DurationUnit.DAYS, 1, 1), } def __add__(self, other: "Duration") -> "Duration": unit, f1, f2 = Duration._mapping[(self.unit, other.unit)] return Duration(value=int(f1 * self.value + f2 * other.value), unit=unit)
[docs] @classmethod def from_str(cls: Type["Duration"], text: str) -> "Duration": value, unit = text.split() return Duration(int(value), DurationUnit(unit))