from datetime import datetime
from typing import Any, Dict, Optional, Tuple, Type, TypeVar
import regex
from regex import Regex
import enum
T = TypeVar("T", bound="Artifact")
[docs]class Artifact:
def __init__(self) -> None:
self.mstart = 0
self.mend = 0
self._attrs = ["mstart", "mend"]
[docs] def update_span(self: T, *args: "Artifact") -> T:
self.mstart = args[0].mstart
self.mend = args[-1].mend
return self
def __len__(self) -> int:
return self.mend - self.mstart
def __bool__(self) -> bool:
return True
def __str__(self) -> str:
return ""
def __repr__(self) -> str:
return "{}[{}-{}]{{{}}}".format(
self.__class__.__name__, self.mstart, self.mend, str(self)
)
[docs] def nb_str(self) -> str:
"""Return a string representation without the bounds information."""
return "{}[]{{{}}}".format(self.__class__.__name__, str(self))
def __eq__(self, other: Any) -> bool:
if type(other) != type(self):
return False
else:
return all(getattr(self, a) == getattr(other, a) for a in self._attrs)
def __hash__(self) -> int:
return hash(tuple(getattr(self, a) for a in self._attrs))
def _hasOnly(self, *args: str) -> bool:
"""check that all attributes set to True are set (i.e. not None) and
all set to False are not set (i.e. None)
"""
return all(
getattr(self, a) is not None if a in args else getattr(self, a) is None
for a in self._attrs
)
def _hasAtLeast(self, *args: str) -> bool:
"""check that all attributes set to True are set (i.e. not None) and
all set to False are not set (i.e. None)
"""
return all(getattr(self, a) is not None for a in args)
[docs]class RegexMatch(Artifact):
def __init__(self, id: int, m: Regex) -> None:
super().__init__()
self._attrs = ["mstart", "mend", "id"]
self.key = "R{}".format(id)
self.id = id
self.match = m
self.mstart = m.span(self.key)[0]
self.mend = m.span(self.key)[1]
self._text = m.group(self.key)
def __str__(self) -> str:
return "{}:{}".format(self.id, self._text)
_pod_hours = {
"earlymorning": {
"offset": (4, 7),
"early": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (-1, -1),
},
"late": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (1, 1),
},
},
"morning": {
"offset": (6, 9),
"early": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (-1, -1),
},
"late": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (1, 1),
},
},
"forenoon": {
"offset": (9, 12),
"early": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (-1, -1),
},
"late": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (1, 1),
},
},
"noon": {
"offset": (11, 13),
"early": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (-1, -1),
},
"late": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (1, 1),
},
},
"afternoon": {
"offset": (12, 17),
"early": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (-1, -1),
},
"late": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (1, 1),
},
},
"evening": {
"offset": (17, 20),
"early": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (-1, -1),
},
"late": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (1, 1),
},
},
"lateevening": {
"offset": (18, 21),
"early": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (-1, -1),
},
"late": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (1, 1),
},
},
"night": {
"offset": (19, 22),
"early": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (-1, -1),
},
"late": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (1, 1),
},
},
"first": {
"offset": (0, 0),
"early": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (0, 0),
},
"late": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (0, 0),
},
},
"last": {
"offset": (23, 23),
"early": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (0, 0),
},
"late": {
"early": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"late": {"offset": (0, 0), "very": {"offset": (0, 0)}},
"very": {"offset": (0, 0)},
"offset": (0, 0),
},
},
}
def _mk_pod_hours() -> Dict[str, Tuple[int, int]]:
def _add_ts(t1: Tuple[int, int], t2: Tuple[int, int]) -> Tuple[int, int]:
return (t1[0] + t2[0], t1[1] + t2[1])
def _mk(
pod: str, pod_data: Dict[str, Any], t: Tuple[int, int]
) -> Dict[str, Tuple[int, int]]:
r = {pod: _add_ts(t, pod_data["offset"])}
for k, v in pod_data.items():
if k == "offset":
continue
r.update(_mk(k + pod, v, r[pod]))
return r
res = {}
for k, v in _pod_hours.items():
if k == "offset":
continue
res.update(_mk(k, v, (0, 0)))
return res
pod_hours = _mk_pod_hours()
_TIME_REGEX = regex.compile(
r"(\d{4}|X)-(\d{2}|X)-(\d{2}|X) (\d{2}|X):(\d{2}|X) \((\d|X)\/(\w+)\)"
)
[docs]class Time(Artifact):
def __init__(
self,
year: Optional[int] = None,
month: Optional[int] = None,
day: Optional[int] = None,
hour: Optional[int] = None,
minute: Optional[int] = None,
DOW: Optional[int] = None,
POD: Optional[str] = None,
) -> None:
super().__init__()
self._attrs = ["year", "month", "day", "hour", "minute", "DOW", "POD"]
# Might add some validation here, did not to avoid the overhead
self.year = year
self.month = month
self.day = day
self.hour = hour
self.minute = minute
self.DOW = DOW
self.POD = POD
# -----------------------------------------------------------------------------
# Make sure to not accidentially test bool(x) as False when x==0, but you meant
# x==None
# -----------------------------------------------------------------------------
@property
def isDOY(self) -> bool:
"""isDayOfYear <=> a dd.mm but not year"""
return self._hasOnly("month", "day")
@property
def isDOM(self) -> bool:
"""isDayOfMonth <=> a dd but no month"""
return self._hasOnly("day")
@property
def isDOW(self) -> bool:
"""isDayOfWeek <=> DOW is the 0=Monday index; fragile test, as the DOW
could be accompanied by e.g. a full date etc.; in practice,
however, the production rules do not do that.
"""
return self._hasOnly("DOW")
@property
def isMonth(self) -> bool:
return self._hasOnly("month")
@property
def isPOD(self) -> bool:
"""isPartOfDay <=> morning, etc.; fragile, tests only that there is a
POD and neither a full date nor a full time
"""
return self._hasOnly("POD")
@property
def isHour(self) -> bool:
"""only has an hour"""
return self._hasOnly("hour")
@property
def isTOD(self) -> bool:
"""isTimeOfDay - only a time, not date"""
return self._hasOnly("hour") or self._hasOnly("hour", "minute")
@property
def isDate(self) -> bool:
"""isDate - only a date, not time"""
return self._hasOnly("year", "month", "day")
@property
def isDateTime(self) -> bool:
"""a date and a time"""
return self._hasOnly("year", "month", "day", "hour") or self._hasOnly(
"year", "month", "day", "hour", "minute"
)
@property
def isYear(self) -> bool:
"""just a year"""
return self._hasOnly("year")
@property
def hasDate(self) -> bool:
"""at least a date"""
return self._hasAtLeast("year", "month", "day")
@property
def hasDOY(self) -> bool:
"""at least a day of year"""
return self._hasAtLeast("month", "day")
@property
def hasDOW(self) -> bool:
"""at least a day of week"""
return self._hasAtLeast("DOW")
@property
def hasTime(self) -> bool:
"""at least a time to the hour"""
return self._hasAtLeast("hour")
@property
def hasPOD(self) -> bool:
"""at least a part of day"""
return self._hasAtLeast("POD")
def __str__(self) -> str:
return "{}-{}-{} {}:{} ({}/{})".format(
"{:04d}".format(self.year) if self.year is not None else "X",
"{:02d}".format(self.month) if self.month is not None else "X",
"{:02d}".format(self.day) if self.day is not None else "X",
"{:02d}".format(self.hour) if self.hour is not None else "X",
"{:02d}".format(self.minute) if self.minute is not None else "X",
"{:d}".format(self.DOW) if self.DOW is not None else "X",
"{}".format(self.POD) if self.POD is not None else "X",
)
[docs] @classmethod
def from_str(cls: Type["Time"], text: str) -> "Time":
match = _TIME_REGEX.match(text)
if not match:
raise ValueError("Invalid format")
else:
def parse_opt_int(x: str) -> Optional[int]:
return None if x == "X" else int(x)
pod = match.group(7)
return cls(
year=parse_opt_int(match.group(1)),
month=parse_opt_int(match.group(2)),
day=parse_opt_int(match.group(3)),
hour=parse_opt_int(match.group(4)),
minute=parse_opt_int(match.group(5)),
DOW=parse_opt_int(match.group(6)),
POD=None if pod == "X" else pod,
)
@property
def start(self) -> "Time":
if self.hour is None and self.hasPOD:
hour = pod_hours[self.POD][0] # type: ignore
else:
hour = self.hour or 0
return Time(
year=self.year,
month=self.month,
day=self.day,
hour=hour,
minute=self.minute or 0,
)
@property
def end(self) -> "Time":
if self.hour is None and self.hasPOD:
hour = pod_hours[self.POD][1] # type: ignore
else:
hour = self.hour if self.hour is not None else 23
return Time(
year=self.year,
month=self.month,
day=self.day,
hour=hour,
minute=self.minute if self.minute is not None else 59,
)
@property
def dt(self) -> datetime:
# Use the start time, in case we have a POD specification
t = self.start
if t.year is None or t.month is None or t.day is None:
raise ValueError(
"cannot convert underspecified Time into datetime"
", missing at least one of year, month or day"
)
return datetime(t.year, t.month, t.day, t.hour or 0, t.minute or 0)
[docs]class Interval(Artifact):
def __init__(
self, t_from: Optional[Time] = None, t_to: Optional[Time] = None
) -> None:
super().__init__()
self._attrs = ["t_from", "t_to"]
self.t_from = t_from
self.t_to = t_to
@property
def isTimeInterval(self) -> bool:
if self.t_from is None or self.t_to is None:
return False
else:
return self.t_from.isTOD and self.t_to.isTOD
@property
def isDateInterval(self) -> bool:
if self.t_from is None or self.t_to is None:
return False
return self.t_from.isDate and self.t_to.isDate
def __str__(self) -> str:
return "{} - {}".format(str(self.t_from), str(self.t_to))
[docs] @classmethod
def from_str(cls: Type["Interval"], text: str) -> "Interval":
bounds = text.split(" - ")
if len(bounds) != 2:
raise ValueError("Invalid format")
t_from = None if bounds[0] == "None" else Time.from_str(bounds[0])
t_to = None if bounds[1] == "None" else Time.from_str(bounds[1])
return cls(t_from=t_from, t_to=t_to)
@property
def start(self) -> Optional[Time]:
if self.t_from is not None:
return self.t_from.start
else:
return None
@property
def end(self) -> Optional[Time]:
if self.t_to is not None:
return self.t_to.end
else:
return None
[docs]@enum.unique
class DurationUnit(enum.Enum):
MINUTES = "minutes"
HOURS = "hours"
DAYS = "days"
NIGHTS = "nights"
WEEKS = "weeks"
MONTHS = "months"
[docs]class Duration(Artifact):
def __init__(self, value: int, unit: DurationUnit):
"""Create a Duration using value and unit.
Typical values for unit are:
minute, hour, day, night, week, month, year
"""
super().__init__()
self.value = value
self.unit = unit
def __str__(self) -> str:
return "{} {}".format(self.value, self.unit.value)
_mapping = {
(DurationUnit.MINUTES, DurationUnit.MINUTES): (DurationUnit.MINUTES, 1, 1),
(DurationUnit.MINUTES, DurationUnit.HOURS): (DurationUnit.MINUTES, 1, 60),
(DurationUnit.MINUTES, DurationUnit.DAYS): (DurationUnit.MINUTES, 1, 60 * 24),
(DurationUnit.MINUTES, DurationUnit.NIGHTS): (DurationUnit.MINUTES, 1, 60 * 24),
(DurationUnit.MINUTES, DurationUnit.WEEKS): (
DurationUnit.MINUTES,
1,
60 * 24 * 7,
),
(DurationUnit.MINUTES, DurationUnit.MONTHS): (
DurationUnit.MINUTES,
1,
(365 * 24 * 60) / 12,
),
(DurationUnit.HOURS, DurationUnit.MINUTES): (DurationUnit.MINUTES, 60, 1),
(DurationUnit.HOURS, DurationUnit.HOURS): (DurationUnit.HOURS, 1, 1),
(DurationUnit.HOURS, DurationUnit.DAYS): (DurationUnit.HOURS, 1, 24),
(DurationUnit.HOURS, DurationUnit.NIGHTS): (DurationUnit.HOURS, 1, 24),
(DurationUnit.HOURS, DurationUnit.WEEKS): (DurationUnit.HOURS, 1, 24 * 7),
(DurationUnit.HOURS, DurationUnit.MONTHS): (
DurationUnit.HOURS,
1,
(365 * 24) / 12,
),
(DurationUnit.DAYS, DurationUnit.MINUTES): (DurationUnit.MINUTES, 24 * 60, 1),
(DurationUnit.DAYS, DurationUnit.HOURS): (DurationUnit.HOURS, 24, 1),
(DurationUnit.DAYS, DurationUnit.DAYS): (DurationUnit.DAYS, 1, 1),
(DurationUnit.DAYS, DurationUnit.NIGHTS): (DurationUnit.DAYS, 1, 1),
(DurationUnit.DAYS, DurationUnit.WEEKS): (DurationUnit.DAYS, 1, 7),
(DurationUnit.DAYS, DurationUnit.MONTHS): (DurationUnit.DAYS, 1, 365 / 12),
(DurationUnit.NIGHTS, DurationUnit.MINUTES): (DurationUnit.MINUTES, 24 * 60, 1),
(DurationUnit.NIGHTS, DurationUnit.HOURS): (DurationUnit.HOURS, 24, 1),
(DurationUnit.NIGHTS, DurationUnit.DAYS): (DurationUnit.DAYS, 1, 1),
(DurationUnit.NIGHTS, DurationUnit.NIGHTS): (DurationUnit.DAYS, 1, 1),
(DurationUnit.NIGHTS, DurationUnit.WEEKS): (DurationUnit.DAYS, 1, 7),
(DurationUnit.NIGHTS, DurationUnit.MONTHS): (DurationUnit.DAYS, 1, 365 / 12),
(DurationUnit.WEEKS, DurationUnit.MINUTES): (
DurationUnit.MINUTES,
7 * 24 * 60,
1,
),
(DurationUnit.WEEKS, DurationUnit.HOURS): (DurationUnit.HOURS, 7 * 24, 1),
(DurationUnit.WEEKS, DurationUnit.DAYS): (DurationUnit.DAYS, 7, 1),
(DurationUnit.WEEKS, DurationUnit.NIGHTS): (DurationUnit.DAYS, 7, 1),
(DurationUnit.WEEKS, DurationUnit.WEEKS): (DurationUnit.DAYS, 1, 1),
(DurationUnit.WEEKS, DurationUnit.MONTHS): (DurationUnit.DAYS, 1, 52 / 12),
(DurationUnit.MONTHS, DurationUnit.MINUTES): (
DurationUnit.MINUTES,
(365 * 24 * 60) / 12,
1,
),
(DurationUnit.MONTHS, DurationUnit.HOURS): (
DurationUnit.HOURS,
(365 * 24) / 12,
1,
),
(DurationUnit.MONTHS, DurationUnit.DAYS): (DurationUnit.DAYS, 365 / 12, 1),
(DurationUnit.MONTHS, DurationUnit.NIGHTS): (DurationUnit.DAYS, 365 / 12, 1),
(DurationUnit.MONTHS, DurationUnit.WEEKS): (DurationUnit.DAYS, 52 / 12, 1),
(DurationUnit.MONTHS, DurationUnit.MONTHS): (DurationUnit.DAYS, 1, 1),
}
def __add__(self, other: "Duration") -> "Duration":
unit, f1, f2 = Duration._mapping[(self.unit, other.unit)]
return Duration(value=int(f1 * self.value + f2 * other.value), unit=unit)
[docs] @classmethod
def from_str(cls: Type["Duration"], text: str) -> "Duration":
value, unit = text.split()
return Duration(int(value), DurationUnit(unit))