use aho_corasick::{AhoCorasick, MatchKind};
use std::sync::LazyLock;
use crate::dictionaries::Locale;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TokenType {
Month,
Weekday,
CasualDate,
CasualTime,
TimeUnit,
RelativeModifier,
Ago,
Later,
Within,
At,
In,
On,
From,
Before,
After,
}
#[derive(Debug, Clone)]
pub struct Token {
pub token_type: TokenType,
pub start: usize,
pub end: usize,
pub pattern_id: usize,
}
struct PatternEntry {
pattern: &'static str,
token_type: TokenType,
}
static EN_PATTERNS: &[PatternEntry] = &[
PatternEntry {
pattern: "january",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "february",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "march",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "april",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "may",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "june",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "july",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "august",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "september",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "october",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "november",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "december",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "jan",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "feb",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "mar",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "apr",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "jun",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "jul",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "aug",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "sep",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "sept",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "oct",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "nov",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "dec",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "sunday",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "monday",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "tuesday",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "wednesday",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "thursday",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "friday",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "saturday",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "sun",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "mon",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "tue",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "wed",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "thu",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "thur",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "thurs",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "fri",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "sat",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "now",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "today",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "tonight",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "tomorrow",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "tmr",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "tmrw",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "yesterday",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "overmorrow",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "noon",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "midday",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "midnight",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "morning",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "afternoon",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "evening",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "night",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "second",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "seconds",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "minute",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "minutes",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "hour",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "hours",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "day",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "days",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "week",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "weeks",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "month",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "months",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "year",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "years",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "sec",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "secs",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "min",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "mins",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "hr",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "hrs",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "mo",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "yr",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "yrs",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "h",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "m",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "s",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "d",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "w",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "y",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "this",
token_type: TokenType::RelativeModifier,
},
PatternEntry {
pattern: "next",
token_type: TokenType::RelativeModifier,
},
PatternEntry {
pattern: "last",
token_type: TokenType::RelativeModifier,
},
PatternEntry {
pattern: "past",
token_type: TokenType::RelativeModifier,
},
PatternEntry {
pattern: "previous",
token_type: TokenType::RelativeModifier,
},
PatternEntry {
pattern: "ago",
token_type: TokenType::Ago,
},
PatternEntry {
pattern: "before",
token_type: TokenType::Before,
},
PatternEntry {
pattern: "earlier",
token_type: TokenType::Ago,
},
PatternEntry {
pattern: "later",
token_type: TokenType::Later,
},
PatternEntry {
pattern: "after",
token_type: TokenType::After,
},
PatternEntry {
pattern: "from now",
token_type: TokenType::Later,
},
PatternEntry {
pattern: "within",
token_type: TokenType::Within,
},
PatternEntry {
pattern: "in",
token_type: TokenType::In,
},
PatternEntry {
pattern: "at",
token_type: TokenType::At,
},
PatternEntry {
pattern: "on",
token_type: TokenType::On,
},
PatternEntry {
pattern: "from",
token_type: TokenType::From,
},
];
static DE_PATTERNS: &[PatternEntry] = &[
PatternEntry {
pattern: "januar",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "februar",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "mƤrz",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "maerz",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "april",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "mai",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "juni",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "juli",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "august",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "september",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "oktober",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "november",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "dezember",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "sonntag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "so",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "montag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "mo",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "dienstag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "di",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "mittwoch",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "mi",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "donnerstag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "do",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "freitag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "fr",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "samstag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "sa",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "jetzt",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "heute",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "morgen",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "gestern",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "übermorgen",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "uebermorgen",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "vorgestern",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "sekunde",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "sekunden",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "minute",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "minuten",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "min",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "stunde",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "stunden",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "tag",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "tage",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "tagen",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "woche",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "wochen",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "monat",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "monate",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "monaten",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "monats",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "jahr",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "jahre",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "jahren",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "jahres",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "vor",
token_type: TokenType::Ago,
},
PatternEntry {
pattern: "in",
token_type: TokenType::In,
},
PatternEntry {
pattern: "um",
token_type: TokenType::At,
},
PatternEntry {
pattern: "am",
token_type: TokenType::On,
},
];
static ES_PATTERNS: &[PatternEntry] = &[
PatternEntry {
pattern: "enero",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "febrero",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "marzo",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "abril",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "mayo",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "junio",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "julio",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "agosto",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "septiembre",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "octubre",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "noviembre",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "diciembre",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ene",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "feb",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "abr",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "jun",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "jul",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ago",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "sep",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "oct",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "nov",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "dic",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "domingo",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "lunes",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "martes",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "miƩrcoles",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "miercoles",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "jueves",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "viernes",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "sƔbado",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "sabado",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ahora",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "hoy",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "maƱana",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "manana",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ayer",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "anoche",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "mediodĆa",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "mediodia",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "medianoche",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "tarde",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "noche",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "segundo",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "segundos",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "minuto",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "minutos",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "hora",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "horas",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "dĆa",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "dia",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "dĆas",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "dias",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "semana",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "semanas",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "mes",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "meses",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "aƱo",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "anos",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "aƱos",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "hace",
token_type: TokenType::Ago,
},
PatternEntry {
pattern: "en",
token_type: TokenType::In,
},
PatternEntry {
pattern: "dentro de",
token_type: TokenType::Within,
},
];
static FR_PATTERNS: &[PatternEntry] = &[
PatternEntry {
pattern: "janvier",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "fƩvrier",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "fevrier",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "mars",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "avril",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "mai",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "juin",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "juillet",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "aoƻt",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "aout",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "septembre",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "octobre",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "novembre",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "dƩcembre",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "decembre",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "dimanche",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "lundi",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "mardi",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "mercredi",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "jeudi",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "vendredi",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "samedi",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "maintenant",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "aujourd'hui",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "aujourdhui",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "demain",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "hier",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "midi",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "minuit",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "matin",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "soir",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "seconde",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "secondes",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "minute",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "minutes",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "heure",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "heures",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "jour",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "jours",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "semaine",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "semaines",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "mois",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "an",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ans",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "annƩe",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "annee",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "il y a",
token_type: TokenType::Ago,
},
PatternEntry {
pattern: "dans",
token_type: TokenType::In,
},
PatternEntry {
pattern: "Ć ",
token_type: TokenType::At,
},
];
static IT_PATTERNS: &[PatternEntry] = &[
PatternEntry {
pattern: "adesso",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ora",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "oggi",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "stanotte",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "stasera",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "stamattina",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "domani",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ieri",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "dopodomani",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "mezzogiorno",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "mezzanotte",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "mattina",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "mattino",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "pomeriggio",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "sera",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "notte",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "gennaio",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "febbraio",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "marzo",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "aprile",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "maggio",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "giugno",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "luglio",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "agosto",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "settembre",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ottobre",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "novembre",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "dicembre",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "domenica",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "lunedƬ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "lunedi",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "martedƬ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "martedi",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "mercoledƬ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "mercoledi",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "giovedƬ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "giovedi",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "venerdƬ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "venerdi",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "sabato",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "secondo",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "secondi",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "minuto",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "minuti",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ora",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ore",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "giorno",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "giorni",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "settimana",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "settimane",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "mese",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "mesi",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "anno",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "anni",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "fa",
token_type: TokenType::Ago,
},
PatternEntry {
pattern: "in",
token_type: TokenType::In,
},
PatternEntry {
pattern: "tra",
token_type: TokenType::Within,
},
PatternEntry {
pattern: "fra",
token_type: TokenType::Within,
},
PatternEntry {
pattern: "alle",
token_type: TokenType::At,
},
];
static JA_PATTERNS: &[PatternEntry] = &[
PatternEntry {
pattern: "ä»ę„",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ććć",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ę¬ę„",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ć»ććć¤",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ä»å¤",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ććć",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ä»ę©",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ććć°ć",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ä»å¤",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ćććć",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ęę„",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ććć",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ćć",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ęØę„",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ćć®ć",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ćććć¤",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ęå¾ę„",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ććć£ć¦",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "äøęØę„",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ććØćØć",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ä»ę",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ćć",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "åå",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "åå¾",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "ę£å",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "ę„ęę„",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ęęę„",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ē«ęę„",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ę°“ęę„",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ęØęę„",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "éęę„",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "åęę„",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ē§",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "å",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ęé",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ę„",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "é±é",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ę",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "幓",
token_type: TokenType::TimeUnit,
},
];
static NL_PATTERNS: &[PatternEntry] = &[
PatternEntry {
pattern: "nu",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "vandaag",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "vanavond",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "morgen",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "gisteren",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "overmorgen",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "eergisteren",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "vanochtend",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "vanmiddag",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "morgenochtend",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "morgenmiddag",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "morgenavond",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "gisterenochtend",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "gisterenmiddag",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "gisterenavond",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "januari",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "februari",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "maart",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "april",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "mei",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "juni",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "juli",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "augustus",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "september",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "oktober",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "november",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "december",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "zondag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "maandag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "dinsdag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "woensdag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "donderdag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "vrijdag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "zaterdag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "seconde",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "seconden",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "minuut",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "minuten",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "uur",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "uren",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "dag",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "dagen",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "week",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "weken",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "maand",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "maanden",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "jaar",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "jaren",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "geleden",
token_type: TokenType::Ago,
},
PatternEntry {
pattern: "over",
token_type: TokenType::Within,
},
];
static PT_PATTERNS: &[PatternEntry] = &[
PatternEntry {
pattern: "agora",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "hoje",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "amanhã",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "amanha",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ontem",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "anteontem",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "janeiro",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "fevereiro",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "marƧo",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "marco",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "abril",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "maio",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "junho",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "julho",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "agosto",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "setembro",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "outubro",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "novembro",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "dezembro",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "domingo",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "segunda-feira",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "segunda",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "terƧa-feira",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "terca-feira",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "terƧa",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "terca",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "quarta-feira",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "quarta",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "quinta-feira",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "quinta",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "sexta-feira",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "sexta",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "sƔbado",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "sabado",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "segundo",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "segundos",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "minuto",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "minutos",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "hora",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "horas",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "dia",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "dias",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "semana",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "semanas",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "mĆŖs",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "mes",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "meses",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ano",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "anos",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "atrƔs",
token_type: TokenType::Ago,
},
PatternEntry {
pattern: "atras",
token_type: TokenType::Ago,
},
PatternEntry {
pattern: "em",
token_type: TokenType::In,
},
];
static RU_PATTERNS: &[PatternEntry] = &[
PatternEntry {
pattern: "ŃŠµŠ¹ŃаŃ",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ŃŠµŠ³Š¾Š“нŃ",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "завŃŃŠ°",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "Š²ŃŠµŃа",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ŠæŠ¾ŃŠ»ŠµŠ·Š°Š²ŃŃŠ°",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ŠæŠ¾ŃŠ»ŠµŠæŠ¾ŃлезавŃŃŠ°",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ŠæŠ¾Š·Š°Š²ŃŠµŃа",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ŠæŠ¾Š·Š°ŠæŠ¾Š·Š°Š²ŃŠµŃа",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ŃŃŃŠ¾Š¼",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "Š²ŠµŃŠµŃом",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "ноŃŃŃ",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "полГенŃ",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "полноŃŃ",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "ŃŠ½Š²Š°ŃŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ŃŠ½Š²Š°ŃŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ŃŠµŠ²ŃалŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ŃŠµŠ²ŃалŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "маŃŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "маŃŃŠ°",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "Š°ŠæŃŠµŠ»Ń",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "Š°ŠæŃŠµŠ»Ń",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "май",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "маŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ŠøŃŠ½Ń",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ŠøŃŠ½Ń",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ŠøŃŠ»Ń",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ŠøŃŠ»Ń",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "авгŃŃŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "авгŃŃŃŠ°",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ŃŠµŠ½ŃŃŠ±ŃŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ŃŠµŠ½ŃŃŠ±ŃŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "окŃŃŠ±ŃŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "окŃŃŠ±ŃŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "Š½Š¾ŃŠ±ŃŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "Š½Š¾ŃŠ±ŃŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ГекабŃŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ГекабŃŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "Š²Š¾ŃŠŗŃŠµŃŠµŠ½Ńе",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ŠæŠ¾Š½ŠµŠ“ŠµŠ»ŃŠ½ŠøŠŗ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "Š²ŃŠ¾Ńник",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ŃŃŠµŠ“а",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ŃŃŠµŠ“Ń",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ŃŠµŃŠ²ŠµŃŠ³",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ŠæŃŃŠ½ŠøŃа",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ŠæŃŃŠ½ŠøŃŃ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ŃŃŠ±Š±Š¾Ńа",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ŃŃŠ±Š±Š¾ŃŃ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ŃŠµŠŗŃнГа",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ŃŠµŠŗŃнГŃ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ŃŠµŠŗŃнГ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "минŃŃŠ°",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "минŃŃŃ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "минŃŃŃ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "минŃŃ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ŃŠ°Ń",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ŃŠ°Ńа",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ŃŠ°Ńов",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ГенŃ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ГнŃ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "Гней",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "неГелŃ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "неГели",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "неГелŃ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "меŃŃŃ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "меŃŃŃŠ°",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "меŃŃŃŠµŠ²",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "гоГ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "гоГа",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "леŃ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "назаГ",
token_type: TokenType::Ago,
},
PatternEntry {
pattern: "ŃŠµŃез",
token_type: TokenType::Within,
},
PatternEntry {
pattern: "в ŃŠµŃение",
token_type: TokenType::Within,
},
PatternEntry {
pattern: "в ŃŠµŃении",
token_type: TokenType::Within,
},
];
static SV_PATTERNS: &[PatternEntry] = &[
PatternEntry {
pattern: "nu",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "idag",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ikvƤll",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "i kvƤll",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "imorgon",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "igƄr",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "igar",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "fƶrrgƄr",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "forrgar",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "januari",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "februari",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "mars",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "april",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "maj",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "juni",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "juli",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "augusti",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "september",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "oktober",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "november",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "december",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "sƶndag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "sondag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "mƄndag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "mandag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "tisdag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "onsdag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "torsdag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "fredag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "lƶrdag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "lordag",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "sekund",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "sekunder",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "minut",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "minuter",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "timme",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "timmar",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "dag",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "dagar",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "vecka",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "veckor",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "mƄnad",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "mƄnader",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "Ƅr",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "sedan",
token_type: TokenType::Ago,
},
PatternEntry {
pattern: "om",
token_type: TokenType::Within,
},
];
static UK_PATTERNS: &[PatternEntry] = &[
PatternEntry {
pattern: "Š·Š°ŃŠ°Š·",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ŃŃŠ¾Š³Š¾Š“нŃ",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "завŃŃŠ°",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "Š²ŃŠ¾Ńа",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ŠæŃŃŠ»ŃзавŃŃŠ°",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ŠæŃŃŠ»ŃŠæŃŃŠ»ŃзавŃŃŠ°",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ŠæŠ¾Š·Š°Š²ŃŠ¾Ńа",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ŠæŠ¾Š·Š°ŠæŠ¾Š·Š°Š²ŃŠ¾Ńа",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "Š²ŃŠ°Š½ŃŃ",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "Š²Š²ŠµŃŠµŃŃ",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "вноŃŃ",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "Š¾ŠæŃŠ²Š“нŃ",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "Š¾ŠæŃŠ²Š½Š¾ŃŃ",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "ŃŃŃŠµŠ½Ń",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ŃŃŃŠ½Ń",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "Š»ŃŃŠøŠ¹",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "Š»ŃŃŠ¾Š³Š¾",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "Š±ŠµŃŠµŠ·ŠµŠ½Ń",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "Š±ŠµŃŠµŠ·Š½Ń",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "квŃŃŠµŠ½Ń",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "квŃŃŠ½Ń",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ŃŃŠ°Š²ŠµŠ½Ń",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ŃŃŠ°Š²Š½Ń",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ŃŠµŃвенŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ŃŠµŃвнŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "липенŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "липнŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ŃŠµŃпенŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "ŃŠµŃпнŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "Š²ŠµŃŠµŃенŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "Š²ŠµŃŠµŃнŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "Š¶Š¾Š²ŃŠµŠ½Ń",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "Š¶Š¾Š²ŃŠ½Ń",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "лиŃŃŠ¾ŠæŠ°Š“",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "лиŃŃŠ¾ŠæŠ°Š“а",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "гŃŃŠ“енŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "гŃŃŠ“нŃ",
token_type: TokenType::Month,
},
PatternEntry {
pattern: "Š½ŠµŠ“ŃŠ»Ń",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ŠæŠ¾Š½ŠµŠ“ŃŠ»Š¾Šŗ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "Š²ŃŠ²ŃŠ¾ŃŠ¾Šŗ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ŃŠµŃеГа",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ŃŠµŃеГŃ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ŃŠµŃвеŃ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "Šæ'ŃŃŠ½ŠøŃŃ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "Šæ'ŃŃŠ½ŠøŃŃ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ŃŃŠ±Š¾Ńа",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ŃŃŠ±Š¾ŃŃ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ŃŠµŠŗŃнГа",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ŃŠµŠŗŃнГи",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ŃŠµŠŗŃнГ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "Ń
вилина",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "Ń
вилини",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "Ń
вилин",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "гоГина",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "гоГини",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "гоГин",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ГенŃ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ГнŃ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "Š“Š½ŃŠ²",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ŃŠøŠ¶Š“енŃ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ŃŠøŠ¶Š½Ń",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ŃŠøŠ¶Š½Ńв",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "мŃŃŃŃŃ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "мŃŃŃŃŃ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "мŃŃŃŃŃŠ²",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ŃŃŠŗ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ŃŠ¾ŠŗŃ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ŃŠ¾ŠŗŃв",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ŃŠ¾Š¼Ń",
token_type: TokenType::Ago,
},
PatternEntry {
pattern: "ŃŠµŃез",
token_type: TokenType::Within,
},
];
static ZH_PATTERNS: &[PatternEntry] = &[
PatternEntry {
pattern: "ē°åØ",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ä»å¤©",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ä»ę",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ę天",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ęØå¤©",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "å天",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "å天",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ē¾åØ",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ä»ę„",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ęę„",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "ęØę„",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "å¾å¤©",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "č½ę„",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "čå®¶",
token_type: TokenType::CasualDate,
},
PatternEntry {
pattern: "äøå",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "äøå",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "ę©äø",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "ęäø",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "äøå",
token_type: TokenType::CasualTime,
},
PatternEntry {
pattern: "ęęę„",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ęęäø",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ęęäŗ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ęęäø",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ęęå",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ęęäŗ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ęęå
",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "åØę„",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "åØäø",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "åØäŗ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "åØäø",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "åØå",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "åØäŗ",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "åØå
",
token_type: TokenType::Weekday,
},
PatternEntry {
pattern: "ē§",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "å",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "åé",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "åé",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "å°ę¶",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "å°ę",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "天",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ę„",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "åØ",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "é±",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ęę",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "ę",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "幓",
token_type: TokenType::TimeUnit,
},
PatternEntry {
pattern: "å",
token_type: TokenType::Ago,
},
PatternEntry {
pattern: "å",
token_type: TokenType::Later,
},
PatternEntry {
pattern: "å¾",
token_type: TokenType::Later,
},
PatternEntry {
pattern: "å
",
token_type: TokenType::Within,
},
PatternEntry {
pattern: "å
§",
token_type: TokenType::Within,
},
];
static EN_AUTOMATON: LazyLock<LocaleAutomaton> =
LazyLock::new(|| LocaleAutomaton::new(EN_PATTERNS));
static DE_AUTOMATON: LazyLock<LocaleAutomaton> =
LazyLock::new(|| LocaleAutomaton::new(DE_PATTERNS));
static ES_AUTOMATON: LazyLock<LocaleAutomaton> =
LazyLock::new(|| LocaleAutomaton::new(ES_PATTERNS));
static FR_AUTOMATON: LazyLock<LocaleAutomaton> =
LazyLock::new(|| LocaleAutomaton::new(FR_PATTERNS));
static IT_AUTOMATON: LazyLock<LocaleAutomaton> =
LazyLock::new(|| LocaleAutomaton::new(IT_PATTERNS));
static JA_AUTOMATON: LazyLock<LocaleAutomaton> =
LazyLock::new(|| LocaleAutomaton::new(JA_PATTERNS));
static NL_AUTOMATON: LazyLock<LocaleAutomaton> =
LazyLock::new(|| LocaleAutomaton::new(NL_PATTERNS));
static PT_AUTOMATON: LazyLock<LocaleAutomaton> =
LazyLock::new(|| LocaleAutomaton::new(PT_PATTERNS));
static RU_AUTOMATON: LazyLock<LocaleAutomaton> =
LazyLock::new(|| LocaleAutomaton::new(RU_PATTERNS));
static SV_AUTOMATON: LazyLock<LocaleAutomaton> =
LazyLock::new(|| LocaleAutomaton::new(SV_PATTERNS));
static UK_AUTOMATON: LazyLock<LocaleAutomaton> =
LazyLock::new(|| LocaleAutomaton::new(UK_PATTERNS));
static ZH_AUTOMATON: LazyLock<LocaleAutomaton> =
LazyLock::new(|| LocaleAutomaton::new(ZH_PATTERNS));
struct LocaleAutomaton {
ac: AhoCorasick,
patterns: &'static [PatternEntry],
}
impl LocaleAutomaton {
fn new(patterns: &'static [PatternEntry]) -> Self {
let pattern_strs: Vec<&str> = patterns.iter().map(|p| p.pattern).collect();
let ac = AhoCorasick::builder()
.match_kind(MatchKind::LeftmostLongest)
.build(&pattern_strs)
.expect("Failed to build Aho-Corasick automaton");
Self { ac, patterns }
}
fn scan(&self, text: &str) -> Vec<Token> {
let mut tokens = Vec::new();
for mat in self.ac.find_iter(text) {
let pattern_id = mat.pattern().as_usize();
let entry = &self.patterns[pattern_id];
let start = mat.start();
let end = mat.end();
let pattern_len = end - start;
let valid_start = if start == 0 {
true
} else {
let prev_char = text.as_bytes()[start - 1];
if pattern_len == 1 && entry.token_type == TokenType::TimeUnit {
!prev_char.is_ascii_alphabetic()
} else {
!prev_char.is_ascii_alphanumeric()
}
};
let valid_end = end == text.len() || !text.as_bytes()[end].is_ascii_alphanumeric();
if valid_start && valid_end {
tokens.push(Token {
token_type: entry.token_type,
start,
end,
pattern_id,
});
}
}
tokens
}
}
pub struct TokenScanner;
impl TokenScanner {
pub fn scan_locale(text: &str, locale: Locale) -> Vec<Token> {
let automaton = match locale {
Locale::En => &*EN_AUTOMATON,
Locale::De => &*DE_AUTOMATON,
Locale::Es => &*ES_AUTOMATON,
Locale::Fr => &*FR_AUTOMATON,
Locale::It => &*IT_AUTOMATON,
Locale::Ja => &*JA_AUTOMATON,
Locale::Nl => &*NL_AUTOMATON,
Locale::Pt => &*PT_AUTOMATON,
Locale::Ru => &*RU_AUTOMATON,
Locale::Sv => &*SV_AUTOMATON,
Locale::Uk => &*UK_AUTOMATON,
Locale::Zh => &*ZH_AUTOMATON,
};
automaton.scan(text)
}
pub fn scan(text: &str) -> Vec<Token> {
Self::scan_locale(text, Locale::En)
}
pub fn contains_type(text: &str, token_type: TokenType) -> bool {
Self::scan(text).iter().any(|t| t.token_type == token_type)
}
pub fn has_date_hint(text: &str) -> bool {
if text.bytes().any(|b| b.is_ascii_digit()) {
return true;
}
!Self::scan(text).is_empty()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_scan_months() {
let tokens = TokenScanner::scan("meeting in january");
assert!(tokens.iter().any(|t| t.token_type == TokenType::Month));
}
#[test]
fn test_scan_weekdays() {
let tokens = TokenScanner::scan("see you on monday");
assert!(tokens.iter().any(|t| t.token_type == TokenType::Weekday));
}
#[test]
fn test_scan_casual() {
let tokens = TokenScanner::scan("let's meet tomorrow");
assert!(tokens.iter().any(|t| t.token_type == TokenType::CasualDate));
}
#[test]
fn test_word_boundary() {
let tokens = TokenScanner::scan("maybe we can");
assert!(tokens.iter().all(|t| t.token_type != TokenType::Month));
}
#[test]
fn test_german_locale() {
let tokens = TokenScanner::scan_locale("treffen wir uns morgen", Locale::De);
assert!(tokens.iter().any(|t| t.token_type == TokenType::CasualDate));
}
#[test]
fn test_spanish_locale() {
let tokens = TokenScanner::scan_locale("nos vemos maƱana", Locale::Es);
assert!(tokens.iter().any(|t| t.token_type == TokenType::CasualDate));
}
}