Skip to main content

whichtime_sys/
scanner.rs

1//! Option A: Aho-Corasick based multi-pattern scanner with multi-locale support
2//!
3//! This scans the input text once and identifies all potential date-related
4//! tokens (months, weekdays, time units, casual words) in a single pass.
5//!
6//! Benefits:
7//! - Single pass over input text
8//! - O(n + m + z) complexity (n=text, m=patterns, z=matches)
9//! - SIMD-optimized by the aho-corasick crate
10
11use aho_corasick::{AhoCorasick, MatchKind};
12use std::sync::LazyLock;
13
14use crate::dictionaries::Locale;
15
16/// Categories of token the scanner can emit.
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum TokenType {
19    // Date-related
20    Month,
21    Weekday,
22    CasualDate,
23    CasualTime,
24
25    // Time units
26    TimeUnit,
27
28    // Modifiers
29    RelativeModifier,
30
31    // Keywords
32    Ago,
33    Later,
34    Within,
35    At,
36    In,
37    On,
38    From,
39    Before,
40    After,
41}
42
43/// A token found by the scanner pre-pass.
44#[derive(Debug, Clone)]
45pub struct Token {
46    /// Semantic category of the matched token.
47    pub token_type: TokenType,
48    /// Start byte index in the scanned text.
49    pub start: usize,
50    /// End byte index in the scanned text.
51    pub end: usize,
52    /// Index of the pattern within the locale automaton.
53    pub pattern_id: usize,
54}
55
56/// Pattern entry for the Aho-Corasick automaton
57struct PatternEntry {
58    pattern: &'static str,
59    token_type: TokenType,
60}
61
62// ============================================================================
63// English patterns
64// ============================================================================
65
66static EN_PATTERNS: &[PatternEntry] = &[
67    // Months (full)
68    PatternEntry {
69        pattern: "january",
70        token_type: TokenType::Month,
71    },
72    PatternEntry {
73        pattern: "february",
74        token_type: TokenType::Month,
75    },
76    PatternEntry {
77        pattern: "march",
78        token_type: TokenType::Month,
79    },
80    PatternEntry {
81        pattern: "april",
82        token_type: TokenType::Month,
83    },
84    PatternEntry {
85        pattern: "may",
86        token_type: TokenType::Month,
87    },
88    PatternEntry {
89        pattern: "june",
90        token_type: TokenType::Month,
91    },
92    PatternEntry {
93        pattern: "july",
94        token_type: TokenType::Month,
95    },
96    PatternEntry {
97        pattern: "august",
98        token_type: TokenType::Month,
99    },
100    PatternEntry {
101        pattern: "september",
102        token_type: TokenType::Month,
103    },
104    PatternEntry {
105        pattern: "october",
106        token_type: TokenType::Month,
107    },
108    PatternEntry {
109        pattern: "november",
110        token_type: TokenType::Month,
111    },
112    PatternEntry {
113        pattern: "december",
114        token_type: TokenType::Month,
115    },
116    // Months (abbreviated)
117    PatternEntry {
118        pattern: "jan",
119        token_type: TokenType::Month,
120    },
121    PatternEntry {
122        pattern: "feb",
123        token_type: TokenType::Month,
124    },
125    PatternEntry {
126        pattern: "mar",
127        token_type: TokenType::Month,
128    },
129    PatternEntry {
130        pattern: "apr",
131        token_type: TokenType::Month,
132    },
133    PatternEntry {
134        pattern: "jun",
135        token_type: TokenType::Month,
136    },
137    PatternEntry {
138        pattern: "jul",
139        token_type: TokenType::Month,
140    },
141    PatternEntry {
142        pattern: "aug",
143        token_type: TokenType::Month,
144    },
145    PatternEntry {
146        pattern: "sep",
147        token_type: TokenType::Month,
148    },
149    PatternEntry {
150        pattern: "sept",
151        token_type: TokenType::Month,
152    },
153    PatternEntry {
154        pattern: "oct",
155        token_type: TokenType::Month,
156    },
157    PatternEntry {
158        pattern: "nov",
159        token_type: TokenType::Month,
160    },
161    PatternEntry {
162        pattern: "dec",
163        token_type: TokenType::Month,
164    },
165    // Weekdays (full)
166    PatternEntry {
167        pattern: "sunday",
168        token_type: TokenType::Weekday,
169    },
170    PatternEntry {
171        pattern: "monday",
172        token_type: TokenType::Weekday,
173    },
174    PatternEntry {
175        pattern: "tuesday",
176        token_type: TokenType::Weekday,
177    },
178    PatternEntry {
179        pattern: "wednesday",
180        token_type: TokenType::Weekday,
181    },
182    PatternEntry {
183        pattern: "thursday",
184        token_type: TokenType::Weekday,
185    },
186    PatternEntry {
187        pattern: "friday",
188        token_type: TokenType::Weekday,
189    },
190    PatternEntry {
191        pattern: "saturday",
192        token_type: TokenType::Weekday,
193    },
194    // Weekdays (abbreviated)
195    PatternEntry {
196        pattern: "sun",
197        token_type: TokenType::Weekday,
198    },
199    PatternEntry {
200        pattern: "mon",
201        token_type: TokenType::Weekday,
202    },
203    PatternEntry {
204        pattern: "tue",
205        token_type: TokenType::Weekday,
206    },
207    PatternEntry {
208        pattern: "wed",
209        token_type: TokenType::Weekday,
210    },
211    PatternEntry {
212        pattern: "thu",
213        token_type: TokenType::Weekday,
214    },
215    PatternEntry {
216        pattern: "thur",
217        token_type: TokenType::Weekday,
218    },
219    PatternEntry {
220        pattern: "thurs",
221        token_type: TokenType::Weekday,
222    },
223    PatternEntry {
224        pattern: "fri",
225        token_type: TokenType::Weekday,
226    },
227    PatternEntry {
228        pattern: "sat",
229        token_type: TokenType::Weekday,
230    },
231    // Casual date
232    PatternEntry {
233        pattern: "now",
234        token_type: TokenType::CasualDate,
235    },
236    PatternEntry {
237        pattern: "today",
238        token_type: TokenType::CasualDate,
239    },
240    PatternEntry {
241        pattern: "tonight",
242        token_type: TokenType::CasualDate,
243    },
244    PatternEntry {
245        pattern: "tomorrow",
246        token_type: TokenType::CasualDate,
247    },
248    PatternEntry {
249        pattern: "tmr",
250        token_type: TokenType::CasualDate,
251    },
252    PatternEntry {
253        pattern: "tmrw",
254        token_type: TokenType::CasualDate,
255    },
256    PatternEntry {
257        pattern: "yesterday",
258        token_type: TokenType::CasualDate,
259    },
260    PatternEntry {
261        pattern: "overmorrow",
262        token_type: TokenType::CasualDate,
263    },
264    // Casual time
265    PatternEntry {
266        pattern: "noon",
267        token_type: TokenType::CasualTime,
268    },
269    PatternEntry {
270        pattern: "midday",
271        token_type: TokenType::CasualTime,
272    },
273    PatternEntry {
274        pattern: "midnight",
275        token_type: TokenType::CasualTime,
276    },
277    PatternEntry {
278        pattern: "morning",
279        token_type: TokenType::CasualTime,
280    },
281    PatternEntry {
282        pattern: "afternoon",
283        token_type: TokenType::CasualTime,
284    },
285    PatternEntry {
286        pattern: "evening",
287        token_type: TokenType::CasualTime,
288    },
289    PatternEntry {
290        pattern: "night",
291        token_type: TokenType::CasualTime,
292    },
293    // Time units (full)
294    PatternEntry {
295        pattern: "second",
296        token_type: TokenType::TimeUnit,
297    },
298    PatternEntry {
299        pattern: "seconds",
300        token_type: TokenType::TimeUnit,
301    },
302    PatternEntry {
303        pattern: "minute",
304        token_type: TokenType::TimeUnit,
305    },
306    PatternEntry {
307        pattern: "minutes",
308        token_type: TokenType::TimeUnit,
309    },
310    PatternEntry {
311        pattern: "hour",
312        token_type: TokenType::TimeUnit,
313    },
314    PatternEntry {
315        pattern: "hours",
316        token_type: TokenType::TimeUnit,
317    },
318    PatternEntry {
319        pattern: "day",
320        token_type: TokenType::TimeUnit,
321    },
322    PatternEntry {
323        pattern: "days",
324        token_type: TokenType::TimeUnit,
325    },
326    PatternEntry {
327        pattern: "week",
328        token_type: TokenType::TimeUnit,
329    },
330    PatternEntry {
331        pattern: "weeks",
332        token_type: TokenType::TimeUnit,
333    },
334    PatternEntry {
335        pattern: "month",
336        token_type: TokenType::TimeUnit,
337    },
338    PatternEntry {
339        pattern: "months",
340        token_type: TokenType::TimeUnit,
341    },
342    PatternEntry {
343        pattern: "year",
344        token_type: TokenType::TimeUnit,
345    },
346    PatternEntry {
347        pattern: "years",
348        token_type: TokenType::TimeUnit,
349    },
350    // Time units (abbreviated)
351    PatternEntry {
352        pattern: "sec",
353        token_type: TokenType::TimeUnit,
354    },
355    PatternEntry {
356        pattern: "secs",
357        token_type: TokenType::TimeUnit,
358    },
359    PatternEntry {
360        pattern: "min",
361        token_type: TokenType::TimeUnit,
362    },
363    PatternEntry {
364        pattern: "mins",
365        token_type: TokenType::TimeUnit,
366    },
367    PatternEntry {
368        pattern: "hr",
369        token_type: TokenType::TimeUnit,
370    },
371    PatternEntry {
372        pattern: "hrs",
373        token_type: TokenType::TimeUnit,
374    },
375    PatternEntry {
376        pattern: "mo",
377        token_type: TokenType::TimeUnit,
378    },
379    PatternEntry {
380        pattern: "yr",
381        token_type: TokenType::TimeUnit,
382    },
383    PatternEntry {
384        pattern: "yrs",
385        token_type: TokenType::TimeUnit,
386    },
387    // Time units (single letter - for patterns like "1h", "5m")
388    PatternEntry {
389        pattern: "h",
390        token_type: TokenType::TimeUnit,
391    },
392    PatternEntry {
393        pattern: "m",
394        token_type: TokenType::TimeUnit,
395    },
396    PatternEntry {
397        pattern: "s",
398        token_type: TokenType::TimeUnit,
399    },
400    PatternEntry {
401        pattern: "d",
402        token_type: TokenType::TimeUnit,
403    },
404    PatternEntry {
405        pattern: "w",
406        token_type: TokenType::TimeUnit,
407    },
408    PatternEntry {
409        pattern: "y",
410        token_type: TokenType::TimeUnit,
411    },
412    // Relative modifiers
413    PatternEntry {
414        pattern: "this",
415        token_type: TokenType::RelativeModifier,
416    },
417    PatternEntry {
418        pattern: "next",
419        token_type: TokenType::RelativeModifier,
420    },
421    PatternEntry {
422        pattern: "last",
423        token_type: TokenType::RelativeModifier,
424    },
425    PatternEntry {
426        pattern: "past",
427        token_type: TokenType::RelativeModifier,
428    },
429    PatternEntry {
430        pattern: "previous",
431        token_type: TokenType::RelativeModifier,
432    },
433    // Keywords
434    PatternEntry {
435        pattern: "ago",
436        token_type: TokenType::Ago,
437    },
438    PatternEntry {
439        pattern: "before",
440        token_type: TokenType::Before,
441    },
442    PatternEntry {
443        pattern: "earlier",
444        token_type: TokenType::Ago,
445    },
446    PatternEntry {
447        pattern: "later",
448        token_type: TokenType::Later,
449    },
450    PatternEntry {
451        pattern: "after",
452        token_type: TokenType::After,
453    },
454    PatternEntry {
455        pattern: "from now",
456        token_type: TokenType::Later,
457    },
458    PatternEntry {
459        pattern: "within",
460        token_type: TokenType::Within,
461    },
462    PatternEntry {
463        pattern: "in",
464        token_type: TokenType::In,
465    },
466    PatternEntry {
467        pattern: "at",
468        token_type: TokenType::At,
469    },
470    PatternEntry {
471        pattern: "on",
472        token_type: TokenType::On,
473    },
474    PatternEntry {
475        pattern: "from",
476        token_type: TokenType::From,
477    },
478];
479
480// ============================================================================
481// German patterns
482// ============================================================================
483
484static DE_PATTERNS: &[PatternEntry] = &[
485    // Months
486    PatternEntry {
487        pattern: "januar",
488        token_type: TokenType::Month,
489    },
490    PatternEntry {
491        pattern: "februar",
492        token_type: TokenType::Month,
493    },
494    PatternEntry {
495        pattern: "märz",
496        token_type: TokenType::Month,
497    },
498    PatternEntry {
499        pattern: "maerz",
500        token_type: TokenType::Month,
501    },
502    PatternEntry {
503        pattern: "april",
504        token_type: TokenType::Month,
505    },
506    PatternEntry {
507        pattern: "mai",
508        token_type: TokenType::Month,
509    },
510    PatternEntry {
511        pattern: "juni",
512        token_type: TokenType::Month,
513    },
514    PatternEntry {
515        pattern: "juli",
516        token_type: TokenType::Month,
517    },
518    PatternEntry {
519        pattern: "august",
520        token_type: TokenType::Month,
521    },
522    PatternEntry {
523        pattern: "september",
524        token_type: TokenType::Month,
525    },
526    PatternEntry {
527        pattern: "oktober",
528        token_type: TokenType::Month,
529    },
530    PatternEntry {
531        pattern: "november",
532        token_type: TokenType::Month,
533    },
534    PatternEntry {
535        pattern: "dezember",
536        token_type: TokenType::Month,
537    },
538    // Weekdays
539    PatternEntry {
540        pattern: "sonntag",
541        token_type: TokenType::Weekday,
542    },
543    PatternEntry {
544        pattern: "so",
545        token_type: TokenType::Weekday,
546    },
547    PatternEntry {
548        pattern: "montag",
549        token_type: TokenType::Weekday,
550    },
551    PatternEntry {
552        pattern: "mo",
553        token_type: TokenType::Weekday,
554    },
555    PatternEntry {
556        pattern: "dienstag",
557        token_type: TokenType::Weekday,
558    },
559    PatternEntry {
560        pattern: "di",
561        token_type: TokenType::Weekday,
562    },
563    PatternEntry {
564        pattern: "mittwoch",
565        token_type: TokenType::Weekday,
566    },
567    PatternEntry {
568        pattern: "mi",
569        token_type: TokenType::Weekday,
570    },
571    PatternEntry {
572        pattern: "donnerstag",
573        token_type: TokenType::Weekday,
574    },
575    PatternEntry {
576        pattern: "do",
577        token_type: TokenType::Weekday,
578    },
579    PatternEntry {
580        pattern: "freitag",
581        token_type: TokenType::Weekday,
582    },
583    PatternEntry {
584        pattern: "fr",
585        token_type: TokenType::Weekday,
586    },
587    PatternEntry {
588        pattern: "samstag",
589        token_type: TokenType::Weekday,
590    },
591    PatternEntry {
592        pattern: "sa",
593        token_type: TokenType::Weekday,
594    },
595    // Casual date
596    PatternEntry {
597        pattern: "jetzt",
598        token_type: TokenType::CasualDate,
599    },
600    PatternEntry {
601        pattern: "heute",
602        token_type: TokenType::CasualDate,
603    },
604    PatternEntry {
605        pattern: "morgen",
606        token_type: TokenType::CasualDate,
607    },
608    PatternEntry {
609        pattern: "gestern",
610        token_type: TokenType::CasualDate,
611    },
612    PatternEntry {
613        pattern: "übermorgen",
614        token_type: TokenType::CasualDate,
615    },
616    PatternEntry {
617        pattern: "uebermorgen",
618        token_type: TokenType::CasualDate,
619    },
620    PatternEntry {
621        pattern: "vorgestern",
622        token_type: TokenType::CasualDate,
623    },
624    // Time units
625    PatternEntry {
626        pattern: "sekunde",
627        token_type: TokenType::TimeUnit,
628    },
629    PatternEntry {
630        pattern: "sekunden",
631        token_type: TokenType::TimeUnit,
632    },
633    PatternEntry {
634        pattern: "minute",
635        token_type: TokenType::TimeUnit,
636    },
637    PatternEntry {
638        pattern: "minuten",
639        token_type: TokenType::TimeUnit,
640    },
641    PatternEntry {
642        pattern: "min",
643        token_type: TokenType::TimeUnit,
644    },
645    PatternEntry {
646        pattern: "stunde",
647        token_type: TokenType::TimeUnit,
648    },
649    PatternEntry {
650        pattern: "stunden",
651        token_type: TokenType::TimeUnit,
652    },
653    PatternEntry {
654        pattern: "tag",
655        token_type: TokenType::TimeUnit,
656    },
657    PatternEntry {
658        pattern: "tage",
659        token_type: TokenType::TimeUnit,
660    },
661    PatternEntry {
662        pattern: "tagen",
663        token_type: TokenType::TimeUnit,
664    },
665    PatternEntry {
666        pattern: "woche",
667        token_type: TokenType::TimeUnit,
668    },
669    PatternEntry {
670        pattern: "wochen",
671        token_type: TokenType::TimeUnit,
672    },
673    PatternEntry {
674        pattern: "monat",
675        token_type: TokenType::TimeUnit,
676    },
677    PatternEntry {
678        pattern: "monate",
679        token_type: TokenType::TimeUnit,
680    },
681    PatternEntry {
682        pattern: "monaten",
683        token_type: TokenType::TimeUnit,
684    },
685    PatternEntry {
686        pattern: "monats",
687        token_type: TokenType::TimeUnit,
688    },
689    PatternEntry {
690        pattern: "jahr",
691        token_type: TokenType::TimeUnit,
692    },
693    PatternEntry {
694        pattern: "jahre",
695        token_type: TokenType::TimeUnit,
696    },
697    PatternEntry {
698        pattern: "jahren",
699        token_type: TokenType::TimeUnit,
700    },
701    PatternEntry {
702        pattern: "jahres",
703        token_type: TokenType::TimeUnit,
704    },
705    // Keywords
706    PatternEntry {
707        pattern: "vor",
708        token_type: TokenType::Ago,
709    },
710    PatternEntry {
711        pattern: "in",
712        token_type: TokenType::In,
713    },
714    PatternEntry {
715        pattern: "um",
716        token_type: TokenType::At,
717    },
718    PatternEntry {
719        pattern: "am",
720        token_type: TokenType::On,
721    },
722];
723
724// ============================================================================
725// Spanish patterns
726// ============================================================================
727
728static ES_PATTERNS: &[PatternEntry] = &[
729    // Months
730    PatternEntry {
731        pattern: "enero",
732        token_type: TokenType::Month,
733    },
734    PatternEntry {
735        pattern: "febrero",
736        token_type: TokenType::Month,
737    },
738    PatternEntry {
739        pattern: "marzo",
740        token_type: TokenType::Month,
741    },
742    PatternEntry {
743        pattern: "abril",
744        token_type: TokenType::Month,
745    },
746    PatternEntry {
747        pattern: "mayo",
748        token_type: TokenType::Month,
749    },
750    PatternEntry {
751        pattern: "junio",
752        token_type: TokenType::Month,
753    },
754    PatternEntry {
755        pattern: "julio",
756        token_type: TokenType::Month,
757    },
758    PatternEntry {
759        pattern: "agosto",
760        token_type: TokenType::Month,
761    },
762    PatternEntry {
763        pattern: "septiembre",
764        token_type: TokenType::Month,
765    },
766    PatternEntry {
767        pattern: "octubre",
768        token_type: TokenType::Month,
769    },
770    PatternEntry {
771        pattern: "noviembre",
772        token_type: TokenType::Month,
773    },
774    PatternEntry {
775        pattern: "diciembre",
776        token_type: TokenType::Month,
777    },
778    // Abbreviated months
779    PatternEntry {
780        pattern: "ene",
781        token_type: TokenType::Month,
782    },
783    PatternEntry {
784        pattern: "feb",
785        token_type: TokenType::Month,
786    },
787    PatternEntry {
788        pattern: "abr",
789        token_type: TokenType::Month,
790    },
791    PatternEntry {
792        pattern: "jun",
793        token_type: TokenType::Month,
794    },
795    PatternEntry {
796        pattern: "jul",
797        token_type: TokenType::Month,
798    },
799    PatternEntry {
800        pattern: "ago",
801        token_type: TokenType::Month,
802    },
803    PatternEntry {
804        pattern: "sep",
805        token_type: TokenType::Month,
806    },
807    PatternEntry {
808        pattern: "oct",
809        token_type: TokenType::Month,
810    },
811    PatternEntry {
812        pattern: "nov",
813        token_type: TokenType::Month,
814    },
815    PatternEntry {
816        pattern: "dic",
817        token_type: TokenType::Month,
818    },
819    // Weekdays
820    PatternEntry {
821        pattern: "domingo",
822        token_type: TokenType::Weekday,
823    },
824    PatternEntry {
825        pattern: "lunes",
826        token_type: TokenType::Weekday,
827    },
828    PatternEntry {
829        pattern: "martes",
830        token_type: TokenType::Weekday,
831    },
832    PatternEntry {
833        pattern: "miércoles",
834        token_type: TokenType::Weekday,
835    },
836    PatternEntry {
837        pattern: "miercoles",
838        token_type: TokenType::Weekday,
839    },
840    PatternEntry {
841        pattern: "jueves",
842        token_type: TokenType::Weekday,
843    },
844    PatternEntry {
845        pattern: "viernes",
846        token_type: TokenType::Weekday,
847    },
848    PatternEntry {
849        pattern: "sábado",
850        token_type: TokenType::Weekday,
851    },
852    PatternEntry {
853        pattern: "sabado",
854        token_type: TokenType::Weekday,
855    },
856    // Casual date
857    PatternEntry {
858        pattern: "ahora",
859        token_type: TokenType::CasualDate,
860    },
861    PatternEntry {
862        pattern: "hoy",
863        token_type: TokenType::CasualDate,
864    },
865    PatternEntry {
866        pattern: "mañana",
867        token_type: TokenType::CasualDate,
868    },
869    PatternEntry {
870        pattern: "manana",
871        token_type: TokenType::CasualDate,
872    },
873    PatternEntry {
874        pattern: "ayer",
875        token_type: TokenType::CasualDate,
876    },
877    PatternEntry {
878        pattern: "anoche",
879        token_type: TokenType::CasualDate,
880    },
881    // Casual time
882    PatternEntry {
883        pattern: "mediodía",
884        token_type: TokenType::CasualTime,
885    },
886    PatternEntry {
887        pattern: "mediodia",
888        token_type: TokenType::CasualTime,
889    },
890    PatternEntry {
891        pattern: "medianoche",
892        token_type: TokenType::CasualTime,
893    },
894    PatternEntry {
895        pattern: "tarde",
896        token_type: TokenType::CasualTime,
897    },
898    PatternEntry {
899        pattern: "noche",
900        token_type: TokenType::CasualTime,
901    },
902    // Time units
903    PatternEntry {
904        pattern: "segundo",
905        token_type: TokenType::TimeUnit,
906    },
907    PatternEntry {
908        pattern: "segundos",
909        token_type: TokenType::TimeUnit,
910    },
911    PatternEntry {
912        pattern: "minuto",
913        token_type: TokenType::TimeUnit,
914    },
915    PatternEntry {
916        pattern: "minutos",
917        token_type: TokenType::TimeUnit,
918    },
919    PatternEntry {
920        pattern: "hora",
921        token_type: TokenType::TimeUnit,
922    },
923    PatternEntry {
924        pattern: "horas",
925        token_type: TokenType::TimeUnit,
926    },
927    PatternEntry {
928        pattern: "día",
929        token_type: TokenType::TimeUnit,
930    },
931    PatternEntry {
932        pattern: "dia",
933        token_type: TokenType::TimeUnit,
934    },
935    PatternEntry {
936        pattern: "días",
937        token_type: TokenType::TimeUnit,
938    },
939    PatternEntry {
940        pattern: "dias",
941        token_type: TokenType::TimeUnit,
942    },
943    PatternEntry {
944        pattern: "semana",
945        token_type: TokenType::TimeUnit,
946    },
947    PatternEntry {
948        pattern: "semanas",
949        token_type: TokenType::TimeUnit,
950    },
951    PatternEntry {
952        pattern: "mes",
953        token_type: TokenType::TimeUnit,
954    },
955    PatternEntry {
956        pattern: "meses",
957        token_type: TokenType::TimeUnit,
958    },
959    PatternEntry {
960        pattern: "año",
961        token_type: TokenType::TimeUnit,
962    },
963    PatternEntry {
964        pattern: "anos",
965        token_type: TokenType::TimeUnit,
966    },
967    PatternEntry {
968        pattern: "años",
969        token_type: TokenType::TimeUnit,
970    },
971    // Keywords
972    PatternEntry {
973        pattern: "hace",
974        token_type: TokenType::Ago,
975    },
976    PatternEntry {
977        pattern: "en",
978        token_type: TokenType::In,
979    },
980    PatternEntry {
981        pattern: "dentro de",
982        token_type: TokenType::Within,
983    },
984];
985
986// ============================================================================
987// French patterns
988// ============================================================================
989
990static FR_PATTERNS: &[PatternEntry] = &[
991    // Months
992    PatternEntry {
993        pattern: "janvier",
994        token_type: TokenType::Month,
995    },
996    PatternEntry {
997        pattern: "février",
998        token_type: TokenType::Month,
999    },
1000    PatternEntry {
1001        pattern: "fevrier",
1002        token_type: TokenType::Month,
1003    },
1004    PatternEntry {
1005        pattern: "mars",
1006        token_type: TokenType::Month,
1007    },
1008    PatternEntry {
1009        pattern: "avril",
1010        token_type: TokenType::Month,
1011    },
1012    PatternEntry {
1013        pattern: "mai",
1014        token_type: TokenType::Month,
1015    },
1016    PatternEntry {
1017        pattern: "juin",
1018        token_type: TokenType::Month,
1019    },
1020    PatternEntry {
1021        pattern: "juillet",
1022        token_type: TokenType::Month,
1023    },
1024    PatternEntry {
1025        pattern: "août",
1026        token_type: TokenType::Month,
1027    },
1028    PatternEntry {
1029        pattern: "aout",
1030        token_type: TokenType::Month,
1031    },
1032    PatternEntry {
1033        pattern: "septembre",
1034        token_type: TokenType::Month,
1035    },
1036    PatternEntry {
1037        pattern: "octobre",
1038        token_type: TokenType::Month,
1039    },
1040    PatternEntry {
1041        pattern: "novembre",
1042        token_type: TokenType::Month,
1043    },
1044    PatternEntry {
1045        pattern: "décembre",
1046        token_type: TokenType::Month,
1047    },
1048    PatternEntry {
1049        pattern: "decembre",
1050        token_type: TokenType::Month,
1051    },
1052    // Weekdays
1053    PatternEntry {
1054        pattern: "dimanche",
1055        token_type: TokenType::Weekday,
1056    },
1057    PatternEntry {
1058        pattern: "lundi",
1059        token_type: TokenType::Weekday,
1060    },
1061    PatternEntry {
1062        pattern: "mardi",
1063        token_type: TokenType::Weekday,
1064    },
1065    PatternEntry {
1066        pattern: "mercredi",
1067        token_type: TokenType::Weekday,
1068    },
1069    PatternEntry {
1070        pattern: "jeudi",
1071        token_type: TokenType::Weekday,
1072    },
1073    PatternEntry {
1074        pattern: "vendredi",
1075        token_type: TokenType::Weekday,
1076    },
1077    PatternEntry {
1078        pattern: "samedi",
1079        token_type: TokenType::Weekday,
1080    },
1081    // Casual date
1082    PatternEntry {
1083        pattern: "maintenant",
1084        token_type: TokenType::CasualDate,
1085    },
1086    PatternEntry {
1087        pattern: "aujourd'hui",
1088        token_type: TokenType::CasualDate,
1089    },
1090    PatternEntry {
1091        pattern: "aujourdhui",
1092        token_type: TokenType::CasualDate,
1093    },
1094    PatternEntry {
1095        pattern: "demain",
1096        token_type: TokenType::CasualDate,
1097    },
1098    PatternEntry {
1099        pattern: "hier",
1100        token_type: TokenType::CasualDate,
1101    },
1102    // Casual time
1103    PatternEntry {
1104        pattern: "midi",
1105        token_type: TokenType::CasualTime,
1106    },
1107    PatternEntry {
1108        pattern: "minuit",
1109        token_type: TokenType::CasualTime,
1110    },
1111    PatternEntry {
1112        pattern: "matin",
1113        token_type: TokenType::CasualTime,
1114    },
1115    PatternEntry {
1116        pattern: "soir",
1117        token_type: TokenType::CasualTime,
1118    },
1119    // Time units
1120    PatternEntry {
1121        pattern: "seconde",
1122        token_type: TokenType::TimeUnit,
1123    },
1124    PatternEntry {
1125        pattern: "secondes",
1126        token_type: TokenType::TimeUnit,
1127    },
1128    PatternEntry {
1129        pattern: "minute",
1130        token_type: TokenType::TimeUnit,
1131    },
1132    PatternEntry {
1133        pattern: "minutes",
1134        token_type: TokenType::TimeUnit,
1135    },
1136    PatternEntry {
1137        pattern: "heure",
1138        token_type: TokenType::TimeUnit,
1139    },
1140    PatternEntry {
1141        pattern: "heures",
1142        token_type: TokenType::TimeUnit,
1143    },
1144    PatternEntry {
1145        pattern: "jour",
1146        token_type: TokenType::TimeUnit,
1147    },
1148    PatternEntry {
1149        pattern: "jours",
1150        token_type: TokenType::TimeUnit,
1151    },
1152    PatternEntry {
1153        pattern: "semaine",
1154        token_type: TokenType::TimeUnit,
1155    },
1156    PatternEntry {
1157        pattern: "semaines",
1158        token_type: TokenType::TimeUnit,
1159    },
1160    PatternEntry {
1161        pattern: "mois",
1162        token_type: TokenType::TimeUnit,
1163    },
1164    PatternEntry {
1165        pattern: "an",
1166        token_type: TokenType::TimeUnit,
1167    },
1168    PatternEntry {
1169        pattern: "ans",
1170        token_type: TokenType::TimeUnit,
1171    },
1172    PatternEntry {
1173        pattern: "année",
1174        token_type: TokenType::TimeUnit,
1175    },
1176    PatternEntry {
1177        pattern: "annee",
1178        token_type: TokenType::TimeUnit,
1179    },
1180    // Keywords
1181    PatternEntry {
1182        pattern: "il y a",
1183        token_type: TokenType::Ago,
1184    },
1185    PatternEntry {
1186        pattern: "dans",
1187        token_type: TokenType::In,
1188    },
1189    PatternEntry {
1190        pattern: "à",
1191        token_type: TokenType::At,
1192    },
1193];
1194
1195// ============================================================================
1196// Italian patterns
1197// ============================================================================
1198
1199static IT_PATTERNS: &[PatternEntry] = &[
1200    // Casual date
1201    PatternEntry {
1202        pattern: "adesso",
1203        token_type: TokenType::CasualDate,
1204    },
1205    PatternEntry {
1206        pattern: "ora",
1207        token_type: TokenType::CasualDate,
1208    },
1209    PatternEntry {
1210        pattern: "oggi",
1211        token_type: TokenType::CasualDate,
1212    },
1213    PatternEntry {
1214        pattern: "stanotte",
1215        token_type: TokenType::CasualDate,
1216    },
1217    PatternEntry {
1218        pattern: "stasera",
1219        token_type: TokenType::CasualDate,
1220    },
1221    PatternEntry {
1222        pattern: "stamattina",
1223        token_type: TokenType::CasualDate,
1224    },
1225    PatternEntry {
1226        pattern: "domani",
1227        token_type: TokenType::CasualDate,
1228    },
1229    PatternEntry {
1230        pattern: "ieri",
1231        token_type: TokenType::CasualDate,
1232    },
1233    PatternEntry {
1234        pattern: "dopodomani",
1235        token_type: TokenType::CasualDate,
1236    },
1237    // Casual time
1238    PatternEntry {
1239        pattern: "mezzogiorno",
1240        token_type: TokenType::CasualTime,
1241    },
1242    PatternEntry {
1243        pattern: "mezzanotte",
1244        token_type: TokenType::CasualTime,
1245    },
1246    PatternEntry {
1247        pattern: "mattina",
1248        token_type: TokenType::CasualTime,
1249    },
1250    PatternEntry {
1251        pattern: "mattino",
1252        token_type: TokenType::CasualTime,
1253    },
1254    PatternEntry {
1255        pattern: "pomeriggio",
1256        token_type: TokenType::CasualTime,
1257    },
1258    PatternEntry {
1259        pattern: "sera",
1260        token_type: TokenType::CasualTime,
1261    },
1262    PatternEntry {
1263        pattern: "notte",
1264        token_type: TokenType::CasualTime,
1265    },
1266    // Months
1267    PatternEntry {
1268        pattern: "gennaio",
1269        token_type: TokenType::Month,
1270    },
1271    PatternEntry {
1272        pattern: "febbraio",
1273        token_type: TokenType::Month,
1274    },
1275    PatternEntry {
1276        pattern: "marzo",
1277        token_type: TokenType::Month,
1278    },
1279    PatternEntry {
1280        pattern: "aprile",
1281        token_type: TokenType::Month,
1282    },
1283    PatternEntry {
1284        pattern: "maggio",
1285        token_type: TokenType::Month,
1286    },
1287    PatternEntry {
1288        pattern: "giugno",
1289        token_type: TokenType::Month,
1290    },
1291    PatternEntry {
1292        pattern: "luglio",
1293        token_type: TokenType::Month,
1294    },
1295    PatternEntry {
1296        pattern: "agosto",
1297        token_type: TokenType::Month,
1298    },
1299    PatternEntry {
1300        pattern: "settembre",
1301        token_type: TokenType::Month,
1302    },
1303    PatternEntry {
1304        pattern: "ottobre",
1305        token_type: TokenType::Month,
1306    },
1307    PatternEntry {
1308        pattern: "novembre",
1309        token_type: TokenType::Month,
1310    },
1311    PatternEntry {
1312        pattern: "dicembre",
1313        token_type: TokenType::Month,
1314    },
1315    // Weekdays
1316    PatternEntry {
1317        pattern: "domenica",
1318        token_type: TokenType::Weekday,
1319    },
1320    PatternEntry {
1321        pattern: "lunedì",
1322        token_type: TokenType::Weekday,
1323    },
1324    PatternEntry {
1325        pattern: "lunedi",
1326        token_type: TokenType::Weekday,
1327    },
1328    PatternEntry {
1329        pattern: "martedì",
1330        token_type: TokenType::Weekday,
1331    },
1332    PatternEntry {
1333        pattern: "martedi",
1334        token_type: TokenType::Weekday,
1335    },
1336    PatternEntry {
1337        pattern: "mercoledì",
1338        token_type: TokenType::Weekday,
1339    },
1340    PatternEntry {
1341        pattern: "mercoledi",
1342        token_type: TokenType::Weekday,
1343    },
1344    PatternEntry {
1345        pattern: "giovedì",
1346        token_type: TokenType::Weekday,
1347    },
1348    PatternEntry {
1349        pattern: "giovedi",
1350        token_type: TokenType::Weekday,
1351    },
1352    PatternEntry {
1353        pattern: "venerdì",
1354        token_type: TokenType::Weekday,
1355    },
1356    PatternEntry {
1357        pattern: "venerdi",
1358        token_type: TokenType::Weekday,
1359    },
1360    PatternEntry {
1361        pattern: "sabato",
1362        token_type: TokenType::Weekday,
1363    },
1364    // Time units
1365    PatternEntry {
1366        pattern: "secondo",
1367        token_type: TokenType::TimeUnit,
1368    },
1369    PatternEntry {
1370        pattern: "secondi",
1371        token_type: TokenType::TimeUnit,
1372    },
1373    PatternEntry {
1374        pattern: "minuto",
1375        token_type: TokenType::TimeUnit,
1376    },
1377    PatternEntry {
1378        pattern: "minuti",
1379        token_type: TokenType::TimeUnit,
1380    },
1381    PatternEntry {
1382        pattern: "ora",
1383        token_type: TokenType::TimeUnit,
1384    },
1385    PatternEntry {
1386        pattern: "ore",
1387        token_type: TokenType::TimeUnit,
1388    },
1389    PatternEntry {
1390        pattern: "giorno",
1391        token_type: TokenType::TimeUnit,
1392    },
1393    PatternEntry {
1394        pattern: "giorni",
1395        token_type: TokenType::TimeUnit,
1396    },
1397    PatternEntry {
1398        pattern: "settimana",
1399        token_type: TokenType::TimeUnit,
1400    },
1401    PatternEntry {
1402        pattern: "settimane",
1403        token_type: TokenType::TimeUnit,
1404    },
1405    PatternEntry {
1406        pattern: "mese",
1407        token_type: TokenType::TimeUnit,
1408    },
1409    PatternEntry {
1410        pattern: "mesi",
1411        token_type: TokenType::TimeUnit,
1412    },
1413    PatternEntry {
1414        pattern: "anno",
1415        token_type: TokenType::TimeUnit,
1416    },
1417    PatternEntry {
1418        pattern: "anni",
1419        token_type: TokenType::TimeUnit,
1420    },
1421    // Keywords
1422    PatternEntry {
1423        pattern: "fa",
1424        token_type: TokenType::Ago,
1425    },
1426    PatternEntry {
1427        pattern: "in",
1428        token_type: TokenType::In,
1429    },
1430    PatternEntry {
1431        pattern: "tra",
1432        token_type: TokenType::Within,
1433    },
1434    PatternEntry {
1435        pattern: "fra",
1436        token_type: TokenType::Within,
1437    },
1438    PatternEntry {
1439        pattern: "alle",
1440        token_type: TokenType::At,
1441    },
1442];
1443
1444// ============================================================================
1445// Japanese patterns
1446// ============================================================================
1447
1448static JA_PATTERNS: &[PatternEntry] = &[
1449    // Casual date
1450    PatternEntry {
1451        pattern: "今日",
1452        token_type: TokenType::CasualDate,
1453    },
1454    PatternEntry {
1455        pattern: "きょう",
1456        token_type: TokenType::CasualDate,
1457    },
1458    PatternEntry {
1459        pattern: "本日",
1460        token_type: TokenType::CasualDate,
1461    },
1462    PatternEntry {
1463        pattern: "ほんじつ",
1464        token_type: TokenType::CasualDate,
1465    },
1466    PatternEntry {
1467        pattern: "今夜",
1468        token_type: TokenType::CasualDate,
1469    },
1470    PatternEntry {
1471        pattern: "こんや",
1472        token_type: TokenType::CasualDate,
1473    },
1474    PatternEntry {
1475        pattern: "今晩",
1476        token_type: TokenType::CasualDate,
1477    },
1478    PatternEntry {
1479        pattern: "こんばん",
1480        token_type: TokenType::CasualDate,
1481    },
1482    PatternEntry {
1483        pattern: "今夕",
1484        token_type: TokenType::CasualDate,
1485    },
1486    PatternEntry {
1487        pattern: "こんゆう",
1488        token_type: TokenType::CasualDate,
1489    },
1490    PatternEntry {
1491        pattern: "明日",
1492        token_type: TokenType::CasualDate,
1493    },
1494    PatternEntry {
1495        pattern: "あした",
1496        token_type: TokenType::CasualDate,
1497    },
1498    PatternEntry {
1499        pattern: "あす",
1500        token_type: TokenType::CasualDate,
1501    },
1502    PatternEntry {
1503        pattern: "昨日",
1504        token_type: TokenType::CasualDate,
1505    },
1506    PatternEntry {
1507        pattern: "きのう",
1508        token_type: TokenType::CasualDate,
1509    },
1510    PatternEntry {
1511        pattern: "さくじつ",
1512        token_type: TokenType::CasualDate,
1513    },
1514    PatternEntry {
1515        pattern: "明後日",
1516        token_type: TokenType::CasualDate,
1517    },
1518    PatternEntry {
1519        pattern: "あさって",
1520        token_type: TokenType::CasualDate,
1521    },
1522    PatternEntry {
1523        pattern: "一昨日",
1524        token_type: TokenType::CasualDate,
1525    },
1526    PatternEntry {
1527        pattern: "おととい",
1528        token_type: TokenType::CasualDate,
1529    },
1530    PatternEntry {
1531        pattern: "今朝",
1532        token_type: TokenType::CasualDate,
1533    },
1534    PatternEntry {
1535        pattern: "けさ",
1536        token_type: TokenType::CasualDate,
1537    },
1538    // Casual time
1539    PatternEntry {
1540        pattern: "午前",
1541        token_type: TokenType::CasualTime,
1542    },
1543    PatternEntry {
1544        pattern: "午後",
1545        token_type: TokenType::CasualTime,
1546    },
1547    PatternEntry {
1548        pattern: "正午",
1549        token_type: TokenType::CasualTime,
1550    },
1551    // Weekdays
1552    PatternEntry {
1553        pattern: "日曜日",
1554        token_type: TokenType::Weekday,
1555    },
1556    PatternEntry {
1557        pattern: "月曜日",
1558        token_type: TokenType::Weekday,
1559    },
1560    PatternEntry {
1561        pattern: "火曜日",
1562        token_type: TokenType::Weekday,
1563    },
1564    PatternEntry {
1565        pattern: "水曜日",
1566        token_type: TokenType::Weekday,
1567    },
1568    PatternEntry {
1569        pattern: "木曜日",
1570        token_type: TokenType::Weekday,
1571    },
1572    PatternEntry {
1573        pattern: "金曜日",
1574        token_type: TokenType::Weekday,
1575    },
1576    PatternEntry {
1577        pattern: "土曜日",
1578        token_type: TokenType::Weekday,
1579    },
1580    // Time units
1581    PatternEntry {
1582        pattern: "秒",
1583        token_type: TokenType::TimeUnit,
1584    },
1585    PatternEntry {
1586        pattern: "分",
1587        token_type: TokenType::TimeUnit,
1588    },
1589    PatternEntry {
1590        pattern: "時間",
1591        token_type: TokenType::TimeUnit,
1592    },
1593    PatternEntry {
1594        pattern: "日",
1595        token_type: TokenType::TimeUnit,
1596    },
1597    PatternEntry {
1598        pattern: "週間",
1599        token_type: TokenType::TimeUnit,
1600    },
1601    PatternEntry {
1602        pattern: "月",
1603        token_type: TokenType::TimeUnit,
1604    },
1605    PatternEntry {
1606        pattern: "年",
1607        token_type: TokenType::TimeUnit,
1608    },
1609];
1610
1611// ============================================================================
1612// Dutch patterns
1613// ============================================================================
1614
1615static NL_PATTERNS: &[PatternEntry] = &[
1616    // Casual date
1617    PatternEntry {
1618        pattern: "nu",
1619        token_type: TokenType::CasualDate,
1620    },
1621    PatternEntry {
1622        pattern: "vandaag",
1623        token_type: TokenType::CasualDate,
1624    },
1625    PatternEntry {
1626        pattern: "vanavond",
1627        token_type: TokenType::CasualDate,
1628    },
1629    PatternEntry {
1630        pattern: "morgen",
1631        token_type: TokenType::CasualDate,
1632    },
1633    PatternEntry {
1634        pattern: "gisteren",
1635        token_type: TokenType::CasualDate,
1636    },
1637    PatternEntry {
1638        pattern: "overmorgen",
1639        token_type: TokenType::CasualDate,
1640    },
1641    PatternEntry {
1642        pattern: "eergisteren",
1643        token_type: TokenType::CasualDate,
1644    },
1645    // Compound casual dates
1646    PatternEntry {
1647        pattern: "vanochtend",
1648        token_type: TokenType::CasualDate,
1649    },
1650    PatternEntry {
1651        pattern: "vanmiddag",
1652        token_type: TokenType::CasualDate,
1653    },
1654    PatternEntry {
1655        pattern: "morgenochtend",
1656        token_type: TokenType::CasualDate,
1657    },
1658    PatternEntry {
1659        pattern: "morgenmiddag",
1660        token_type: TokenType::CasualDate,
1661    },
1662    PatternEntry {
1663        pattern: "morgenavond",
1664        token_type: TokenType::CasualDate,
1665    },
1666    PatternEntry {
1667        pattern: "gisterenochtend",
1668        token_type: TokenType::CasualDate,
1669    },
1670    PatternEntry {
1671        pattern: "gisterenmiddag",
1672        token_type: TokenType::CasualDate,
1673    },
1674    PatternEntry {
1675        pattern: "gisterenavond",
1676        token_type: TokenType::CasualDate,
1677    },
1678    // Months
1679    PatternEntry {
1680        pattern: "januari",
1681        token_type: TokenType::Month,
1682    },
1683    PatternEntry {
1684        pattern: "februari",
1685        token_type: TokenType::Month,
1686    },
1687    PatternEntry {
1688        pattern: "maart",
1689        token_type: TokenType::Month,
1690    },
1691    PatternEntry {
1692        pattern: "april",
1693        token_type: TokenType::Month,
1694    },
1695    PatternEntry {
1696        pattern: "mei",
1697        token_type: TokenType::Month,
1698    },
1699    PatternEntry {
1700        pattern: "juni",
1701        token_type: TokenType::Month,
1702    },
1703    PatternEntry {
1704        pattern: "juli",
1705        token_type: TokenType::Month,
1706    },
1707    PatternEntry {
1708        pattern: "augustus",
1709        token_type: TokenType::Month,
1710    },
1711    PatternEntry {
1712        pattern: "september",
1713        token_type: TokenType::Month,
1714    },
1715    PatternEntry {
1716        pattern: "oktober",
1717        token_type: TokenType::Month,
1718    },
1719    PatternEntry {
1720        pattern: "november",
1721        token_type: TokenType::Month,
1722    },
1723    PatternEntry {
1724        pattern: "december",
1725        token_type: TokenType::Month,
1726    },
1727    // Weekdays
1728    PatternEntry {
1729        pattern: "zondag",
1730        token_type: TokenType::Weekday,
1731    },
1732    PatternEntry {
1733        pattern: "maandag",
1734        token_type: TokenType::Weekday,
1735    },
1736    PatternEntry {
1737        pattern: "dinsdag",
1738        token_type: TokenType::Weekday,
1739    },
1740    PatternEntry {
1741        pattern: "woensdag",
1742        token_type: TokenType::Weekday,
1743    },
1744    PatternEntry {
1745        pattern: "donderdag",
1746        token_type: TokenType::Weekday,
1747    },
1748    PatternEntry {
1749        pattern: "vrijdag",
1750        token_type: TokenType::Weekday,
1751    },
1752    PatternEntry {
1753        pattern: "zaterdag",
1754        token_type: TokenType::Weekday,
1755    },
1756    // Time units
1757    PatternEntry {
1758        pattern: "seconde",
1759        token_type: TokenType::TimeUnit,
1760    },
1761    PatternEntry {
1762        pattern: "seconden",
1763        token_type: TokenType::TimeUnit,
1764    },
1765    PatternEntry {
1766        pattern: "minuut",
1767        token_type: TokenType::TimeUnit,
1768    },
1769    PatternEntry {
1770        pattern: "minuten",
1771        token_type: TokenType::TimeUnit,
1772    },
1773    PatternEntry {
1774        pattern: "uur",
1775        token_type: TokenType::TimeUnit,
1776    },
1777    PatternEntry {
1778        pattern: "uren",
1779        token_type: TokenType::TimeUnit,
1780    },
1781    PatternEntry {
1782        pattern: "dag",
1783        token_type: TokenType::TimeUnit,
1784    },
1785    PatternEntry {
1786        pattern: "dagen",
1787        token_type: TokenType::TimeUnit,
1788    },
1789    PatternEntry {
1790        pattern: "week",
1791        token_type: TokenType::TimeUnit,
1792    },
1793    PatternEntry {
1794        pattern: "weken",
1795        token_type: TokenType::TimeUnit,
1796    },
1797    PatternEntry {
1798        pattern: "maand",
1799        token_type: TokenType::TimeUnit,
1800    },
1801    PatternEntry {
1802        pattern: "maanden",
1803        token_type: TokenType::TimeUnit,
1804    },
1805    PatternEntry {
1806        pattern: "jaar",
1807        token_type: TokenType::TimeUnit,
1808    },
1809    PatternEntry {
1810        pattern: "jaren",
1811        token_type: TokenType::TimeUnit,
1812    },
1813    // Keywords
1814    PatternEntry {
1815        pattern: "geleden",
1816        token_type: TokenType::Ago,
1817    },
1818    PatternEntry {
1819        pattern: "over",
1820        token_type: TokenType::Within,
1821    },
1822];
1823
1824// ============================================================================
1825// Portuguese patterns
1826// ============================================================================
1827
1828static PT_PATTERNS: &[PatternEntry] = &[
1829    // Casual date
1830    PatternEntry {
1831        pattern: "agora",
1832        token_type: TokenType::CasualDate,
1833    },
1834    PatternEntry {
1835        pattern: "hoje",
1836        token_type: TokenType::CasualDate,
1837    },
1838    PatternEntry {
1839        pattern: "amanhã",
1840        token_type: TokenType::CasualDate,
1841    },
1842    PatternEntry {
1843        pattern: "amanha",
1844        token_type: TokenType::CasualDate,
1845    },
1846    PatternEntry {
1847        pattern: "ontem",
1848        token_type: TokenType::CasualDate,
1849    },
1850    PatternEntry {
1851        pattern: "anteontem",
1852        token_type: TokenType::CasualDate,
1853    },
1854    // Months
1855    PatternEntry {
1856        pattern: "janeiro",
1857        token_type: TokenType::Month,
1858    },
1859    PatternEntry {
1860        pattern: "fevereiro",
1861        token_type: TokenType::Month,
1862    },
1863    PatternEntry {
1864        pattern: "março",
1865        token_type: TokenType::Month,
1866    },
1867    PatternEntry {
1868        pattern: "marco",
1869        token_type: TokenType::Month,
1870    },
1871    PatternEntry {
1872        pattern: "abril",
1873        token_type: TokenType::Month,
1874    },
1875    PatternEntry {
1876        pattern: "maio",
1877        token_type: TokenType::Month,
1878    },
1879    PatternEntry {
1880        pattern: "junho",
1881        token_type: TokenType::Month,
1882    },
1883    PatternEntry {
1884        pattern: "julho",
1885        token_type: TokenType::Month,
1886    },
1887    PatternEntry {
1888        pattern: "agosto",
1889        token_type: TokenType::Month,
1890    },
1891    PatternEntry {
1892        pattern: "setembro",
1893        token_type: TokenType::Month,
1894    },
1895    PatternEntry {
1896        pattern: "outubro",
1897        token_type: TokenType::Month,
1898    },
1899    PatternEntry {
1900        pattern: "novembro",
1901        token_type: TokenType::Month,
1902    },
1903    PatternEntry {
1904        pattern: "dezembro",
1905        token_type: TokenType::Month,
1906    },
1907    // Weekdays
1908    PatternEntry {
1909        pattern: "domingo",
1910        token_type: TokenType::Weekday,
1911    },
1912    PatternEntry {
1913        pattern: "segunda-feira",
1914        token_type: TokenType::Weekday,
1915    },
1916    PatternEntry {
1917        pattern: "segunda",
1918        token_type: TokenType::Weekday,
1919    },
1920    PatternEntry {
1921        pattern: "terça-feira",
1922        token_type: TokenType::Weekday,
1923    },
1924    PatternEntry {
1925        pattern: "terca-feira",
1926        token_type: TokenType::Weekday,
1927    },
1928    PatternEntry {
1929        pattern: "terça",
1930        token_type: TokenType::Weekday,
1931    },
1932    PatternEntry {
1933        pattern: "terca",
1934        token_type: TokenType::Weekday,
1935    },
1936    PatternEntry {
1937        pattern: "quarta-feira",
1938        token_type: TokenType::Weekday,
1939    },
1940    PatternEntry {
1941        pattern: "quarta",
1942        token_type: TokenType::Weekday,
1943    },
1944    PatternEntry {
1945        pattern: "quinta-feira",
1946        token_type: TokenType::Weekday,
1947    },
1948    PatternEntry {
1949        pattern: "quinta",
1950        token_type: TokenType::Weekday,
1951    },
1952    PatternEntry {
1953        pattern: "sexta-feira",
1954        token_type: TokenType::Weekday,
1955    },
1956    PatternEntry {
1957        pattern: "sexta",
1958        token_type: TokenType::Weekday,
1959    },
1960    PatternEntry {
1961        pattern: "sábado",
1962        token_type: TokenType::Weekday,
1963    },
1964    PatternEntry {
1965        pattern: "sabado",
1966        token_type: TokenType::Weekday,
1967    },
1968    // Time units
1969    PatternEntry {
1970        pattern: "segundo",
1971        token_type: TokenType::TimeUnit,
1972    },
1973    PatternEntry {
1974        pattern: "segundos",
1975        token_type: TokenType::TimeUnit,
1976    },
1977    PatternEntry {
1978        pattern: "minuto",
1979        token_type: TokenType::TimeUnit,
1980    },
1981    PatternEntry {
1982        pattern: "minutos",
1983        token_type: TokenType::TimeUnit,
1984    },
1985    PatternEntry {
1986        pattern: "hora",
1987        token_type: TokenType::TimeUnit,
1988    },
1989    PatternEntry {
1990        pattern: "horas",
1991        token_type: TokenType::TimeUnit,
1992    },
1993    PatternEntry {
1994        pattern: "dia",
1995        token_type: TokenType::TimeUnit,
1996    },
1997    PatternEntry {
1998        pattern: "dias",
1999        token_type: TokenType::TimeUnit,
2000    },
2001    PatternEntry {
2002        pattern: "semana",
2003        token_type: TokenType::TimeUnit,
2004    },
2005    PatternEntry {
2006        pattern: "semanas",
2007        token_type: TokenType::TimeUnit,
2008    },
2009    PatternEntry {
2010        pattern: "mês",
2011        token_type: TokenType::TimeUnit,
2012    },
2013    PatternEntry {
2014        pattern: "mes",
2015        token_type: TokenType::TimeUnit,
2016    },
2017    PatternEntry {
2018        pattern: "meses",
2019        token_type: TokenType::TimeUnit,
2020    },
2021    PatternEntry {
2022        pattern: "ano",
2023        token_type: TokenType::TimeUnit,
2024    },
2025    PatternEntry {
2026        pattern: "anos",
2027        token_type: TokenType::TimeUnit,
2028    },
2029    // Keywords
2030    PatternEntry {
2031        pattern: "atrás",
2032        token_type: TokenType::Ago,
2033    },
2034    PatternEntry {
2035        pattern: "atras",
2036        token_type: TokenType::Ago,
2037    },
2038    PatternEntry {
2039        pattern: "em",
2040        token_type: TokenType::In,
2041    },
2042];
2043
2044// ============================================================================
2045// Russian patterns
2046// ============================================================================
2047
2048static RU_PATTERNS: &[PatternEntry] = &[
2049    // Casual date
2050    PatternEntry {
2051        pattern: "сейчас",
2052        token_type: TokenType::CasualDate,
2053    },
2054    PatternEntry {
2055        pattern: "сегодня",
2056        token_type: TokenType::CasualDate,
2057    },
2058    PatternEntry {
2059        pattern: "завтра",
2060        token_type: TokenType::CasualDate,
2061    },
2062    PatternEntry {
2063        pattern: "вчера",
2064        token_type: TokenType::CasualDate,
2065    },
2066    PatternEntry {
2067        pattern: "послезавтра",
2068        token_type: TokenType::CasualDate,
2069    },
2070    PatternEntry {
2071        pattern: "послепослезавтра",
2072        token_type: TokenType::CasualDate,
2073    },
2074    PatternEntry {
2075        pattern: "позавчера",
2076        token_type: TokenType::CasualDate,
2077    },
2078    PatternEntry {
2079        pattern: "позапозавчера",
2080        token_type: TokenType::CasualDate,
2081    },
2082    // Casual time
2083    PatternEntry {
2084        pattern: "утром",
2085        token_type: TokenType::CasualTime,
2086    },
2087    PatternEntry {
2088        pattern: "вечером",
2089        token_type: TokenType::CasualTime,
2090    },
2091    PatternEntry {
2092        pattern: "ночью",
2093        token_type: TokenType::CasualTime,
2094    },
2095    PatternEntry {
2096        pattern: "полдень",
2097        token_type: TokenType::CasualTime,
2098    },
2099    PatternEntry {
2100        pattern: "полночь",
2101        token_type: TokenType::CasualTime,
2102    },
2103    // Months
2104    PatternEntry {
2105        pattern: "январь",
2106        token_type: TokenType::Month,
2107    },
2108    PatternEntry {
2109        pattern: "января",
2110        token_type: TokenType::Month,
2111    },
2112    PatternEntry {
2113        pattern: "февраль",
2114        token_type: TokenType::Month,
2115    },
2116    PatternEntry {
2117        pattern: "февраля",
2118        token_type: TokenType::Month,
2119    },
2120    PatternEntry {
2121        pattern: "март",
2122        token_type: TokenType::Month,
2123    },
2124    PatternEntry {
2125        pattern: "марта",
2126        token_type: TokenType::Month,
2127    },
2128    PatternEntry {
2129        pattern: "апрель",
2130        token_type: TokenType::Month,
2131    },
2132    PatternEntry {
2133        pattern: "апреля",
2134        token_type: TokenType::Month,
2135    },
2136    PatternEntry {
2137        pattern: "май",
2138        token_type: TokenType::Month,
2139    },
2140    PatternEntry {
2141        pattern: "мая",
2142        token_type: TokenType::Month,
2143    },
2144    PatternEntry {
2145        pattern: "июнь",
2146        token_type: TokenType::Month,
2147    },
2148    PatternEntry {
2149        pattern: "июня",
2150        token_type: TokenType::Month,
2151    },
2152    PatternEntry {
2153        pattern: "июль",
2154        token_type: TokenType::Month,
2155    },
2156    PatternEntry {
2157        pattern: "июля",
2158        token_type: TokenType::Month,
2159    },
2160    PatternEntry {
2161        pattern: "август",
2162        token_type: TokenType::Month,
2163    },
2164    PatternEntry {
2165        pattern: "августа",
2166        token_type: TokenType::Month,
2167    },
2168    PatternEntry {
2169        pattern: "сентябрь",
2170        token_type: TokenType::Month,
2171    },
2172    PatternEntry {
2173        pattern: "сентября",
2174        token_type: TokenType::Month,
2175    },
2176    PatternEntry {
2177        pattern: "октябрь",
2178        token_type: TokenType::Month,
2179    },
2180    PatternEntry {
2181        pattern: "октября",
2182        token_type: TokenType::Month,
2183    },
2184    PatternEntry {
2185        pattern: "ноябрь",
2186        token_type: TokenType::Month,
2187    },
2188    PatternEntry {
2189        pattern: "ноября",
2190        token_type: TokenType::Month,
2191    },
2192    PatternEntry {
2193        pattern: "декабрь",
2194        token_type: TokenType::Month,
2195    },
2196    PatternEntry {
2197        pattern: "декабря",
2198        token_type: TokenType::Month,
2199    },
2200    // Weekdays
2201    PatternEntry {
2202        pattern: "воскресенье",
2203        token_type: TokenType::Weekday,
2204    },
2205    PatternEntry {
2206        pattern: "понедельник",
2207        token_type: TokenType::Weekday,
2208    },
2209    PatternEntry {
2210        pattern: "вторник",
2211        token_type: TokenType::Weekday,
2212    },
2213    PatternEntry {
2214        pattern: "среда",
2215        token_type: TokenType::Weekday,
2216    },
2217    PatternEntry {
2218        pattern: "среду",
2219        token_type: TokenType::Weekday,
2220    },
2221    PatternEntry {
2222        pattern: "четверг",
2223        token_type: TokenType::Weekday,
2224    },
2225    PatternEntry {
2226        pattern: "пятница",
2227        token_type: TokenType::Weekday,
2228    },
2229    PatternEntry {
2230        pattern: "пятницу",
2231        token_type: TokenType::Weekday,
2232    },
2233    PatternEntry {
2234        pattern: "суббота",
2235        token_type: TokenType::Weekday,
2236    },
2237    PatternEntry {
2238        pattern: "субботу",
2239        token_type: TokenType::Weekday,
2240    },
2241    // Time units
2242    PatternEntry {
2243        pattern: "секунда",
2244        token_type: TokenType::TimeUnit,
2245    },
2246    PatternEntry {
2247        pattern: "секунды",
2248        token_type: TokenType::TimeUnit,
2249    },
2250    PatternEntry {
2251        pattern: "секунд",
2252        token_type: TokenType::TimeUnit,
2253    },
2254    PatternEntry {
2255        pattern: "минута",
2256        token_type: TokenType::TimeUnit,
2257    },
2258    PatternEntry {
2259        pattern: "минуты",
2260        token_type: TokenType::TimeUnit,
2261    },
2262    PatternEntry {
2263        pattern: "минуту",
2264        token_type: TokenType::TimeUnit,
2265    },
2266    PatternEntry {
2267        pattern: "минут",
2268        token_type: TokenType::TimeUnit,
2269    },
2270    PatternEntry {
2271        pattern: "час",
2272        token_type: TokenType::TimeUnit,
2273    },
2274    PatternEntry {
2275        pattern: "часа",
2276        token_type: TokenType::TimeUnit,
2277    },
2278    PatternEntry {
2279        pattern: "часов",
2280        token_type: TokenType::TimeUnit,
2281    },
2282    PatternEntry {
2283        pattern: "день",
2284        token_type: TokenType::TimeUnit,
2285    },
2286    PatternEntry {
2287        pattern: "дня",
2288        token_type: TokenType::TimeUnit,
2289    },
2290    PatternEntry {
2291        pattern: "дней",
2292        token_type: TokenType::TimeUnit,
2293    },
2294    PatternEntry {
2295        pattern: "неделя",
2296        token_type: TokenType::TimeUnit,
2297    },
2298    PatternEntry {
2299        pattern: "недели",
2300        token_type: TokenType::TimeUnit,
2301    },
2302    PatternEntry {
2303        pattern: "недель",
2304        token_type: TokenType::TimeUnit,
2305    },
2306    PatternEntry {
2307        pattern: "месяц",
2308        token_type: TokenType::TimeUnit,
2309    },
2310    PatternEntry {
2311        pattern: "месяца",
2312        token_type: TokenType::TimeUnit,
2313    },
2314    PatternEntry {
2315        pattern: "месяцев",
2316        token_type: TokenType::TimeUnit,
2317    },
2318    PatternEntry {
2319        pattern: "год",
2320        token_type: TokenType::TimeUnit,
2321    },
2322    PatternEntry {
2323        pattern: "года",
2324        token_type: TokenType::TimeUnit,
2325    },
2326    PatternEntry {
2327        pattern: "лет",
2328        token_type: TokenType::TimeUnit,
2329    },
2330    // Keywords
2331    PatternEntry {
2332        pattern: "назад",
2333        token_type: TokenType::Ago,
2334    },
2335    PatternEntry {
2336        pattern: "через",
2337        token_type: TokenType::Within,
2338    },
2339    PatternEntry {
2340        pattern: "в течение",
2341        token_type: TokenType::Within,
2342    },
2343    PatternEntry {
2344        pattern: "в течении",
2345        token_type: TokenType::Within,
2346    },
2347];
2348
2349// ============================================================================
2350// Swedish patterns
2351// ============================================================================
2352
2353static SV_PATTERNS: &[PatternEntry] = &[
2354    // Casual date
2355    PatternEntry {
2356        pattern: "nu",
2357        token_type: TokenType::CasualDate,
2358    },
2359    PatternEntry {
2360        pattern: "idag",
2361        token_type: TokenType::CasualDate,
2362    },
2363    PatternEntry {
2364        pattern: "ikväll",
2365        token_type: TokenType::CasualDate,
2366    },
2367    PatternEntry {
2368        pattern: "i kväll",
2369        token_type: TokenType::CasualDate,
2370    },
2371    PatternEntry {
2372        pattern: "imorgon",
2373        token_type: TokenType::CasualDate,
2374    },
2375    PatternEntry {
2376        pattern: "igår",
2377        token_type: TokenType::CasualDate,
2378    },
2379    PatternEntry {
2380        pattern: "igar",
2381        token_type: TokenType::CasualDate,
2382    },
2383    PatternEntry {
2384        pattern: "förrgår",
2385        token_type: TokenType::CasualDate,
2386    },
2387    PatternEntry {
2388        pattern: "forrgar",
2389        token_type: TokenType::CasualDate,
2390    },
2391    // Months
2392    PatternEntry {
2393        pattern: "januari",
2394        token_type: TokenType::Month,
2395    },
2396    PatternEntry {
2397        pattern: "februari",
2398        token_type: TokenType::Month,
2399    },
2400    PatternEntry {
2401        pattern: "mars",
2402        token_type: TokenType::Month,
2403    },
2404    PatternEntry {
2405        pattern: "april",
2406        token_type: TokenType::Month,
2407    },
2408    PatternEntry {
2409        pattern: "maj",
2410        token_type: TokenType::Month,
2411    },
2412    PatternEntry {
2413        pattern: "juni",
2414        token_type: TokenType::Month,
2415    },
2416    PatternEntry {
2417        pattern: "juli",
2418        token_type: TokenType::Month,
2419    },
2420    PatternEntry {
2421        pattern: "augusti",
2422        token_type: TokenType::Month,
2423    },
2424    PatternEntry {
2425        pattern: "september",
2426        token_type: TokenType::Month,
2427    },
2428    PatternEntry {
2429        pattern: "oktober",
2430        token_type: TokenType::Month,
2431    },
2432    PatternEntry {
2433        pattern: "november",
2434        token_type: TokenType::Month,
2435    },
2436    PatternEntry {
2437        pattern: "december",
2438        token_type: TokenType::Month,
2439    },
2440    // Weekdays
2441    PatternEntry {
2442        pattern: "söndag",
2443        token_type: TokenType::Weekday,
2444    },
2445    PatternEntry {
2446        pattern: "sondag",
2447        token_type: TokenType::Weekday,
2448    },
2449    PatternEntry {
2450        pattern: "måndag",
2451        token_type: TokenType::Weekday,
2452    },
2453    PatternEntry {
2454        pattern: "mandag",
2455        token_type: TokenType::Weekday,
2456    },
2457    PatternEntry {
2458        pattern: "tisdag",
2459        token_type: TokenType::Weekday,
2460    },
2461    PatternEntry {
2462        pattern: "onsdag",
2463        token_type: TokenType::Weekday,
2464    },
2465    PatternEntry {
2466        pattern: "torsdag",
2467        token_type: TokenType::Weekday,
2468    },
2469    PatternEntry {
2470        pattern: "fredag",
2471        token_type: TokenType::Weekday,
2472    },
2473    PatternEntry {
2474        pattern: "lördag",
2475        token_type: TokenType::Weekday,
2476    },
2477    PatternEntry {
2478        pattern: "lordag",
2479        token_type: TokenType::Weekday,
2480    },
2481    // Time units
2482    PatternEntry {
2483        pattern: "sekund",
2484        token_type: TokenType::TimeUnit,
2485    },
2486    PatternEntry {
2487        pattern: "sekunder",
2488        token_type: TokenType::TimeUnit,
2489    },
2490    PatternEntry {
2491        pattern: "minut",
2492        token_type: TokenType::TimeUnit,
2493    },
2494    PatternEntry {
2495        pattern: "minuter",
2496        token_type: TokenType::TimeUnit,
2497    },
2498    PatternEntry {
2499        pattern: "timme",
2500        token_type: TokenType::TimeUnit,
2501    },
2502    PatternEntry {
2503        pattern: "timmar",
2504        token_type: TokenType::TimeUnit,
2505    },
2506    PatternEntry {
2507        pattern: "dag",
2508        token_type: TokenType::TimeUnit,
2509    },
2510    PatternEntry {
2511        pattern: "dagar",
2512        token_type: TokenType::TimeUnit,
2513    },
2514    PatternEntry {
2515        pattern: "vecka",
2516        token_type: TokenType::TimeUnit,
2517    },
2518    PatternEntry {
2519        pattern: "veckor",
2520        token_type: TokenType::TimeUnit,
2521    },
2522    PatternEntry {
2523        pattern: "månad",
2524        token_type: TokenType::TimeUnit,
2525    },
2526    PatternEntry {
2527        pattern: "månader",
2528        token_type: TokenType::TimeUnit,
2529    },
2530    PatternEntry {
2531        pattern: "år",
2532        token_type: TokenType::TimeUnit,
2533    },
2534    // Keywords
2535    PatternEntry {
2536        pattern: "sedan",
2537        token_type: TokenType::Ago,
2538    },
2539    PatternEntry {
2540        pattern: "om",
2541        token_type: TokenType::Within,
2542    },
2543];
2544
2545// ============================================================================
2546// Ukrainian patterns
2547// ============================================================================
2548
2549static UK_PATTERNS: &[PatternEntry] = &[
2550    // Casual date
2551    PatternEntry {
2552        pattern: "зараз",
2553        token_type: TokenType::CasualDate,
2554    },
2555    PatternEntry {
2556        pattern: "сьогодні",
2557        token_type: TokenType::CasualDate,
2558    },
2559    PatternEntry {
2560        pattern: "завтра",
2561        token_type: TokenType::CasualDate,
2562    },
2563    PatternEntry {
2564        pattern: "вчора",
2565        token_type: TokenType::CasualDate,
2566    },
2567    PatternEntry {
2568        pattern: "післязавтра",
2569        token_type: TokenType::CasualDate,
2570    },
2571    PatternEntry {
2572        pattern: "післяпіслязавтра",
2573        token_type: TokenType::CasualDate,
2574    },
2575    PatternEntry {
2576        pattern: "позавчора",
2577        token_type: TokenType::CasualDate,
2578    },
2579    PatternEntry {
2580        pattern: "позапозавчора",
2581        token_type: TokenType::CasualDate,
2582    },
2583    // Casual time
2584    PatternEntry {
2585        pattern: "вранці",
2586        token_type: TokenType::CasualTime,
2587    },
2588    PatternEntry {
2589        pattern: "ввечері",
2590        token_type: TokenType::CasualTime,
2591    },
2592    PatternEntry {
2593        pattern: "вночі",
2594        token_type: TokenType::CasualTime,
2595    },
2596    PatternEntry {
2597        pattern: "опівдні",
2598        token_type: TokenType::CasualTime,
2599    },
2600    PatternEntry {
2601        pattern: "опівночі",
2602        token_type: TokenType::CasualTime,
2603    },
2604    // Months
2605    PatternEntry {
2606        pattern: "січень",
2607        token_type: TokenType::Month,
2608    },
2609    PatternEntry {
2610        pattern: "січня",
2611        token_type: TokenType::Month,
2612    },
2613    PatternEntry {
2614        pattern: "лютий",
2615        token_type: TokenType::Month,
2616    },
2617    PatternEntry {
2618        pattern: "лютого",
2619        token_type: TokenType::Month,
2620    },
2621    PatternEntry {
2622        pattern: "березень",
2623        token_type: TokenType::Month,
2624    },
2625    PatternEntry {
2626        pattern: "березня",
2627        token_type: TokenType::Month,
2628    },
2629    PatternEntry {
2630        pattern: "квітень",
2631        token_type: TokenType::Month,
2632    },
2633    PatternEntry {
2634        pattern: "квітня",
2635        token_type: TokenType::Month,
2636    },
2637    PatternEntry {
2638        pattern: "травень",
2639        token_type: TokenType::Month,
2640    },
2641    PatternEntry {
2642        pattern: "травня",
2643        token_type: TokenType::Month,
2644    },
2645    PatternEntry {
2646        pattern: "червень",
2647        token_type: TokenType::Month,
2648    },
2649    PatternEntry {
2650        pattern: "червня",
2651        token_type: TokenType::Month,
2652    },
2653    PatternEntry {
2654        pattern: "липень",
2655        token_type: TokenType::Month,
2656    },
2657    PatternEntry {
2658        pattern: "липня",
2659        token_type: TokenType::Month,
2660    },
2661    PatternEntry {
2662        pattern: "серпень",
2663        token_type: TokenType::Month,
2664    },
2665    PatternEntry {
2666        pattern: "серпня",
2667        token_type: TokenType::Month,
2668    },
2669    PatternEntry {
2670        pattern: "вересень",
2671        token_type: TokenType::Month,
2672    },
2673    PatternEntry {
2674        pattern: "вересня",
2675        token_type: TokenType::Month,
2676    },
2677    PatternEntry {
2678        pattern: "жовтень",
2679        token_type: TokenType::Month,
2680    },
2681    PatternEntry {
2682        pattern: "жовтня",
2683        token_type: TokenType::Month,
2684    },
2685    PatternEntry {
2686        pattern: "листопад",
2687        token_type: TokenType::Month,
2688    },
2689    PatternEntry {
2690        pattern: "листопада",
2691        token_type: TokenType::Month,
2692    },
2693    PatternEntry {
2694        pattern: "грудень",
2695        token_type: TokenType::Month,
2696    },
2697    PatternEntry {
2698        pattern: "грудня",
2699        token_type: TokenType::Month,
2700    },
2701    // Weekdays
2702    PatternEntry {
2703        pattern: "неділя",
2704        token_type: TokenType::Weekday,
2705    },
2706    PatternEntry {
2707        pattern: "понеділок",
2708        token_type: TokenType::Weekday,
2709    },
2710    PatternEntry {
2711        pattern: "вівторок",
2712        token_type: TokenType::Weekday,
2713    },
2714    PatternEntry {
2715        pattern: "середа",
2716        token_type: TokenType::Weekday,
2717    },
2718    PatternEntry {
2719        pattern: "середу",
2720        token_type: TokenType::Weekday,
2721    },
2722    PatternEntry {
2723        pattern: "четвер",
2724        token_type: TokenType::Weekday,
2725    },
2726    PatternEntry {
2727        pattern: "п'ятниця",
2728        token_type: TokenType::Weekday,
2729    },
2730    PatternEntry {
2731        pattern: "п'ятницю",
2732        token_type: TokenType::Weekday,
2733    },
2734    PatternEntry {
2735        pattern: "субота",
2736        token_type: TokenType::Weekday,
2737    },
2738    PatternEntry {
2739        pattern: "суботу",
2740        token_type: TokenType::Weekday,
2741    },
2742    // Time units
2743    PatternEntry {
2744        pattern: "секунда",
2745        token_type: TokenType::TimeUnit,
2746    },
2747    PatternEntry {
2748        pattern: "секунди",
2749        token_type: TokenType::TimeUnit,
2750    },
2751    PatternEntry {
2752        pattern: "секунд",
2753        token_type: TokenType::TimeUnit,
2754    },
2755    PatternEntry {
2756        pattern: "хвилина",
2757        token_type: TokenType::TimeUnit,
2758    },
2759    PatternEntry {
2760        pattern: "хвилини",
2761        token_type: TokenType::TimeUnit,
2762    },
2763    PatternEntry {
2764        pattern: "хвилин",
2765        token_type: TokenType::TimeUnit,
2766    },
2767    PatternEntry {
2768        pattern: "година",
2769        token_type: TokenType::TimeUnit,
2770    },
2771    PatternEntry {
2772        pattern: "години",
2773        token_type: TokenType::TimeUnit,
2774    },
2775    PatternEntry {
2776        pattern: "годин",
2777        token_type: TokenType::TimeUnit,
2778    },
2779    PatternEntry {
2780        pattern: "день",
2781        token_type: TokenType::TimeUnit,
2782    },
2783    PatternEntry {
2784        pattern: "дня",
2785        token_type: TokenType::TimeUnit,
2786    },
2787    PatternEntry {
2788        pattern: "днів",
2789        token_type: TokenType::TimeUnit,
2790    },
2791    PatternEntry {
2792        pattern: "тиждень",
2793        token_type: TokenType::TimeUnit,
2794    },
2795    PatternEntry {
2796        pattern: "тижня",
2797        token_type: TokenType::TimeUnit,
2798    },
2799    PatternEntry {
2800        pattern: "тижнів",
2801        token_type: TokenType::TimeUnit,
2802    },
2803    PatternEntry {
2804        pattern: "місяць",
2805        token_type: TokenType::TimeUnit,
2806    },
2807    PatternEntry {
2808        pattern: "місяця",
2809        token_type: TokenType::TimeUnit,
2810    },
2811    PatternEntry {
2812        pattern: "місяців",
2813        token_type: TokenType::TimeUnit,
2814    },
2815    PatternEntry {
2816        pattern: "рік",
2817        token_type: TokenType::TimeUnit,
2818    },
2819    PatternEntry {
2820        pattern: "року",
2821        token_type: TokenType::TimeUnit,
2822    },
2823    PatternEntry {
2824        pattern: "років",
2825        token_type: TokenType::TimeUnit,
2826    },
2827    // Keywords
2828    PatternEntry {
2829        pattern: "тому",
2830        token_type: TokenType::Ago,
2831    },
2832    PatternEntry {
2833        pattern: "через",
2834        token_type: TokenType::Within,
2835    },
2836];
2837
2838// ============================================================================
2839// Chinese patterns
2840// ============================================================================
2841
2842static ZH_PATTERNS: &[PatternEntry] = &[
2843    // Casual date - Simplified
2844    PatternEntry {
2845        pattern: "现在",
2846        token_type: TokenType::CasualDate,
2847    },
2848    PatternEntry {
2849        pattern: "今天",
2850        token_type: TokenType::CasualDate,
2851    },
2852    PatternEntry {
2853        pattern: "今晚",
2854        token_type: TokenType::CasualDate,
2855    },
2856    PatternEntry {
2857        pattern: "明天",
2858        token_type: TokenType::CasualDate,
2859    },
2860    PatternEntry {
2861        pattern: "昨天",
2862        token_type: TokenType::CasualDate,
2863    },
2864    PatternEntry {
2865        pattern: "后天",
2866        token_type: TokenType::CasualDate,
2867    },
2868    PatternEntry {
2869        pattern: "前天",
2870        token_type: TokenType::CasualDate,
2871    },
2872    // Casual date - Traditional
2873    PatternEntry {
2874        pattern: "現在",
2875        token_type: TokenType::CasualDate,
2876    },
2877    PatternEntry {
2878        pattern: "今日",
2879        token_type: TokenType::CasualDate,
2880    },
2881    PatternEntry {
2882        pattern: "明日",
2883        token_type: TokenType::CasualDate,
2884    },
2885    PatternEntry {
2886        pattern: "昨日",
2887        token_type: TokenType::CasualDate,
2888    },
2889    PatternEntry {
2890        pattern: "後天",
2891        token_type: TokenType::CasualDate,
2892    },
2893    PatternEntry {
2894        pattern: "聽日",
2895        token_type: TokenType::CasualDate,
2896    },
2897    PatternEntry {
2898        pattern: "而家",
2899        token_type: TokenType::CasualDate,
2900    },
2901    // Casual time
2902    PatternEntry {
2903        pattern: "上午",
2904        token_type: TokenType::CasualTime,
2905    },
2906    PatternEntry {
2907        pattern: "下午",
2908        token_type: TokenType::CasualTime,
2909    },
2910    PatternEntry {
2911        pattern: "早上",
2912        token_type: TokenType::CasualTime,
2913    },
2914    PatternEntry {
2915        pattern: "晚上",
2916        token_type: TokenType::CasualTime,
2917    },
2918    PatternEntry {
2919        pattern: "中午",
2920        token_type: TokenType::CasualTime,
2921    },
2922    // Weekdays
2923    PatternEntry {
2924        pattern: "星期日",
2925        token_type: TokenType::Weekday,
2926    },
2927    PatternEntry {
2928        pattern: "星期一",
2929        token_type: TokenType::Weekday,
2930    },
2931    PatternEntry {
2932        pattern: "星期二",
2933        token_type: TokenType::Weekday,
2934    },
2935    PatternEntry {
2936        pattern: "星期三",
2937        token_type: TokenType::Weekday,
2938    },
2939    PatternEntry {
2940        pattern: "星期四",
2941        token_type: TokenType::Weekday,
2942    },
2943    PatternEntry {
2944        pattern: "星期五",
2945        token_type: TokenType::Weekday,
2946    },
2947    PatternEntry {
2948        pattern: "星期六",
2949        token_type: TokenType::Weekday,
2950    },
2951    PatternEntry {
2952        pattern: "周日",
2953        token_type: TokenType::Weekday,
2954    },
2955    PatternEntry {
2956        pattern: "周一",
2957        token_type: TokenType::Weekday,
2958    },
2959    PatternEntry {
2960        pattern: "周二",
2961        token_type: TokenType::Weekday,
2962    },
2963    PatternEntry {
2964        pattern: "周三",
2965        token_type: TokenType::Weekday,
2966    },
2967    PatternEntry {
2968        pattern: "周四",
2969        token_type: TokenType::Weekday,
2970    },
2971    PatternEntry {
2972        pattern: "周五",
2973        token_type: TokenType::Weekday,
2974    },
2975    PatternEntry {
2976        pattern: "周六",
2977        token_type: TokenType::Weekday,
2978    },
2979    // Time units
2980    PatternEntry {
2981        pattern: "秒",
2982        token_type: TokenType::TimeUnit,
2983    },
2984    PatternEntry {
2985        pattern: "分",
2986        token_type: TokenType::TimeUnit,
2987    },
2988    PatternEntry {
2989        pattern: "分钟",
2990        token_type: TokenType::TimeUnit,
2991    },
2992    PatternEntry {
2993        pattern: "分鐘",
2994        token_type: TokenType::TimeUnit,
2995    },
2996    PatternEntry {
2997        pattern: "小时",
2998        token_type: TokenType::TimeUnit,
2999    },
3000    PatternEntry {
3001        pattern: "小時",
3002        token_type: TokenType::TimeUnit,
3003    },
3004    PatternEntry {
3005        pattern: "天",
3006        token_type: TokenType::TimeUnit,
3007    },
3008    PatternEntry {
3009        pattern: "日",
3010        token_type: TokenType::TimeUnit,
3011    },
3012    PatternEntry {
3013        pattern: "周",
3014        token_type: TokenType::TimeUnit,
3015    },
3016    PatternEntry {
3017        pattern: "週",
3018        token_type: TokenType::TimeUnit,
3019    },
3020    PatternEntry {
3021        pattern: "星期",
3022        token_type: TokenType::TimeUnit,
3023    },
3024    PatternEntry {
3025        pattern: "月",
3026        token_type: TokenType::TimeUnit,
3027    },
3028    PatternEntry {
3029        pattern: "年",
3030        token_type: TokenType::TimeUnit,
3031    },
3032    // Keywords
3033    PatternEntry {
3034        pattern: "前",
3035        token_type: TokenType::Ago,
3036    },
3037    PatternEntry {
3038        pattern: "后",
3039        token_type: TokenType::Later,
3040    },
3041    PatternEntry {
3042        pattern: "後",
3043        token_type: TokenType::Later,
3044    },
3045    PatternEntry {
3046        pattern: "内",
3047        token_type: TokenType::Within,
3048    },
3049    PatternEntry {
3050        pattern: "內",
3051        token_type: TokenType::Within,
3052    },
3053];
3054
3055// Lazily initialized automaton per locale
3056static EN_AUTOMATON: LazyLock<LocaleAutomaton> =
3057    LazyLock::new(|| LocaleAutomaton::new(EN_PATTERNS));
3058static DE_AUTOMATON: LazyLock<LocaleAutomaton> =
3059    LazyLock::new(|| LocaleAutomaton::new(DE_PATTERNS));
3060static ES_AUTOMATON: LazyLock<LocaleAutomaton> =
3061    LazyLock::new(|| LocaleAutomaton::new(ES_PATTERNS));
3062static FR_AUTOMATON: LazyLock<LocaleAutomaton> =
3063    LazyLock::new(|| LocaleAutomaton::new(FR_PATTERNS));
3064static IT_AUTOMATON: LazyLock<LocaleAutomaton> =
3065    LazyLock::new(|| LocaleAutomaton::new(IT_PATTERNS));
3066static JA_AUTOMATON: LazyLock<LocaleAutomaton> =
3067    LazyLock::new(|| LocaleAutomaton::new(JA_PATTERNS));
3068static NL_AUTOMATON: LazyLock<LocaleAutomaton> =
3069    LazyLock::new(|| LocaleAutomaton::new(NL_PATTERNS));
3070static PT_AUTOMATON: LazyLock<LocaleAutomaton> =
3071    LazyLock::new(|| LocaleAutomaton::new(PT_PATTERNS));
3072static RU_AUTOMATON: LazyLock<LocaleAutomaton> =
3073    LazyLock::new(|| LocaleAutomaton::new(RU_PATTERNS));
3074static SV_AUTOMATON: LazyLock<LocaleAutomaton> =
3075    LazyLock::new(|| LocaleAutomaton::new(SV_PATTERNS));
3076static UK_AUTOMATON: LazyLock<LocaleAutomaton> =
3077    LazyLock::new(|| LocaleAutomaton::new(UK_PATTERNS));
3078static ZH_AUTOMATON: LazyLock<LocaleAutomaton> =
3079    LazyLock::new(|| LocaleAutomaton::new(ZH_PATTERNS));
3080
3081/// Locale-specific automaton
3082struct LocaleAutomaton {
3083    ac: AhoCorasick,
3084    patterns: &'static [PatternEntry],
3085}
3086
3087impl LocaleAutomaton {
3088    fn new(patterns: &'static [PatternEntry]) -> Self {
3089        let pattern_strs: Vec<&str> = patterns.iter().map(|p| p.pattern).collect();
3090        let ac = AhoCorasick::builder()
3091            .match_kind(MatchKind::LeftmostLongest)
3092            .build(&pattern_strs)
3093            .expect("Failed to build Aho-Corasick automaton");
3094        Self { ac, patterns }
3095    }
3096
3097    fn scan(&self, text: &str) -> Vec<Token> {
3098        let mut tokens = Vec::new();
3099
3100        for mat in self.ac.find_iter(text) {
3101            let pattern_id = mat.pattern().as_usize();
3102            let entry = &self.patterns[pattern_id];
3103
3104            let start = mat.start();
3105            let end = mat.end();
3106            let pattern_len = end - start;
3107
3108            // Word boundary check with special handling for single-letter time units
3109            // Allow digits before single-letter time unit abbreviations (e.g., "1h", "5m")
3110            let valid_start = if start == 0 {
3111                true
3112            } else {
3113                let prev_char = text.as_bytes()[start - 1];
3114                if pattern_len == 1 && entry.token_type == TokenType::TimeUnit {
3115                    // Single-letter time units can follow digits
3116                    !prev_char.is_ascii_alphabetic()
3117                } else {
3118                    !prev_char.is_ascii_alphanumeric()
3119                }
3120            };
3121
3122            let valid_end = end == text.len() || !text.as_bytes()[end].is_ascii_alphanumeric();
3123
3124            if valid_start && valid_end {
3125                tokens.push(Token {
3126                    token_type: entry.token_type,
3127                    start,
3128                    end,
3129                    pattern_id,
3130                });
3131            }
3132        }
3133
3134        tokens
3135    }
3136}
3137
3138/// Locale-aware token scanner used by parser prefilters.
3139pub struct TokenScanner;
3140
3141impl TokenScanner {
3142    /// Scan text using the automaton for the provided locale.
3143    pub fn scan_locale(text: &str, locale: Locale) -> Vec<Token> {
3144        let automaton = match locale {
3145            Locale::En => &*EN_AUTOMATON,
3146            Locale::De => &*DE_AUTOMATON,
3147            Locale::Es => &*ES_AUTOMATON,
3148            Locale::Fr => &*FR_AUTOMATON,
3149            Locale::It => &*IT_AUTOMATON,
3150            Locale::Ja => &*JA_AUTOMATON,
3151            Locale::Nl => &*NL_AUTOMATON,
3152            Locale::Pt => &*PT_AUTOMATON,
3153            Locale::Ru => &*RU_AUTOMATON,
3154            Locale::Sv => &*SV_AUTOMATON,
3155            Locale::Uk => &*UK_AUTOMATON,
3156            Locale::Zh => &*ZH_AUTOMATON,
3157        };
3158        automaton.scan(text)
3159    }
3160
3161    /// Scan text using the default English automaton.
3162    pub fn scan(text: &str) -> Vec<Token> {
3163        Self::scan_locale(text, Locale::En)
3164    }
3165
3166    /// Return `true` if the scanned text contains a token of the given type.
3167    pub fn contains_type(text: &str, token_type: TokenType) -> bool {
3168        Self::scan(text).iter().any(|t| t.token_type == token_type)
3169    }
3170
3171    /// Return `true` if the text appears to contain date-related hints.
3172    ///
3173    /// This fast heuristic treats any digits or any scanned token as a hint.
3174    pub fn has_date_hint(text: &str) -> bool {
3175        // Quick check for digits (dates often have numbers)
3176        if text.bytes().any(|b| b.is_ascii_digit()) {
3177            return true;
3178        }
3179
3180        // Check for any token
3181        !Self::scan(text).is_empty()
3182    }
3183}
3184
3185#[cfg(test)]
3186mod tests {
3187    use super::*;
3188
3189    #[test]
3190    fn test_scan_months() {
3191        let tokens = TokenScanner::scan("meeting in january");
3192        assert!(tokens.iter().any(|t| t.token_type == TokenType::Month));
3193    }
3194
3195    #[test]
3196    fn test_scan_weekdays() {
3197        let tokens = TokenScanner::scan("see you on monday");
3198        assert!(tokens.iter().any(|t| t.token_type == TokenType::Weekday));
3199    }
3200
3201    #[test]
3202    fn test_scan_casual() {
3203        let tokens = TokenScanner::scan("let's meet tomorrow");
3204        assert!(tokens.iter().any(|t| t.token_type == TokenType::CasualDate));
3205    }
3206
3207    #[test]
3208    fn test_word_boundary() {
3209        // "may" should not match inside "maybe"
3210        let tokens = TokenScanner::scan("maybe we can");
3211        assert!(tokens.iter().all(|t| t.token_type != TokenType::Month));
3212    }
3213
3214    #[test]
3215    fn test_german_locale() {
3216        let tokens = TokenScanner::scan_locale("treffen wir uns morgen", Locale::De);
3217        assert!(tokens.iter().any(|t| t.token_type == TokenType::CasualDate));
3218    }
3219
3220    #[test]
3221    fn test_spanish_locale() {
3222        let tokens = TokenScanner::scan_locale("nos vemos mañana", Locale::Es);
3223        assert!(tokens.iter().any(|t| t.token_type == TokenType::CasualDate));
3224    }
3225}