Skip to main content

whichtime_sys/
lib.rs

1//! Lower-level parsing engine for the `whichtime` project.
2//!
3//! `whichtime-sys` exposes the lower-level Rust parser engine that powers the
4//! higher-level [`whichtime`] crate and the project's FFI bindings. The main
5//! entry point is [`WhichTime`], which coordinates a locale-specific parser
6//! pipeline:
7//!
8//! 1. Pre-scan the input for date-related tokens.
9//! 2. Run matching parsers for the active locale.
10//! 3. Post-process matches with refiners.
11//! 4. Return [`ParsedResult`] values or the first parsed date.
12//!
13//! Supported locales: `en`, `de`, `es`, `fr`, `it`, `ja`, `nl`, `pt`, `ru`,
14//! `sv`, `uk`, and `zh`.
15
16pub mod components;
17pub mod context;
18pub mod dictionaries;
19pub mod error;
20pub mod parsers;
21pub mod refiners;
22pub mod results;
23pub mod scanner;
24pub mod types;
25
26// Re-export main types
27pub use components::{Component, ComponentFlags, FastComponents};
28pub use context::ParsingContext;
29pub use dictionaries::{CasualDateType, CasualTimeType, Locale, RelativeModifier};
30pub use error::{Error, Result};
31pub use results::{ParsedResult, ReferenceWithTimezone};
32pub use types::{Meridiem, Weekday};
33
34use parsers::Parser;
35use refiners::Refiner;
36
37/// Locale-specific parser configuration.
38///
39/// This bundles the parser and refiner pipeline that will be used by
40/// [`WhichTime`]. Most callers should use [`WhichTime::new`] or
41/// [`WhichTime::with_locale`] instead of constructing this directly.
42pub struct Configuration {
43    /// Parsers that attempt to extract date and time mentions from text.
44    pub parsers: Vec<Box<dyn Parser>>,
45    /// Refiners that merge, filter, or adjust parser output.
46    pub refiners: Vec<Box<dyn Refiner>>,
47    /// Locale that the configuration is intended for.
48    pub locale: Locale,
49}
50
51/// The main natural-language date parser.
52///
53/// A `WhichTime` instance owns a locale-specific set of parsers and refiners
54/// and can be reused across multiple parse calls.
55pub struct WhichTime {
56    parsers: Vec<Box<dyn Parser>>,
57    refiners: Vec<Box<dyn Refiner>>,
58    locale: Locale,
59}
60
61impl WhichTime {
62    /// Create a parser with the default English configuration.
63    pub fn new() -> Self {
64        Self::with_locale(Locale::En)
65    }
66
67    /// Create a parser configured for a specific locale.
68    pub fn with_locale(locale: Locale) -> Self {
69        let config = create_configuration_for_locale(locale);
70        Self {
71            parsers: config.parsers,
72            refiners: config.refiners,
73            locale: config.locale,
74        }
75    }
76
77    /// Create a parser from a custom [`Configuration`].
78    pub fn with_configuration(config: Configuration) -> Self {
79        Self {
80            parsers: config.parsers,
81            refiners: config.refiners,
82            locale: config.locale,
83        }
84    }
85
86    /// Return the parser locale.
87    pub fn locale(&self) -> Locale {
88        self.locale
89    }
90
91    /// Parse text and return every detected date/time match.
92    ///
93    /// If `reference` is `None`, the current local time is used as the
94    /// reference instant for relative expressions such as "tomorrow" or
95    /// "next week".
96    pub fn parse(
97        &self,
98        text: &str,
99        reference: Option<chrono::DateTime<chrono::Local>>,
100    ) -> Result<Vec<ParsedResult>> {
101        let reference = reference.unwrap_or_else(chrono::Local::now);
102        let ref_tz = ReferenceWithTimezone::new(reference, None);
103        let context = ParsingContext::with_locale(text, &ref_tz, self.locale);
104
105        let mut results = Vec::new();
106
107        for parser in &self.parsers {
108            if !parser.should_apply(&context) {
109                continue;
110            }
111            let parsed = parser.parse(&context)?;
112            results.extend(parsed);
113        }
114
115        results.sort_by_key(|r| r.index);
116
117        for refiner in &self.refiners {
118            results = refiner.refine(&context, results);
119            results.sort_by_key(|r| r.index);
120        }
121
122        Ok(results)
123    }
124
125    /// Parse text and return the first resolved date, if any.
126    ///
127    /// This is a convenience wrapper around [`WhichTime::parse`] for callers
128    /// that only care about the earliest match.
129    pub fn parse_date(
130        &self,
131        text: &str,
132        reference: Option<chrono::DateTime<chrono::Local>>,
133    ) -> Result<Option<chrono::DateTime<chrono::Local>>> {
134        let results = self.parse(text, reference)?;
135        if let Some(result) = results.first() {
136            let ref_dt = reference.unwrap_or_else(chrono::Local::now);
137            let ref_tz = ReferenceWithTimezone::new(ref_dt, None);
138            Ok(result.start.to_datetime(&ref_tz))
139        } else {
140            Ok(None)
141        }
142    }
143}
144
145impl Default for WhichTime {
146    fn default() -> Self {
147        Self::new()
148    }
149}
150
151impl Clone for WhichTime {
152    fn clone(&self) -> Self {
153        Self::with_locale(self.locale)
154    }
155}
156
157/// Build the default parser configuration for a locale.
158///
159/// This is primarily useful when composing a custom [`WhichTime`] instance or
160/// when inspecting the parser/refiner pipeline used by a given locale.
161pub fn create_configuration_for_locale(locale: Locale) -> Configuration {
162    use parsers::common::{
163        MultiLocaleCasualTimeParser, MultiLocaleTimeUnitAgoParser, MultiLocaleTimeUnitWithinParser,
164        MultiLocaleWeekdayParser,
165    };
166    use parsers::de::{
167        DECasualDateParser, DECasualTimeParser, DETimeExpressionParser, DETimeUnitRelativeParser,
168    };
169    use parsers::en::{ISOFormatParser, SlashDateParser, TimeExpressionParser};
170
171    match locale {
172        Locale::En => parsers::en::create_casual_configuration(),
173        Locale::De => {
174            use parsers::de::{DEMonthNameParser, DEWeekdayParser};
175            Configuration {
176                parsers: vec![
177                    Box::new(ISOFormatParser),
178                    Box::new(SlashDateParser::new(true)), // European date format (DD/MM/YYYY)
179                    Box::new(DEMonthNameParser::new()),
180                    Box::new(DEWeekdayParser::new()),
181                    Box::new(DETimeExpressionParser::new()),
182                    Box::new(DECasualDateParser::new()),
183                    Box::new(DECasualTimeParser::new()),
184                    Box::new(DETimeUnitRelativeParser::new()),
185                    Box::new(MultiLocaleTimeUnitAgoParser::new(locale)),
186                    Box::new(MultiLocaleTimeUnitWithinParser::new(locale)),
187                ],
188                refiners: vec![
189                    Box::new(crate::refiners::MergeWeekdayDateRefiner),
190                    Box::new(crate::refiners::MergeDateRangeRefiner),
191                    Box::new(crate::refiners::OverlapRemovalRefiner),
192                    Box::new(crate::refiners::MergeDateTimeRefiner),
193                    Box::new(crate::refiners::ForwardDateRefiner),
194                ],
195                locale,
196            }
197        }
198        Locale::Es => {
199            use parsers::es::{
200                ESCasualDateParser, ESCasualTimeParser, ESMonthNameParser, ESTimeExpressionParser,
201            };
202            Configuration {
203                parsers: vec![
204                    Box::new(ISOFormatParser),
205                    Box::new(SlashDateParser::new(true)), // DD/MM/YYYY format
206                    Box::new(ESMonthNameParser::new()),
207                    Box::new(MultiLocaleWeekdayParser::new(locale)),
208                    Box::new(ESTimeExpressionParser::new()),
209                    Box::new(ESCasualDateParser::new()),
210                    Box::new(ESCasualTimeParser::new()),
211                    Box::new(MultiLocaleTimeUnitAgoParser::new(locale)),
212                    Box::new(MultiLocaleTimeUnitWithinParser::new(locale)),
213                ],
214                refiners: vec![
215                    Box::new(crate::refiners::MergeWeekdayDateRefiner),
216                    Box::new(crate::refiners::OverlapRemovalRefiner),
217                    Box::new(crate::refiners::MergeDateTimeRefiner),
218                    Box::new(crate::refiners::ForwardDateRefiner),
219                ],
220                locale,
221            }
222        }
223        Locale::Fr => {
224            use parsers::fr::{
225                FRCasualDateParser, FRCasualTimeParser, FRMonthNameParser, FRSlashDateParser,
226                FRTimeExpressionParser, FRTimeUnitRelativeParser, FRWeekdayParser,
227            };
228            Configuration {
229                parsers: vec![
230                    Box::new(ISOFormatParser),
231                    Box::new(FRSlashDateParser::new()), // DD/MM/YYYY format with optional year
232                    Box::new(FRMonthNameParser::new()),
233                    Box::new(FRWeekdayParser::new()),
234                    Box::new(FRTimeExpressionParser::new()),
235                    Box::new(FRCasualDateParser::new()),
236                    Box::new(FRCasualTimeParser::new()),
237                    Box::new(FRTimeUnitRelativeParser::new()),
238                    Box::new(MultiLocaleTimeUnitAgoParser::new(locale)),
239                    Box::new(MultiLocaleTimeUnitWithinParser::new(locale)),
240                ],
241                refiners: vec![
242                    Box::new(crate::refiners::MergeWeekdayDateRefiner),
243                    Box::new(crate::refiners::OverlapRemovalRefiner),
244                    Box::new(crate::refiners::MergeDateTimeRefiner),
245                    Box::new(crate::refiners::ForwardDateRefiner),
246                ],
247                locale,
248            }
249        }
250        Locale::It => {
251            use parsers::it::{ITCasualDateParser, ITMonthNameParser, ITYearMonthDayParser};
252            Configuration {
253                parsers: vec![
254                    Box::new(ISOFormatParser),
255                    Box::new(SlashDateParser::new(true)), // DD/MM/YYYY format
256                    Box::new(ITYearMonthDayParser::new()), // YYYY.MM.DD format
257                    Box::new(ITMonthNameParser::new()),
258                    Box::new(MultiLocaleWeekdayParser::new(locale)),
259                    Box::new(TimeExpressionParser),
260                    Box::new(ITCasualDateParser::new()),
261                    Box::new(MultiLocaleCasualTimeParser::new(locale)),
262                    Box::new(MultiLocaleTimeUnitAgoParser::new(locale)),
263                    Box::new(MultiLocaleTimeUnitWithinParser::new(locale)),
264                ],
265                refiners: vec![
266                    Box::new(crate::refiners::MergeWeekdayDateRefiner),
267                    Box::new(crate::refiners::OverlapRemovalRefiner),
268                    Box::new(crate::refiners::MergeDateTimeRefiner),
269                    Box::new(crate::refiners::ForwardDateRefiner),
270                ],
271                locale,
272            }
273        }
274        Locale::Ja => {
275            use parsers::ja::{
276                JACasualDateParser, JACasualTimeParser, JASlashDateParser, JAStandardDateParser,
277                JATimeExpressionParser, JAWeekdayParser,
278            };
279            Configuration {
280                parsers: vec![
281                    Box::new(ISOFormatParser),
282                    Box::new(JASlashDateParser::new()),
283                    Box::new(JAStandardDateParser::new()),
284                    Box::new(JAWeekdayParser::new()),
285                    Box::new(JATimeExpressionParser::new()),
286                    Box::new(JACasualDateParser::new()),
287                    Box::new(JACasualTimeParser::new()),
288                    Box::new(MultiLocaleTimeUnitAgoParser::new(locale)),
289                    Box::new(MultiLocaleTimeUnitWithinParser::new(locale)),
290                ],
291                refiners: vec![
292                    Box::new(crate::refiners::MergeWeekdayDateRefiner),
293                    Box::new(crate::refiners::OverlapRemovalRefiner),
294                    Box::new(crate::refiners::MergeDateTimeRefiner),
295                    Box::new(crate::refiners::ForwardDateRefiner),
296                ],
297                locale,
298            }
299        }
300        Locale::Nl => {
301            use parsers::nl::{
302                NLCasualDateParser, NLCasualTimeParser, NLMonthNameParser, NLWeekdayParser,
303            };
304            Configuration {
305                parsers: vec![
306                    Box::new(ISOFormatParser),
307                    Box::new(SlashDateParser::new(true)), // DD/MM/YYYY format
308                    Box::new(NLMonthNameParser::new()),
309                    Box::new(NLWeekdayParser::new()),
310                    Box::new(TimeExpressionParser),
311                    Box::new(NLCasualDateParser::new()),
312                    Box::new(NLCasualTimeParser::new()),
313                    Box::new(MultiLocaleTimeUnitAgoParser::new(locale)),
314                    Box::new(MultiLocaleTimeUnitWithinParser::new(locale)),
315                ],
316                refiners: vec![
317                    Box::new(crate::refiners::MergeWeekdayDateRefiner),
318                    Box::new(crate::refiners::MergeDateRangeRefiner),
319                    Box::new(crate::refiners::OverlapRemovalRefiner),
320                    Box::new(crate::refiners::MergeDateTimeRefiner),
321                    Box::new(crate::refiners::ForwardDateRefiner),
322                ],
323                locale,
324            }
325        }
326        Locale::Pt => {
327            use parsers::pt::{
328                PTCasualDateParser, PTCasualTimeParser, PTMonthNameParser, PTTimeExpressionParser,
329                PTWeekdayParser,
330            };
331            Configuration {
332                parsers: vec![
333                    Box::new(ISOFormatParser),
334                    Box::new(SlashDateParser::new(true)), // DD/MM/YYYY format
335                    Box::new(PTMonthNameParser::new()),
336                    Box::new(PTWeekdayParser::new()),
337                    Box::new(PTTimeExpressionParser::new()),
338                    Box::new(PTCasualDateParser::new()),
339                    Box::new(PTCasualTimeParser::new()),
340                    Box::new(MultiLocaleTimeUnitAgoParser::new(locale)),
341                    Box::new(MultiLocaleTimeUnitWithinParser::new(locale)),
342                ],
343                refiners: vec![
344                    Box::new(crate::refiners::MergeWeekdayDateRefiner),
345                    Box::new(crate::refiners::MergeDateRangeRefiner),
346                    Box::new(crate::refiners::OverlapRemovalRefiner),
347                    Box::new(crate::refiners::MergeDateTimeRefiner),
348                    Box::new(crate::refiners::ForwardDateRefiner),
349                ],
350                locale,
351            }
352        }
353        Locale::Ru => {
354            use parsers::ru::{
355                RUCasualDateParser, RUCasualTimeParser, RUMonthNameParser,
356                RUTimeUnitRelativeParser, RUWeekdayParser,
357            };
358            Configuration {
359                parsers: vec![
360                    Box::new(ISOFormatParser),
361                    Box::new(SlashDateParser::new(true)), // DD.MM.YYYY
362                    Box::new(RUMonthNameParser::new()),
363                    Box::new(RUWeekdayParser::new()),
364                    Box::new(TimeExpressionParser),
365                    Box::new(RUCasualDateParser::new()),
366                    Box::new(RUCasualTimeParser::new()),
367                    Box::new(RUTimeUnitRelativeParser::new()),
368                    Box::new(MultiLocaleTimeUnitWithinParser::new(locale)),
369                ],
370                refiners: vec![
371                    Box::new(crate::refiners::MergeWeekdayDateRefiner),
372                    Box::new(crate::refiners::MergeDateRangeRefiner),
373                    Box::new(crate::refiners::OverlapRemovalRefiner),
374                    Box::new(crate::refiners::MergeDateTimeRefiner),
375                    Box::new(crate::refiners::ForwardDateRefiner),
376                ],
377                locale,
378            }
379        }
380        Locale::Sv => {
381            use parsers::sv::{
382                SVCasualDateParser, SVMonthNameParser, SVTimeUnitRelativeParser, SVWeekdayParser,
383            };
384            Configuration {
385                parsers: vec![
386                    Box::new(ISOFormatParser),
387                    Box::new(SlashDateParser::new(true)), // DD/MM/YYYY or DD.MM.YYYY
388                    Box::new(SVMonthNameParser::new()),
389                    Box::new(SVWeekdayParser::new()),
390                    Box::new(TimeExpressionParser),
391                    Box::new(SVCasualDateParser::new()),
392                    Box::new(SVTimeUnitRelativeParser::new()),
393                    Box::new(MultiLocaleTimeUnitAgoParser::new(locale)),
394                    Box::new(MultiLocaleTimeUnitWithinParser::new(locale)),
395                ],
396                refiners: vec![
397                    Box::new(crate::refiners::MergeWeekdayDateRefiner),
398                    Box::new(crate::refiners::MergeDateRangeRefiner),
399                    Box::new(crate::refiners::OverlapRemovalRefiner),
400                    Box::new(crate::refiners::MergeDateTimeRefiner),
401                    Box::new(crate::refiners::ForwardDateRefiner),
402                ],
403                locale,
404            }
405        }
406        Locale::Uk => {
407            use parsers::uk::{UKCasualDateParser, UKMonthNameParser, UKWeekdayParser};
408            Configuration {
409                parsers: vec![
410                    Box::new(ISOFormatParser),
411                    Box::new(UKMonthNameParser::new()),
412                    Box::new(UKWeekdayParser::new()),
413                    Box::new(TimeExpressionParser),
414                    Box::new(UKCasualDateParser::new()),
415                    Box::new(MultiLocaleTimeUnitAgoParser::new(locale)),
416                    Box::new(MultiLocaleTimeUnitWithinParser::new(locale)),
417                ],
418                refiners: vec![
419                    Box::new(crate::refiners::MergeWeekdayDateRefiner),
420                    Box::new(crate::refiners::MergeDateRangeRefiner),
421                    Box::new(crate::refiners::OverlapRemovalRefiner),
422                    Box::new(crate::refiners::MergeDateTimeRefiner),
423                    Box::new(crate::refiners::ForwardDateRefiner),
424                ],
425                locale,
426            }
427        }
428        Locale::Zh => {
429            use parsers::zh::{
430                ZHCasualDateParser, ZHStandardDateParser, ZHTimeExpressionParser,
431                ZHTimeUnitWithinParser, ZHWeekdayParser,
432            };
433            Configuration {
434                parsers: vec![
435                    Box::new(ISOFormatParser),
436                    Box::new(ZHStandardDateParser::new()),
437                    Box::new(ZHWeekdayParser::new()),
438                    Box::new(ZHTimeExpressionParser::new()),
439                    Box::new(ZHCasualDateParser::new()),
440                    Box::new(ZHTimeUnitWithinParser::new()),
441                ],
442                refiners: vec![
443                    Box::new(crate::refiners::MergeWeekdayDateRefiner),
444                    Box::new(crate::refiners::OverlapRemovalRefiner),
445                    Box::new(crate::refiners::MergeDateTimeRefiner),
446                    Box::new(crate::refiners::ForwardDateRefiner),
447                ],
448                locale,
449            }
450        }
451    }
452}