Skip to main content

partial_date/
models.rs

1//! Core types for the partial-date library.
2//!
3//! This module contains all the structs and enums that describe inputs,
4//! configuration, and extraction results.
5
6// ---------------------------------------------------------------------------
7// Result types
8// ---------------------------------------------------------------------------
9
10/// The outcome of attempting to extract a single date component (day, month, or year).
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub enum Extracted<T> {
13    /// The value was found directly in the input.
14    Found(T),
15    /// No value could be found and no default was configured.
16    NotFound,
17    /// The value was not found in the input but a default was applied.
18    Defaulted(T),
19}
20
21impl<T> Extracted<T> {
22    /// Returns `true` if the value was found in the input.
23    pub fn is_found(&self) -> bool {
24        matches!(self, Extracted::Found(_))
25    }
26
27    /// Returns `true` if no value was found and no default applied.
28    pub fn is_not_found(&self) -> bool {
29        matches!(self, Extracted::NotFound)
30    }
31
32    /// Returns `true` if the value was defaulted.
33    pub fn is_defaulted(&self) -> bool {
34        matches!(self, Extracted::Defaulted(_))
35    }
36
37    /// Returns the inner value regardless of whether it was found or defaulted.
38    /// Returns `None` if `NotFound`.
39    pub fn value(&self) -> Option<&T> {
40        match self {
41            Extracted::Found(v) | Extracted::Defaulted(v) => Some(v),
42            Extracted::NotFound => None,
43        }
44    }
45}
46
47/// A fully-resolved (possibly partial) date returned by the extractor.
48#[derive(Debug)]
49pub struct PartialDate {
50    pub day: Day,
51    pub month: Month,
52    pub year: Year,
53}
54
55/// Extracted day component (1–31).
56#[derive(Debug)]
57pub struct Day {
58    pub value: Extracted<u8>,
59}
60
61/// Extracted month component (1–12).
62#[derive(Debug)]
63pub struct Month {
64    pub number: Extracted<u8>,
65    pub name: Extracted<MonthName>,
66}
67
68/// Extracted year component.
69///
70/// Uses `i32` to accommodate the full range required by the spec (0–3000) and
71/// to leave room for historical (negative / BC) years if needed in future.
72#[derive(Debug)]
73pub struct Year {
74    pub value: Extracted<i32>,
75}
76
77// ---------------------------------------------------------------------------
78// Configuration types
79// ---------------------------------------------------------------------------
80
81/// Indicates whether a date component is expected to be present in the input.
82///
83/// Used to guide disambiguation when the same token could be interpreted as
84/// more than one component (e.g. `12/06` could be DD/MM or MM/DD).
85#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
86pub enum IsExpected {
87    /// The component is definitely expected.
88    Yes,
89    /// The component is definitely not expected.
90    No,
91    /// No strong expectation either way (the default).
92    #[default]
93    Maybe,
94}
95
96/// Configuration for day extraction.
97#[derive(Debug, Clone)]
98pub struct DayConfig {
99    /// Minimum valid day value (inclusive). Default: `1`.
100    pub min: u8,
101    /// Maximum valid day value (inclusive). Default: `31`.
102    pub max: u8,
103    /// Whether a day component is expected in the input.
104    pub expected: IsExpected,
105    /// Default day value to use when the day is not found, if any.
106    pub default: Option<u8>,
107}
108
109//TODO: Refactor these to be on the Day, Month and Year structs rather than the
110//configs? Only issue might be the min and max, but I think we can instead
111//attach the configs to the structs to assist in that? Perhaps that makes the
112//Structs too messy for returning in the PartialDate and we should have another
113//intermediate struct like DayCandidate or something like that that we can map
114//to a Day using From/Into when we are determining the PartialDate output at the
115//end. That way we don't expose the config in the return value to the user of
116//the library
117impl DayConfig {
118    /// Return `Some(value as u8)` when `value` is a plausible day for this
119    /// config, or `None` when it is not.
120    ///
121    /// A value is a plausible day when:
122    /// - `digit_count` is not 4 (four-digit numbers cannot be days).
123    /// - The value is within the universal day range 1–31.
124    /// - The value falls within the caller-configured `min`/`max` bounds.
125    pub fn try_as_day_candidate(&self, value: i16, digit_count: u8) -> Option<u8> {
126        if digit_count == 4 {
127            return None;
128        }
129        let as_u8 = u8::try_from(value).ok()?;
130        if (1..=31).contains(&value) && (self.min..=self.max).contains(&as_u8) {
131            Some(as_u8)
132        } else {
133            None
134        }
135    }
136}
137
138impl Default for DayConfig {
139    fn default() -> Self {
140        DayConfig {
141            min: 1,
142            max: 31,
143            expected: IsExpected::Maybe,
144            default: None,
145        }
146    }
147}
148
149impl DayConfig {
150    /// Set the valid day range.
151    ///
152    /// # Panics
153    ///
154    /// Panics if `min > max`. Use [`DayConfig::try_with_range`] when the
155    /// values come from dynamic input and you need to handle the error.
156    ///
157    /// ```
158    /// use partial_date::models::DayConfig;
159    ///
160    /// let config = DayConfig::default().with_range(1, 28);
161    /// ```
162    pub fn with_range(self, min: u8, max: u8) -> Self {
163        assert!(
164            min <= max,
165            "DayConfig::with_range min ({min}) must not exceed max ({max})"
166        );
167        DayConfig { min, max, ..self }
168    }
169
170    /// Set the valid day range, returning `Err` if `min > max`.
171    ///
172    /// Use this when the range values come from dynamic input. For
173    /// known-valid static values, prefer [`DayConfig::with_range`].
174    pub fn try_with_range(self, min: u8, max: u8) -> Result<Self, ConfigRangeError> {
175        if min > max {
176            return Err(ConfigRangeError::MinExceedsMax {
177                min: min as i32,
178                max: max as i32,
179            });
180        }
181        Ok(DayConfig { min, max, ..self })
182    }
183
184    /// Set whether a day component is expected in the input.
185    ///
186    /// ```
187    /// use partial_date::models::{DayConfig, IsExpected};
188    ///
189    /// let config = DayConfig::default().with_expected(IsExpected::Yes);
190    /// ```
191    pub fn with_expected(self, expected: IsExpected) -> Self {
192        DayConfig { expected, ..self }
193    }
194
195    /// Set the default day value to use when no day is found in the input.
196    ///
197    /// ```
198    /// use partial_date::models::DayConfig;
199    ///
200    /// let config = DayConfig::default().with_default(1);
201    /// ```
202    pub fn with_default(self, default: u8) -> Self {
203        DayConfig {
204            default: Some(default),
205            ..self
206        }
207    }
208}
209
210/// Error returned by [`DayConfig::try_with_range`], [`MonthConfig::try_with_range`],
211/// and [`YearConfig::try_with_range`] when the provided range is invalid.
212#[derive(Debug, Clone, PartialEq, Eq)]
213pub enum ConfigRangeError {
214    /// The minimum value exceeds the maximum value.
215    MinExceedsMax { min: i32, max: i32 },
216}
217
218/// Configuration for month extraction.
219#[derive(Debug, Clone)]
220pub struct MonthConfig {
221    /// Minimum valid month value (inclusive). Default: `1`.
222    pub min: u8,
223    /// Maximum valid month value (inclusive). Default: `12`.
224    pub max: u8,
225    /// Whether a month component is expected in the input.
226    pub expected: IsExpected,
227    /// Default month value to use when the month is not found, if any.
228    pub default: Option<u8>,
229}
230
231impl MonthConfig {
232    /// Return `Some(value as u8)` when `value` is a plausible month for this
233    /// config, or `None` when it is not.
234    ///
235    /// A value is a plausible month when:
236    /// - `digit_count` is not 4 (four-digit numbers cannot be months).
237    /// - The value is within the universal month range 1–12.
238    /// - The value falls within the caller-configured `min`/`max` bounds.
239    pub fn try_as_month_candidate(&self, value: i16, digit_count: u8) -> Option<u8> {
240        if digit_count == 4 {
241            return None;
242        }
243        let as_u8 = u8::try_from(value).ok()?;
244        if (1..=12).contains(&value) && (self.min..=self.max).contains(&as_u8) {
245            Some(as_u8)
246        } else {
247            None
248        }
249    }
250}
251
252impl Default for MonthConfig {
253    fn default() -> Self {
254        MonthConfig {
255            min: 1,
256            max: 12,
257            expected: IsExpected::Maybe,
258            default: None,
259        }
260    }
261}
262
263impl MonthConfig {
264    /// Set the valid month range.
265    ///
266    /// # Panics
267    ///
268    /// Panics if `min > max`. Use [`MonthConfig::try_with_range`] when the
269    /// values come from dynamic input and you need to handle the error.
270    ///
271    /// ```
272    /// use partial_date::models::MonthConfig;
273    ///
274    /// let config = MonthConfig::default().with_range(1, 6);
275    /// ```
276    pub fn with_range(self, min: u8, max: u8) -> Self {
277        assert!(
278            min <= max,
279            "MonthConfig::with_range min ({min}) must not exceed max ({max})"
280        );
281        MonthConfig { min, max, ..self }
282    }
283
284    /// Set the valid month range, returning `Err` if `min > max`.
285    ///
286    /// Use this when the range values come from dynamic input. For
287    /// known-valid static values, prefer [`MonthConfig::with_range`].
288    pub fn try_with_range(self, min: u8, max: u8) -> Result<Self, ConfigRangeError> {
289        if min > max {
290            return Err(ConfigRangeError::MinExceedsMax {
291                min: min as i32,
292                max: max as i32,
293            });
294        }
295        Ok(MonthConfig { min, max, ..self })
296    }
297
298    /// Set whether a month component is expected in the input.
299    ///
300    /// ```
301    /// use partial_date::models::{IsExpected, MonthConfig};
302    ///
303    /// let config = MonthConfig::default().with_expected(IsExpected::Yes);
304    /// ```
305    pub fn with_expected(self, expected: IsExpected) -> Self {
306        MonthConfig { expected, ..self }
307    }
308
309    /// Set the default month value to use when no month is found in the input.
310    ///
311    /// ```
312    /// use partial_date::models::MonthConfig;
313    ///
314    /// let config = MonthConfig::default().with_default(1);
315    /// ```
316    pub fn with_default(self, default: u8) -> Self {
317        MonthConfig {
318            default: Some(default),
319            ..self
320        }
321    }
322}
323
324/// The pivot point for a [`TwoDigitYearExpansion::SlidingWindow`].
325///
326/// A valid pivot is in the range `1–99`. A pivot of `p` means two-digit
327/// values `0..(p-1)` map to the upper (more recent) part of the window, and
328/// values `p..99` map to the lower (earlier) part.
329///
330/// # Construction
331///
332/// Use [`SlidingWindowPivot::new`] for known-valid static values (panics on
333/// invalid input) or [`SlidingWindowPivot::try_new`] when the value comes
334/// from dynamic input and you need to handle the error:
335///
336/// ```
337/// use partial_date::models::SlidingWindowPivot;
338///
339/// // Known-valid — panics if the value is invalid.
340/// let pivot = SlidingWindowPivot::new(50);
341///
342/// // Dynamic input — returns Result.
343/// let pivot = SlidingWindowPivot::try_new(50).unwrap();
344///
345/// // Via TryFrom.
346/// let pivot: SlidingWindowPivot = 50_u8.try_into().unwrap();
347/// ```
348#[derive(Debug, Clone, Copy, PartialEq, Eq)]
349pub struct SlidingWindowPivot(u8);
350
351impl SlidingWindowPivot {
352    /// Create a new `SlidingWindowPivot`.
353    ///
354    /// # Panics
355    ///
356    /// Panics if `pivot` is `0` or greater than `99`. Use
357    /// [`SlidingWindowPivot::try_new`] when the value comes from dynamic input
358    /// and you need to handle the error instead of panicking.
359    pub fn new(pivot: u8) -> Self {
360        assert!(
361            pivot > 0 && pivot <= 99,
362            "SlidingWindowPivot must be in the range 1–99, got {pivot}"
363        );
364        SlidingWindowPivot(pivot)
365    }
366
367    /// Create a new `SlidingWindowPivot`, returning `Err` if `pivot` is `0`
368    /// or greater than `99`.
369    ///
370    /// Use this when the pivot value comes from dynamic input (e.g. user
371    /// configuration, a config file). For known-valid static values, prefer
372    /// [`SlidingWindowPivot::new`].
373    pub fn try_new(pivot: u8) -> Result<Self, SlidingWindowPivotError> {
374        if pivot == 0 || pivot > 99 {
375            return Err(SlidingWindowPivotError::InvalidPivot(pivot));
376        }
377        Ok(SlidingWindowPivot(pivot))
378    }
379}
380
381impl TryFrom<u8> for SlidingWindowPivot {
382    type Error = SlidingWindowPivotError;
383
384    fn try_from(value: u8) -> Result<Self, Self::Error> {
385        SlidingWindowPivot::try_new(value)
386    }
387}
388
389impl From<SlidingWindowPivot> for u8 {
390    fn from(pivot: SlidingWindowPivot) -> u8 {
391        pivot.0
392    }
393}
394
395/// Errors returned by [`SlidingWindowPivot::try_new`] when validation fails.
396#[derive(Debug, Clone, PartialEq, Eq)]
397pub enum SlidingWindowPivotError {
398    /// The pivot value must be in the range `1–99`.
399    InvalidPivot(u8),
400}
401
402/// A year that falls exactly on a century boundary (divisible by 100).
403///
404/// Used with [`TwoDigitYearExpansion::Always`] to express that all two-digit
405/// values should map into a specific century. For example, `Century::new(1800)`
406/// means `00 → 1800`, `34 → 1834`, `99 → 1899`.
407///
408/// # Construction
409///
410/// Use [`Century::new`] for known-valid static values (panics on invalid
411/// input) or [`Century::try_new`] when the value comes from dynamic input
412/// and you need to handle the error:
413///
414/// ```
415/// use partial_date::models::Century;
416///
417/// // Known-valid — panics if the value is not a century boundary.
418/// let century = Century::new(1800);
419///
420/// // Dynamic input — returns Result.
421/// let century = Century::try_new(2000).unwrap();
422///
423/// // Via TryFrom.
424/// let century: Century = 2000_i32.try_into().unwrap();
425/// ```
426#[derive(Debug, Clone, Copy, PartialEq, Eq)]
427pub struct Century(i32);
428
429impl Century {
430    /// Create a new `Century`.
431    ///
432    /// # Panics
433    ///
434    /// Panics if `year` is not divisible by `100`. Use [`Century::try_new`]
435    /// when the value comes from dynamic input and you need to handle the
436    /// error instead of panicking.
437    pub fn new(year: i32) -> Self {
438        assert!(
439            year % 100 == 0,
440            "Century must be divisible by 100, got {year}"
441        );
442        Century(year)
443    }
444
445    /// Create a new `Century`, returning `Err` if `year` is not divisible by
446    /// `100`.
447    ///
448    /// Use this when the year value comes from dynamic input (e.g. user
449    /// configuration, a config file). For known-valid static values, prefer
450    /// [`Century::new`].
451    pub fn try_new(year: i32) -> Result<Self, CenturyError> {
452        if year % 100 != 0 {
453            return Err(CenturyError::NotACenturyBoundary(year));
454        }
455        Ok(Century(year))
456    }
457}
458
459impl TryFrom<i32> for Century {
460    type Error = CenturyError;
461
462    fn try_from(value: i32) -> Result<Self, Self::Error> {
463        Century::try_new(value)
464    }
465}
466
467impl From<Century> for i32 {
468    fn from(century: Century) -> i32 {
469        century.0
470    }
471}
472
473/// Errors returned by [`Century::try_new`] when validation fails.
474#[derive(Debug, Clone, PartialEq, Eq)]
475pub enum CenturyError {
476    /// The year value must be divisible by `100` (e.g. `1800`, `2000`).
477    NotACenturyBoundary(i32),
478}
479
480/// Strategy for expanding two-digit years into four-digit years.
481///
482/// # Choosing a strategy
483///
484/// - Use [`SlidingWindow`] when two-digit years could span two adjacent
485///   centuries and you want to bias towards a particular era.
486/// - Use [`Always`] when all two-digit years belong to the same century
487///   without ambiguity (e.g. children's birthdays are all in the 2000s).
488/// - Use [`Literal`] when you want the two-digit value kept as-is (e.g.
489///   historical records where the year is genuinely in the range 0–99).
490///
491/// [`SlidingWindow`]: TwoDigitYearExpansion::SlidingWindow
492/// [`Always`]: TwoDigitYearExpansion::Always
493/// [`Literal`]: TwoDigitYearExpansion::Literal
494#[derive(Debug, Clone, Copy, PartialEq, Eq)]
495pub enum TwoDigitYearExpansion {
496    /// Splits the 100 possible two-digit values across two adjacent centuries.
497    ///
498    /// `earliest_year` is the smallest year the window can ever produce — it
499    /// is the year that two-digit value `pivot` maps to.  Values
500    /// `pivot..=99` map to `earliest_year..=(earliest_year + (99 - pivot))`,
501    /// and values `0..(pivot)` map to
502    /// `(earliest_year + (100 - pivot))..(earliest_year + 99)`.
503    ///
504    /// # Example
505    ///
506    /// ```
507    /// use partial_date::models::{SlidingWindowPivot, TwoDigitYearExpansion};
508    ///
509    /// // 00–49 → 2000–2049, 50–99 → 1950–1999 (the default).
510    /// let expansion = TwoDigitYearExpansion::SlidingWindow {
511    ///     earliest_year: 1950,
512    ///     pivot: SlidingWindowPivot::new(50),
513    /// };
514    ///
515    /// // Industrial Revolution era: 00–49 → 1800–1849, 50–99 → 1750–1799.
516    /// let expansion = TwoDigitYearExpansion::SlidingWindow {
517    ///     earliest_year: 1750,
518    ///     pivot: SlidingWindowPivot::new(50),
519    /// };
520    /// ```
521    SlidingWindow {
522        /// The smallest year this window can produce (the year `pivot` maps
523        /// to).  Must be chosen so that the full window
524        /// `[earliest_year, earliest_year + 99]` covers the values you
525        /// intend to accept.  Use [`YearConfig::min`] and [`YearConfig::max`]
526        /// to reject any expanded years that fall outside your valid range.
527        earliest_year: i32,
528        /// The two-digit value at which the window wraps from the lower
529        /// (earlier) century to the upper (more recent) century.
530        pivot: SlidingWindowPivot,
531    },
532    /// Maps all two-digit values into a single century.
533    ///
534    /// `00` maps to the century start, `99` maps to `century + 99`.
535    ///
536    /// # Example
537    ///
538    /// ```
539    /// use partial_date::models::{Century, TwoDigitYearExpansion};
540    ///
541    /// // All two-digit years are in the 2000s: 00 → 2000, 34 → 2034.
542    /// let expansion = TwoDigitYearExpansion::Always(Century::new(2000));
543    ///
544    /// // All two-digit years are in the 1800s: 00 → 1800, 34 → 1834.
545    /// let expansion = TwoDigitYearExpansion::Always(Century::new(1800));
546    /// ```
547    Always(Century),
548    /// Return the two-digit value literally (e.g. `24` stays as `24`).
549    ///
550    /// Useful when processing historical records where the year genuinely
551    /// falls in the range `0–99`, or when you want to apply your own
552    /// post-processing.
553    Literal,
554}
555
556impl Default for TwoDigitYearExpansion {
557    /// The standard modern sliding window: `00–49 → 2000–2049`, `50–99 → 1950–1999`.
558    fn default() -> Self {
559        TwoDigitYearExpansion::SlidingWindow {
560            earliest_year: 1950,
561            pivot: SlidingWindowPivot(50),
562        }
563    }
564}
565
566/// Configuration for year extraction.
567#[derive(Debug, Clone)]
568pub struct YearConfig {
569    /// Minimum valid year value (inclusive). Default: `0`.
570    pub min: i32,
571    /// Maximum valid year value (inclusive). Default: `3000`.
572    pub max: i32,
573    /// Whether a year component is expected in the input.
574    pub expected: IsExpected,
575    /// Default year value to use when the year is not found, if any.
576    pub default: Option<i32>,
577    /// Strategy for expanding two-digit years. Default: [`TwoDigitYearExpansion::SlidingWindow`].
578    pub two_digit_expansion: TwoDigitYearExpansion,
579    /// When `true`, a single-digit token (`1`–`9`) is treated as a two-digit
580    /// year by prepending a zero — `5` becomes `05` — and then expanded
581    /// according to [`YearConfig::two_digit_expansion`].
582    ///
583    /// This option only applies when the other date components (day and month)
584    /// have already been filled by unambiguous tokens, so the interpreter can
585    /// confirm that the single digit is genuinely intended as a year.
586    ///
587    /// Default: `false`.  Enable when processing inputs like `"1 January 5"`
588    /// where `5` means year AD 5 (literal) or year 2005 (sliding window).
589    pub single_digit_year_expansion: bool,
590}
591
592impl YearConfig {
593    /// Return the expanded year value when `value` (with `digit_count` original
594    /// digits) is a plausible year for this config, or `None` when it is not.
595    ///
596    /// - 4-digit values are used as-is.
597    /// - 3-digit values (100–999) are treated as literal years.
598    /// - 2-digit values are expanded according to [`TwoDigitYearExpansion`].
599    /// - 1-digit values are accepted only when
600    ///   [`YearConfig::single_digit_year_expansion`] is `true`, in which case
601    ///   `value` is treated as `0value` (e.g. `5` → `05`) and then expanded
602    ///   using the same two-digit expansion strategy.
603    /// - All other digit counts return `None`.
604    ///
605    /// The expanded value must also fall within the configured `min`/`max`
606    /// bounds.
607    pub fn try_as_year_candidate(&self, value: i16, digit_count: u8) -> Option<i32> {
608        // Normalise single-digit values to their two-digit equivalent when the
609        // option is enabled, then fall through to the two-digit expansion path.
610        let (effective_value, effective_digit_count) =
611            if digit_count == 1 && self.single_digit_year_expansion {
612                // Prepend a zero: "5" → "05".  The digit count is now 2.
613                (value, 2u8)
614            } else {
615                (value, digit_count)
616            };
617
618        let expanded = match effective_digit_count {
619            4 => effective_value as i32,
620            // 3-digit values (100–999) are treated as literal years: year 100,
621            // year 999, etc.  This covers word-number inputs like "nine hundred
622            // ninety-nine" which replace to the 3-digit numeral 999.
623            3 => effective_value as i32,
624            2 => {
625                let raw = effective_value as i32;
626                match &self.two_digit_expansion {
627                    TwoDigitYearExpansion::Literal => raw,
628                    TwoDigitYearExpansion::Always(century) => i32::from(*century) + raw,
629                    TwoDigitYearExpansion::SlidingWindow {
630                        earliest_year,
631                        pivot,
632                    } => {
633                        let pivot = u8::from(*pivot) as i32;
634                        if raw < pivot {
635                            // Upper (more recent) half: 0..(pivot-1)
636                            earliest_year + (100 - pivot) + raw
637                        } else {
638                            // Lower (earlier) half: pivot..99
639                            earliest_year + (raw - pivot)
640                        }
641                    }
642                }
643            }
644            _ => return None,
645        };
646        if expanded >= self.min && expanded <= self.max {
647            Some(expanded)
648        } else {
649            None
650        }
651    }
652}
653
654impl Default for YearConfig {
655    fn default() -> Self {
656        YearConfig {
657            min: 0,
658            max: 3000,
659            expected: IsExpected::Maybe,
660            default: None,
661            two_digit_expansion: TwoDigitYearExpansion::default(),
662            single_digit_year_expansion: false,
663        }
664    }
665}
666
667impl YearConfig {
668    /// Set the valid year range.
669    ///
670    /// # Panics
671    ///
672    /// Panics if `min > max`. Use [`YearConfig::try_with_range`] when the
673    /// values come from dynamic input and you need to handle the error.
674    ///
675    /// ```
676    /// use partial_date::models::YearConfig;
677    ///
678    /// let config = YearConfig::default().with_range(1760, 1840);
679    /// ```
680    pub fn with_range(self, min: i32, max: i32) -> Self {
681        assert!(
682            min <= max,
683            "YearConfig::with_range min ({min}) must not exceed max ({max})"
684        );
685        YearConfig { min, max, ..self }
686    }
687
688    /// Set the valid year range, returning `Err` if `min > max`.
689    ///
690    /// Use this when the range values come from dynamic input. For
691    /// known-valid static values, prefer [`YearConfig::with_range`].
692    pub fn try_with_range(self, min: i32, max: i32) -> Result<Self, ConfigRangeError> {
693        if min > max {
694            return Err(ConfigRangeError::MinExceedsMax { min, max });
695        }
696        Ok(YearConfig { min, max, ..self })
697    }
698
699    /// Set whether a year component is expected in the input.
700    ///
701    /// ```
702    /// use partial_date::models::{IsExpected, YearConfig};
703    ///
704    /// let config = YearConfig::default().with_expected(IsExpected::Yes);
705    /// ```
706    pub fn with_expected(self, expected: IsExpected) -> Self {
707        YearConfig { expected, ..self }
708    }
709
710    /// Set the default year value to use when no year is found in the input.
711    ///
712    /// ```
713    /// use partial_date::models::YearConfig;
714    ///
715    /// let config = YearConfig::default().with_default(2025);
716    /// ```
717    pub fn with_default(self, default: i32) -> Self {
718        YearConfig {
719            default: Some(default),
720            ..self
721        }
722    }
723
724    /// Set the two-digit year expansion strategy.
725    ///
726    /// ```
727    /// use partial_date::models::{
728    ///     Century, SlidingWindowPivot, TwoDigitYearExpansion, YearConfig,
729    /// };
730    ///
731    /// // All two-digit years map to the 2000s.
732    /// let config = YearConfig::default()
733    ///     .with_two_digit_expansion(TwoDigitYearExpansion::Always(Century::new(2000)));
734    ///
735    /// // Industrial Revolution era window.
736    /// let config = YearConfig::default()
737    ///     .with_range(1760, 1840)
738    ///     .with_two_digit_expansion(TwoDigitYearExpansion::SlidingWindow {
739    ///         earliest_year: 1750,
740    ///         pivot: SlidingWindowPivot::new(50),
741    ///     });
742    /// ```
743    pub fn with_two_digit_expansion(self, two_digit_expansion: TwoDigitYearExpansion) -> Self {
744        YearConfig {
745            two_digit_expansion,
746            ..self
747        }
748    }
749
750    /// Set whether single-digit tokens are expanded as two-digit years.
751    ///
752    /// When `true`, a single-digit token (e.g. `5`) is treated as `05` and
753    /// then expanded according to the configured
754    /// [`YearConfig::two_digit_expansion`] strategy.
755    ///
756    /// ```
757    /// use partial_date::models::YearConfig;
758    ///
759    /// let config = YearConfig::default().with_single_digit_expansion(true);
760    /// ```
761    pub fn with_single_digit_expansion(self, enabled: bool) -> Self {
762        YearConfig {
763            single_digit_year_expansion: enabled,
764            ..self
765        }
766    }
767}
768
769/// A single date component: day, month, or year.
770///
771/// Used within [`ComponentOrder`] to describe the positional ordering of
772/// components in structured (numeric) date input.
773#[derive(Debug, Clone, Copy, PartialEq, Eq)]
774pub enum DateComponent {
775    /// The day-of-month value (1–31).
776    Day,
777    /// The month value (1–12).
778    Month,
779    /// The year value.
780    Year,
781}
782
783/// The expected ordering of date components in positional (numeric) input.
784///
785/// For example, `01/06/24` is ambiguous — a `ComponentOrder` of
786/// `[Day, Month, Year]` interprets it as 1 June 2024, while
787/// `[Month, Day, Year]` gives 6 January 2024.
788///
789/// For unambiguous inputs (e.g. `31/06/24`) the correct interpretation
790/// can always be determined regardless of this setting.
791///
792/// All three components must be present and each must appear exactly once.
793/// Construct with [`ComponentOrder::new`] to enforce this invariant, or use
794/// [`ComponentOrder::default`] for the standard Day/Month/Year order.
795#[derive(Debug, Clone, Copy, PartialEq, Eq)]
796pub struct ComponentOrder {
797    /// The component expected in the first position.
798    pub first: DateComponent,
799    /// The component expected in the second position.
800    pub second: DateComponent,
801    /// The component expected in the third position.
802    pub third: DateComponent,
803}
804
805/// Errors returned by [`ComponentOrder::new`] when validation fails.
806#[derive(Debug, Clone, PartialEq, Eq)]
807pub enum ComponentOrderError {
808    /// The same component appears more than once in the order.
809    DuplicateComponent(DateComponent),
810}
811
812impl ComponentOrder {
813    /// Create a new `ComponentOrder`, returning `Err` if any component is
814    /// duplicated (which also implies another is missing).
815    pub fn new(
816        first: DateComponent,
817        second: DateComponent,
818        third: DateComponent,
819    ) -> Result<Self, ComponentOrderError> {
820        if first == second {
821            return Err(ComponentOrderError::DuplicateComponent(first));
822        }
823        if first == third {
824            return Err(ComponentOrderError::DuplicateComponent(first));
825        }
826        if second == third {
827            return Err(ComponentOrderError::DuplicateComponent(second));
828        }
829        Ok(ComponentOrder {
830            first,
831            second,
832            third,
833        })
834    }
835}
836
837impl Default for ComponentOrder {
838    /// The default order is Day → Month → Year (e.g. `DD/MM/YYYY`).
839    fn default() -> Self {
840        ComponentOrder {
841            first: DateComponent::Day,
842            second: DateComponent::Month,
843            third: DateComponent::Year,
844        }
845    }
846}
847
848/// Top-level configuration for the extractor.
849///
850/// The extractor always tries all standard separators (`/`, `-`, `.`, `,`,
851/// `\`, and whitespace) automatically — no separator needs to be specified.
852/// Use [`Config::no_separator`] to enable parsing of fully concatenated
853/// date strings (e.g. `"25122024"`), and [`Config::extra_separators`] to
854/// add custom separator strings (e.g. `"||"`, `" - "`).
855///
856/// Construct via [`Config::default()`] and override only the fields you need,
857/// or build a fully custom config by setting each field explicitly.
858#[derive(Debug, Clone)]
859pub struct Config {
860    /// Configuration for day extraction.
861    pub day: DayConfig,
862    /// Configuration for month extraction.
863    pub month: MonthConfig,
864    /// Configuration for year extraction.
865    pub year: YearConfig,
866    /// The expected ordering of date components for positional (numeric) inputs.
867    /// Default: Day → Month → Year. See [`ComponentOrder`].
868    pub component_order: ComponentOrder,
869    /// When `true`, the extractor also attempts to parse fully concatenated
870    /// date strings with no separator (e.g. `"25122024"`). Default: `false`.
871    pub no_separator: bool,
872    /// Additional custom separator strings to try alongside the standard set.
873    /// Default: empty. Example: `vec!["||".to_string(), " - ".to_string()]`.
874    pub extra_separators: Vec<String>,
875    /// When `true`, the tokeniser substitutes the letter `O` (upper or lower
876    /// case) for the digit `0` inside tokens that consist entirely of digits
877    /// and the letter O — for example `"2O24"` is treated as `"2024"`.
878    ///
879    /// This handles OCR and keyboard-entry errors where the letter O is typed
880    /// in place of zero. The substitution is applied only to tokens that would
881    /// otherwise be entirely numeric-with-O; it is never applied when the O
882    /// appears as part of a longer alphabetic run (e.g. `"7october"` — the
883    /// `"october"` portion is left as-is and classified as a month name).
884    ///
885    /// Default: `true`.
886    pub letter_o_substitution: bool,
887    //TODO: Add a Config section for fuzzy matching options, include letter o substitution in that config
888}
889
890impl Default for Config {
891    fn default() -> Self {
892        Config {
893            day: DayConfig::default(),
894            month: MonthConfig::default(),
895            year: YearConfig::default(),
896            component_order: ComponentOrder::default(),
897            no_separator: false,
898            extra_separators: Vec::new(),
899            letter_o_substitution: true,
900        }
901    }
902}
903
904impl Config {
905    /// Set the day extraction configuration.
906    ///
907    /// ```
908    /// use partial_date::models::{Config, DayConfig, IsExpected};
909    ///
910    /// let config = Config::default()
911    ///     .with_day(DayConfig::default().with_range(1, 28).with_expected(IsExpected::Yes));
912    /// ```
913    pub fn with_day(self, day: DayConfig) -> Self {
914        Config { day, ..self }
915    }
916
917    /// Set the month extraction configuration.
918    ///
919    /// ```
920    /// use partial_date::models::{Config, IsExpected, MonthConfig};
921    ///
922    /// let config = Config::default()
923    ///     .with_month(MonthConfig::default().with_expected(IsExpected::Yes));
924    /// ```
925    pub fn with_month(self, month: MonthConfig) -> Self {
926        Config { month, ..self }
927    }
928
929    /// Set the year extraction configuration.
930    ///
931    /// ```
932    /// use partial_date::models::{Config, IsExpected, YearConfig};
933    ///
934    /// let config = Config::default()
935    ///     .with_year(YearConfig::default().with_range(1760, 1840).with_expected(IsExpected::Yes));
936    /// ```
937    pub fn with_year(self, year: YearConfig) -> Self {
938        Config { year, ..self }
939    }
940
941    /// Set the expected ordering of date components for positional inputs.
942    ///
943    /// ```
944    /// use partial_date::models::{ComponentOrder, Config, DateComponent};
945    ///
946    /// let config = Config::default().with_component_order(
947    ///     ComponentOrder::new(
948    ///         DateComponent::Month,
949    ///         DateComponent::Day,
950    ///         DateComponent::Year,
951    ///     )
952    ///     .unwrap(),
953    /// );
954    /// ```
955    pub fn with_component_order(self, component_order: ComponentOrder) -> Self {
956        Config {
957            component_order,
958            ..self
959        }
960    }
961
962    /// Enable or disable no-separator parsing (e.g. `"25122024"`).
963    ///
964    /// ```
965    /// use partial_date::models::Config;
966    ///
967    /// let config = Config::default().with_no_separator(true);
968    /// ```
969    pub fn with_no_separator(self, no_separator: bool) -> Self {
970        Config {
971            no_separator,
972            ..self
973        }
974    }
975
976    /// Set additional custom separator strings to try alongside the standard
977    /// set (`/`, `-`, `.`, `,`, `\`, and whitespace).
978    ///
979    /// ```
980    /// use partial_date::models::Config;
981    ///
982    /// let config = Config::default()
983    ///     .with_extra_separators(vec!["||".to_string(), " - ".to_string()]);
984    /// ```
985    pub fn with_extra_separators(self, extra_separators: Vec<String>) -> Self {
986        Config {
987            extra_separators,
988            ..self
989        }
990    }
991
992    /// Enable or disable substitution of the letter `O` for the digit `0`.
993    ///
994    /// ```
995    /// use partial_date::models::Config;
996    ///
997    /// let config = Config::default().with_letter_o_substitution(false);
998    /// ```
999    pub fn with_letter_o_substitution(self, letter_o_substitution: bool) -> Self {
1000        Config {
1001            letter_o_substitution,
1002            ..self
1003        }
1004    }
1005}
1006
1007// ---------------------------------------------------------------------------
1008// Input type
1009// ---------------------------------------------------------------------------
1010
1011/// Input to the partial date extractor.
1012#[derive(Debug, Clone)]
1013pub struct Input {
1014    /// The raw text from which a date should be extracted.
1015    pub utterance: String,
1016    /// Per-call config override. Falls back to the library default when `None`.
1017    pub config: Option<Config>,
1018}
1019
1020/// The name of a calendar month, as extracted from natural language input.
1021///
1022/// ## Conversions
1023///
1024/// `MonthName` can be constructed from either a string or a number:
1025///
1026/// ```
1027/// use partial_date::models::{MonthName, MonthNameError};
1028/// use std::convert::TryFrom;
1029///
1030/// // From a name string (full, abbreviated, or unambiguous prefix)
1031/// assert_eq!(MonthName::try_from("October"), Ok(MonthName::October));
1032/// assert_eq!(MonthName::try_from("oct"),     Ok(MonthName::October));
1033/// assert_eq!(MonthName::try_from("Octo"),    Ok(MonthName::October));
1034///
1035/// // From a numeric string
1036/// assert_eq!(MonthName::try_from("10"), Ok(MonthName::October));
1037///
1038/// // From a u8
1039/// assert_eq!(MonthName::try_from(10_u8), Ok(MonthName::October));
1040///
1041/// // Errors
1042/// assert_eq!(MonthName::try_from(0_u8),  Err(MonthNameError::NumberOutOfRange(0)));
1043/// assert_eq!(MonthName::try_from(13_u8), Err(MonthNameError::NumberOutOfRange(13)));
1044/// assert_eq!(MonthName::try_from("Xyz"), Err(MonthNameError::UnrecognisedName));
1045/// assert_eq!(MonthName::try_from("5x"),  Err(MonthNameError::NotAMonth));
1046/// ```
1047#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1048pub enum MonthName {
1049    January,
1050    February,
1051    March,
1052    April,
1053    May,
1054    June,
1055    July,
1056    August,
1057    September,
1058    October,
1059    November,
1060    December,
1061}
1062
1063impl MonthName {
1064    /// Return the calendar number of this month (1 = January … 12 = December).
1065    ///
1066    /// ```
1067    /// use partial_date::models::MonthName;
1068    /// assert_eq!(MonthName::January.number(), 1);
1069    /// assert_eq!(MonthName::December.number(), 12);
1070    /// ```
1071    pub fn number(self) -> u8 {
1072        match self {
1073            MonthName::January => 1,
1074            MonthName::February => 2,
1075            MonthName::March => 3,
1076            MonthName::April => 4,
1077            MonthName::May => 5,
1078            MonthName::June => 6,
1079            MonthName::July => 7,
1080            MonthName::August => 8,
1081            MonthName::September => 9,
1082            MonthName::October => 10,
1083            MonthName::November => 11,
1084            MonthName::December => 12,
1085        }
1086    }
1087}
1088
1089/// Errors returned when a [`MonthName`] conversion fails.
1090#[derive(Debug, Clone, PartialEq, Eq)]
1091pub enum MonthNameError {
1092    /// The input string was alphabetic but did not match any known month name,
1093    /// abbreviation, or unambiguous prefix.
1094    UnrecognisedName,
1095    /// The input was a valid integer but outside the range 1–12.
1096    NumberOutOfRange(u8),
1097    /// The input was neither a pure alphabetic string nor a pure integer
1098    /// (e.g. `"5x"` or `"jan2"`).
1099    NotAMonth,
1100}
1101
1102/// Convert a month number (`1` = January … `12` = December) into a
1103/// [`MonthName`].
1104///
1105/// Returns [`MonthNameError::NumberOutOfRange`] for any value outside 1–12.
1106impl TryFrom<u8> for MonthName {
1107    type Error = MonthNameError;
1108
1109    fn try_from(n: u8) -> Result<Self, Self::Error> {
1110        match n {
1111            1 => Ok(MonthName::January),
1112            2 => Ok(MonthName::February),
1113            3 => Ok(MonthName::March),
1114            4 => Ok(MonthName::April),
1115            5 => Ok(MonthName::May),
1116            6 => Ok(MonthName::June),
1117            7 => Ok(MonthName::July),
1118            8 => Ok(MonthName::August),
1119            9 => Ok(MonthName::September),
1120            10 => Ok(MonthName::October),
1121            11 => Ok(MonthName::November),
1122            12 => Ok(MonthName::December),
1123            _ => Err(MonthNameError::NumberOutOfRange(n)),
1124        }
1125    }
1126}
1127
1128/// Convert a string into a [`MonthName`].
1129///
1130/// Three strategies are tried in order:
1131///
1132/// 1. **Alphabetic match** — if every character is ASCII alphabetic (after
1133///    stripping a trailing `.`), the lowercased string is compared against all
1134///    full names, standard 3-letter abbreviations, and unambiguous longer
1135///    prefixes.
1136///
1137/// 2. **Fuzzy match** — if no exact or prefix match was found, the
1138///    Levenshtein ratio is computed against every full month name.  The
1139///    closest match is accepted when its ratio is ≥ 0.6 and it is
1140///    unambiguously the best candidate (no tie).  Returns
1141///    [`MonthNameError::UnrecognisedName`] when no candidate passes.
1142///
1143/// 3. **Numeric match** — if every character is an ASCII digit, the value is
1144///    parsed as a `u8` and forwarded to [`TryFrom<u8>`].  Returns
1145///    [`MonthNameError::NumberOutOfRange`] when the number is outside 1–12.
1146///
1147/// If the string is neither purely alphabetic nor purely numeric (e.g.
1148/// `"jan2"` or `"5x"`), [`MonthNameError::NotAMonth`] is returned.
1149impl TryFrom<&str> for MonthName {
1150    type Error = MonthNameError;
1151
1152    fn try_from(s: &str) -> Result<Self, Self::Error> {
1153        // Strip a trailing dot before classification (handles "Jan.", "Feb.").
1154        let s = s.strip_suffix('.').unwrap_or(s);
1155
1156        if s.is_empty() {
1157            return Err(MonthNameError::NotAMonth);
1158        }
1159
1160        if s.chars().all(|c| c.is_ascii_alphabetic()) {
1161            // --- Alphabetic path ---
1162            let lower = s.to_ascii_lowercase();
1163            match_month_name_str(lower.as_str())
1164        } else if s.chars().all(|c| c.is_ascii_digit()) {
1165            // --- Numeric path ---
1166            // A leading-zero number like "06" parses to 6, which is valid.
1167            // Values > 255 would overflow u8::MAX; treat them as out-of-range.
1168            let n: u8 = s.parse().map_err(|_| MonthNameError::NumberOutOfRange(0))?;
1169            MonthName::try_from(n)
1170        } else {
1171            Err(MonthNameError::NotAMonth)
1172        }
1173    }
1174}
1175
1176/// All twelve full month names paired with their [`MonthName`] variant,
1177/// used for both prefix and fuzzy matching.
1178const FULL_MONTH_NAMES: &[(&str, MonthName)] = &[
1179    ("january", MonthName::January),
1180    ("february", MonthName::February),
1181    ("march", MonthName::March),
1182    ("april", MonthName::April),
1183    ("may", MonthName::May),
1184    ("june", MonthName::June),
1185    ("july", MonthName::July),
1186    ("august", MonthName::August),
1187    ("september", MonthName::September),
1188    ("october", MonthName::October),
1189    ("november", MonthName::November),
1190    ("december", MonthName::December),
1191];
1192
1193/// Minimum Levenshtein ratio required for a fuzzy match to be accepted.
1194///
1195/// A ratio of 0.6 means at most 2 edits in a 5-character word, or 1 edit in
1196/// a 3-character word.  Empirically this passes all known real-world
1197/// misspellings while rejecting clearly unrelated words like `"Foo"` or
1198/// `"Friday"`.
1199const FUZZY_MATCH_THRESHOLD: f32 = 0.6;
1200
1201/// Match an already-lowercased, purely-alphabetic string against all known
1202/// month names, abbreviations, and unambiguous prefixes, falling back to
1203/// fuzzy (Levenshtein ratio) matching when no exact or prefix match is found.
1204fn match_month_name_str(lower: &str) -> Result<MonthName, MonthNameError> {
1205    // --- 1. Exact match: full names and standard 3-letter abbreviations ---
1206    let exact = match lower {
1207        "january" | "jan" => Some(MonthName::January),
1208        "february" | "feb" => Some(MonthName::February),
1209        "march" | "mar" => Some(MonthName::March),
1210        "april" | "apr" => Some(MonthName::April),
1211        "may" => Some(MonthName::May),
1212        "june" | "jun" => Some(MonthName::June),
1213        "july" | "jul" => Some(MonthName::July),
1214        "august" | "aug" => Some(MonthName::August),
1215        "september" | "sep" => Some(MonthName::September),
1216        "october" | "oct" => Some(MonthName::October),
1217        "november" | "nov" => Some(MonthName::November),
1218        "december" | "dec" => Some(MonthName::December),
1219        _ => None,
1220    };
1221
1222    if let Some(month) = exact {
1223        return Ok(month);
1224    }
1225
1226    // --- 2. Unambiguous prefix match (≥ 4 characters) ---
1227    if lower.len() >= 4 {
1228        let mut found: Option<MonthName> = None;
1229        for (full_name, variant) in FULL_MONTH_NAMES {
1230            if full_name.starts_with(lower) {
1231                if found.is_some() {
1232                    // More than one month starts with this prefix — ambiguous;
1233                    // fall through to fuzzy matching below.
1234                    found = None;
1235                    break;
1236                }
1237                found = Some(*variant);
1238            }
1239        }
1240        if let Some(month) = found {
1241            return Ok(month);
1242        }
1243    }
1244
1245    // --- 3. Fuzzy match via Levenshtein ratio ---
1246    fuzzy_match_month(lower)
1247}
1248
1249/// Find the best-matching month name for `lower` using Levenshtein ratio.
1250///
1251/// Returns the matched [`MonthName`] if exactly one candidate scores above
1252/// [`FUZZY_MATCH_THRESHOLD`] and no other candidate ties it.  Returns
1253/// [`MonthNameError::UnrecognisedName`] otherwise.
1254fn fuzzy_match_month(lower: &str) -> Result<MonthName, MonthNameError> {
1255    use crate::levenshtein::levenshtein_ratio;
1256
1257    let mut best_ratio: f32 = 0.0;
1258    let mut best_month: Option<MonthName> = None;
1259    let mut is_tied = false;
1260
1261    for (full_name, variant) in FULL_MONTH_NAMES {
1262        let ratio = levenshtein_ratio(lower, full_name);
1263        if ratio > best_ratio {
1264            best_ratio = ratio;
1265            best_month = Some(*variant);
1266            is_tied = false;
1267        } else if (ratio - best_ratio).abs() < f32::EPSILON {
1268            // Two candidates have the same ratio — ambiguous.
1269            is_tied = true;
1270        }
1271    }
1272
1273    if best_ratio >= FUZZY_MATCH_THRESHOLD && !is_tied {
1274        best_month.ok_or(MonthNameError::UnrecognisedName)
1275    } else {
1276        Err(MonthNameError::UnrecognisedName)
1277    }
1278}
1279
1280// ---------------------------------------------------------------------------
1281// Tokenisation types
1282// ---------------------------------------------------------------------------
1283
1284/// A single meaningful chunk produced by [`crate::extract::tokenise`].
1285///
1286/// The tokeniser strips separator characters and noise words, leaving only
1287/// tokens that *could* contribute to a date component. At most three tokens
1288/// are returned (one per date component: day, month, year).
1289///
1290/// Each variant stores the already-parsed value rather than the raw source
1291/// text, so consumers can use the token directly without re-parsing.
1292#[derive(Debug, Clone, PartialEq, Eq)]
1293pub enum Token {
1294    /// A parsed integer together with the number of digits in the original
1295    /// source string.
1296    ///
1297    /// The digit count is required for year disambiguation: `"24"` (2 digits)
1298    /// must be expanded via [`TwoDigitYearExpansion`], while `"2024"` (4
1299    /// digits) is used as-is.  Three-digit and five-digit numbers are never
1300    /// valid date components.
1301    ///
1302    /// Uses `i16` for the value because the full year range required by the
1303    /// spec (0–3000) fits within `i16::MAX` (32,767), and day/month values
1304    /// are far smaller.
1305    Numeric(i16, u8),
1306    /// The numeric day extracted from an ordinal like `"19th"` or `"1st"`,
1307    /// with the suffix already stripped.
1308    OrdinalDay(u8),
1309    /// A resolved [`MonthName`] variant, matched from a full name,
1310    /// abbreviation, unambiguous prefix, or fuzzy misspelling.
1311    MonthName(MonthName),
1312}