partial_date/models.rs
1//! Core types for the partial-date library.
2//!
3//! This module contains all the structs and enums that describe inputs,
4//! configuration, and extraction results.
5
6// ---------------------------------------------------------------------------
7// Result types
8// ---------------------------------------------------------------------------
9
10/// The outcome of attempting to extract a single date component (day, month, or year).
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub enum Extracted<T> {
13 /// The value was found directly in the input.
14 Found(T),
15 /// No value could be found and no default was configured.
16 NotFound,
17 /// The value was not found in the input but a default was applied.
18 Defaulted(T),
19}
20
21impl<T> Extracted<T> {
22 /// Returns `true` if the value was found in the input.
23 pub fn is_found(&self) -> bool {
24 matches!(self, Extracted::Found(_))
25 }
26
27 /// Returns `true` if no value was found and no default applied.
28 pub fn is_not_found(&self) -> bool {
29 matches!(self, Extracted::NotFound)
30 }
31
32 /// Returns `true` if the value was defaulted.
33 pub fn is_defaulted(&self) -> bool {
34 matches!(self, Extracted::Defaulted(_))
35 }
36
37 /// Returns the inner value regardless of whether it was found or defaulted.
38 /// Returns `None` if `NotFound`.
39 pub fn value(&self) -> Option<&T> {
40 match self {
41 Extracted::Found(v) | Extracted::Defaulted(v) => Some(v),
42 Extracted::NotFound => None,
43 }
44 }
45}
46
47/// A fully-resolved (possibly partial) date returned by the extractor.
48#[derive(Debug)]
49pub struct PartialDate {
50 pub day: Day,
51 pub month: Month,
52 pub year: Year,
53}
54
55/// Extracted day component (1–31).
56#[derive(Debug)]
57pub struct Day {
58 pub value: Extracted<u8>,
59}
60
61/// Extracted month component (1–12).
62#[derive(Debug)]
63pub struct Month {
64 pub number: Extracted<u8>,
65 pub name: Extracted<MonthName>,
66}
67
68/// Extracted year component.
69///
70/// Uses `i32` to accommodate the full range required by the spec (0–3000) and
71/// to leave room for historical (negative / BC) years if needed in future.
72#[derive(Debug)]
73pub struct Year {
74 pub value: Extracted<i32>,
75}
76
77// ---------------------------------------------------------------------------
78// Configuration types
79// ---------------------------------------------------------------------------
80
81/// Indicates whether a date component is expected to be present in the input.
82///
83/// Used to guide disambiguation when the same token could be interpreted as
84/// more than one component (e.g. `12/06` could be DD/MM or MM/DD).
85#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
86pub enum IsExpected {
87 /// The component is definitely expected.
88 Yes,
89 /// The component is definitely not expected.
90 No,
91 /// No strong expectation either way (the default).
92 #[default]
93 Maybe,
94}
95
96/// Configuration for day extraction.
97#[derive(Debug, Clone)]
98pub struct DayConfig {
99 /// Minimum valid day value (inclusive). Default: `1`.
100 pub min: u8,
101 /// Maximum valid day value (inclusive). Default: `31`.
102 pub max: u8,
103 /// Whether a day component is expected in the input.
104 pub expected: IsExpected,
105 /// Default day value to use when the day is not found, if any.
106 pub default: Option<u8>,
107}
108
109//TODO: Refactor these to be on the Day, Month and Year structs rather than the
110//configs? Only issue might be the min and max, but I think we can instead
111//attach the configs to the structs to assist in that? Perhaps that makes the
112//Structs too messy for returning in the PartialDate and we should have another
113//intermediate struct like DayCandidate or something like that that we can map
114//to a Day using From/Into when we are determining the PartialDate output at the
115//end. That way we don't expose the config in the return value to the user of
116//the library
117impl DayConfig {
118 /// Return `Some(value as u8)` when `value` is a plausible day for this
119 /// config, or `None` when it is not.
120 ///
121 /// A value is a plausible day when:
122 /// - `digit_count` is not 4 (four-digit numbers cannot be days).
123 /// - The value is within the universal day range 1–31.
124 /// - The value falls within the caller-configured `min`/`max` bounds.
125 pub fn try_as_day_candidate(&self, value: i16, digit_count: u8) -> Option<u8> {
126 if digit_count == 4 {
127 return None;
128 }
129 let as_u8 = u8::try_from(value).ok()?;
130 if (1..=31).contains(&value) && (self.min..=self.max).contains(&as_u8) {
131 Some(as_u8)
132 } else {
133 None
134 }
135 }
136}
137
138impl Default for DayConfig {
139 fn default() -> Self {
140 DayConfig {
141 min: 1,
142 max: 31,
143 expected: IsExpected::Maybe,
144 default: None,
145 }
146 }
147}
148
149impl DayConfig {
150 /// Set the valid day range.
151 ///
152 /// # Panics
153 ///
154 /// Panics if `min > max`. Use [`DayConfig::try_with_range`] when the
155 /// values come from dynamic input and you need to handle the error.
156 ///
157 /// ```
158 /// use partial_date::models::DayConfig;
159 ///
160 /// let config = DayConfig::default().with_range(1, 28);
161 /// ```
162 pub fn with_range(self, min: u8, max: u8) -> Self {
163 assert!(
164 min <= max,
165 "DayConfig::with_range min ({min}) must not exceed max ({max})"
166 );
167 DayConfig { min, max, ..self }
168 }
169
170 /// Set the valid day range, returning `Err` if `min > max`.
171 ///
172 /// Use this when the range values come from dynamic input. For
173 /// known-valid static values, prefer [`DayConfig::with_range`].
174 pub fn try_with_range(self, min: u8, max: u8) -> Result<Self, ConfigRangeError> {
175 if min > max {
176 return Err(ConfigRangeError::MinExceedsMax {
177 min: min as i32,
178 max: max as i32,
179 });
180 }
181 Ok(DayConfig { min, max, ..self })
182 }
183
184 /// Set whether a day component is expected in the input.
185 ///
186 /// ```
187 /// use partial_date::models::{DayConfig, IsExpected};
188 ///
189 /// let config = DayConfig::default().with_expected(IsExpected::Yes);
190 /// ```
191 pub fn with_expected(self, expected: IsExpected) -> Self {
192 DayConfig { expected, ..self }
193 }
194
195 /// Set the default day value to use when no day is found in the input.
196 ///
197 /// ```
198 /// use partial_date::models::DayConfig;
199 ///
200 /// let config = DayConfig::default().with_default(1);
201 /// ```
202 pub fn with_default(self, default: u8) -> Self {
203 DayConfig {
204 default: Some(default),
205 ..self
206 }
207 }
208}
209
210/// Error returned by [`DayConfig::try_with_range`], [`MonthConfig::try_with_range`],
211/// and [`YearConfig::try_with_range`] when the provided range is invalid.
212#[derive(Debug, Clone, PartialEq, Eq)]
213pub enum ConfigRangeError {
214 /// The minimum value exceeds the maximum value.
215 MinExceedsMax { min: i32, max: i32 },
216}
217
218/// Configuration for month extraction.
219#[derive(Debug, Clone)]
220pub struct MonthConfig {
221 /// Minimum valid month value (inclusive). Default: `1`.
222 pub min: u8,
223 /// Maximum valid month value (inclusive). Default: `12`.
224 pub max: u8,
225 /// Whether a month component is expected in the input.
226 pub expected: IsExpected,
227 /// Default month value to use when the month is not found, if any.
228 pub default: Option<u8>,
229}
230
231impl MonthConfig {
232 /// Return `Some(value as u8)` when `value` is a plausible month for this
233 /// config, or `None` when it is not.
234 ///
235 /// A value is a plausible month when:
236 /// - `digit_count` is not 4 (four-digit numbers cannot be months).
237 /// - The value is within the universal month range 1–12.
238 /// - The value falls within the caller-configured `min`/`max` bounds.
239 pub fn try_as_month_candidate(&self, value: i16, digit_count: u8) -> Option<u8> {
240 if digit_count == 4 {
241 return None;
242 }
243 let as_u8 = u8::try_from(value).ok()?;
244 if (1..=12).contains(&value) && (self.min..=self.max).contains(&as_u8) {
245 Some(as_u8)
246 } else {
247 None
248 }
249 }
250}
251
252impl Default for MonthConfig {
253 fn default() -> Self {
254 MonthConfig {
255 min: 1,
256 max: 12,
257 expected: IsExpected::Maybe,
258 default: None,
259 }
260 }
261}
262
263impl MonthConfig {
264 /// Set the valid month range.
265 ///
266 /// # Panics
267 ///
268 /// Panics if `min > max`. Use [`MonthConfig::try_with_range`] when the
269 /// values come from dynamic input and you need to handle the error.
270 ///
271 /// ```
272 /// use partial_date::models::MonthConfig;
273 ///
274 /// let config = MonthConfig::default().with_range(1, 6);
275 /// ```
276 pub fn with_range(self, min: u8, max: u8) -> Self {
277 assert!(
278 min <= max,
279 "MonthConfig::with_range min ({min}) must not exceed max ({max})"
280 );
281 MonthConfig { min, max, ..self }
282 }
283
284 /// Set the valid month range, returning `Err` if `min > max`.
285 ///
286 /// Use this when the range values come from dynamic input. For
287 /// known-valid static values, prefer [`MonthConfig::with_range`].
288 pub fn try_with_range(self, min: u8, max: u8) -> Result<Self, ConfigRangeError> {
289 if min > max {
290 return Err(ConfigRangeError::MinExceedsMax {
291 min: min as i32,
292 max: max as i32,
293 });
294 }
295 Ok(MonthConfig { min, max, ..self })
296 }
297
298 /// Set whether a month component is expected in the input.
299 ///
300 /// ```
301 /// use partial_date::models::{IsExpected, MonthConfig};
302 ///
303 /// let config = MonthConfig::default().with_expected(IsExpected::Yes);
304 /// ```
305 pub fn with_expected(self, expected: IsExpected) -> Self {
306 MonthConfig { expected, ..self }
307 }
308
309 /// Set the default month value to use when no month is found in the input.
310 ///
311 /// ```
312 /// use partial_date::models::MonthConfig;
313 ///
314 /// let config = MonthConfig::default().with_default(1);
315 /// ```
316 pub fn with_default(self, default: u8) -> Self {
317 MonthConfig {
318 default: Some(default),
319 ..self
320 }
321 }
322}
323
324/// The pivot point for a [`TwoDigitYearExpansion::SlidingWindow`].
325///
326/// A valid pivot is in the range `1–99`. A pivot of `p` means two-digit
327/// values `0..(p-1)` map to the upper (more recent) part of the window, and
328/// values `p..99` map to the lower (earlier) part.
329///
330/// # Construction
331///
332/// Use [`SlidingWindowPivot::new`] for known-valid static values (panics on
333/// invalid input) or [`SlidingWindowPivot::try_new`] when the value comes
334/// from dynamic input and you need to handle the error:
335///
336/// ```
337/// use partial_date::models::SlidingWindowPivot;
338///
339/// // Known-valid — panics if the value is invalid.
340/// let pivot = SlidingWindowPivot::new(50);
341///
342/// // Dynamic input — returns Result.
343/// let pivot = SlidingWindowPivot::try_new(50).unwrap();
344///
345/// // Via TryFrom.
346/// let pivot: SlidingWindowPivot = 50_u8.try_into().unwrap();
347/// ```
348#[derive(Debug, Clone, Copy, PartialEq, Eq)]
349pub struct SlidingWindowPivot(u8);
350
351impl SlidingWindowPivot {
352 /// Create a new `SlidingWindowPivot`.
353 ///
354 /// # Panics
355 ///
356 /// Panics if `pivot` is `0` or greater than `99`. Use
357 /// [`SlidingWindowPivot::try_new`] when the value comes from dynamic input
358 /// and you need to handle the error instead of panicking.
359 pub fn new(pivot: u8) -> Self {
360 assert!(
361 pivot > 0 && pivot <= 99,
362 "SlidingWindowPivot must be in the range 1–99, got {pivot}"
363 );
364 SlidingWindowPivot(pivot)
365 }
366
367 /// Create a new `SlidingWindowPivot`, returning `Err` if `pivot` is `0`
368 /// or greater than `99`.
369 ///
370 /// Use this when the pivot value comes from dynamic input (e.g. user
371 /// configuration, a config file). For known-valid static values, prefer
372 /// [`SlidingWindowPivot::new`].
373 pub fn try_new(pivot: u8) -> Result<Self, SlidingWindowPivotError> {
374 if pivot == 0 || pivot > 99 {
375 return Err(SlidingWindowPivotError::InvalidPivot(pivot));
376 }
377 Ok(SlidingWindowPivot(pivot))
378 }
379}
380
381impl TryFrom<u8> for SlidingWindowPivot {
382 type Error = SlidingWindowPivotError;
383
384 fn try_from(value: u8) -> Result<Self, Self::Error> {
385 SlidingWindowPivot::try_new(value)
386 }
387}
388
389impl From<SlidingWindowPivot> for u8 {
390 fn from(pivot: SlidingWindowPivot) -> u8 {
391 pivot.0
392 }
393}
394
395/// Errors returned by [`SlidingWindowPivot::try_new`] when validation fails.
396#[derive(Debug, Clone, PartialEq, Eq)]
397pub enum SlidingWindowPivotError {
398 /// The pivot value must be in the range `1–99`.
399 InvalidPivot(u8),
400}
401
402/// A year that falls exactly on a century boundary (divisible by 100).
403///
404/// Used with [`TwoDigitYearExpansion::Always`] to express that all two-digit
405/// values should map into a specific century. For example, `Century::new(1800)`
406/// means `00 → 1800`, `34 → 1834`, `99 → 1899`.
407///
408/// # Construction
409///
410/// Use [`Century::new`] for known-valid static values (panics on invalid
411/// input) or [`Century::try_new`] when the value comes from dynamic input
412/// and you need to handle the error:
413///
414/// ```
415/// use partial_date::models::Century;
416///
417/// // Known-valid — panics if the value is not a century boundary.
418/// let century = Century::new(1800);
419///
420/// // Dynamic input — returns Result.
421/// let century = Century::try_new(2000).unwrap();
422///
423/// // Via TryFrom.
424/// let century: Century = 2000_i32.try_into().unwrap();
425/// ```
426#[derive(Debug, Clone, Copy, PartialEq, Eq)]
427pub struct Century(i32);
428
429impl Century {
430 /// Create a new `Century`.
431 ///
432 /// # Panics
433 ///
434 /// Panics if `year` is not divisible by `100`. Use [`Century::try_new`]
435 /// when the value comes from dynamic input and you need to handle the
436 /// error instead of panicking.
437 pub fn new(year: i32) -> Self {
438 assert!(
439 year % 100 == 0,
440 "Century must be divisible by 100, got {year}"
441 );
442 Century(year)
443 }
444
445 /// Create a new `Century`, returning `Err` if `year` is not divisible by
446 /// `100`.
447 ///
448 /// Use this when the year value comes from dynamic input (e.g. user
449 /// configuration, a config file). For known-valid static values, prefer
450 /// [`Century::new`].
451 pub fn try_new(year: i32) -> Result<Self, CenturyError> {
452 if year % 100 != 0 {
453 return Err(CenturyError::NotACenturyBoundary(year));
454 }
455 Ok(Century(year))
456 }
457}
458
459impl TryFrom<i32> for Century {
460 type Error = CenturyError;
461
462 fn try_from(value: i32) -> Result<Self, Self::Error> {
463 Century::try_new(value)
464 }
465}
466
467impl From<Century> for i32 {
468 fn from(century: Century) -> i32 {
469 century.0
470 }
471}
472
473/// Errors returned by [`Century::try_new`] when validation fails.
474#[derive(Debug, Clone, PartialEq, Eq)]
475pub enum CenturyError {
476 /// The year value must be divisible by `100` (e.g. `1800`, `2000`).
477 NotACenturyBoundary(i32),
478}
479
480/// Strategy for expanding two-digit years into four-digit years.
481///
482/// # Choosing a strategy
483///
484/// - Use [`SlidingWindow`] when two-digit years could span two adjacent
485/// centuries and you want to bias towards a particular era.
486/// - Use [`Always`] when all two-digit years belong to the same century
487/// without ambiguity (e.g. children's birthdays are all in the 2000s).
488/// - Use [`Literal`] when you want the two-digit value kept as-is (e.g.
489/// historical records where the year is genuinely in the range 0–99).
490///
491/// [`SlidingWindow`]: TwoDigitYearExpansion::SlidingWindow
492/// [`Always`]: TwoDigitYearExpansion::Always
493/// [`Literal`]: TwoDigitYearExpansion::Literal
494#[derive(Debug, Clone, Copy, PartialEq, Eq)]
495pub enum TwoDigitYearExpansion {
496 /// Splits the 100 possible two-digit values across two adjacent centuries.
497 ///
498 /// `earliest_year` is the smallest year the window can ever produce — it
499 /// is the year that two-digit value `pivot` maps to. Values
500 /// `pivot..=99` map to `earliest_year..=(earliest_year + (99 - pivot))`,
501 /// and values `0..(pivot)` map to
502 /// `(earliest_year + (100 - pivot))..(earliest_year + 99)`.
503 ///
504 /// # Example
505 ///
506 /// ```
507 /// use partial_date::models::{SlidingWindowPivot, TwoDigitYearExpansion};
508 ///
509 /// // 00–49 → 2000–2049, 50–99 → 1950–1999 (the default).
510 /// let expansion = TwoDigitYearExpansion::SlidingWindow {
511 /// earliest_year: 1950,
512 /// pivot: SlidingWindowPivot::new(50),
513 /// };
514 ///
515 /// // Industrial Revolution era: 00–49 → 1800–1849, 50–99 → 1750–1799.
516 /// let expansion = TwoDigitYearExpansion::SlidingWindow {
517 /// earliest_year: 1750,
518 /// pivot: SlidingWindowPivot::new(50),
519 /// };
520 /// ```
521 SlidingWindow {
522 /// The smallest year this window can produce (the year `pivot` maps
523 /// to). Must be chosen so that the full window
524 /// `[earliest_year, earliest_year + 99]` covers the values you
525 /// intend to accept. Use [`YearConfig::min`] and [`YearConfig::max`]
526 /// to reject any expanded years that fall outside your valid range.
527 earliest_year: i32,
528 /// The two-digit value at which the window wraps from the lower
529 /// (earlier) century to the upper (more recent) century.
530 pivot: SlidingWindowPivot,
531 },
532 /// Maps all two-digit values into a single century.
533 ///
534 /// `00` maps to the century start, `99` maps to `century + 99`.
535 ///
536 /// # Example
537 ///
538 /// ```
539 /// use partial_date::models::{Century, TwoDigitYearExpansion};
540 ///
541 /// // All two-digit years are in the 2000s: 00 → 2000, 34 → 2034.
542 /// let expansion = TwoDigitYearExpansion::Always(Century::new(2000));
543 ///
544 /// // All two-digit years are in the 1800s: 00 → 1800, 34 → 1834.
545 /// let expansion = TwoDigitYearExpansion::Always(Century::new(1800));
546 /// ```
547 Always(Century),
548 /// Return the two-digit value literally (e.g. `24` stays as `24`).
549 ///
550 /// Useful when processing historical records where the year genuinely
551 /// falls in the range `0–99`, or when you want to apply your own
552 /// post-processing.
553 Literal,
554}
555
556impl Default for TwoDigitYearExpansion {
557 /// The standard modern sliding window: `00–49 → 2000–2049`, `50–99 → 1950–1999`.
558 fn default() -> Self {
559 TwoDigitYearExpansion::SlidingWindow {
560 earliest_year: 1950,
561 pivot: SlidingWindowPivot(50),
562 }
563 }
564}
565
566/// Configuration for year extraction.
567#[derive(Debug, Clone)]
568pub struct YearConfig {
569 /// Minimum valid year value (inclusive). Default: `0`.
570 pub min: i32,
571 /// Maximum valid year value (inclusive). Default: `3000`.
572 pub max: i32,
573 /// Whether a year component is expected in the input.
574 pub expected: IsExpected,
575 /// Default year value to use when the year is not found, if any.
576 pub default: Option<i32>,
577 /// Strategy for expanding two-digit years. Default: [`TwoDigitYearExpansion::SlidingWindow`].
578 pub two_digit_expansion: TwoDigitYearExpansion,
579 /// When `true`, a single-digit token (`1`–`9`) is treated as a two-digit
580 /// year by prepending a zero — `5` becomes `05` — and then expanded
581 /// according to [`YearConfig::two_digit_expansion`].
582 ///
583 /// This option only applies when the other date components (day and month)
584 /// have already been filled by unambiguous tokens, so the interpreter can
585 /// confirm that the single digit is genuinely intended as a year.
586 ///
587 /// Default: `false`. Enable when processing inputs like `"1 January 5"`
588 /// where `5` means year AD 5 (literal) or year 2005 (sliding window).
589 pub single_digit_year_expansion: bool,
590}
591
592impl YearConfig {
593 /// Return the expanded year value when `value` (with `digit_count` original
594 /// digits) is a plausible year for this config, or `None` when it is not.
595 ///
596 /// - 4-digit values are used as-is.
597 /// - 3-digit values (100–999) are treated as literal years.
598 /// - 2-digit values are expanded according to [`TwoDigitYearExpansion`].
599 /// - 1-digit values are accepted only when
600 /// [`YearConfig::single_digit_year_expansion`] is `true`, in which case
601 /// `value` is treated as `0value` (e.g. `5` → `05`) and then expanded
602 /// using the same two-digit expansion strategy.
603 /// - All other digit counts return `None`.
604 ///
605 /// The expanded value must also fall within the configured `min`/`max`
606 /// bounds.
607 pub fn try_as_year_candidate(&self, value: i16, digit_count: u8) -> Option<i32> {
608 // Normalise single-digit values to their two-digit equivalent when the
609 // option is enabled, then fall through to the two-digit expansion path.
610 let (effective_value, effective_digit_count) =
611 if digit_count == 1 && self.single_digit_year_expansion {
612 // Prepend a zero: "5" → "05". The digit count is now 2.
613 (value, 2u8)
614 } else {
615 (value, digit_count)
616 };
617
618 let expanded = match effective_digit_count {
619 4 => effective_value as i32,
620 // 3-digit values (100–999) are treated as literal years: year 100,
621 // year 999, etc. This covers word-number inputs like "nine hundred
622 // ninety-nine" which replace to the 3-digit numeral 999.
623 3 => effective_value as i32,
624 2 => {
625 let raw = effective_value as i32;
626 match &self.two_digit_expansion {
627 TwoDigitYearExpansion::Literal => raw,
628 TwoDigitYearExpansion::Always(century) => i32::from(*century) + raw,
629 TwoDigitYearExpansion::SlidingWindow {
630 earliest_year,
631 pivot,
632 } => {
633 let pivot = u8::from(*pivot) as i32;
634 if raw < pivot {
635 // Upper (more recent) half: 0..(pivot-1)
636 earliest_year + (100 - pivot) + raw
637 } else {
638 // Lower (earlier) half: pivot..99
639 earliest_year + (raw - pivot)
640 }
641 }
642 }
643 }
644 _ => return None,
645 };
646 if expanded >= self.min && expanded <= self.max {
647 Some(expanded)
648 } else {
649 None
650 }
651 }
652}
653
654impl Default for YearConfig {
655 fn default() -> Self {
656 YearConfig {
657 min: 0,
658 max: 3000,
659 expected: IsExpected::Maybe,
660 default: None,
661 two_digit_expansion: TwoDigitYearExpansion::default(),
662 single_digit_year_expansion: false,
663 }
664 }
665}
666
667impl YearConfig {
668 /// Set the valid year range.
669 ///
670 /// # Panics
671 ///
672 /// Panics if `min > max`. Use [`YearConfig::try_with_range`] when the
673 /// values come from dynamic input and you need to handle the error.
674 ///
675 /// ```
676 /// use partial_date::models::YearConfig;
677 ///
678 /// let config = YearConfig::default().with_range(1760, 1840);
679 /// ```
680 pub fn with_range(self, min: i32, max: i32) -> Self {
681 assert!(
682 min <= max,
683 "YearConfig::with_range min ({min}) must not exceed max ({max})"
684 );
685 YearConfig { min, max, ..self }
686 }
687
688 /// Set the valid year range, returning `Err` if `min > max`.
689 ///
690 /// Use this when the range values come from dynamic input. For
691 /// known-valid static values, prefer [`YearConfig::with_range`].
692 pub fn try_with_range(self, min: i32, max: i32) -> Result<Self, ConfigRangeError> {
693 if min > max {
694 return Err(ConfigRangeError::MinExceedsMax { min, max });
695 }
696 Ok(YearConfig { min, max, ..self })
697 }
698
699 /// Set whether a year component is expected in the input.
700 ///
701 /// ```
702 /// use partial_date::models::{IsExpected, YearConfig};
703 ///
704 /// let config = YearConfig::default().with_expected(IsExpected::Yes);
705 /// ```
706 pub fn with_expected(self, expected: IsExpected) -> Self {
707 YearConfig { expected, ..self }
708 }
709
710 /// Set the default year value to use when no year is found in the input.
711 ///
712 /// ```
713 /// use partial_date::models::YearConfig;
714 ///
715 /// let config = YearConfig::default().with_default(2025);
716 /// ```
717 pub fn with_default(self, default: i32) -> Self {
718 YearConfig {
719 default: Some(default),
720 ..self
721 }
722 }
723
724 /// Set the two-digit year expansion strategy.
725 ///
726 /// ```
727 /// use partial_date::models::{
728 /// Century, SlidingWindowPivot, TwoDigitYearExpansion, YearConfig,
729 /// };
730 ///
731 /// // All two-digit years map to the 2000s.
732 /// let config = YearConfig::default()
733 /// .with_two_digit_expansion(TwoDigitYearExpansion::Always(Century::new(2000)));
734 ///
735 /// // Industrial Revolution era window.
736 /// let config = YearConfig::default()
737 /// .with_range(1760, 1840)
738 /// .with_two_digit_expansion(TwoDigitYearExpansion::SlidingWindow {
739 /// earliest_year: 1750,
740 /// pivot: SlidingWindowPivot::new(50),
741 /// });
742 /// ```
743 pub fn with_two_digit_expansion(self, two_digit_expansion: TwoDigitYearExpansion) -> Self {
744 YearConfig {
745 two_digit_expansion,
746 ..self
747 }
748 }
749
750 /// Set whether single-digit tokens are expanded as two-digit years.
751 ///
752 /// When `true`, a single-digit token (e.g. `5`) is treated as `05` and
753 /// then expanded according to the configured
754 /// [`YearConfig::two_digit_expansion`] strategy.
755 ///
756 /// ```
757 /// use partial_date::models::YearConfig;
758 ///
759 /// let config = YearConfig::default().with_single_digit_expansion(true);
760 /// ```
761 pub fn with_single_digit_expansion(self, enabled: bool) -> Self {
762 YearConfig {
763 single_digit_year_expansion: enabled,
764 ..self
765 }
766 }
767}
768
769/// A single date component: day, month, or year.
770///
771/// Used within [`ComponentOrder`] to describe the positional ordering of
772/// components in structured (numeric) date input.
773#[derive(Debug, Clone, Copy, PartialEq, Eq)]
774pub enum DateComponent {
775 /// The day-of-month value (1–31).
776 Day,
777 /// The month value (1–12).
778 Month,
779 /// The year value.
780 Year,
781}
782
783/// The expected ordering of date components in positional (numeric) input.
784///
785/// For example, `01/06/24` is ambiguous — a `ComponentOrder` of
786/// `[Day, Month, Year]` interprets it as 1 June 2024, while
787/// `[Month, Day, Year]` gives 6 January 2024.
788///
789/// For unambiguous inputs (e.g. `31/06/24`) the correct interpretation
790/// can always be determined regardless of this setting.
791///
792/// All three components must be present and each must appear exactly once.
793/// Construct with [`ComponentOrder::new`] to enforce this invariant, or use
794/// [`ComponentOrder::default`] for the standard Day/Month/Year order.
795#[derive(Debug, Clone, Copy, PartialEq, Eq)]
796pub struct ComponentOrder {
797 /// The component expected in the first position.
798 pub first: DateComponent,
799 /// The component expected in the second position.
800 pub second: DateComponent,
801 /// The component expected in the third position.
802 pub third: DateComponent,
803}
804
805/// Errors returned by [`ComponentOrder::new`] when validation fails.
806#[derive(Debug, Clone, PartialEq, Eq)]
807pub enum ComponentOrderError {
808 /// The same component appears more than once in the order.
809 DuplicateComponent(DateComponent),
810}
811
812impl ComponentOrder {
813 /// Create a new `ComponentOrder`, returning `Err` if any component is
814 /// duplicated (which also implies another is missing).
815 pub fn new(
816 first: DateComponent,
817 second: DateComponent,
818 third: DateComponent,
819 ) -> Result<Self, ComponentOrderError> {
820 if first == second {
821 return Err(ComponentOrderError::DuplicateComponent(first));
822 }
823 if first == third {
824 return Err(ComponentOrderError::DuplicateComponent(first));
825 }
826 if second == third {
827 return Err(ComponentOrderError::DuplicateComponent(second));
828 }
829 Ok(ComponentOrder {
830 first,
831 second,
832 third,
833 })
834 }
835}
836
837impl Default for ComponentOrder {
838 /// The default order is Day → Month → Year (e.g. `DD/MM/YYYY`).
839 fn default() -> Self {
840 ComponentOrder {
841 first: DateComponent::Day,
842 second: DateComponent::Month,
843 third: DateComponent::Year,
844 }
845 }
846}
847
848/// Top-level configuration for the extractor.
849///
850/// The extractor always tries all standard separators (`/`, `-`, `.`, `,`,
851/// `\`, and whitespace) automatically — no separator needs to be specified.
852/// Use [`Config::no_separator`] to enable parsing of fully concatenated
853/// date strings (e.g. `"25122024"`), and [`Config::extra_separators`] to
854/// add custom separator strings (e.g. `"||"`, `" - "`).
855///
856/// Construct via [`Config::default()`] and override only the fields you need,
857/// or build a fully custom config by setting each field explicitly.
858#[derive(Debug, Clone)]
859pub struct Config {
860 /// Configuration for day extraction.
861 pub day: DayConfig,
862 /// Configuration for month extraction.
863 pub month: MonthConfig,
864 /// Configuration for year extraction.
865 pub year: YearConfig,
866 /// The expected ordering of date components for positional (numeric) inputs.
867 /// Default: Day → Month → Year. See [`ComponentOrder`].
868 pub component_order: ComponentOrder,
869 /// When `true`, the extractor also attempts to parse fully concatenated
870 /// date strings with no separator (e.g. `"25122024"`). Default: `false`.
871 pub no_separator: bool,
872 /// Additional custom separator strings to try alongside the standard set.
873 /// Default: empty. Example: `vec!["||".to_string(), " - ".to_string()]`.
874 pub extra_separators: Vec<String>,
875 /// When `true`, the tokeniser substitutes the letter `O` (upper or lower
876 /// case) for the digit `0` inside tokens that consist entirely of digits
877 /// and the letter O — for example `"2O24"` is treated as `"2024"`.
878 ///
879 /// This handles OCR and keyboard-entry errors where the letter O is typed
880 /// in place of zero. The substitution is applied only to tokens that would
881 /// otherwise be entirely numeric-with-O; it is never applied when the O
882 /// appears as part of a longer alphabetic run (e.g. `"7october"` — the
883 /// `"october"` portion is left as-is and classified as a month name).
884 ///
885 /// Default: `true`.
886 pub letter_o_substitution: bool,
887 //TODO: Add a Config section for fuzzy matching options, include letter o substitution in that config
888}
889
890impl Default for Config {
891 fn default() -> Self {
892 Config {
893 day: DayConfig::default(),
894 month: MonthConfig::default(),
895 year: YearConfig::default(),
896 component_order: ComponentOrder::default(),
897 no_separator: false,
898 extra_separators: Vec::new(),
899 letter_o_substitution: true,
900 }
901 }
902}
903
904impl Config {
905 /// Set the day extraction configuration.
906 ///
907 /// ```
908 /// use partial_date::models::{Config, DayConfig, IsExpected};
909 ///
910 /// let config = Config::default()
911 /// .with_day(DayConfig::default().with_range(1, 28).with_expected(IsExpected::Yes));
912 /// ```
913 pub fn with_day(self, day: DayConfig) -> Self {
914 Config { day, ..self }
915 }
916
917 /// Set the month extraction configuration.
918 ///
919 /// ```
920 /// use partial_date::models::{Config, IsExpected, MonthConfig};
921 ///
922 /// let config = Config::default()
923 /// .with_month(MonthConfig::default().with_expected(IsExpected::Yes));
924 /// ```
925 pub fn with_month(self, month: MonthConfig) -> Self {
926 Config { month, ..self }
927 }
928
929 /// Set the year extraction configuration.
930 ///
931 /// ```
932 /// use partial_date::models::{Config, IsExpected, YearConfig};
933 ///
934 /// let config = Config::default()
935 /// .with_year(YearConfig::default().with_range(1760, 1840).with_expected(IsExpected::Yes));
936 /// ```
937 pub fn with_year(self, year: YearConfig) -> Self {
938 Config { year, ..self }
939 }
940
941 /// Set the expected ordering of date components for positional inputs.
942 ///
943 /// ```
944 /// use partial_date::models::{ComponentOrder, Config, DateComponent};
945 ///
946 /// let config = Config::default().with_component_order(
947 /// ComponentOrder::new(
948 /// DateComponent::Month,
949 /// DateComponent::Day,
950 /// DateComponent::Year,
951 /// )
952 /// .unwrap(),
953 /// );
954 /// ```
955 pub fn with_component_order(self, component_order: ComponentOrder) -> Self {
956 Config {
957 component_order,
958 ..self
959 }
960 }
961
962 /// Enable or disable no-separator parsing (e.g. `"25122024"`).
963 ///
964 /// ```
965 /// use partial_date::models::Config;
966 ///
967 /// let config = Config::default().with_no_separator(true);
968 /// ```
969 pub fn with_no_separator(self, no_separator: bool) -> Self {
970 Config {
971 no_separator,
972 ..self
973 }
974 }
975
976 /// Set additional custom separator strings to try alongside the standard
977 /// set (`/`, `-`, `.`, `,`, `\`, and whitespace).
978 ///
979 /// ```
980 /// use partial_date::models::Config;
981 ///
982 /// let config = Config::default()
983 /// .with_extra_separators(vec!["||".to_string(), " - ".to_string()]);
984 /// ```
985 pub fn with_extra_separators(self, extra_separators: Vec<String>) -> Self {
986 Config {
987 extra_separators,
988 ..self
989 }
990 }
991
992 /// Enable or disable substitution of the letter `O` for the digit `0`.
993 ///
994 /// ```
995 /// use partial_date::models::Config;
996 ///
997 /// let config = Config::default().with_letter_o_substitution(false);
998 /// ```
999 pub fn with_letter_o_substitution(self, letter_o_substitution: bool) -> Self {
1000 Config {
1001 letter_o_substitution,
1002 ..self
1003 }
1004 }
1005}
1006
1007// ---------------------------------------------------------------------------
1008// Input type
1009// ---------------------------------------------------------------------------
1010
1011/// Input to the partial date extractor.
1012#[derive(Debug, Clone)]
1013pub struct Input {
1014 /// The raw text from which a date should be extracted.
1015 pub utterance: String,
1016 /// Per-call config override. Falls back to the library default when `None`.
1017 pub config: Option<Config>,
1018}
1019
1020/// The name of a calendar month, as extracted from natural language input.
1021///
1022/// ## Conversions
1023///
1024/// `MonthName` can be constructed from either a string or a number:
1025///
1026/// ```
1027/// use partial_date::models::{MonthName, MonthNameError};
1028/// use std::convert::TryFrom;
1029///
1030/// // From a name string (full, abbreviated, or unambiguous prefix)
1031/// assert_eq!(MonthName::try_from("October"), Ok(MonthName::October));
1032/// assert_eq!(MonthName::try_from("oct"), Ok(MonthName::October));
1033/// assert_eq!(MonthName::try_from("Octo"), Ok(MonthName::October));
1034///
1035/// // From a numeric string
1036/// assert_eq!(MonthName::try_from("10"), Ok(MonthName::October));
1037///
1038/// // From a u8
1039/// assert_eq!(MonthName::try_from(10_u8), Ok(MonthName::October));
1040///
1041/// // Errors
1042/// assert_eq!(MonthName::try_from(0_u8), Err(MonthNameError::NumberOutOfRange(0)));
1043/// assert_eq!(MonthName::try_from(13_u8), Err(MonthNameError::NumberOutOfRange(13)));
1044/// assert_eq!(MonthName::try_from("Xyz"), Err(MonthNameError::UnrecognisedName));
1045/// assert_eq!(MonthName::try_from("5x"), Err(MonthNameError::NotAMonth));
1046/// ```
1047#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1048pub enum MonthName {
1049 January,
1050 February,
1051 March,
1052 April,
1053 May,
1054 June,
1055 July,
1056 August,
1057 September,
1058 October,
1059 November,
1060 December,
1061}
1062
1063impl MonthName {
1064 /// Return the calendar number of this month (1 = January … 12 = December).
1065 ///
1066 /// ```
1067 /// use partial_date::models::MonthName;
1068 /// assert_eq!(MonthName::January.number(), 1);
1069 /// assert_eq!(MonthName::December.number(), 12);
1070 /// ```
1071 pub fn number(self) -> u8 {
1072 match self {
1073 MonthName::January => 1,
1074 MonthName::February => 2,
1075 MonthName::March => 3,
1076 MonthName::April => 4,
1077 MonthName::May => 5,
1078 MonthName::June => 6,
1079 MonthName::July => 7,
1080 MonthName::August => 8,
1081 MonthName::September => 9,
1082 MonthName::October => 10,
1083 MonthName::November => 11,
1084 MonthName::December => 12,
1085 }
1086 }
1087}
1088
1089/// Errors returned when a [`MonthName`] conversion fails.
1090#[derive(Debug, Clone, PartialEq, Eq)]
1091pub enum MonthNameError {
1092 /// The input string was alphabetic but did not match any known month name,
1093 /// abbreviation, or unambiguous prefix.
1094 UnrecognisedName,
1095 /// The input was a valid integer but outside the range 1–12.
1096 NumberOutOfRange(u8),
1097 /// The input was neither a pure alphabetic string nor a pure integer
1098 /// (e.g. `"5x"` or `"jan2"`).
1099 NotAMonth,
1100}
1101
1102/// Convert a month number (`1` = January … `12` = December) into a
1103/// [`MonthName`].
1104///
1105/// Returns [`MonthNameError::NumberOutOfRange`] for any value outside 1–12.
1106impl TryFrom<u8> for MonthName {
1107 type Error = MonthNameError;
1108
1109 fn try_from(n: u8) -> Result<Self, Self::Error> {
1110 match n {
1111 1 => Ok(MonthName::January),
1112 2 => Ok(MonthName::February),
1113 3 => Ok(MonthName::March),
1114 4 => Ok(MonthName::April),
1115 5 => Ok(MonthName::May),
1116 6 => Ok(MonthName::June),
1117 7 => Ok(MonthName::July),
1118 8 => Ok(MonthName::August),
1119 9 => Ok(MonthName::September),
1120 10 => Ok(MonthName::October),
1121 11 => Ok(MonthName::November),
1122 12 => Ok(MonthName::December),
1123 _ => Err(MonthNameError::NumberOutOfRange(n)),
1124 }
1125 }
1126}
1127
1128/// Convert a string into a [`MonthName`].
1129///
1130/// Three strategies are tried in order:
1131///
1132/// 1. **Alphabetic match** — if every character is ASCII alphabetic (after
1133/// stripping a trailing `.`), the lowercased string is compared against all
1134/// full names, standard 3-letter abbreviations, and unambiguous longer
1135/// prefixes.
1136///
1137/// 2. **Fuzzy match** — if no exact or prefix match was found, the
1138/// Levenshtein ratio is computed against every full month name. The
1139/// closest match is accepted when its ratio is ≥ 0.6 and it is
1140/// unambiguously the best candidate (no tie). Returns
1141/// [`MonthNameError::UnrecognisedName`] when no candidate passes.
1142///
1143/// 3. **Numeric match** — if every character is an ASCII digit, the value is
1144/// parsed as a `u8` and forwarded to [`TryFrom<u8>`]. Returns
1145/// [`MonthNameError::NumberOutOfRange`] when the number is outside 1–12.
1146///
1147/// If the string is neither purely alphabetic nor purely numeric (e.g.
1148/// `"jan2"` or `"5x"`), [`MonthNameError::NotAMonth`] is returned.
1149impl TryFrom<&str> for MonthName {
1150 type Error = MonthNameError;
1151
1152 fn try_from(s: &str) -> Result<Self, Self::Error> {
1153 // Strip a trailing dot before classification (handles "Jan.", "Feb.").
1154 let s = s.strip_suffix('.').unwrap_or(s);
1155
1156 if s.is_empty() {
1157 return Err(MonthNameError::NotAMonth);
1158 }
1159
1160 if s.chars().all(|c| c.is_ascii_alphabetic()) {
1161 // --- Alphabetic path ---
1162 let lower = s.to_ascii_lowercase();
1163 match_month_name_str(lower.as_str())
1164 } else if s.chars().all(|c| c.is_ascii_digit()) {
1165 // --- Numeric path ---
1166 // A leading-zero number like "06" parses to 6, which is valid.
1167 // Values > 255 would overflow u8::MAX; treat them as out-of-range.
1168 let n: u8 = s.parse().map_err(|_| MonthNameError::NumberOutOfRange(0))?;
1169 MonthName::try_from(n)
1170 } else {
1171 Err(MonthNameError::NotAMonth)
1172 }
1173 }
1174}
1175
1176/// All twelve full month names paired with their [`MonthName`] variant,
1177/// used for both prefix and fuzzy matching.
1178const FULL_MONTH_NAMES: &[(&str, MonthName)] = &[
1179 ("january", MonthName::January),
1180 ("february", MonthName::February),
1181 ("march", MonthName::March),
1182 ("april", MonthName::April),
1183 ("may", MonthName::May),
1184 ("june", MonthName::June),
1185 ("july", MonthName::July),
1186 ("august", MonthName::August),
1187 ("september", MonthName::September),
1188 ("october", MonthName::October),
1189 ("november", MonthName::November),
1190 ("december", MonthName::December),
1191];
1192
1193/// Minimum Levenshtein ratio required for a fuzzy match to be accepted.
1194///
1195/// A ratio of 0.6 means at most 2 edits in a 5-character word, or 1 edit in
1196/// a 3-character word. Empirically this passes all known real-world
1197/// misspellings while rejecting clearly unrelated words like `"Foo"` or
1198/// `"Friday"`.
1199const FUZZY_MATCH_THRESHOLD: f32 = 0.6;
1200
1201/// Match an already-lowercased, purely-alphabetic string against all known
1202/// month names, abbreviations, and unambiguous prefixes, falling back to
1203/// fuzzy (Levenshtein ratio) matching when no exact or prefix match is found.
1204fn match_month_name_str(lower: &str) -> Result<MonthName, MonthNameError> {
1205 // --- 1. Exact match: full names and standard 3-letter abbreviations ---
1206 let exact = match lower {
1207 "january" | "jan" => Some(MonthName::January),
1208 "february" | "feb" => Some(MonthName::February),
1209 "march" | "mar" => Some(MonthName::March),
1210 "april" | "apr" => Some(MonthName::April),
1211 "may" => Some(MonthName::May),
1212 "june" | "jun" => Some(MonthName::June),
1213 "july" | "jul" => Some(MonthName::July),
1214 "august" | "aug" => Some(MonthName::August),
1215 "september" | "sep" => Some(MonthName::September),
1216 "october" | "oct" => Some(MonthName::October),
1217 "november" | "nov" => Some(MonthName::November),
1218 "december" | "dec" => Some(MonthName::December),
1219 _ => None,
1220 };
1221
1222 if let Some(month) = exact {
1223 return Ok(month);
1224 }
1225
1226 // --- 2. Unambiguous prefix match (≥ 4 characters) ---
1227 if lower.len() >= 4 {
1228 let mut found: Option<MonthName> = None;
1229 for (full_name, variant) in FULL_MONTH_NAMES {
1230 if full_name.starts_with(lower) {
1231 if found.is_some() {
1232 // More than one month starts with this prefix — ambiguous;
1233 // fall through to fuzzy matching below.
1234 found = None;
1235 break;
1236 }
1237 found = Some(*variant);
1238 }
1239 }
1240 if let Some(month) = found {
1241 return Ok(month);
1242 }
1243 }
1244
1245 // --- 3. Fuzzy match via Levenshtein ratio ---
1246 fuzzy_match_month(lower)
1247}
1248
1249/// Find the best-matching month name for `lower` using Levenshtein ratio.
1250///
1251/// Returns the matched [`MonthName`] if exactly one candidate scores above
1252/// [`FUZZY_MATCH_THRESHOLD`] and no other candidate ties it. Returns
1253/// [`MonthNameError::UnrecognisedName`] otherwise.
1254fn fuzzy_match_month(lower: &str) -> Result<MonthName, MonthNameError> {
1255 use crate::levenshtein::levenshtein_ratio;
1256
1257 let mut best_ratio: f32 = 0.0;
1258 let mut best_month: Option<MonthName> = None;
1259 let mut is_tied = false;
1260
1261 for (full_name, variant) in FULL_MONTH_NAMES {
1262 let ratio = levenshtein_ratio(lower, full_name);
1263 if ratio > best_ratio {
1264 best_ratio = ratio;
1265 best_month = Some(*variant);
1266 is_tied = false;
1267 } else if (ratio - best_ratio).abs() < f32::EPSILON {
1268 // Two candidates have the same ratio — ambiguous.
1269 is_tied = true;
1270 }
1271 }
1272
1273 if best_ratio >= FUZZY_MATCH_THRESHOLD && !is_tied {
1274 best_month.ok_or(MonthNameError::UnrecognisedName)
1275 } else {
1276 Err(MonthNameError::UnrecognisedName)
1277 }
1278}
1279
1280// ---------------------------------------------------------------------------
1281// Tokenisation types
1282// ---------------------------------------------------------------------------
1283
1284/// A single meaningful chunk produced by [`crate::extract::tokenise`].
1285///
1286/// The tokeniser strips separator characters and noise words, leaving only
1287/// tokens that *could* contribute to a date component. At most three tokens
1288/// are returned (one per date component: day, month, year).
1289///
1290/// Each variant stores the already-parsed value rather than the raw source
1291/// text, so consumers can use the token directly without re-parsing.
1292#[derive(Debug, Clone, PartialEq, Eq)]
1293pub enum Token {
1294 /// A parsed integer together with the number of digits in the original
1295 /// source string.
1296 ///
1297 /// The digit count is required for year disambiguation: `"24"` (2 digits)
1298 /// must be expanded via [`TwoDigitYearExpansion`], while `"2024"` (4
1299 /// digits) is used as-is. Three-digit and five-digit numbers are never
1300 /// valid date components.
1301 ///
1302 /// Uses `i16` for the value because the full year range required by the
1303 /// spec (0–3000) fits within `i16::MAX` (32,767), and day/month values
1304 /// are far smaller.
1305 Numeric(i16, u8),
1306 /// The numeric day extracted from an ordinal like `"19th"` or `"1st"`,
1307 /// with the suffix already stripped.
1308 OrdinalDay(u8),
1309 /// A resolved [`MonthName`] variant, matched from a full name,
1310 /// abbreviation, unambiguous prefix, or fuzzy misspelling.
1311 MonthName(MonthName),
1312}