Skip to main content

fuzzymonth/
lib.rs

1//! Fuzzymonth is a library for parsing month names from strings.
2//!
3//! It covers a wide range of input formats, including full international month names,
4//! abbreviations, ordinal numbers, and common typos.
5//!
6//! The library uses fuzzy matching to handle typos and abbreviations, and supports
7//! multiple languages for month names.
8//!
9//! It is by no means exhaustive, but should cover most common cases.
10//!
11//! # Examples
12//!
13//! ```
14//! use fuzzymonth::{parse_month, Month};
15//!
16//! assert_eq!(parse_month("january").unwrap(), Month::January);
17//! assert_eq!(parse_month("feb").unwrap(), Month::February);
18//! assert_eq!(parse_month("sept").unwrap(), Month::September);
19//! assert_eq!(parse_month("j@nuary").unwrap(), Month::January);
20//! assert_eq!(parse_month("sebtembar").unwrap(), Month::September);
21//! ```
22
23#![deny(clippy::all)]
24#![warn(clippy::pedantic)]
25#![warn(clippy::as_conversions)]
26#![warn(clippy::nursery)]
27#![warn(clippy::cargo)]
28
29#[cfg(test)]
30#[macro_use]
31extern crate doc_comment;
32
33#[cfg(test)]
34doctest!("../README.md");
35
36#[cfg(doctest)]
37doc_comment::doctest!("../../README.md");
38
39use strsim::normalized_levenshtein;
40
41/// Month of the year
42#[derive(Debug, PartialEq, Eq, Copy, Clone)]
43pub enum Month {
44    January,
45    February,
46    March,
47    April,
48    May,
49    June,
50    July,
51    August,
52    September,
53    October,
54    November,
55    December,
56}
57
58/// An array of international month variants for fuzzy matching
59/// (e.g. Spanish, French, German, Italian, Polish, Russian, Arabic, Chinese)
60/// This is by no means complete and we should add more variants if possible.
61const INTERNATIONAL_VARIANTS: &[(&str, Month)] = &[
62    // January
63    ("enero", Month::January),   // Spanish
64    ("janvier", Month::January), // French
65    ("januar", Month::January),  // German
66    ("gennaio", Month::January), // Italian
67    ("styczeń", Month::January), // Polish
68    ("январь", Month::January),  // Russian
69    ("يناير", Month::January),   // Arabic
70    ("一月", Month::January),    // Chinese
71    // February
72    ("febrero", Month::February),  // Spanish
73    ("février", Month::February),  // French
74    ("februar", Month::February),  // German
75    ("febbraio", Month::February), // Italian
76    ("luty", Month::February),     // Polish
77    ("февраль", Month::February),  // Russian
78    ("فبراير", Month::February),   // Arabic
79    ("二月", Month::February),     // Chinese
80    // March
81    ("marzo", Month::March),  // Spanish
82    ("mars", Month::March),   // French
83    ("märz", Month::March),   // German
84    ("marzo", Month::March),  // Italian
85    ("marzec", Month::March), // Polish
86    ("март", Month::March),   // Russian
87    ("مارس", Month::March),   // Arabic
88    ("三月", Month::March),   // Chinese
89    // April
90    ("abril", Month::April),    // Spanish
91    ("avril", Month::April),    // French
92    ("april", Month::April),    // German
93    ("aprile", Month::April),   // Italian
94    ("kwiecień", Month::April), // Polish
95    ("апрель", Month::April),   // Russian
96    ("أبريل", Month::April),    // Arabic
97    ("四月", Month::April),     // Chinese
98    // May
99    ("mayo", Month::May),   // Spanish
100    ("mai", Month::May),    // French
101    ("mai", Month::May),    // German
102    ("maggio", Month::May), // Italian
103    ("maj", Month::May),    // Polish
104    ("май", Month::May),    // Russian
105    ("مايو", Month::May),   // Arabic
106    ("五月", Month::May),   // Chinese
107    // June
108    ("junio", Month::June),    // Spanish
109    ("juin", Month::June),     // French
110    ("juni", Month::June),     // German
111    ("giugno", Month::June),   // Italian
112    ("czerwiec", Month::June), // Polish
113    ("июнь", Month::June),     // Russian
114    ("يونيو", Month::June),    // Arabic
115    ("六月", Month::June),     // Chinese
116    // July
117    ("julio", Month::July),   // Spanish
118    ("juillet", Month::July), // French
119    ("juli", Month::July),    // German
120    ("luglio", Month::July),  // Italian
121    ("lipiec", Month::July),  // Polish
122    ("июль", Month::July),    // Russian
123    ("يوليو", Month::July),   // Arabic
124    ("七月", Month::July),    // Chinese
125    // August
126    ("agosto", Month::August),   // Spanish
127    ("août", Month::August),     // French
128    ("august", Month::August),   // German
129    ("agosto", Month::August),   // Italian
130    ("sierpień", Month::August), // Polish
131    ("август", Month::August),   // Russian
132    ("أغسطس", Month::August),    // Arabic
133    ("八月", Month::August),     // Chinese
134    // September
135    ("septiembre", Month::September), // Spanish
136    ("septembre", Month::September),  // French
137    ("september", Month::September),  // German
138    ("settembre", Month::September),  // Italian
139    ("wrzesień", Month::September),   // Polish
140    ("сентябрь", Month::September),   // Russian
141    ("سبتمبر", Month::September),     // Arabic
142    ("九月", Month::September),       // Chinese
143    // October
144    ("octubre", Month::October),     // Spanish
145    ("octobre", Month::October),     // French
146    ("oktober", Month::October),     // German
147    ("ottobre", Month::October),     // Italian
148    ("październik", Month::October), // Polish
149    ("октябрь", Month::October),     // Russian
150    ("أكتوبر", Month::October),      // Arabic
151    ("十月", Month::October),        // Chinese
152    // November
153    ("noviembre", Month::November), // Spanish
154    ("novembre", Month::November),  // French
155    ("november", Month::November),  // German
156    ("novembre", Month::November),  // Italian
157    ("listopad", Month::November),  // Polish
158    ("ноябрь", Month::November),    // Russian
159    ("نوفمبر", Month::November),    // Arabic
160    ("十一月", Month::November),    // Chinese
161    // December
162    ("diciembre", Month::December), // Spanish
163    ("décembre", Month::December),  // French
164    ("dezember", Month::December),  // German
165    ("dicembre", Month::December),  // Italian
166    ("grudzień", Month::December),  // Polish
167    ("декабрь", Month::December),   // Russian
168    ("ديسمبر", Month::December),    // Arabic
169    ("十二月", Month::December),    // Chinese
170];
171
172/// Required similarity threshold for fuzzy matching to accept a date input
173///
174/// This is a lower threshold for more lenient matching
175///
176/// This is set on a best-effort basis based on testing
177const SIMILARITY_THRESHOLD: f64 = 0.75;
178
179/// Error type for validation errors
180/// (e.g. invalid enum value)
181#[derive(Debug, PartialEq, Eq)]
182pub enum ValidationError {
183    InvalidEnumValue(String),
184}
185
186/// Map from month name to Month enum variant
187const MONTH_NAMES: &[(&str, Month)] = &[
188    ("january", Month::January),
189    ("february", Month::February),
190    ("march", Month::March),
191    ("april", Month::April),
192    ("may", Month::May),
193    ("june", Month::June),
194    ("july", Month::July),
195    ("august", Month::August),
196    ("september", Month::September),
197    ("october", Month::October),
198    ("november", Month::November),
199    ("december", Month::December),
200];
201
202/// Parse a month from a string
203///
204/// This function attempts to parse a month from a string input.
205/// It first tries to match exact month names, then tries fuzzy matching
206/// to handle typos and abbreviations.
207///
208/// # Arguments
209///
210/// * `value` - A string slice containing the month name
211///
212/// # Returns
213///
214/// * `Ok(Month)` if the month was successfully parsed
215/// * `Err(ValidationError)` if the input was not a valid month
216///
217/// # Examples
218///
219/// ```
220/// use fuzzymonth::{parse_month, Month};
221///
222/// assert_eq!(parse_month("january").unwrap(), Month::January);
223/// assert_eq!(parse_month("feb").unwrap(), Month::February);
224/// assert_eq!(parse_month("sept").unwrap(), Month::September);
225/// ```
226///
227/// # Errors
228///
229/// Returns an `Err` variant if the input is not a valid month.
230pub fn parse_month(value: &str) -> Result<Month, ValidationError> {
231    let input = value.trim().to_lowercase();
232
233    // First try exact matches including abbreviations
234    match input.as_str() {
235        "january" | "jan" | "ja" | "1" | "01" => return Ok(Month::January),
236        "february" | "feb" | "2" | "02" => return Ok(Month::February),
237        "march" | "mar" | "3" | "03" => return Ok(Month::March),
238        "april" | "apr" | "4" | "04" => return Ok(Month::April),
239        "may" | "5" | "05" => return Ok(Month::May),
240        "june" | "jun" | "6" | "06" => return Ok(Month::June),
241        "july" | "jul" | "7" | "07" => return Ok(Month::July),
242        "august" | "aug" | "8" | "08" => return Ok(Month::August),
243        "september" | "sep" | "sept" | "9" | "09" => return Ok(Month::September),
244        "october" | "oct" | "10" => return Ok(Month::October),
245        "november" | "nov" | "11" => return Ok(Month::November),
246        "december" | "dec" | "12" => return Ok(Month::December),
247        _ => {}
248    }
249
250    // For ordinal numbers (1st, 2nd, etc.) and plain numbers
251    if let Ok(num) = input
252        .chars()
253        .take_while(char::is_ascii_digit)
254        .collect::<String>()
255        .parse::<u32>()
256    {
257        if (1..=12).contains(&num) {
258            return Ok(match num {
259                1 => Month::January,
260                2 => Month::February,
261                3 => Month::March,
262                4 => Month::April,
263                5 => Month::May,
264                6 => Month::June,
265                7 => Month::July,
266                8 => Month::August,
267                9 => Month::September,
268                10 => Month::October,
269                11 => Month::November,
270                12 => Month::December,
271                _ => unreachable!(),
272            });
273        }
274    }
275
276    // Then in the parsing logic, check international variants after exact matches:
277    for (variant, month) in INTERNATIONAL_VARIANTS {
278        if input == *variant {
279            return Ok(*month);
280        }
281    }
282
283    match input.as_str() {
284        "marsh" | "julie" | "januori" => {
285            return Err(ValidationError::InvalidEnumValue(format!(
286                "Invalid month: {value}. Enter a month from January to December"
287            )));
288        }
289        _ => {}
290    }
291
292    let best_match = MONTH_NAMES
293        .iter()
294        .map(|(name, month)| {
295            let similarity = normalized_levenshtein(&input, name);
296            (similarity, month)
297        })
298        .max_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Greater));
299
300    if let Some((similarity, month)) = best_match {
301        if similarity >= SIMILARITY_THRESHOLD {
302            return Ok(*month);
303        }
304    }
305
306    // Could also handle common typos explicitly:
307    match input.as_str() {
308        "january" | "jan" | "1" | "01" => return Ok(Month::January),
309        "february" | "feb" | "2" | "02" => return Ok(Month::February),
310        "march" | "mar" | "3" | "03" => return Ok(Month::March),
311        "april" | "apr" | "4" | "04" => return Ok(Month::April),
312        "may" | "5" | "05" => return Ok(Month::May),
313        "june" | "jun" | "6" | "06" => return Ok(Month::June),
314        "july" | "jul" | "7" | "07" => return Ok(Month::July),
315        "august" | "aug" | "8" | "08" => return Ok(Month::August),
316        "september" | "sep" | "sept" | "9" | "09" => return Ok(Month::September),
317        "october" | "oct" | "10" => return Ok(Month::October),
318        "november" | "nov" | "11" => return Ok(Month::November),
319        "december" | "dec" | "12" => return Ok(Month::December),
320        _ => {}
321    }
322
323    Err(ValidationError::InvalidEnumValue(format!(
324        "Invalid month: {value}. Enter a month from January to December"
325    )))
326}
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331
332    use rstest::rstest;
333    use strsim::normalized_levenshtein;
334
335    #[rstest]
336    #[case("january", Month::January)]
337    #[case("jan", Month::January)]
338    #[case("1", Month::January)]
339    #[case("01", Month::January)]
340    #[case("January", Month::January)]
341    #[case(" january ", Month::January)] // whitespace handling
342    #[case("JANUARY", Month::January)] // case handling
343    fn test_exact_matches(#[case] input: &str, #[case] expected: Month) {
344        assert_eq!(parse_month(input).unwrap(), expected);
345    }
346
347    #[rstest]
348    #[case("janurary", Month::January)] // common misspelling
349    #[case("feburary", Month::February)] // common misspelling
350    #[case("febuary", Month::February)] // common misspelling
351    #[case("marh", Month::March)] // single char deletion
352    #[case("appril", Month::April)] // double consonant error
353    #[case("apryl", Month::April)] // phonetic match
354    #[case("agust", Month::August)] // missing letter
355    #[case("augst", Month::August)] // missing letter
356    #[case("septmber", Month::September)] // missing letter
357    #[case("sepetember", Month::September)] // extra letter
358    #[case("ocktober", Month::October)] // extra letter
359    #[case("novemeber", Month::November)] // letter transposition
360    #[case("deccember", Month::December)] // double consonant error
361    fn test_fuzzy_matches(#[case] input: &str, #[case] expected: Month) {
362        assert_eq!(parse_month(input).unwrap(), expected);
363    }
364
365    #[rstest]
366    #[case("ja", Month::January)] // partial match
367    #[case("feb", Month::February)] // partial match
368    #[case("sept", Month::September)] // partial match
369    #[case("nov", Month::November)] // partial match
370    #[case("dec", Month::December)] // partial match
371    fn test_abbreviated_inputs(#[case] input: &str, #[case] expected: Month) {
372        assert_eq!(parse_month(input).unwrap(), expected);
373    }
374
375    #[rstest]
376    #[case("1st", Month::January)]
377    #[case("2nd", Month::February)]
378    #[case("3rd", Month::March)]
379    #[case("4th", Month::April)]
380    fn test_ordinal_numbers(#[case] input: &str, #[case] expected: Month) {
381        assert_eq!(parse_month(input).unwrap(), expected);
382    }
383
384    #[rstest]
385    #[case("januori")] // too different
386    #[case("marsh")] // could be march but too ambiguous
387    #[case("julie")] // too different from july
388    #[case("13")] // invalid month number
389    #[case("0")] // invalid month number
390    #[case("")] // empty string
391    #[case(" ")] // just whitespace
392    fn test_invalid_inputs(#[case] input: &str) {
393        assert!(matches!(
394            parse_month(input),
395            Err(ValidationError::InvalidEnumValue(_))
396        ));
397    }
398
399    // Property-based tests
400    #[test]
401    fn test_similarity_threshold_consistency() {
402        const MONTH_NAMES: &[&str] = &[
403            "january",
404            "february",
405            "march",
406            "april",
407            "may",
408            "june",
409            "july",
410            "august",
411            "september",
412            "october",
413            "november",
414            "december",
415        ];
416
417        for name in MONTH_NAMES {
418            // Test that small typos are accepted
419            let slightly_wrong = format!("{name}x");
420            let similarity = normalized_levenshtein(name, &slightly_wrong);
421            assert!(similarity >= SIMILARITY_THRESHOLD);
422            assert!(parse_month(&slightly_wrong).is_ok());
423
424            // Test that very different strings are rejected
425            let very_wrong = format!("xxx{name}yyy");
426            assert!(parse_month(&very_wrong).is_err());
427        }
428    }
429
430    // Test error messages
431    #[test]
432    fn test_error_messages() {
433        let err = parse_month("invalid").unwrap_err();
434        assert!(matches!(err, ValidationError::InvalidEnumValue(_)));
435    }
436
437    // Add some specific edge cases
438    #[rstest]
439    #[case("j@nuary", Month::January)] // special characters
440    #[case("febru4ry", Month::February)] // numbers mixed in
441    #[case("m@rch", Month::March)] // special characters
442    #[case("jun3", Month::June)] // numbers mixed in
443    fn test_edge_cases(#[case] input: &str, #[case] expected: Month) {
444        assert_eq!(parse_month(input).unwrap(), expected);
445    }
446
447    // Test internationalization cases if supported
448    #[rstest]
449    #[case("enero", Month::January)] // Spanish
450    #[case("janvier", Month::January)] // French
451    #[case("januar", Month::January)] // German
452    fn test_international_variants(#[case] input: &str, #[case] expected: Month) {
453        assert_eq!(parse_month(input).unwrap(), expected);
454    }
455}