fuzzy_datetime/
lib.rs

1use chrono::{NaiveDate, NaiveDateTime, ParseError};
2use simple_string_patterns::{CharGroupMatch, CharType, SimplContainsType, ToSegments};
3
4mod date_order;
5mod guess;
6mod validators;
7mod converters;
8mod detect;
9mod from_fuzzy_iso_string;
10
11pub use date_order::{DateOrder, DateOptions};
12pub use detect::{detect_date_format_from_list, detect_date_format_from_generic_list};
13pub use from_fuzzy_iso_string::*;
14use guess::surmise_date_order_and_splitter;
15use validators::segment_is_subseconds;
16use converters::{fuzzy_to_formatted_time_parts, to_formatted_date_string};
17
18/// If the second argument is None, the function will attempt to guess the date order
19/// Otherwise, it will use the provided date order and splitter
20pub fn fuzzy_to_datetime(dt: &str, date_opts: Option<DateOptions>, time_separator: Option<char>) -> Result<NaiveDateTime, ParseError> {
21  let formatted_str = fuzzy_to_datetime_string(dt, date_opts, time_separator).unwrap_or_default();
22  NaiveDateTime::parse_from_str(&formatted_str, "%Y-%m-%dT%H:%M:%S%.3fZ")
23}
24
25/// convert a date-time-like string to a valid ISO 8601-compatible date-time string
26/// for direct output or further processing via chrono
27/// Assume all input dates conforms to the ISO 8601 order, even if incomplete. All guessing is short-circuited
28/// This is compatible with original function in julian_day_converter
29pub fn iso_fuzzy_string_to_datetime(dt: &str) -> Result<NaiveDateTime, ParseError> {
30  fuzzy_to_datetime(dt, Some(DateOptions::default()), Some(':'))
31}
32
33/// If the second argument is None, the function will attempt to guess the date order
34/// Otherwise, it will use the provided date order and splitter
35pub fn fuzzy_to_date(dt: &str, date_opts: Option<DateOptions>) -> Result<NaiveDate, ParseError> {
36  let date_str = fuzzy_to_date_string(dt, date_opts).unwrap_or_default();
37  NaiveDate::parse_from_str(&date_str, "%Y-%m-%d")
38}
39
40/// Convert a ISO YMD date-like string to a NaiveDate
41/// It assumes Y-M-D order and a hyphen as the splitter, but can accommodate missing month or day components
42pub fn iso_fuzzy_to_date(dt: &str) -> Result<NaiveDate, ParseError> {
43  fuzzy_to_date(dt, Some(DateOptions::default()))
44}
45
46/// convert a date-time-like string to a valid ISO 8601-compatible date string
47/// for direct output or further processing via chrono
48/// If date_opts is None, the function will attempt to guess the date order with bias towards YMD and DMY in case of ambiguity
49/// For best performance, provide the date order and splitter
50pub fn fuzzy_to_date_string(dt: &str, date_opts: Option<DateOptions>) -> Option<String> {
51  if let Some((date_str, _t_str, _ms_tz)) = fuzzy_to_date_string_with_time(dt, date_opts) {
52    if !date_str.is_empty() {
53      return Some(date_str)
54    }
55  }
56  None
57}
58
59/// convert a date-like assuming the source string follows the Y-M-D pattern
60pub fn iso_fuzzy_to_date_string(dt: &str) -> Option<String> {
61	fuzzy_to_date_string(dt, Some(DateOptions::default()))
62}
63
64/// convert a date-time-like assuming the source string follows the Y-M-D H:m:s pattern
65pub fn iso_fuzzy_to_datetime_string(dt: &str) -> Option<String> {
66	fuzzy_to_datetime_string_opts(dt, 'T', Some(DateOptions::default()), Some(':'), true)
67}
68
69
70/// convert a date-time-like string to a valid ISO 8601-compatible string
71pub fn fuzzy_to_date_string_with_time(dt: &str, date_opts: Option<DateOptions>) -> Option<(String, String, String)> {
72	
73  let (dt_str, mtz) = dt.to_start_end(".");
74  let has_mtz = segment_is_subseconds(&mtz);
75  let milli_tz = if has_mtz {
76    mtz
77  } else {
78    "".to_string()
79  };
80  let dt_base = if has_mtz {
81    dt_str
82  } else {
83    dt.to_string()
84  };
85	let clean_dt = dt_base.replace("T", " ").trim().to_string();
86	let mut dt_parts = clean_dt.split_whitespace();
87	let date_part = dt_parts.next().unwrap_or("0000-01-01");
88  let date_options = if let Some(dt_opts) = date_opts {
89    dt_opts
90  } else {
91    surmise_date_order_and_splitter(date_part)
92  };
93	let time_part = dt_parts.next().unwrap_or("00:00:00");
94	if date_part.contains_type(CharType::Alpha) {
95			return None;
96	}
97
98	if let Some(formatted_date) = to_formatted_date_string(date_part, date_options.order(), date_options.splitter()) {
99    Some((formatted_date, time_part.to_string(), milli_tz))
100  } else {
101    None
102  }
103}
104
105
106/// convert a date-time-like string to a valid ISO 8601-compatible string
107pub fn fuzzy_to_datetime_string(dt: &str, date_opts: Option<DateOptions>, time_separator: Option<char>) -> Option<String> {
108	fuzzy_to_datetime_string_opts(dt, 'T', date_opts, time_separator, true)
109}
110
111/// convert a date-time-like string to a valid ISO 8601-compatible string
112/// dt: the date-time string
113/// separator: the separator between the date and time parts
114/// add_z: whether to add 'Z' timezone indicator
115pub fn fuzzy_to_datetime_string_opts(dt: &str, separator: char, date_opts: Option<DateOptions>, time_separator: Option<char>, add_z: bool) -> Option<String> {
116  if let Some((formatted_date, time_part, ms_tz)) = fuzzy_to_date_string_with_time(dt, date_opts) {
117    // exclude the the whole date-time string if the time part is non-empty without digits
118    if !time_part.is_empty() && !time_part.has_digits() {
119      return None;
120    }
121    let (formatted_time, tz_suffix) = fuzzy_to_formatted_time_parts(&time_part, &ms_tz, time_separator, add_z).unwrap_or_default();
122    let formatted_str = format!("{}{}{}{}", formatted_date, separator, formatted_time, tz_suffix);
123    if !formatted_str.is_empty() {
124      return Some(formatted_str);
125    }
126	}
127  None
128}
129
130// Check if a string is likely to be a date string with an optional time component
131pub fn is_datetime_like(text: &str) -> bool {
132  fuzzy_to_datetime_string(text, None, None).is_some()
133}
134
135#[cfg(test)]
136mod tests {
137    use guess::surmise_date_order;
138
139  use super::*;
140
141  #[test]
142  fn test_fuzzy_dates() {
143      let sample_1 = "2001-apple";
144      assert!(fuzzy_to_datetime(sample_1, None, None).is_err());
145      assert_eq!(fuzzy_to_datetime_string(sample_1, None, None), None);
146
147      let sample_2 = "1876-08-29 17:15";
148      assert!(fuzzy_to_datetime(sample_2, None, None).is_ok());
149
150      // correct sample datetime
151      let sample_3 = "2023-8-29 19:34:39";
152      assert_eq!(
153          fuzzy_to_datetime_string(sample_3, None, None),
154          Some("2023-08-29T19:34:39.000Z".to_string())
155      );
156
157      // Correct date-only string
158      let sample_4 = "2023-9-10";
159      assert_eq!(
160          fuzzy_to_date_string(sample_4, None),
161          Some("2023-09-10".to_string())
162      );
163      // time-only strings should not be valid
164      let sample_5 = "10:10:10";
165      assert_eq!(
166          fuzzy_to_datetime_string(sample_5, None, None),
167          None
168      );
169
170      // datetime with extra milliseconds and timezone
171      let sample_3 = "2023-08-29T19:34:39.678Z";
172      assert_eq!(
173          fuzzy_to_datetime_string(sample_3, None, None),
174          Some(sample_3.to_string())
175      );
176  }
177
178  #[test]
179  fn test_is_datetime_like() {
180      assert!(is_datetime_like("2023-10-10T10:10:10"));
181      assert!(is_datetime_like("2023-10-10 10:10:10"));
182      assert!(is_datetime_like("2023-10-10"));
183      assert!(!is_datetime_like("10:10:10"));
184      assert!(!is_datetime_like("invalid-date"));
185      assert!(!is_datetime_like("2023-10-10Tinvalid"));
186  }
187
188  #[test]
189  fn test_surmise_date_order() {
190    let sample_date_1 = "1876-08-29";      
191    assert_eq!(surmise_date_order(sample_date_1, Some('-')), DateOrder::YMD);
192
193    let sample_date_2 = "28/02/1998";
194    assert_eq!(surmise_date_order(sample_date_2, Some('/')), DateOrder::DMY);
195
196    let sample_date_3 = "02/28/1998";
197    assert_eq!(surmise_date_order(sample_date_3, Some('/')), DateOrder::MDY);
198
199    // Ambiguous year-last dates will default to DMY (sorry Americans)
200    // However, this can be overridden by specifying the date order
201    // order parsing a set of dates to see if any have numbers greater than 12 in the second position
202    // and no numbers over 12 in the first position
203    let sample_date_4 = "08/07/1998";
204    assert_eq!(surmise_date_order(sample_date_4, Some('/')), DateOrder::DMY);
205  }
206
207  #[test]
208  fn test_surmise_date_order_and_splitter() {
209    let sample_date_1 = "1876-08-29";
210    let date_opts_1 = surmise_date_order_and_splitter(sample_date_1);
211    assert_eq!(date_opts_1.order(), DateOrder::YMD);
212    assert_eq!(date_opts_1.splitter(), Some('-'));
213
214    let sample_date_2 = "28/02/1998";
215    let date_opts_2 = surmise_date_order_and_splitter(sample_date_2);
216    assert_eq!(date_opts_2.order(), DateOrder::DMY);
217    assert_eq!(date_opts_2.splitter(), Some('/'));
218  }
219
220  #[test]
221  fn test_surmise_date_order_and_splitter_plain() {
222
223    let sample_date_1 = "28021998";
224    let date_opts_1 = surmise_date_order_and_splitter(sample_date_1);
225    assert_eq!(date_opts_1.order(), DateOrder::DMY);
226    assert_eq!(date_opts_1.splitter(), None);
227
228    let sample_date_2 = "02281998";
229    let date_opts_2 = surmise_date_order_and_splitter(sample_date_2);
230    assert_eq!(date_opts_2.order(), DateOrder::MDY);
231
232    let sample_date_3 = "18761208";
233    let date_opts_3 = surmise_date_order_and_splitter(sample_date_3);
234    assert_eq!(date_opts_3.order(), DateOrder::YMD);
235
236  }
237
238  #[test]
239  fn test_millisecond_splitter() {
240    
241      let sample_1 = "2023-08-29T19.34.39.678Z";
242      let (dt_base, milli_tz) = sample_1.to_start_end(".");
243      assert_eq!(dt_base, "2023-08-29T19.34.39");
244      assert_eq!(milli_tz, "678Z");
245
246      assert_eq!(segment_is_subseconds("678Z"), true);
247  }
248
249  #[test]
250  fn test_detect_date_format_from_list() {
251    
252    // American dates are usually MDY with slashes
253    let sample_dates_usa = vec![
254      "07/08/1998",
255      "09/10/2021",
256      "12/15/2022",
257      "11/09/1999",
258    ];
259
260    let date_opts_usa = detect_date_format_from_list(&sample_dates_usa);
261    assert_eq!(date_opts_usa.order(), DateOrder::MDY);
262    assert_eq!(date_opts_usa.splitter(), Some('/'));
263
264    // Many other countries use DMY with slashes
265    let sample_dates_dmy = vec![
266      "08/07/1998",
267      "10/09/2021",
268      "15/12/2022",
269      "09/11/1999",
270    ];
271
272    let date_opts_dmy = detect_date_format_from_list(&sample_dates_dmy);
273    assert_eq!(date_opts_dmy.order(), DateOrder::DMY);
274    assert_eq!(date_opts_dmy.splitter(), Some('/'));
275
276
277    // Dates in Germany and many other European countries are DMY with full stops
278    let sample_dates_de = vec![
279      "8.7.1998",
280      "10.9.2021",
281      "15.12.2022",
282      "9.11.1999",
283    ];
284    let date_opts_de = detect_date_format_from_list(&sample_dates_de);
285    assert_eq!(date_opts_de.order(), DateOrder::DMY);
286    assert_eq!(date_opts_de.splitter(), Some('.'));
287
288    // French dates are also DMY, but often with hyphens
289    let sample_dates_fr = vec![
290      "08-07-1998",
291      "10-09-2021",
292      "15-12-2022",
293      "09-11-1999",
294    ];
295    let date_opts_fr = detect_date_format_from_list(&sample_dates_fr);
296    assert_eq!(date_opts_fr.order(), DateOrder::DMY);
297    assert_eq!(date_opts_fr.splitter(), Some('-'));
298
299    let sample_dates_iso = vec![
300      "1998-07-08",
301      "2021-09-10",
302      "2022-12-15",
303      "1999-11-09",
304    ];
305    let date_opts_iso = detect_date_format_from_list(&sample_dates_iso);
306    assert_eq!(date_opts_iso.order(), DateOrder::YMD);
307    assert_eq!(date_opts_iso.splitter(), Some('-'));
308
309
310    struct SpecialDay {
311      #[allow(dead_code)]
312      name: String,
313      date: String,
314    }
315
316    let rows: Vec<SpecialDay> = vec![
317      SpecialDay {
318        name: "Independence Day".to_string(),
319        date: "07/04/1776".to_string(),
320      },
321      SpecialDay {
322        name: "Christmas Day".to_string(),
323        date: "12/25/2021".to_string(),
324      },
325      SpecialDay {
326        name: "New Year's Day".to_string(),
327        date: "01/01/2022".to_string(),
328      },
329    ];
330
331    let date_opts_special = detect_date_format_from_generic_list(&rows, |x| Some(x.date.clone()));
332    assert_eq!(date_opts_special.order(), DateOrder::MDY);
333  }
334
335  #[test]
336  fn test_fuzzy_to_date_string() {
337    // correct date
338    let sample_str_1 = fuzzy_to_date_string("1993-8-29", Some(DateOptions::default()));
339    assert_eq!(sample_str_1, Some("1993-08-29".to_string()));
340
341    let sample_str_2 = fuzzy_to_date_string("1993-8", Some(DateOptions::default()));
342    assert_eq!(sample_str_2, Some("1993-08-01".to_string()));
343
344    // correct date
345    let sample_str_3 = fuzzy_to_date_string("29/08/1993", Some(DateOptions::dmy('/')));
346    assert_eq!(sample_str_3, Some("1993-08-29".to_string()));
347  }
348  #[test]
349  fn test_fuzzy_datetime_to_naive_datetime() {
350    let datetime_str = "1876-9-25 15:45"; // incomplete without zero-padding
351    let dt = NaiveDateTime::from_fuzzy_iso_string(datetime_str).unwrap();
352    assert_eq!(dt.to_string(), "1876-09-25 15:45:00".to_owned());
353  }
354}