diligent_date_parser/
lib.rs

1//! This is a library to parse dates in unknown format.
2//! It diligently tries to apply known patterns and returns
3//! best found match.
4//!
5//! # Examples
6//!
7//! ```rust
8//! use diligent_date_parser::parse_date;
9//! use diligent_date_parser::chrono::prelude::*;
10//! use diligent_date_parser::chrono::offset::FixedOffset;
11//!
12//! assert_eq!(
13//!     parse_date("Mon, 2 Jan 2006 15:04:05 MST"),
14//!     Some(FixedOffset::west(7 * 3600).ymd(2006, 1, 2).and_hms(15, 4, 5)),
15//! );
16//! assert_eq!(
17//!     parse_date("Apr 21 2016"),
18//!     Some(Utc.ymd(2016, 4, 21).and_hms(0, 0, 0).into()),
19//! );
20//! assert_eq!(
21//!     parse_date("Sun Dec 24 13:19:25 +0200 2017"),
22//!     Some(Utc.ymd(2017, 12, 24).and_hms(11, 19, 25).into()),
23//! );
24//! assert_eq!(
25//!     parse_date("Yesterday"),
26//!     None,
27//! );
28//! ```
29
30pub use chrono;
31use chrono::prelude::*;
32pub use chrono::{offset::FixedOffset, DateTime};
33use std::convert::AsRef;
34
35fn cut(string: &str, len: usize) -> Option<&str> {
36    if string.len() >= len && string.is_char_boundary(len) {
37        Some(&string[..len])
38    } else {
39        None
40    }
41}
42
43fn suffix(string: &str, suffix: &'static str) -> String {
44    format!("{}{}", string, suffix)
45}
46
47fn rfc3339<T: AsRef<str>>(string: T) -> Option<DateTime<FixedOffset>> {
48    DateTime::parse_from_rfc3339(string.as_ref()).ok()
49}
50
51fn rfc2822<T: AsRef<str>>(string: T) -> Option<DateTime<FixedOffset>> {
52    DateTime::parse_from_rfc2822(string.as_ref()).ok()
53}
54
55fn utc_datetime(string: &str, format: &str) -> Option<DateTime<FixedOffset>> {
56    NaiveDateTime::parse_from_str(string, format)
57        .map(|d| Utc.from_utc_datetime(&d))
58        .ok()
59        .map(|d: DateTime<Utc>| d.into())
60}
61
62fn utc_date(string: &str, format: &str) -> Option<DateTime<FixedOffset>> {
63    let date = NaiveDate::parse_from_str(string, format).ok()?;
64    let time = NaiveTime::from_hms_opt(0, 0, 0)?;
65    let datetime = NaiveDateTime::new(date, time);
66    Some(Utc.from_utc_datetime(&datetime).into())
67}
68
69/// Parses a string using multiple formats
70///
71/// # Example
72///
73/// ```rust
74/// # use diligent_date_parser::parse_date;
75/// # use diligent_date_parser::chrono::prelude::*;
76/// # use diligent_date_parser::chrono::offset::FixedOffset;
77/// let datetime = parse_date("Mon, 2 Jan 2006 15:04:05 MST");
78/// let expected = FixedOffset::west(7 * 3600).ymd(2006, 1, 2).and_hms(15, 4, 5);
79/// assert_eq!(datetime, Some(expected));
80/// ```
81pub fn parse_date(string: &str) -> Option<DateTime<FixedOffset>> {
82    let trimmed = string.trim();
83    None.or_else(|| rfc3339(trimmed))
84        .or_else(|| DateTime::parse_from_str(trimmed, "%Y-%m-%d %H:%M:%S%.3f %z").ok())
85        .or_else(|| utc_datetime(trimmed, "%Y-%m-%d %H:%M:%S%.3f"))
86        .or_else(|| cut(trimmed, 20).and_then(rfc3339))
87        .or_else(|| cut(trimmed, 19).map(|s| suffix(s, "Z")).and_then(rfc3339))
88        .or_else(|| {
89            cut(trimmed, 16)
90                .map(|s| suffix(s, ":00Z"))
91                .and_then(rfc3339)
92        })
93        .or_else(|| {
94            cut(trimmed, 13)
95                .map(|s| suffix(s, ":00:00Z"))
96                .and_then(rfc3339)
97        })
98        .or_else(|| {
99            cut(trimmed, 10)
100                .map(|s| suffix(s, "T00:00:00Z"))
101                .and_then(rfc3339)
102        })
103        .or_else(|| rfc2822(trimmed))
104        .or_else(|| rfc2822(suffix(trimmed, " +0000")))
105        .or_else(|| rfc2822(suffix(trimmed, ":00 +0000")))
106        .or_else(|| rfc2822(suffix(trimmed, ":00:00 +0000")))
107        .or_else(|| rfc2822(suffix(trimmed, " 00:00:00 +0000")))
108        .or_else(|| DateTime::parse_from_str(trimmed, "%a %b %d %H:%M:%S %z %Y").ok()) // twitter's format
109        .or_else(|| utc_date(trimmed, "%b %d %Y"))
110        .or_else(|| utc_date(trimmed, "%b %e %Y"))
111        .or_else(|| utc_date(trimmed, "%B %d %Y"))
112        .or_else(|| utc_date(trimmed, "%B %e %Y"))
113        .or_else(|| utc_date(trimmed, "%b %d, %Y"))
114        .or_else(|| utc_date(trimmed, "%b %e, %Y"))
115        .or_else(|| utc_date(trimmed, "%B %d, %Y"))
116        .or_else(|| utc_date(trimmed, "%B %e, %Y"))
117        .or_else(|| utc_date(trimmed, "%m/%d/%Y"))
118        .or_else(|| utc_date(trimmed, "%d.%m.%Y"))
119}
120
121#[cfg(test)]
122mod test {
123    use super::*;
124    use chrono::Duration;
125
126    fn utc(year: i32, mon: u32, day: u32, hour: u32, min: u32, sec: u32) -> DateTime<FixedOffset> {
127        Utc.with_ymd_and_hms(year, mon, day, hour, min, sec)
128            .unwrap()
129            .into()
130    }
131
132    #[test]
133    fn test_parse_date() {
134        assert_eq!(
135            parse_date("2011-11-17T08:00:00-08:00"),
136            Some(utc(2011, 11, 17, 16, 0, 0))
137        );
138        assert_eq!(
139            parse_date("2011-11-17T08:00:00-08:00"),
140            Some(
141                FixedOffset::west_opt(8 * 3600)
142                    .unwrap()
143                    .with_ymd_and_hms(2011, 11, 17, 8, 0, 0)
144                    .unwrap()
145            )
146        );
147        assert_eq!(
148            parse_date("2011-11-23T18:12:20Z"),
149            Some(utc(2011, 11, 23, 18, 12, 20))
150        );
151        assert_eq!(
152            parse_date("2011-12-10T14:32:42+00:00"),
153            Some(utc(2011, 12, 10, 14, 32, 42))
154        );
155        assert_eq!(
156            parse_date("2010-02-17T00:00:00ZT00:00:00-08:00"),
157            Some(utc(2010, 2, 17, 0, 0, 0))
158        );
159        assert_eq!(
160            parse_date("2010-12-21T19:57:37+00:00"),
161            Some(utc(2010, 12, 21, 19, 57, 37))
162        );
163        assert_eq!(
164            parse_date("2012-02-14T17:58:00-08:00"),
165            Some(utc(2012, 2, 15, 1, 58, 0))
166        );
167        assert_eq!(
168            parse_date("2012-02-15T12:24:00+02:00"),
169            Some(utc(2012, 2, 15, 10, 24, 0))
170        );
171        assert_eq!(
172            parse_date("2013-03-20T10:46:37.600732+02:00"),
173            Some(utc(2013, 3, 20, 8, 46, 37) + Duration::microseconds(600732))
174        );
175        assert_eq!(
176            parse_date("2013-03-20T14:00:00.000000+02:00"),
177            Some(utc(2013, 3, 20, 12, 0, 0))
178        );
179        assert_eq!(
180            parse_date("2013-10-21T18:23:10.394069+03:00"),
181            Some(utc(2013, 10, 21, 15, 23, 10) + Duration::microseconds(394069))
182        );
183        assert_eq!(
184            parse_date("2014-01-08T01:18:21"),
185            Some(utc(2014, 1, 8, 1, 18, 21))
186        );
187        assert_eq!(
188            parse_date("2014-01-07T20:45"),
189            Some(utc(2014, 1, 7, 20, 45, 0))
190        );
191        assert_eq!(parse_date("2014-01-08T13"), Some(utc(2014, 1, 8, 13, 0, 0)));
192        assert_eq!(parse_date("2014-01-11"), Some(utc(2014, 1, 11, 0, 0, 0)));
193
194        assert_eq!(
195            parse_date("2014-01-11 01:18:21 +0000"),
196            Some(utc(2014, 01, 11, 1, 18, 21))
197        );
198        assert_eq!(
199            parse_date("2014-01-11 01:18:21 +0100"),
200            Some(
201                FixedOffset::east_opt(3600)
202                    .unwrap()
203                    .with_ymd_and_hms(2014, 01, 11, 1, 18, 21)
204                    .unwrap()
205            )
206        );
207        assert_eq!(
208            parse_date(" 2014-01-11 01:18:21 "),
209            Some(utc(2014, 01, 11, 1, 18, 21))
210        );
211        assert_eq!(
212            parse_date(" 2014-01-11 01:18:21.125 "),
213            Some(utc(2014, 01, 11, 1, 18, 21) + Duration::milliseconds(125))
214        );
215        assert_eq!(
216            parse_date("Fri, 12 Feb 2016 14:08:24 +0000"),
217            Some(utc(2016, 2, 12, 14, 8, 24))
218        );
219        assert_eq!(
220            parse_date("Fri, 13 Aug 2010 00:49:00 +0700"),
221            Some(utc(2010, 8, 12, 17, 49, 0))
222        );
223        assert_eq!(
224            parse_date("Fri, 13 Jul 2012 07:13:31 -0600"),
225            Some(utc(2012, 7, 13, 13, 13, 31))
226        );
227        assert_eq!(
228            parse_date("Fri, 14 Dec 2012 04:00:00 -0800"),
229            Some(utc(2012, 12, 14, 12, 0, 0))
230        );
231        assert_eq!(
232            parse_date("Fri, 14 Jun 2013 05:00:00 -0700"),
233            Some(utc(2013, 6, 14, 12, 0, 0))
234        );
235        assert_eq!(
236            parse_date("Fri, 14 Nov 2014 17:16:12 PST"),
237            Some(utc(2014, 11, 15, 1, 16, 12))
238        );
239        assert_eq!(
240            parse_date("Fri, 14 Oct 2011 04:01:47 +0000"),
241            Some(utc(2011, 10, 14, 4, 1, 47))
242        );
243        assert_eq!(
244            parse_date("Fri, 15 Apr 2016 00:00:00 +0200"),
245            Some(utc(2016, 4, 14, 22, 0, 0))
246        );
247        assert_eq!(
248            parse_date("Fri, 15 Apr 2016 23:02:22 GMT"),
249            Some(utc(2016, 4, 15, 23, 2, 22))
250        );
251        assert_eq!(
252            parse_date("Fri, 15 Mar 2013 07:27:18 +0000"),
253            Some(utc(2013, 3, 15, 7, 27, 18))
254        );
255        assert_eq!(
256            parse_date("Fri, 16 May 2014 02:13:00 PDT"),
257            Some(utc(2014, 5, 16, 9, 13, 0))
258        );
259        assert_eq!(
260            parse_date("Tue, 3 Jul 2012 23:02:36 +0400"),
261            Some(utc(2012, 7, 3, 19, 2, 36))
262        );
263        assert_eq!(
264            parse_date("Tue,  3  Jul 2012 23:02:36 +0400"),
265            Some(utc(2012, 7, 3, 19, 2, 36))
266        );
267        assert_eq!(
268            parse_date("Tue, 3 Jul 2012 23:02:36"),
269            Some(utc(2012, 7, 3, 23, 2, 36))
270        );
271        assert_eq!(
272            parse_date("Tue, 3 Jul 2012 23:02"),
273            Some(utc(2012, 7, 3, 23, 2, 0))
274        );
275        assert_eq!(
276            parse_date("Tue, 3 Jul 2012 23"),
277            Some(utc(2012, 7, 3, 23, 0, 0))
278        );
279        assert_eq!(
280            parse_date("Tue, 3 Jul 2012"),
281            Some(utc(2012, 7, 3, 0, 0, 0))
282        );
283        assert_eq!(
284            parse_date("3 Jul 2012 23:02:36"),
285            Some(utc(2012, 7, 3, 23, 2, 36))
286        );
287
288        assert_eq!(parse_date("14 Apr 2016"), Some(utc(2016, 4, 14, 0, 0, 0)));
289        assert_eq!(parse_date("21 Apr 2016"), Some(utc(2016, 4, 21, 0, 0, 0)));
290        assert_eq!(parse_date("28 Apr 2016"), Some(utc(2016, 4, 28, 0, 0, 0)));
291        assert_eq!(parse_date(" 7 Apr 2016"), Some(utc(2016, 4, 7, 0, 0, 0)));
292
293        assert_eq!(parse_date("Apr 21 2016"), Some(utc(2016, 4, 21, 0, 0, 0)));
294        assert_eq!(parse_date(" Apr  1, 2016"), Some(utc(2016, 4, 1, 0, 0, 0)));
295        assert_eq!(
296            parse_date("  April 01, 2016"),
297            Some(utc(2016, 4, 1, 0, 0, 0))
298        );
299
300        // twitter
301        assert_eq!(
302            parse_date("Sun Dec 24 13:19:25 +0000 2017"),
303            Some(utc(2017, 12, 24, 13, 19, 25))
304        );
305        assert_eq!(
306            parse_date("Sun Dec 24 13:19:25 -0000 2017"),
307            Some(utc(2017, 12, 24, 13, 19, 25))
308        );
309        assert_eq!(
310            parse_date("Sun Dec 24 13:19:25 +0200 2017"),
311            Some(utc(2017, 12, 24, 11, 19, 25))
312        );
313        assert_eq!(
314            parse_date("Sun Dec 24 13:19:25 -0200 2017"),
315            Some(utc(2017, 12, 24, 15, 19, 25))
316        );
317    }
318}