gix_date/
parse.rs

1use std::str::FromStr;
2
3use smallvec::SmallVec;
4
5use crate::Time;
6
7#[derive(thiserror::Error, Debug, Clone)]
8#[allow(missing_docs)]
9pub enum Error {
10    #[error("Could not convert a duration into a date")]
11    RelativeTimeConversion,
12    #[error("Date string can not be parsed")]
13    InvalidDateString { input: String },
14    #[error("The heat-death of the universe happens before this date")]
15    InvalidDate(#[from] std::num::TryFromIntError),
16    #[error("Current time is missing but required to handle relative dates.")]
17    MissingCurrentTime,
18}
19
20/// A container for just enough bytes to hold the largest-possible [`time`](Time) instance.
21/// It's used in conjunction with
22#[derive(Default, Clone)]
23pub struct TimeBuf {
24    buf: SmallVec<[u8; Time::MAX.size()]>,
25}
26
27impl TimeBuf {
28    /// Represent this instance as standard string, serialized in a format compatible with
29    /// signature fields in Git commits, also known as anything parseable as [raw format](function::parse_header()).
30    pub fn as_str(&self) -> &str {
31        // SAFETY: We know that serialized times are pure ASCII, a subset of UTF-8.
32        //         `buf` and `len` are written only by time-serialization code.
33        let time_bytes = self.buf.as_slice();
34        #[allow(unsafe_code)]
35        unsafe {
36            std::str::from_utf8_unchecked(time_bytes)
37        }
38    }
39
40    /// Clear the previous content.
41    fn clear(&mut self) {
42        self.buf.clear();
43    }
44}
45
46impl std::io::Write for TimeBuf {
47    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
48        self.buf.write(buf)
49    }
50
51    fn flush(&mut self) -> std::io::Result<()> {
52        self.buf.flush()
53    }
54}
55
56impl Time {
57    /// Serialize this instance into `buf`, exactly as it would appear in the header of a Git commit,
58    /// and return `buf` as `&str` for easy consumption.
59    pub fn to_str<'a>(&self, buf: &'a mut TimeBuf) -> &'a str {
60        buf.clear();
61        self.write_to(buf)
62            .expect("write to memory of just the right size cannot fail");
63        buf.as_str()
64    }
65}
66
67impl FromStr for Time {
68    type Err = Error;
69
70    fn from_str(s: &str) -> Result<Self, Self::Err> {
71        crate::parse_header(s).ok_or_else(|| Error::InvalidDateString { input: s.into() })
72    }
73}
74
75pub(crate) mod function {
76    use std::{str::FromStr, time::SystemTime};
77
78    use jiff::{civil::Date, fmt::rfc2822, tz::TimeZone, Zoned};
79
80    use crate::{
81        parse::{relative, Error},
82        time::format::{DEFAULT, GITOXIDE, ISO8601, ISO8601_STRICT, SHORT},
83        OffsetInSeconds, SecondsSinceUnixEpoch, Time,
84    };
85
86    /// Parse `input` as any time that Git can parse when inputting a date.
87    ///
88    /// ## Examples
89    ///
90    /// ### 1. SHORT Format
91    ///
92    /// *   `2018-12-24`
93    /// *   `1970-01-01`
94    /// *   `1950-12-31`
95    /// *   `2024-12-31`
96    ///
97    /// ### 2. RFC2822 Format
98    ///
99    /// *   `Thu, 18 Aug 2022 12:45:06 +0800`
100    /// *   `Mon Oct 27 10:30:00 2023 -0800`
101    ///
102    /// ### 3. GIT_RFC2822 Format
103    ///
104    /// *   `Thu, 8 Aug 2022 12:45:06 +0800`
105    /// *   `Mon Oct 27 10:30:00 2023 -0800` (Note the single-digit day)
106    ///
107    /// ### 4. ISO8601 Format
108    ///
109    /// *   `2022-08-17 22:04:58 +0200`
110    /// *   `1970-01-01 00:00:00 -0500`
111    ///
112    /// ### 5. ISO8601_STRICT Format
113    ///
114    /// *   `2022-08-17T21:43:13+08:00`
115    ///
116    /// ### 6. UNIX Timestamp (Seconds Since Epoch)
117    ///
118    /// *   `123456789`
119    /// *   `0` (January 1, 1970 UTC)
120    /// *   `-1000`
121    /// *   `1700000000`
122    ///
123    /// ### 7. Commit Header Format
124    ///
125    /// *   `1745582210 +0200`
126    /// *   `1660874655 +0800`
127    /// *   `-1660874655 +0800`
128    ///
129    /// See also the [`parse_header()`].
130    ///
131    /// ### 8. GITOXIDE Format
132    ///
133    /// *   `Thu Sep 04 2022 10:45:06 -0400`
134    /// *   `Mon Oct 27 2023 10:30:00 +0000`
135    ///
136    /// ### 9. DEFAULT Format
137    ///
138    /// *   `Thu Sep 4 10:45:06 2022 -0400`
139    /// *   `Mon Oct 27 10:30:00 2023 +0000`
140    ///
141    /// ### 10. Relative Dates (e.g., "2 minutes ago", "1 hour from now")
142    ///
143    /// These dates are parsed *relative to a `now` timestamp*. The examples depend entirely on the value of `now`.
144    /// If `now` is October 27, 2023 at 10:00:00 UTC:
145    ///     *   `2 minutes ago` (October 27, 2023 at 09:58:00 UTC)
146    ///     *   `3 hours ago` (October 27, 2023 at 07:00:00 UTC)
147    pub fn parse(input: &str, now: Option<SystemTime>) -> Result<Time, Error> {
148        Ok(if let Ok(val) = Date::strptime(SHORT.0, input) {
149            let val = val
150                .to_zoned(TimeZone::UTC)
151                .map_err(|_| Error::InvalidDateString { input: input.into() })?;
152            Time::new(val.timestamp().as_second(), val.offset().seconds())
153        } else if let Ok(val) = rfc2822_relaxed(input) {
154            Time::new(val.timestamp().as_second(), val.offset().seconds())
155        } else if let Ok(val) = strptime_relaxed(ISO8601.0, input) {
156            Time::new(val.timestamp().as_second(), val.offset().seconds())
157        } else if let Ok(val) = strptime_relaxed(ISO8601_STRICT.0, input) {
158            Time::new(val.timestamp().as_second(), val.offset().seconds())
159        } else if let Ok(val) = strptime_relaxed(GITOXIDE.0, input) {
160            Time::new(val.timestamp().as_second(), val.offset().seconds())
161        } else if let Ok(val) = strptime_relaxed(DEFAULT.0, input) {
162            Time::new(val.timestamp().as_second(), val.offset().seconds())
163        } else if let Ok(val) = SecondsSinceUnixEpoch::from_str(input) {
164            // Format::Unix
165            Time::new(val, 0)
166        } else if let Some(val) = relative::parse(input, now).transpose()? {
167            Time::new(val.timestamp().as_second(), val.offset().seconds())
168        } else if let Some(val) = parse_raw(input) {
169            // Format::Raw
170            val
171        } else {
172            return Err(Error::InvalidDateString { input: input.into() });
173        })
174    }
175
176    /// Unlike [`parse()`] which handles all kinds of input, this function only parses the commit-header format
177    /// like `1745582210 +0200`.
178    ///
179    /// Note that failure to parse the time zone isn't fatal, instead it will default to `0`. To know if
180    /// the time is wonky, serialize the return value to see if it matches the `input.`
181    pub fn parse_header(input: &str) -> Option<Time> {
182        pub enum Sign {
183            Plus,
184            Minus,
185        }
186        fn parse_offset(offset: &str) -> Option<OffsetInSeconds> {
187            if (offset.len() != 5) && (offset.len() != 7) {
188                return None;
189            }
190            let sign = match offset.get(..1)? {
191                "-" => Some(Sign::Minus),
192                "+" => Some(Sign::Plus),
193                _ => None,
194            }?;
195            if offset.as_bytes().get(1).is_some_and(|b| !b.is_ascii_digit()) {
196                return None;
197            }
198            let hours: i32 = offset.get(1..3)?.parse().ok()?;
199            let minutes: i32 = offset.get(3..5)?.parse().ok()?;
200            let offset_seconds: i32 = if offset.len() == 7 {
201                offset.get(5..7)?.parse().ok()?
202            } else {
203                0
204            };
205            let mut offset_in_seconds = hours * 3600 + minutes * 60 + offset_seconds;
206            if matches!(sign, Sign::Minus) {
207                offset_in_seconds *= -1;
208            }
209            Some(offset_in_seconds)
210        }
211
212        if input.contains(':') {
213            return None;
214        }
215        let mut split = input.split_whitespace();
216        let seconds = split.next()?;
217        let seconds = match seconds.parse::<SecondsSinceUnixEpoch>() {
218            Ok(s) => s,
219            Err(_err) => {
220                // Inefficient, but it's not the common case.
221                let first_digits: String = seconds.chars().take_while(char::is_ascii_digit).collect();
222                first_digits.parse().ok()?
223            }
224        };
225        let offset = match split.next() {
226            None => 0,
227            Some(offset) => {
228                if split.next().is_some() {
229                    0
230                } else {
231                    parse_offset(offset).unwrap_or_default()
232                }
233            }
234        };
235        let time = Time { seconds, offset };
236        Some(time)
237    }
238
239    /// Strictly parse the raw commit header format like `1745582210 +0200`.
240    ///
241    /// Some strict rules include:
242    ///
243    /// - The timezone offset must be present.
244    /// - The timezone offset must have a sign; either `+` or `-`.
245    /// - The timezone offset hours must be less than or equal to 14.
246    /// - The timezone offset minutes must be exactly 0, 15, 30, or 45.
247    /// - The timezone offset seconds may be present, but 0 is the only valid value.
248    /// - Only whitespace may suffix the timezone offset.
249    ///
250    /// But this function isn't perfectly strict insofar as it allows arbitrary
251    /// whitespace before and after the seconds and offset components.
252    ///
253    /// The goal is to only accept inputs that _unambiguously_ look like
254    /// git's raw date format.
255    fn parse_raw(input: &str) -> Option<Time> {
256        let mut split = input.split_whitespace();
257        let seconds = split.next()?.parse::<SecondsSinceUnixEpoch>().ok()?;
258        let offset_str = split.next()?;
259        if split.next().is_some() {
260            return None;
261        }
262        let offset_len = offset_str.len();
263        if offset_len != 5 && offset_len != 7 {
264            return None;
265        }
266        let sign: i32 = match offset_str.get(..1)? {
267            "-" => Some(-1),
268            "+" => Some(1),
269            _ => None,
270        }?;
271        let hours: u8 = offset_str.get(1..3)?.parse().ok()?;
272        let minutes: u8 = offset_str.get(3..5)?.parse().ok()?;
273        let offset_seconds: u8 = if offset_len == 7 {
274            offset_str.get(5..7)?.parse().ok()?
275        } else {
276            0
277        };
278        if hours > 14 || (minutes != 0 && minutes != 15 && minutes != 30 && minutes != 45) || offset_seconds != 0 {
279            return None;
280        }
281        let offset: i32 = sign * ((hours as i32) * 3600 + (minutes as i32) * 60);
282        Time { seconds, offset }.into()
283    }
284
285    /// This is just like `Zoned::strptime`, but it allows parsing datetimes
286    /// whose weekdays are inconsistent with the date. While the day-of-week
287    /// still must be parsed, it is otherwise ignored. This seems to be
288    /// consistent with how `git` behaves.
289    fn strptime_relaxed(fmt: &str, input: &str) -> Result<Zoned, jiff::Error> {
290        let mut tm = jiff::fmt::strtime::parse(fmt, input)?;
291        tm.set_weekday(None);
292        tm.to_zoned()
293    }
294
295    /// This is just like strptime_relaxed, except for RFC 2822 parsing.
296    /// Namely, it permits the weekday to be inconsistent with the date.
297    fn rfc2822_relaxed(input: &str) -> Result<Zoned, jiff::Error> {
298        static P: rfc2822::DateTimeParser = rfc2822::DateTimeParser::new().relaxed_weekday(true);
299        P.parse_zoned(input)
300    }
301
302    #[cfg(test)]
303    mod tests {
304        use super::*;
305
306        #[test]
307        fn parse_raw_valid() {
308            // These examples show how it's more loose than it has to be,
309            // merely as a side effect of the implementation.
310            for (valid, expected_seconds, expected_offset) in [
311                ("12345 +0000", 12345, 0),
312                ("-1234567 +0000", -1234567, 0),
313                ("+1234567 -000000", 1234567, 0),
314                ("   +0    -000000    ", 0, 0),
315                ("\t-0\t-0000\t", 0, 0),
316                ("\n-0\r\n-0000\n", 0, 0),
317            ] {
318                assert_eq!(
319                    parse_raw(valid),
320                    Some(Time {
321                        seconds: expected_seconds,
322                        offset: expected_offset
323                    }),
324                    "should succeed: '{valid}'"
325                );
326            }
327        }
328
329        #[test]
330        fn parse_raw_invalid() {
331            for (bad_date_str, message) in [
332                ("123456 !0600", "invalid sign - must be + or -"),
333                ("123456 0600", "missing offset sign"),
334                ("123456 +060", "positive offset too short"),
335                ("123456 -060", "negative offset too short"),
336                ("123456 +06000", "not enough offset seconds"),
337                ("123456 --060", "duplicate offset sign with correct offset length"),
338                ("123456 -+060", "multiple offset signs with correct offset length"),
339                ("123456 --0600", "multiple offset signs, but incorrect offset length"),
340                ("123456 +-06000", "multiple offset signs with correct offset length"),
341                ("123456 +-0600", "multiple offset signs with incorrect offset length"),
342                ("123456 +-060", "multiple offset signs with correct offset length"),
343                ("123456 +10030", "invalid offset length with one 'second' field"),
344                ("123456 06000", "invalid offset length, missing sign"),
345                ("123456 +0600 extra", "extra field past offset"),
346                ("123456 +0600 2005", "extra field past offset that looks like year"),
347                ("123456+0600", "missing space between unix timestamp and offset"),
348                (
349                    "123456 + 600",
350                    "extra spaces between sign and offset (which also is too short)",
351                ),
352                ("123456 -1500", "negative offset hours out of bounds"),
353                ("123456 +1500", "positive offset hours out of bounds"),
354                ("123456 +6600", "positive offset hours out of bounds"),
355                ("123456 +0660", "invalid offset minutes"),
356                ("123456 +060010", "positive offset seconds is allowed but only if zero"),
357                ("123456 -060010", "negative offset seconds is allowed but only if zero"),
358                ("123456 +0075", "positive offset minutes invalid"),
359                ("++123456 +0000", "duplicate timestamp sign"),
360                ("--123456 +0000", "duplicate timestamp sign"),
361                ("1234567 -+1+1+0", "unsigned offset parsing rejects '+'"),
362            ] {
363                assert!(
364                    parse_raw(bad_date_str).is_none(),
365                    "should fail: '{bad_date_str}': {message}"
366                );
367            }
368        }
369    }
370}
371
372mod relative {
373    use std::{str::FromStr, time::SystemTime};
374
375    use jiff::{tz::TimeZone, Span, Timestamp, Zoned};
376
377    use crate::parse::Error;
378
379    fn parse_inner(input: &str) -> Option<Result<Span, Error>> {
380        let mut split = input.split_whitespace();
381        let units = i64::from_str(split.next()?).ok()?;
382        let period = split.next()?;
383        if split.next()? != "ago" {
384            return None;
385        }
386        span(period, units)
387    }
388
389    pub(crate) fn parse(input: &str, now: Option<SystemTime>) -> Option<Result<Zoned, Error>> {
390        parse_inner(input).map(|result| {
391            let span = result?;
392            // This was an error case in a previous version of this code, where
393            // it would fail when converting from a negative signed integer
394            // to an unsigned integer. This preserves that failure case even
395            // though the code below handles it okay.
396            if span.is_negative() {
397                return Err(Error::RelativeTimeConversion);
398            }
399            now.ok_or(Error::MissingCurrentTime).and_then(|now| {
400                let ts = Timestamp::try_from(now).map_err(|_| Error::RelativeTimeConversion)?;
401                // N.B. This matches the behavior of this code when it was
402                // written with `time`, but we might consider using the system
403                // time zone here. If we did, then it would implement "1 day
404                // ago" correctly, even when it crosses DST transitions. Since
405                // we're in the UTC time zone here, which has no DST, 1 day is
406                // in practice always 24 hours. ---AG
407                let zdt = ts.to_zoned(TimeZone::UTC);
408                zdt.checked_sub(span).map_err(|_| Error::RelativeTimeConversion)
409            })
410        })
411    }
412
413    fn span(period: &str, units: i64) -> Option<Result<Span, Error>> {
414        let period = period.strip_suffix('s').unwrap_or(period);
415        let result = match period {
416            "second" => Span::new().try_seconds(units),
417            "minute" => Span::new().try_minutes(units),
418            "hour" => Span::new().try_hours(units),
419            "day" => Span::new().try_days(units),
420            "week" => Span::new().try_weeks(units),
421            "month" => Span::new().try_months(units),
422            "year" => Span::new().try_years(units),
423            // Ignore values you don't know, assume seconds then (so does git)
424            _anything => Span::new().try_seconds(units),
425        };
426        Some(result.map_err(|_| Error::RelativeTimeConversion))
427    }
428
429    #[cfg(test)]
430    mod tests {
431        use super::*;
432
433        #[test]
434        fn two_weeks_ago() {
435            let actual = parse_inner("2 weeks ago").unwrap().unwrap();
436            assert_eq!(actual.fieldwise(), Span::new().weeks(2));
437        }
438    }
439}