Skip to main content

grit_lib/git_date/
parse.rs

1//! Git-compatible date parsing (`parse_date_basic`, `parse_date`) — ported from Git `date.c`.
2
3use super::tm::{
4    get_time_sec, init_tm_unknown, is_date_known, match_string, maybeiso8601, nodate,
5    parse_timestamp_prefix, skip_alpha, tm_to_time_t, TIMESTAMP_MAX,
6};
7use libc::{time_t, tm};
8use std::mem::MaybeUninit;
9
10struct TzName {
11    name: &'static str,
12    offset_hours: i32,
13    dst: i32,
14}
15
16const TIMEZONE_NAMES: &[TzName] = &[
17    TzName {
18        name: "IDLW",
19        offset_hours: -12,
20        dst: 0,
21    },
22    TzName {
23        name: "NT",
24        offset_hours: -11,
25        dst: 0,
26    },
27    TzName {
28        name: "CAT",
29        offset_hours: -10,
30        dst: 0,
31    },
32    TzName {
33        name: "HST",
34        offset_hours: -10,
35        dst: 0,
36    },
37    TzName {
38        name: "HDT",
39        offset_hours: -10,
40        dst: 1,
41    },
42    TzName {
43        name: "YST",
44        offset_hours: -9,
45        dst: 0,
46    },
47    TzName {
48        name: "YDT",
49        offset_hours: -9,
50        dst: 1,
51    },
52    TzName {
53        name: "PST",
54        offset_hours: -8,
55        dst: 0,
56    },
57    TzName {
58        name: "PDT",
59        offset_hours: -8,
60        dst: 1,
61    },
62    TzName {
63        name: "MST",
64        offset_hours: -7,
65        dst: 0,
66    },
67    TzName {
68        name: "MDT",
69        offset_hours: -7,
70        dst: 1,
71    },
72    TzName {
73        name: "CST",
74        offset_hours: -6,
75        dst: 0,
76    },
77    TzName {
78        name: "CDT",
79        offset_hours: -6,
80        dst: 1,
81    },
82    TzName {
83        name: "EST",
84        offset_hours: -5,
85        dst: 0,
86    },
87    TzName {
88        name: "EDT",
89        offset_hours: -5,
90        dst: 1,
91    },
92    TzName {
93        name: "AST",
94        offset_hours: -3,
95        dst: 0,
96    },
97    TzName {
98        name: "ADT",
99        offset_hours: -3,
100        dst: 1,
101    },
102    TzName {
103        name: "WAT",
104        offset_hours: -1,
105        dst: 0,
106    },
107    TzName {
108        name: "GMT",
109        offset_hours: 0,
110        dst: 0,
111    },
112    TzName {
113        name: "UTC",
114        offset_hours: 0,
115        dst: 0,
116    },
117    TzName {
118        name: "Z",
119        offset_hours: 0,
120        dst: 0,
121    },
122    TzName {
123        name: "WET",
124        offset_hours: 0,
125        dst: 0,
126    },
127    TzName {
128        name: "BST",
129        offset_hours: 0,
130        dst: 1,
131    },
132    TzName {
133        name: "CET",
134        offset_hours: 1,
135        dst: 0,
136    },
137    TzName {
138        name: "MET",
139        offset_hours: 1,
140        dst: 0,
141    },
142    TzName {
143        name: "MEWT",
144        offset_hours: 1,
145        dst: 0,
146    },
147    TzName {
148        name: "MEST",
149        offset_hours: 1,
150        dst: 1,
151    },
152    TzName {
153        name: "CEST",
154        offset_hours: 1,
155        dst: 1,
156    },
157    TzName {
158        name: "MESZ",
159        offset_hours: 1,
160        dst: 1,
161    },
162    TzName {
163        name: "FWT",
164        offset_hours: 1,
165        dst: 0,
166    },
167    TzName {
168        name: "FST",
169        offset_hours: 1,
170        dst: 1,
171    },
172    TzName {
173        name: "EET",
174        offset_hours: 2,
175        dst: 0,
176    },
177    TzName {
178        name: "EEST",
179        offset_hours: 2,
180        dst: 1,
181    },
182    TzName {
183        name: "WAST",
184        offset_hours: 7,
185        dst: 0,
186    },
187    TzName {
188        name: "WADT",
189        offset_hours: 7,
190        dst: 1,
191    },
192    TzName {
193        name: "CCT",
194        offset_hours: 8,
195        dst: 0,
196    },
197    TzName {
198        name: "JST",
199        offset_hours: 9,
200        dst: 0,
201    },
202    TzName {
203        name: "EAST",
204        offset_hours: 10,
205        dst: 0,
206    },
207    TzName {
208        name: "EADT",
209        offset_hours: 10,
210        dst: 1,
211    },
212    TzName {
213        name: "GST",
214        offset_hours: 10,
215        dst: 0,
216    },
217    TzName {
218        name: "NZT",
219        offset_hours: 12,
220        dst: 0,
221    },
222    TzName {
223        name: "NZST",
224        offset_hours: 12,
225        dst: 0,
226    },
227    TzName {
228        name: "NZDT",
229        offset_hours: 12,
230        dst: 1,
231    },
232    TzName {
233        name: "IDLE",
234        offset_hours: 12,
235        dst: 0,
236    },
237];
238
239pub(crate) const MONTH_NAMES: [&str; 12] = [
240    "January",
241    "February",
242    "March",
243    "April",
244    "May",
245    "June",
246    "July",
247    "August",
248    "September",
249    "October",
250    "November",
251    "December",
252];
253
254pub(crate) const WEEKDAY_NAMES: [&str; 7] = [
255    "Sundays",
256    "Mondays",
257    "Tuesdays",
258    "Wednesdays",
259    "Thursdays",
260    "Fridays",
261    "Saturdays",
262];
263
264/// Format a parsed instant like Git's `date_string`.
265pub fn date_string(date: u64, offset: i32) -> String {
266    let mut sign = '+';
267    let mut o = offset;
268    if o < 0 {
269        o = -o;
270        sign = '-';
271    }
272    format!("{} {}{:02}{:02}", date, sign, o / 60, o % 60)
273}
274
275/// Git `parse_date` — returns canonical `date_string` output.
276pub fn parse_date(date: &str) -> Result<String, ()> {
277    let (ts, off) = parse_date_basic(date)?;
278    Ok(date_string(ts, off))
279}
280
281/// Git `parse_date_basic` — UTC seconds and timezone offset in **minutes** (signed).
282pub fn parse_date_basic(date: &str) -> Result<(u64, i32), ()> {
283    let bytes = date.as_bytes();
284    let mut tm = init_tm_unknown();
285    let mut offset: i32 = -1;
286    let mut tm_gmt = 0i32;
287    let mut i = 0usize;
288
289    if bytes.first() == Some(&b'@') {
290        if let Some((ts, off)) = match_object_header_date(&bytes[1..]) {
291            return Ok((ts, off));
292        }
293    }
294
295    while i < bytes.len() {
296        let c = bytes[i];
297        if c == 0 || c == b'\n' {
298            break;
299        }
300        let mut m = 0usize;
301        if c.is_ascii_alphabetic() {
302            m = match_alpha(&bytes[i..], &mut tm, &mut offset);
303        } else if c.is_ascii_digit() {
304            m = match_digit(&bytes[i..], &mut tm, &mut offset, &mut tm_gmt);
305        } else if (c == b'-' || c == b'+') && bytes.get(i + 1).is_some_and(|x| x.is_ascii_digit()) {
306            m = match_tz(&bytes[i..], &mut offset);
307        }
308        if m == 0 {
309            m = 1;
310        }
311        i += m;
312    }
313
314    let tts = tm_to_time_t(&tm);
315    if tts < 0 {
316        return Err(());
317    }
318    let mut ts = tts as u64;
319
320    if offset == -1 {
321        tm.tm_isdst = -1;
322        let temp_time = unsafe { libc::mktime(&mut tm) };
323        let tt = ts as i128;
324        let tloc = temp_time as i128;
325        offset = if tt > tloc {
326            ((tt - tloc) / 60) as i32
327        } else {
328            -(((tloc - tt) / 60) as i32)
329        };
330    }
331
332    if tm_gmt == 0 {
333        if offset > 0 && (offset as i64) * 60 > ts as i64 {
334            return Err(());
335        }
336        if offset < 0 && (-(offset as i128)) * 60 > (TIMESTAMP_MAX as i128 - ts as i128) {
337            return Err(());
338        }
339        // Git: *timestamp -= *offset * 60 (signed; negative offset adds to the instant).
340        let ts128 = ts as i128;
341        let adj = (offset as i128) * 60;
342        let new_ts = ts128 - adj;
343        if new_ts < 0 {
344            return Err(());
345        }
346        ts = new_ts as u64;
347    }
348
349    Ok((ts, offset))
350}
351
352fn match_object_header_date(date: &[u8]) -> Option<(u64, i32)> {
353    if date.is_empty() || !date[0].is_ascii_digit() {
354        return None;
355    }
356    let (stamp, mut rest) = parse_timestamp_prefix(date);
357    if rest >= date.len() || date[rest] != b' ' {
358        return None;
359    }
360    if stamp == u64::MAX {
361        return None;
362    }
363    rest += 1;
364    if rest >= date.len() || (date[rest] != b'+' && date[rest] != b'-') {
365        return None;
366    }
367    let sign = date[rest];
368    rest += 1;
369    if rest + 4 > date.len() {
370        return None;
371    }
372    let tz_digits = std::str::from_utf8(&date[rest..rest + 4]).ok()?;
373    let ofs_raw: i32 = tz_digits.parse().ok()?;
374    let mut ofs = (ofs_raw / 100) * 60 + (ofs_raw % 100);
375    if sign == b'-' {
376        ofs = -ofs;
377    }
378    let end = rest + 4;
379    if end < date.len() && date[end] != b'\n' && date[end] != 0 {
380        return None;
381    }
382    Some((stamp, ofs))
383}
384
385/// Git `match_tz` — writes offset in minutes; returns bytes consumed.
386fn match_tz(date: &[u8], offp: &mut i32) -> usize {
387    if date.is_empty() || (date[0] != b'+' && date[0] != b'-') {
388        return 0;
389    }
390    let (hour_ul, n) = parse_timestamp_prefix(&date[1..]);
391    let mut end = 1 + n;
392    let mut min: i32 = 0;
393    let mut hour: i32 = hour_ul as i32;
394    if n == 4 {
395        min = hour % 100;
396        hour /= 100;
397    } else if n != 2 {
398        min = 99;
399    } else if end < date.len() && date[end] == b':' {
400        let (m2, n2) = parse_timestamp_prefix(&date[end + 1..]);
401        if n2 == 0 {
402            min = 99;
403        } else {
404            min = m2 as i32;
405            end += 1 + n2;
406            if end - 1 != 5 {
407                min = 99;
408            }
409        }
410    }
411    if min < 60 && hour < 24 {
412        let mut off = hour * 60 + min;
413        if date[0] == b'-' {
414            off = -off;
415        }
416        *offp = off;
417    }
418    end
419}
420
421/// Git `strtol` for a leading signed decimal slice (`end+1` style).
422fn parse_long_prefix(s: &[u8]) -> (i64, usize) {
423    if s.is_empty() {
424        return (0, 0);
425    }
426    let mut i = 0usize;
427    let neg = s[0] == b'-';
428    if s[0] == b'+' || s[0] == b'-' {
429        i = 1;
430    }
431    let start = i;
432    while i < s.len() && s[i].is_ascii_digit() {
433        i += 1;
434    }
435    if i == start {
436        return (0, 0);
437    }
438    let Ok(slice) = std::str::from_utf8(&s[start..i]) else {
439        return (0, 0);
440    };
441    let Ok(v) = slice.parse::<i64>() else {
442        return (0, 0);
443    };
444    let v = if neg { -v } else { v };
445    (v, i)
446}
447
448fn parse_uint_suffix(s: &[u8]) -> (u64, usize) {
449    parse_timestamp_prefix(s)
450}
451
452/// Git `set_date` — `0` ok, `1` reject try, `-1` error.
453fn set_date(year: i32, month: i32, day: i32, now_tm: Option<&tm>, now: i64, tm: &mut tm) -> i32 {
454    if !(month > 0 && month < 13 && day > 0 && day < 32) {
455        return -1;
456    }
457    if now_tm.is_none() {
458        tm.tm_mon = month - 1;
459        tm.tm_mday = day;
460        if year == -1 {
461            return 1;
462        }
463        if (1970..2100).contains(&year) {
464            tm.tm_year = year - 1900;
465        } else if (70..100).contains(&year) {
466            tm.tm_year = year;
467        } else if year < 38 {
468            tm.tm_year = year + 100;
469        } else {
470            return -1;
471        }
472        return 0;
473    }
474    let nt = now_tm.unwrap();
475    let mut check = *tm;
476    check.tm_mon = month - 1;
477    check.tm_mday = day;
478    if year == -1 {
479        check.tm_year = nt.tm_year;
480    } else if (1970..2100).contains(&year) {
481        check.tm_year = year - 1900;
482    } else if (70..100).contains(&year) {
483        check.tm_year = year;
484    } else if year < 38 {
485        check.tm_year = year + 100;
486    } else {
487        return -1;
488    }
489    let specified = tm_to_time_t(&check);
490    if specified >= 0 && now + 10 * 24 * 3600 < specified {
491        return -1;
492    }
493    tm.tm_mon = check.tm_mon;
494    tm.tm_mday = check.tm_mday;
495    if year != -1 {
496        tm.tm_year = check.tm_year;
497    }
498    0
499}
500
501fn set_time(hour: i64, minute: i64, second: i64, tm: &mut tm) -> i32 {
502    if (0..=24).contains(&hour) && (0..60).contains(&minute) && (0..=60).contains(&second) {
503        tm.tm_hour = hour as i32;
504        tm.tm_min = minute as i32;
505        tm.tm_sec = second as i32;
506        0
507    } else {
508        -1
509    }
510}
511
512/// Git `match_multi_number` — `sep_i` is index of separator in `date`; returns bytes consumed from `date` start.
513pub(crate) fn match_multi_number(
514    num: u64,
515    date: &[u8],
516    sep_i: usize,
517    tm: &mut tm,
518    now_in: i64,
519) -> usize {
520    let Some(&c) = date.get(sep_i) else {
521        return 0;
522    };
523    if !matches!(c, b':' | b'-' | b'/' | b'.') {
524        return 0;
525    }
526
527    let (num2, n2) = parse_long_prefix(&date[sep_i + 1..]);
528    if n2 == 0 {
529        return 0;
530    }
531    let mut pos = sep_i + 1 + n2;
532    let mut num3: i64 = -1;
533    if pos < date.len() && date[pos] == c && pos + 1 < date.len() && date[pos + 1].is_ascii_digit()
534    {
535        let (n3, rel) = parse_long_prefix(&date[pos + 1..]);
536        num3 = n3;
537        pos += 1 + rel;
538    }
539
540    match c {
541        b':' => {
542            let mut n3 = num3;
543            if n3 < 0 {
544                n3 = 0;
545            }
546            if set_time(num as i64, num2, n3, tm) == 0 {
547                if pos < date.len()
548                    && date[pos] == b'.'
549                    && pos + 1 < date.len()
550                    && date[pos + 1].is_ascii_digit()
551                    && is_date_known(tm)
552                {
553                    let (_, rel) = parse_long_prefix(&date[pos + 1..]);
554                    pos += 1 + rel;
555                }
556            } else {
557                return 0;
558            }
559        }
560        b'-' | b'/' | b'.' => {
561            let now = if now_in == 0 { get_time_sec() } else { now_in };
562            let mut now_tm_uninit = MaybeUninit::<tm>::uninit();
563            let refuse_future: Option<&tm> = unsafe {
564                let tt = now as time_t;
565                let p = libc::gmtime_r(&tt, now_tm_uninit.as_mut_ptr());
566                if p.is_null() {
567                    None
568                } else {
569                    Some(&*now_tm_uninit.as_ptr())
570                }
571            };
572
573            let y = num as i32;
574            let m = num2 as i32;
575            let d = if num3 < 0 { 0 } else { num3 as i32 };
576
577            if num > 70 {
578                if set_date(y, m, d, None, now, tm) == 0 {
579                    return pos;
580                }
581                if set_date(y, d, m, None, now, tm) == 0 {
582                    return pos;
583                }
584            }
585            if c != b'.' && set_date(d, y, m, refuse_future, now, tm) == 0 {
586                return pos;
587            }
588            if set_date(d, m, y, refuse_future, now, tm) == 0 {
589                return pos;
590            }
591            if c == b'.' && set_date(d, y, m, refuse_future, now, tm) == 0 {
592                return pos;
593            }
594            return 0;
595        }
596        _ => return 0,
597    }
598    pos
599}
600
601fn match_alpha(date: &[u8], tm: &mut tm, offset: &mut i32) -> usize {
602    for (i, name) in MONTH_NAMES.iter().enumerate() {
603        let m = match_string(date, name);
604        if m >= 3 {
605            tm.tm_mon = i as i32;
606            return m;
607        }
608    }
609
610    for (i, name) in WEEKDAY_NAMES.iter().enumerate() {
611        let m = match_string(date, name);
612        if m >= 3 {
613            tm.tm_wday = i as i32;
614            return m;
615        }
616    }
617
618    for tz in TIMEZONE_NAMES {
619        let m = match_string(date, tz.name);
620        if m >= 3 || m == tz.name.len() {
621            let off = tz.offset_hours + tz.dst;
622            if *offset == -1 {
623                *offset = 60 * off;
624            }
625            return m;
626        }
627    }
628
629    if match_string(date, "PM") == 2 {
630        tm.tm_hour = (tm.tm_hour % 12) + 12;
631        return 2;
632    }
633
634    if match_string(date, "AM") == 2 {
635        tm.tm_hour %= 12;
636        return 2;
637    }
638
639    if date.first() == Some(&b'T')
640        && date.get(1).is_some_and(|b| b.is_ascii_digit())
641        && tm.tm_hour == -1
642    {
643        tm.tm_min = 0;
644        tm.tm_sec = 0;
645        return 1;
646    }
647
648    skip_alpha(date)
649}
650
651fn match_digit(date: &[u8], tm: &mut tm, offset: &mut i32, tm_gmt: &mut i32) -> usize {
652    let (num, n) = parse_timestamp_prefix(date);
653    if n == 0 {
654        return 0;
655    }
656    let end = n;
657
658    if num >= 100_000_000 && nodate(tm) {
659        let tt = num as time_t;
660        let p = unsafe { libc::gmtime_r(&tt, tm) };
661        if !p.is_null() {
662            *tm_gmt = 1;
663            return end;
664        }
665    }
666
667    if let Some(&sep) = date.get(end) {
668        if matches!(sep, b':' | b'.' | b'/' | b'-')
669            && date.get(end + 1).is_some_and(|b| b.is_ascii_digit())
670        {
671            let m = match_multi_number(num, date, end, tm, 0);
672            if m != 0 {
673                return m;
674            }
675        }
676    }
677
678    let mut n_digits = 0usize;
679    loop {
680        n_digits += 1;
681        if n_digits >= date.len() || !date[n_digits].is_ascii_digit() {
682            break;
683        }
684    }
685
686    if n_digits == 8 || n_digits == 6 {
687        let num1 = (num / 10000) as i32;
688        let num2 = ((num % 10000) / 100) as i32;
689        let num3 = (num % 100) as i32;
690        if n_digits == 8 {
691            let _ = set_date(num1, num2, num3, None, get_time_sec(), tm);
692        } else if set_time(num1 as i64, num2 as i64, num3 as i64, tm) == 0
693            && date.get(end) == Some(&b'.')
694            && date.get(end + 1).is_some_and(|b| b.is_ascii_digit())
695        {
696            let (_, rel) = parse_uint_suffix(&date[end + 1..]);
697            return end + 1 + rel;
698        }
699        return end;
700    }
701
702    if maybeiso8601(tm) {
703        let mut num1 = num as u32;
704        let mut num2: u32 = 0;
705        if n_digits == 4 {
706            num1 = (num / 100) as u32;
707            num2 = (num % 100) as u32;
708        }
709        if (n_digits == 4 || n_digits == 2)
710            && !nodate(tm)
711            && set_time(num1 as i64, num2 as i64, 0, tm) == 0
712        {
713            return n_digits;
714        }
715        tm.tm_min = -1;
716        tm.tm_sec = -1;
717    }
718
719    if n_digits == 4 {
720        if num <= 1400 && *offset == -1 {
721            let minutes = (num % 100) as u32;
722            let hours = (num / 100) as u32;
723            *offset = (hours * 60 + minutes) as i32;
724        } else if num > 1900 && num < 2100 {
725            tm.tm_year = (num as i32) - 1900;
726        }
727        return n_digits;
728    }
729
730    if n_digits > 2 {
731        return n_digits;
732    }
733
734    if num > 0 && num < 32 && tm.tm_mday < 0 {
735        tm.tm_mday = num as i32;
736        return n_digits;
737    }
738
739    if n_digits == 2 && tm.tm_year < 0 {
740        if num < 10 && tm.tm_mday >= 0 {
741            tm.tm_year = (num as i32) + 100;
742            return n_digits;
743        }
744        if num >= 70 {
745            tm.tm_year = num as i32;
746            return n_digits;
747        }
748    }
749
750    if num > 0 && num < 13 && tm.tm_mon < 0 {
751        tm.tm_mon = (num as i32) - 1;
752    }
753
754    n_digits
755}