Skip to main content

grit_lib/git_date/
parse.rs

1//! Git-compatible date parsing (`parse_date_basic`, `parse_date`) — ported from Git `date.c`.
2
3use super::compat::{self, time_t, tm};
4use super::tm::{
5    get_time_sec, init_tm_unknown, is_date_known, match_string, maybeiso8601, nodate,
6    parse_timestamp_prefix, skip_alpha, tm_to_time_t, TIMESTAMP_MAX,
7};
8use std::mem::MaybeUninit;
9
10struct TzName {
11    name: &'static str,
12    offset_hours: i32,
13    dst: i32,
14}
15
16const TIMEZONE_NAMES: &[TzName] = &[
17    TzName {
18        name: "IDLW",
19        offset_hours: -12,
20        dst: 0,
21    },
22    TzName {
23        name: "NT",
24        offset_hours: -11,
25        dst: 0,
26    },
27    TzName {
28        name: "CAT",
29        offset_hours: -10,
30        dst: 0,
31    },
32    TzName {
33        name: "HST",
34        offset_hours: -10,
35        dst: 0,
36    },
37    TzName {
38        name: "HDT",
39        offset_hours: -10,
40        dst: 1,
41    },
42    TzName {
43        name: "YST",
44        offset_hours: -9,
45        dst: 0,
46    },
47    TzName {
48        name: "YDT",
49        offset_hours: -9,
50        dst: 1,
51    },
52    TzName {
53        name: "PST",
54        offset_hours: -8,
55        dst: 0,
56    },
57    TzName {
58        name: "PDT",
59        offset_hours: -8,
60        dst: 1,
61    },
62    TzName {
63        name: "MST",
64        offset_hours: -7,
65        dst: 0,
66    },
67    TzName {
68        name: "MDT",
69        offset_hours: -7,
70        dst: 1,
71    },
72    TzName {
73        name: "CST",
74        offset_hours: -6,
75        dst: 0,
76    },
77    TzName {
78        name: "CDT",
79        offset_hours: -6,
80        dst: 1,
81    },
82    TzName {
83        name: "EST",
84        offset_hours: -5,
85        dst: 0,
86    },
87    TzName {
88        name: "EDT",
89        offset_hours: -5,
90        dst: 1,
91    },
92    TzName {
93        name: "AST",
94        offset_hours: -3,
95        dst: 0,
96    },
97    TzName {
98        name: "ADT",
99        offset_hours: -3,
100        dst: 1,
101    },
102    TzName {
103        name: "WAT",
104        offset_hours: -1,
105        dst: 0,
106    },
107    TzName {
108        name: "GMT",
109        offset_hours: 0,
110        dst: 0,
111    },
112    TzName {
113        name: "UTC",
114        offset_hours: 0,
115        dst: 0,
116    },
117    TzName {
118        name: "Z",
119        offset_hours: 0,
120        dst: 0,
121    },
122    TzName {
123        name: "WET",
124        offset_hours: 0,
125        dst: 0,
126    },
127    TzName {
128        name: "BST",
129        offset_hours: 0,
130        dst: 1,
131    },
132    TzName {
133        name: "CET",
134        offset_hours: 1,
135        dst: 0,
136    },
137    TzName {
138        name: "MET",
139        offset_hours: 1,
140        dst: 0,
141    },
142    TzName {
143        name: "MEWT",
144        offset_hours: 1,
145        dst: 0,
146    },
147    TzName {
148        name: "MEST",
149        offset_hours: 1,
150        dst: 1,
151    },
152    TzName {
153        name: "CEST",
154        offset_hours: 1,
155        dst: 1,
156    },
157    TzName {
158        name: "MESZ",
159        offset_hours: 1,
160        dst: 1,
161    },
162    TzName {
163        name: "FWT",
164        offset_hours: 1,
165        dst: 0,
166    },
167    TzName {
168        name: "FST",
169        offset_hours: 1,
170        dst: 1,
171    },
172    TzName {
173        name: "EET",
174        offset_hours: 2,
175        dst: 0,
176    },
177    TzName {
178        name: "EEST",
179        offset_hours: 2,
180        dst: 1,
181    },
182    TzName {
183        name: "WAST",
184        offset_hours: 7,
185        dst: 0,
186    },
187    TzName {
188        name: "WADT",
189        offset_hours: 7,
190        dst: 1,
191    },
192    TzName {
193        name: "CCT",
194        offset_hours: 8,
195        dst: 0,
196    },
197    TzName {
198        name: "JST",
199        offset_hours: 9,
200        dst: 0,
201    },
202    TzName {
203        name: "EAST",
204        offset_hours: 10,
205        dst: 0,
206    },
207    TzName {
208        name: "EADT",
209        offset_hours: 10,
210        dst: 1,
211    },
212    TzName {
213        name: "GST",
214        offset_hours: 10,
215        dst: 0,
216    },
217    TzName {
218        name: "NZT",
219        offset_hours: 12,
220        dst: 0,
221    },
222    TzName {
223        name: "NZST",
224        offset_hours: 12,
225        dst: 0,
226    },
227    TzName {
228        name: "NZDT",
229        offset_hours: 12,
230        dst: 1,
231    },
232    TzName {
233        name: "IDLE",
234        offset_hours: 12,
235        dst: 0,
236    },
237];
238
239pub(crate) const MONTH_NAMES: [&str; 12] = [
240    "January",
241    "February",
242    "March",
243    "April",
244    "May",
245    "June",
246    "July",
247    "August",
248    "September",
249    "October",
250    "November",
251    "December",
252];
253
254pub(crate) const WEEKDAY_NAMES: [&str; 7] = [
255    "Sundays",
256    "Mondays",
257    "Tuesdays",
258    "Wednesdays",
259    "Thursdays",
260    "Fridays",
261    "Saturdays",
262];
263
264/// Format a parsed instant like Git's `date_string`.
265pub fn date_string(date: u64, offset: i32) -> String {
266    let mut sign = '+';
267    let mut o = offset;
268    if o < 0 {
269        o = -o;
270        sign = '-';
271    }
272    format!("{} {}{:02}{:02}", date, sign, o / 60, o % 60)
273}
274
275/// Git `parse_date` — returns canonical `date_string` output.
276pub fn parse_date(date: &str) -> Result<String, ()> {
277    let (ts, off) = parse_date_basic(date)?;
278    Ok(date_string(ts, off))
279}
280
281/// Git `parse_date_basic` — UTC seconds and timezone offset in **minutes** (signed).
282pub fn parse_date_basic(date: &str) -> Result<(u64, i32), ()> {
283    let bytes = date.as_bytes();
284    let mut tm = init_tm_unknown();
285    let mut offset: i32 = -1;
286    let mut tm_gmt = 0i32;
287    let mut i = 0usize;
288
289    if bytes.first() == Some(&b'@') {
290        if let Some((ts, off)) = match_object_header_date(&bytes[1..]) {
291            return Ok((ts, off));
292        }
293    }
294
295    while i < bytes.len() {
296        let c = bytes[i];
297        if c == 0 || c == b'\n' {
298            break;
299        }
300        let mut m = 0usize;
301        if c.is_ascii_alphabetic() {
302            m = match_alpha(&bytes[i..], &mut tm, &mut offset);
303        } else if c.is_ascii_digit() {
304            m = match_digit(&bytes[i..], &mut tm, &mut offset, &mut tm_gmt);
305        } else if (c == b'-' || c == b'+') && bytes.get(i + 1).is_some_and(|x| x.is_ascii_digit()) {
306            m = match_tz(&bytes[i..], &mut offset);
307        }
308        if m == 0 {
309            m = 1;
310        }
311        i += m;
312    }
313
314    let tts = tm_to_time_t(&tm);
315    if tts < 0 {
316        return Err(());
317    }
318    let mut ts = tts as u64;
319
320    if offset == -1 {
321        tm.tm_isdst = -1;
322        let temp_time = unsafe { compat::mktime(&mut tm) };
323        let tt = ts as i128;
324        let tloc = temp_time as i128;
325        offset = if tt > tloc {
326            ((tt - tloc) / 60) as i32
327        } else {
328            -(((tloc - tt) / 60) as i32)
329        };
330    }
331
332    if tm_gmt == 0 {
333        if offset > 0 && (offset as i64) * 60 > ts as i64 {
334            return Err(());
335        }
336        if offset < 0 && (-(offset as i128)) * 60 > (TIMESTAMP_MAX as i128 - ts as i128) {
337            return Err(());
338        }
339        // Git: *timestamp -= *offset * 60 (signed; negative offset adds to the instant).
340        let ts128 = ts as i128;
341        let adj = (offset as i128) * 60;
342        let new_ts = ts128 - adj;
343        if new_ts < 0 {
344            return Err(());
345        }
346        ts = new_ts as u64;
347    }
348
349    Ok((ts, offset))
350}
351
352fn match_object_header_date(date: &[u8]) -> Option<(u64, i32)> {
353    if date.is_empty() || !date[0].is_ascii_digit() {
354        return None;
355    }
356    let (stamp, mut rest) = parse_timestamp_prefix(date);
357    if rest >= date.len() || date[rest] != b' ' {
358        return None;
359    }
360    if stamp == u64::MAX {
361        return None;
362    }
363    rest += 1;
364    if rest >= date.len() || (date[rest] != b'+' && date[rest] != b'-') {
365        return None;
366    }
367    let sign = date[rest];
368    rest += 1;
369    if rest + 4 > date.len() {
370        return None;
371    }
372    let tz_digits = std::str::from_utf8(&date[rest..rest + 4]).ok()?;
373    let ofs_raw: i32 = tz_digits.parse().ok()?;
374    let mut ofs = (ofs_raw / 100) * 60 + (ofs_raw % 100);
375    if sign == b'-' {
376        ofs = -ofs;
377    }
378    let end = rest + 4;
379    if end < date.len() && date[end] != b'\n' && date[end] != 0 {
380        return None;
381    }
382    Some((stamp, ofs))
383}
384
385/// Git `match_tz` — writes offset in minutes; returns bytes consumed.
386fn match_tz(date: &[u8], offp: &mut i32) -> usize {
387    if date.is_empty() || (date[0] != b'+' && date[0] != b'-') {
388        return 0;
389    }
390    let (hour_ul, n) = parse_timestamp_prefix(&date[1..]);
391    let mut end = 1 + n;
392    let mut min: i32 = 0;
393    let mut hour: i32 = hour_ul as i32;
394    if n == 4 {
395        min = hour % 100;
396        hour /= 100;
397    } else if n != 2 {
398        min = 99;
399    } else if end < date.len() && date[end] == b':' {
400        let (m2, n2) = parse_timestamp_prefix(&date[end + 1..]);
401        if n2 == 0 {
402            min = 99;
403        } else {
404            min = m2 as i32;
405            end += 1 + n2;
406            if end - 1 != 5 {
407                min = 99;
408            }
409        }
410    }
411    if min < 60 && hour < 24 {
412        let mut off = hour * 60 + min;
413        if date[0] == b'-' {
414            off = -off;
415        }
416        *offp = off;
417    }
418    end
419}
420
421/// Git `strtol` for a leading signed decimal slice (`end+1` style).
422fn parse_long_prefix(s: &[u8]) -> (i64, usize) {
423    if s.is_empty() {
424        return (0, 0);
425    }
426    let mut i = 0usize;
427    let neg = s[0] == b'-';
428    if s[0] == b'+' || s[0] == b'-' {
429        i = 1;
430    }
431    let start = i;
432    while i < s.len() && s[i].is_ascii_digit() {
433        i += 1;
434    }
435    if i == start {
436        return (0, 0);
437    }
438    let Ok(slice) = std::str::from_utf8(&s[start..i]) else {
439        return (0, 0);
440    };
441    let Ok(v) = slice.parse::<i64>() else {
442        return (0, 0);
443    };
444    let v = if neg { -v } else { v };
445    (v, i)
446}
447
448fn parse_uint_suffix(s: &[u8]) -> (u64, usize) {
449    parse_timestamp_prefix(s)
450}
451
452/// Git `set_date` — `0` ok, `1` reject try, `-1` error.
453fn set_date(year: i32, month: i32, day: i32, now_tm: Option<&tm>, now: i64, tm: &mut tm) -> i32 {
454    if !(month > 0 && month < 13 && day > 0 && day < 32) {
455        return -1;
456    }
457    let Some(nt) = now_tm else {
458        tm.tm_mon = month - 1;
459        tm.tm_mday = day;
460        if year == -1 {
461            return 1;
462        }
463        if (1970..2100).contains(&year) {
464            tm.tm_year = year - 1900;
465        } else if (70..100).contains(&year) {
466            tm.tm_year = year;
467        } else if year < 38 {
468            tm.tm_year = year + 100;
469        } else {
470            return -1;
471        }
472        return 0;
473    };
474    let mut check = *tm;
475    check.tm_mon = month - 1;
476    check.tm_mday = day;
477    if year == -1 {
478        check.tm_year = nt.tm_year;
479    } else if (1970..2100).contains(&year) {
480        check.tm_year = year - 1900;
481    } else if (70..100).contains(&year) {
482        check.tm_year = year;
483    } else if year < 38 {
484        check.tm_year = year + 100;
485    } else {
486        return -1;
487    }
488    let specified = tm_to_time_t(&check);
489    if specified >= 0 && now + 10 * 24 * 3600 < specified {
490        return -1;
491    }
492    tm.tm_mon = check.tm_mon;
493    tm.tm_mday = check.tm_mday;
494    if year != -1 {
495        tm.tm_year = check.tm_year;
496    }
497    0
498}
499
500fn set_time(hour: i64, minute: i64, second: i64, tm: &mut tm) -> i32 {
501    if (0..=24).contains(&hour) && (0..60).contains(&minute) && (0..=60).contains(&second) {
502        tm.tm_hour = hour as i32;
503        tm.tm_min = minute as i32;
504        tm.tm_sec = second as i32;
505        0
506    } else {
507        -1
508    }
509}
510
511/// Git `match_multi_number` — `sep_i` is index of separator in `date`; returns bytes consumed from `date` start.
512pub(crate) fn match_multi_number(
513    num: u64,
514    date: &[u8],
515    sep_i: usize,
516    tm: &mut tm,
517    now_in: i64,
518) -> usize {
519    let Some(&c) = date.get(sep_i) else {
520        return 0;
521    };
522    if !matches!(c, b':' | b'-' | b'/' | b'.') {
523        return 0;
524    }
525
526    let (num2, n2) = parse_long_prefix(&date[sep_i + 1..]);
527    if n2 == 0 {
528        return 0;
529    }
530    let mut pos = sep_i + 1 + n2;
531    let mut num3: i64 = -1;
532    if pos < date.len() && date[pos] == c && pos + 1 < date.len() && date[pos + 1].is_ascii_digit()
533    {
534        let (n3, rel) = parse_long_prefix(&date[pos + 1..]);
535        num3 = n3;
536        pos += 1 + rel;
537    }
538
539    match c {
540        b':' => {
541            let mut n3 = num3;
542            if n3 < 0 {
543                n3 = 0;
544            }
545            if set_time(num as i64, num2, n3, tm) == 0 {
546                if pos < date.len()
547                    && date[pos] == b'.'
548                    && pos + 1 < date.len()
549                    && date[pos + 1].is_ascii_digit()
550                    && is_date_known(tm)
551                {
552                    let (_, rel) = parse_long_prefix(&date[pos + 1..]);
553                    pos += 1 + rel;
554                }
555            } else {
556                return 0;
557            }
558        }
559        b'-' | b'/' | b'.' => {
560            let now = if now_in == 0 { get_time_sec() } else { now_in };
561            let mut now_tm_uninit = MaybeUninit::<tm>::uninit();
562            let refuse_future: Option<&tm> = unsafe {
563                let tt = now as time_t;
564                let p = compat::gmtime_r(&tt, now_tm_uninit.as_mut_ptr());
565                if p.is_null() {
566                    None
567                } else {
568                    Some(&*now_tm_uninit.as_ptr())
569                }
570            };
571
572            let y = num as i32;
573            let m = num2 as i32;
574            let d = if num3 < 0 { 0 } else { num3 as i32 };
575
576            if num > 70 {
577                if set_date(y, m, d, None, now, tm) == 0 {
578                    return pos;
579                }
580                if set_date(y, d, m, None, now, tm) == 0 {
581                    return pos;
582                }
583            }
584            if c != b'.' && set_date(d, y, m, refuse_future, now, tm) == 0 {
585                return pos;
586            }
587            if set_date(d, m, y, refuse_future, now, tm) == 0 {
588                return pos;
589            }
590            if c == b'.' && set_date(d, y, m, refuse_future, now, tm) == 0 {
591                return pos;
592            }
593            return 0;
594        }
595        _ => return 0,
596    }
597    pos
598}
599
600fn match_alpha(date: &[u8], tm: &mut tm, offset: &mut i32) -> usize {
601    for (i, name) in MONTH_NAMES.iter().enumerate() {
602        let m = match_string(date, name);
603        if m >= 3 {
604            tm.tm_mon = i as i32;
605            return m;
606        }
607    }
608
609    for (i, name) in WEEKDAY_NAMES.iter().enumerate() {
610        let m = match_string(date, name);
611        if m >= 3 {
612            tm.tm_wday = i as i32;
613            return m;
614        }
615    }
616
617    for tz in TIMEZONE_NAMES {
618        let m = match_string(date, tz.name);
619        if m >= 3 || m == tz.name.len() {
620            let off = tz.offset_hours + tz.dst;
621            if *offset == -1 {
622                *offset = 60 * off;
623            }
624            return m;
625        }
626    }
627
628    if match_string(date, "PM") == 2 {
629        tm.tm_hour = (tm.tm_hour % 12) + 12;
630        return 2;
631    }
632
633    if match_string(date, "AM") == 2 {
634        tm.tm_hour %= 12;
635        return 2;
636    }
637
638    if date.first() == Some(&b'T')
639        && date.get(1).is_some_and(|b| b.is_ascii_digit())
640        && tm.tm_hour == -1
641    {
642        tm.tm_min = 0;
643        tm.tm_sec = 0;
644        return 1;
645    }
646
647    skip_alpha(date)
648}
649
650fn match_digit(date: &[u8], tm: &mut tm, offset: &mut i32, tm_gmt: &mut i32) -> usize {
651    let (num, n) = parse_timestamp_prefix(date);
652    if n == 0 {
653        return 0;
654    }
655    let end = n;
656
657    if num >= 100_000_000 && nodate(tm) {
658        let tt = num as time_t;
659        let p = unsafe { compat::gmtime_r(&tt, tm) };
660        if !p.is_null() {
661            *tm_gmt = 1;
662            return end;
663        }
664    }
665
666    if let Some(&sep) = date.get(end) {
667        if matches!(sep, b':' | b'.' | b'/' | b'-')
668            && date.get(end + 1).is_some_and(|b| b.is_ascii_digit())
669        {
670            let m = match_multi_number(num, date, end, tm, 0);
671            if m != 0 {
672                return m;
673            }
674        }
675    }
676
677    let mut n_digits = 0usize;
678    loop {
679        n_digits += 1;
680        if n_digits >= date.len() || !date[n_digits].is_ascii_digit() {
681            break;
682        }
683    }
684
685    if n_digits == 8 || n_digits == 6 {
686        let num1 = (num / 10000) as i32;
687        let num2 = ((num % 10000) / 100) as i32;
688        let num3 = (num % 100) as i32;
689        if n_digits == 8 {
690            let _ = set_date(num1, num2, num3, None, get_time_sec(), tm);
691        } else if set_time(num1 as i64, num2 as i64, num3 as i64, tm) == 0
692            && date.get(end) == Some(&b'.')
693            && date.get(end + 1).is_some_and(|b| b.is_ascii_digit())
694        {
695            let (_, rel) = parse_uint_suffix(&date[end + 1..]);
696            return end + 1 + rel;
697        }
698        return end;
699    }
700
701    if maybeiso8601(tm) {
702        let mut num1 = num as u32;
703        let mut num2: u32 = 0;
704        if n_digits == 4 {
705            num1 = (num / 100) as u32;
706            num2 = (num % 100) as u32;
707        }
708        if (n_digits == 4 || n_digits == 2)
709            && !nodate(tm)
710            && set_time(num1 as i64, num2 as i64, 0, tm) == 0
711        {
712            return n_digits;
713        }
714        tm.tm_min = -1;
715        tm.tm_sec = -1;
716    }
717
718    if n_digits == 4 {
719        if num <= 1400 && *offset == -1 {
720            let minutes = (num % 100) as u32;
721            let hours = (num / 100) as u32;
722            *offset = (hours * 60 + minutes) as i32;
723        } else if num > 1900 && num < 2100 {
724            tm.tm_year = (num as i32) - 1900;
725        }
726        return n_digits;
727    }
728
729    if n_digits > 2 {
730        return n_digits;
731    }
732
733    if num > 0 && num < 32 && tm.tm_mday < 0 {
734        tm.tm_mday = num as i32;
735        return n_digits;
736    }
737
738    if n_digits == 2 && tm.tm_year < 0 {
739        if num < 10 && tm.tm_mday >= 0 {
740            tm.tm_year = (num as i32) + 100;
741            return n_digits;
742        }
743        if num >= 70 {
744            tm.tm_year = num as i32;
745            return n_digits;
746        }
747    }
748
749    if num > 0 && num < 13 && tm.tm_mon < 0 {
750        tm.tm_mon = (num as i32) - 1;
751    }
752
753    n_digits
754}