Skip to main content

grit_lib/
ident.rs

1//! Git author/committer identity lines (`ident` in Git's `fsck.c` / `commit.c`).
2//!
3//! Parses `Name <email> <unix-timestamp> <+HHMM>` with the same edge cases as Git:
4//! overflow, non-digit timestamps, and whitespace-only timestamps (sentinel handling).
5
6use crate::git_date::tm::date_overflows;
7use crate::objects::ObjectKind;
8
9/// Parsed timestamp from a signature line for display and filtering.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum SignatureTimestamp {
12    /// Parsed seconds since Unix epoch (author/committer field); safe for `time_t` / display.
13    Valid(i64),
14    /// Unparsable, overflowing, or whitespace-only date field — Git uses a sentinel (epoch in
15    /// headers, empty `%ad`, empty `%at` / `%ct` in format).
16    Sentinel,
17}
18
19/// Successful parse of the trailing `<unix> <+HHMM>` portion of a signature.
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct ParsedSignatureTimes {
22    /// Seconds in the author/committer field (before tz offset).
23    pub unix_seconds: i64,
24    /// Signed offset in seconds (from `+HHMM` / `-HHMM`).
25    pub tz_offset_secs: i64,
26    /// Byte range of the `+HHMM` / `-HHMM` field in the original `ident` string.
27    pub tz_hhmm_range: std::ops::Range<usize>,
28}
29
30/// Scan a decimal timestamp like Git's `parse_timestamp_from_buf` / `strtoumax`.
31/// Returns `None` if there is no digit or more than 21 digits (Git uses a 24-byte buffer).
32fn scan_decimal_timestamp(bytes: &[u8], mut i: usize) -> Option<(u128, usize)> {
33    const MAX_DIGITS: usize = 21;
34    let start = i;
35    let mut count = 0usize;
36    while i < bytes.len() && bytes[i].is_ascii_digit() {
37        count += 1;
38        if count > MAX_DIGITS {
39            return Some((u128::MAX, i));
40        }
41        i += 1;
42    }
43    if count == 0 {
44        return None;
45    }
46    let s = std::str::from_utf8(&bytes[start..i]).ok()?;
47    let v: u128 = s.parse().ok()?;
48    Some((v, i))
49}
50
51/// After `>` and one required space, skip only ASCII spaces and horizontal tabs (Git `fsck_ident`).
52fn skip_fsck_date_leading_ws(bytes: &[u8], mut i: usize) -> usize {
53    while i < bytes.len() && matches!(bytes[i], b' ' | b'\t') {
54        i += 1;
55    }
56    i
57}
58
59fn parse_tz_hhmm_offset(offset: &str) -> Option<i64> {
60    let b = offset.as_bytes();
61    if b.len() < 5 {
62        return None;
63    }
64    if !(b[0] == b'+' || b[0] == b'-') {
65        return None;
66    }
67    let sign = if b[0] == b'-' { -1i64 } else { 1i64 };
68    let hours: i64 = std::str::from_utf8(&b[1..3]).ok()?.parse().ok()?;
69    let minutes: i64 = std::str::from_utf8(&b[3..5]).ok()?.parse().ok()?;
70    Some(sign * (hours * 3600 + minutes * 60))
71}
72
73/// Parse `<unix> <+HHMM>` after the closing `>` of the email (Git commit author/committer line).
74#[must_use]
75pub fn parse_signature_times(ident: &str) -> Option<ParsedSignatureTimes> {
76    match parse_signature_tail(ident)? {
77        SignatureTail::Valid(p) => Some(p),
78        SignatureTail::Overflow | SignatureTail::NonNumeric => None,
79    }
80}
81
82/// Distinguishes a non-numeric date field from a numeric field that fails Git's overflow rules.
83#[derive(Debug, Clone, PartialEq, Eq)]
84pub enum SignatureTail {
85    /// Well-formed timestamp and timezone.
86    Valid(ParsedSignatureTimes),
87    /// Digits and timezone present, but the number does not fit Git's `date_overflows` rules.
88    /// Default `%ad` shows the Unix epoch with `+0000` (t4212).
89    Overflow,
90    /// No leading digit after `>` (e.g. `totally_bogus`): `%ad` is empty, headers use epoch `+0000`.
91    NonNumeric,
92}
93
94/// Parse the date/time tail like [`parse_signature_times`], but preserve overflow vs non-numeric.
95#[must_use]
96pub fn parse_signature_tail(ident: &str) -> Option<SignatureTail> {
97    let bytes = ident.as_bytes();
98    let gt = ident.rfind('>')?;
99    let mut i = skip_fsck_date_leading_ws(bytes, gt + 1);
100    if i >= bytes.len() || !bytes[i].is_ascii_digit() {
101        return Some(SignatureTail::NonNumeric);
102    }
103    let (raw, after_digits) = scan_decimal_timestamp(bytes, i)?;
104    if after_digits >= bytes.len() || bytes[after_digits] != b' ' {
105        return None;
106    }
107    i = after_digits + 1;
108    if i + 5 > bytes.len() {
109        return None;
110    }
111    let tz_slice = ident.get(i..i + 5)?;
112    let tz_offset_secs = parse_tz_hhmm_offset(tz_slice)?;
113    let tz_hhmm_range = i..i + 5;
114    if raw == u128::MAX || raw > u64::MAX as u128 || date_overflows(raw as u64) {
115        return Some(SignatureTail::Overflow);
116    }
117    let unix_seconds = i64::try_from(raw).ok()?;
118    Some(SignatureTail::Valid(ParsedSignatureTimes {
119        unix_seconds,
120        tz_offset_secs,
121        tz_hhmm_range,
122    }))
123}
124
125/// Classify the date field for pretty-print / `%at` (t4212 whitespace commits).
126pub fn signature_timestamp_for_pretty(ident: &str) -> SignatureTimestamp {
127    match parse_signature_times(ident) {
128        Some(p) => SignatureTimestamp::Valid(p.unix_seconds),
129        None => SignatureTimestamp::Sentinel,
130    }
131}
132
133/// Unix timestamp for `--until` / `--since` filtering (committer), matching Git's `parse_date`.
134/// Sentinel timestamps (whitespace dates, overflow, etc.) behave like `0` for cutoff comparisons
135/// (see t4212 `rev-list --until`).
136#[must_use]
137pub fn committer_timestamp_for_until_filter(ident: &str) -> i64 {
138    match signature_timestamp_for_pretty(ident) {
139        SignatureTimestamp::Valid(ts) => ts,
140        SignatureTimestamp::Sentinel => 0,
141    }
142}
143
144/// Raw unix seconds as i64 for `%at` when valid; `None` when Git would print nothing.
145#[must_use]
146pub fn timestamp_for_at_ct(ts: SignatureTimestamp) -> Option<i64> {
147    match ts {
148        SignatureTimestamp::Valid(v) => Some(v),
149        SignatureTimestamp::Sentinel => None,
150    }
151}
152
153/// First fsck error Git would report for commit headers (tree/parents/author/committer), or `Ok`.
154/// Message text matches Git's `fsck.c` `report()` shape: `<camelCaseId>: <detail>`.
155pub fn fsck_commit_idents(data: &[u8]) -> Result<(), String> {
156    crate::fsck_standalone::fsck_object(ObjectKind::Commit, data).map_err(|e| e.report_line())
157}
158
159/// Committer seconds for ordering (`rev-list --date-order`, etc.): unknown/corrupt → `0`.
160#[must_use]
161pub fn committer_unix_seconds_for_ordering(ident: &str) -> i64 {
162    match signature_timestamp_for_pretty(ident) {
163        SignatureTimestamp::Valid(ts) => ts,
164        SignatureTimestamp::Sentinel => 0,
165    }
166}
167
168#[cfg(test)]
169mod tests {
170    use super::*;
171
172    #[test]
173    fn non_numeric_author_date_is_non_numeric_tail() {
174        let ident = "A <e@x> totally_bogus -0700";
175        assert!(matches!(
176            parse_signature_tail(ident),
177            Some(SignatureTail::NonNumeric)
178        ));
179    }
180
181    #[test]
182    fn u128_max_digit_count_is_overflow_tail() {
183        let ident = "A <e@x> 18446744073709551617 -0700";
184        assert!(matches!(
185            parse_signature_tail(ident),
186            Some(SignatureTail::Overflow)
187        ));
188    }
189}