Skip to main content

sley_core/
lib.rs

1use std::borrow::Borrow;
2use std::error::Error;
3use std::fmt;
4use std::ops::Deref;
5use std::path::{Path, PathBuf};
6use std::str::FromStr;
7use std::sync::Mutex;
8
9pub const UPSTREAM_GIT_COMPAT_VERSION: &str = "2.54.0";
10
11static ORIGINAL_CWD: Mutex<Option<PathBuf>> = Mutex::new(None);
12
13pub fn set_original_cwd(path: Option<PathBuf>) {
14    if let Ok(mut original) = ORIGINAL_CWD.lock() {
15        *original = path;
16    }
17}
18
19pub fn original_cwd() -> Option<PathBuf> {
20    ORIGINAL_CWD.lock().ok()?.clone()
21}
22
23#[derive(Debug, Default, Clone, PartialEq, Eq)]
24pub enum DateMode {
25    #[default]
26    Default,
27    Local,
28    Raw,
29    RawLocal,
30    Unix,
31    Short,
32    ShortLocal,
33    Iso,
34    IsoLocal,
35    IsoStrict,
36    IsoStrictLocal,
37    Rfc2822,
38    Rfc2822Local,
39    Relative,
40    Human,
41    HumanLocal,
42    Strftime {
43        template: String,
44        local: bool,
45    },
46}
47
48impl DateMode {
49    pub fn parse(value: &str) -> Option<Self> {
50        if let Some(template) = value.strip_prefix("format:") {
51            return Some(Self::Strftime {
52                template: template.to_string(),
53                local: false,
54            });
55        }
56        if let Some(template) = value.strip_prefix("format-local:") {
57            return Some(Self::Strftime {
58                template: template.to_string(),
59                local: true,
60            });
61        }
62        if value == "tformat:" || value.starts_with("tformat:") {
63            return Some(Self::Strftime {
64                template: value["tformat:".len()..].to_string(),
65                local: false,
66            });
67        }
68        if value == "auto:" || value.starts_with("auto:") {
69            return Some(Self::Default);
70        }
71        Some(match value {
72            "default" => Self::Default,
73            "default-local" | "local" => Self::Local,
74            "raw" => Self::Raw,
75            "raw-local" => Self::RawLocal,
76            "unix" => Self::Unix,
77            "short" => Self::Short,
78            "short-local" => Self::ShortLocal,
79            "iso" | "iso8601" => Self::Iso,
80            "iso-local" | "iso8601-local" => Self::IsoLocal,
81            "iso-strict" | "iso8601-strict" => Self::IsoStrict,
82            "iso-strict-local" | "iso8601-strict-local" => Self::IsoStrictLocal,
83            "rfc" | "rfc2822" => Self::Rfc2822,
84            "rfc-local" | "rfc2822-local" => Self::Rfc2822Local,
85            "relative" | "relative-local" => Self::Relative,
86            "human" => Self::Human,
87            "human-local" => Self::HumanLocal,
88            _ => return None,
89        })
90    }
91
92    pub fn parse_atom_modifier(modifier: Option<&str>) -> Option<Self> {
93        modifier.map_or(Some(Self::Default), Self::parse)
94    }
95
96    pub fn render(&self, timestamp: i64, timezone: &str) -> Option<String> {
97        let tz = if self.is_local() { "+0000" } else { timezone };
98        let parts = DateParts::from_timestamp(timestamp, tz)?;
99        Some(match self {
100            Self::Default | Self::Local => {
101                let base = format!(
102                    "{} {} {} {:02}:{:02}:{:02} {}",
103                    parts.weekday,
104                    MONTHS_ABBR[(parts.month - 1) as usize],
105                    parts.day,
106                    parts.hour,
107                    parts.minute,
108                    parts.second,
109                    parts.year,
110                );
111                if self.is_local() {
112                    base
113                } else {
114                    format!("{base} {}", parts.timezone)
115                }
116            }
117            Self::Raw | Self::RawLocal => format!("{} {}", parts.timestamp, parts.timezone),
118            Self::Unix => parts.timestamp.to_string(),
119            Self::Short | Self::ShortLocal => {
120                format!("{:04}-{:02}-{:02}", parts.year, parts.month, parts.day)
121            }
122            Self::Iso | Self::IsoLocal => format!(
123                "{:04}-{:02}-{:02} {:02}:{:02}:{:02} {}",
124                parts.year,
125                parts.month,
126                parts.day,
127                parts.hour,
128                parts.minute,
129                parts.second,
130                parts.timezone,
131            ),
132            Self::IsoStrict | Self::IsoStrictLocal => format!(
133                "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}{}",
134                parts.year,
135                parts.month,
136                parts.day,
137                parts.hour,
138                parts.minute,
139                parts.second,
140                strict_timezone(parts.timezone),
141            ),
142            Self::Rfc2822 | Self::Rfc2822Local => format!(
143                "{}, {} {} {:04} {:02}:{:02}:{:02} {}",
144                parts.weekday,
145                parts.day,
146                MONTHS_ABBR[(parts.month - 1) as usize],
147                parts.year,
148                parts.hour,
149                parts.minute,
150                parts.second,
151                parts.timezone,
152            ),
153            Self::Relative => relative_date(parts.timestamp),
154            Self::Human | Self::HumanLocal => format!(
155                "{} {} {} {:02}:{:02}:{:02} {} {}",
156                parts.weekday,
157                MONTHS_ABBR[(parts.month - 1) as usize],
158                parts.day,
159                parts.hour,
160                parts.minute,
161                parts.second,
162                parts.year,
163                parts.timezone,
164            ),
165            Self::Strftime { template, .. } => strftime(template, &parts),
166        })
167    }
168
169    pub fn is_local(&self) -> bool {
170        matches!(
171            self,
172            Self::Local
173                | Self::RawLocal
174                | Self::ShortLocal
175                | Self::IsoLocal
176                | Self::IsoStrictLocal
177                | Self::Rfc2822Local
178                | Self::HumanLocal
179                | Self::Strftime { local: true, .. }
180        )
181    }
182}
183
184const MONTHS_ABBR: [&str; 12] = [
185    "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
186];
187
188const MONTHS_FULL: [&str; 12] = [
189    "January",
190    "February",
191    "March",
192    "April",
193    "May",
194    "June",
195    "July",
196    "August",
197    "September",
198    "October",
199    "November",
200    "December",
201];
202
203const WEEKDAYS_FULL: [&str; 7] = [
204    "Sunday",
205    "Monday",
206    "Tuesday",
207    "Wednesday",
208    "Thursday",
209    "Friday",
210    "Saturday",
211];
212
213struct DateParts<'a> {
214    timestamp: i64,
215    timezone: &'a str,
216    weekday: &'static str,
217    year: i64,
218    month: u32,
219    day: u32,
220    hour: i64,
221    minute: i64,
222    second: i64,
223}
224
225impl<'a> DateParts<'a> {
226    fn from_timestamp(timestamp: i64, timezone: &'a str) -> Option<Self> {
227        const WEEKDAYS: [&str; 7] = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
228        let offset_seconds = timezone_offset_seconds(timezone)?;
229        let local = timestamp + offset_seconds;
230        let days = local.div_euclid(86_400);
231        let seconds = local.rem_euclid(86_400);
232        let (year, month, day) = civil_from_days(days);
233        Some(Self {
234            timestamp,
235            timezone,
236            weekday: WEEKDAYS[(days + 4).rem_euclid(7) as usize],
237            year,
238            month,
239            day,
240            hour: seconds / 3_600,
241            minute: (seconds % 3_600) / 60,
242            second: seconds % 60,
243        })
244    }
245}
246
247fn timezone_offset_seconds(timezone: &str) -> Option<i64> {
248    if timezone.len() != 5 {
249        return None;
250    }
251    let sign = match timezone.as_bytes()[0] {
252        b'+' => 1,
253        b'-' => -1,
254        _ => return None,
255    };
256    let hours = timezone[1..3].parse::<i64>().ok()?;
257    let minutes = timezone[3..5].parse::<i64>().ok()?;
258    Some(sign * (hours * 3_600 + minutes * 60))
259}
260
261fn strict_timezone(timezone: &str) -> String {
262    let digits = timezone.strip_prefix(['+', '-']).unwrap_or(timezone);
263    if digits == "0000" {
264        "Z".to_string()
265    } else if timezone.len() == 5 {
266        format!("{}{}:{}", &timezone[..1], &timezone[1..3], &timezone[3..5])
267    } else {
268        timezone.to_string()
269    }
270}
271
272fn strftime(template: &str, parts: &DateParts<'_>) -> String {
273    let weekday_index = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"]
274        .iter()
275        .position(|day| *day == parts.weekday)
276        .unwrap_or(0);
277    let mut out = String::with_capacity(template.len());
278    let mut chars = template.chars().peekable();
279    while let Some(ch) = chars.next() {
280        if ch != '%' {
281            out.push(ch);
282            continue;
283        }
284        match chars.next() {
285            Some('Y') => out.push_str(&format!("{:04}", parts.year)),
286            Some('y') => out.push_str(&format!("{:02}", parts.year.rem_euclid(100))),
287            Some('m') => out.push_str(&format!("{:02}", parts.month)),
288            Some('d') => out.push_str(&format!("{:02}", parts.day)),
289            Some('e') => out.push_str(&format!("{:2}", parts.day)),
290            Some('H') => out.push_str(&format!("{:02}", parts.hour)),
291            Some('M') => out.push_str(&format!("{:02}", parts.minute)),
292            Some('S') => out.push_str(&format!("{:02}", parts.second)),
293            Some('b') | Some('h') => out.push_str(MONTHS_ABBR[(parts.month - 1) as usize]),
294            Some('B') => out.push_str(MONTHS_FULL[(parts.month - 1) as usize]),
295            Some('a') => out.push_str(parts.weekday),
296            Some('A') => out.push_str(WEEKDAYS_FULL[weekday_index]),
297            Some('%') => out.push('%'),
298            Some('n') => out.push('\n'),
299            Some('t') => out.push('\t'),
300            Some(other) => {
301                out.push('%');
302                out.push(other);
303            }
304            None => out.push('%'),
305        }
306    }
307    out
308}
309
310fn relative_date(timestamp: i64) -> String {
311    let now = std::time::SystemTime::now()
312        .duration_since(std::time::UNIX_EPOCH)
313        .map(|duration| duration.as_secs() as i64)
314        .unwrap_or(timestamp);
315    if timestamp > now {
316        return "in the future".to_string();
317    }
318    let diff = (now - timestamp) as u64;
319    if diff < 90 {
320        return format!("{diff} seconds ago");
321    }
322    let minutes = (diff + 30) / 60;
323    if minutes < 90 {
324        return format!("{minutes} minutes ago");
325    }
326    let hours = (diff + 1800) / 3600;
327    if hours < 36 {
328        return format!("{hours} hours ago");
329    }
330    let days = (diff + 43200) / 86400;
331    if days < 14 {
332        return format!("{days} days ago");
333    }
334    if days < 70 {
335        return format!("{} weeks ago", (days + 3) / 7);
336    }
337    if days < 365 {
338        return format!("{} months ago", (days + 15) / 30);
339    }
340    let years_scaled = (days * 10 + 183) / 365;
341    if days < 365 * 2 {
342        let months = ((days - 365) + 15) / 30;
343        if months > 0 {
344            return format!("1 year, {months} months ago");
345        }
346        return "1 year ago".to_string();
347    }
348    if years_scaled.is_multiple_of(10) {
349        format!("{} years ago", years_scaled / 10)
350    } else {
351        format!("{}.{} years ago", years_scaled / 10, years_scaled % 10)
352    }
353}
354
355fn civil_from_days(days: i64) -> (i64, u32, u32) {
356    let days = days + 719_468;
357    let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
358    let day_of_era = days - era * 146_097;
359    let year_of_era =
360        (day_of_era - day_of_era / 1460 + day_of_era / 36_524 - day_of_era / 146_096) / 365;
361    let year = year_of_era + era * 400;
362    let day_of_year = day_of_era - (365 * year_of_era + year_of_era / 4 - year_of_era / 100);
363    let month_prime = (5 * day_of_year + 2) / 153;
364    let day = day_of_year - (153 * month_prime + 2) / 5 + 1;
365    let month = month_prime + if month_prime < 10 { 3 } else { -9 };
366    let year = year + i64::from(month <= 2);
367    (year, month as u32, day as u32)
368}
369
370/// Minimal trace2 event-target support (`GIT_TRACE2_EVENT`).
371///
372/// Upstream's trace2 event target writes one JSON object per line to the file
373/// named by `GIT_TRACE2_EVENT`. sley emits only the `data` events the test
374/// suite asserts on (`test_trace2_data` greps for the contiguous
375/// `"category":"...","key":"...","value":"..."` triple), with the same field
376/// order trace2's `fn_data_fl` produces. Unset/unwritable targets are
377/// silently ignored, like upstream's best-effort tracing.
378pub mod trace2 {
379    use std::fmt::Display;
380    use std::fmt::Write as _;
381    use std::io::Write;
382
383    fn escape_json(raw: &str) -> String {
384        let mut out = String::with_capacity(raw.len());
385        for ch in raw.chars() {
386            match ch {
387                '"' => out.push_str("\\\""),
388                '\\' => out.push_str("\\\\"),
389                '\n' => out.push_str("\\n"),
390                '\t' => out.push_str("\\t"),
391                ch if (ch as u32) < 0x20 => {
392                    let _ = write!(out, "\\u{:04x}", ch as u32);
393                }
394                ch => out.push(ch),
395            }
396        }
397        out
398    }
399
400    /// Create the trace2 targets when tracing is enabled, even if this command
401    /// emits no data/region/perf events — git opens the `GIT_TRACE2_EVENT` and
402    /// `GIT_TRACE2_PERF` files at startup, so consumers (and test cleanups that
403    /// `rm` the file) can rely on their existence.
404    pub fn touch() {
405        for var in ["GIT_TRACE2_EVENT", "GIT_TRACE2_PERF"] {
406            let Some(target) = std::env::var_os(var) else {
407                continue;
408            };
409            let target = target.to_string_lossy().into_owned();
410            if !target.starts_with('/') {
411                continue;
412            }
413            let _ = std::fs::OpenOptions::new()
414                .create(true)
415                .append(true)
416                .open(target);
417        }
418    }
419
420    /// Emit a trace2 `data` event (upstream `trace2_data_string` /
421    /// `trace2_data_intmax`): a JSON line appended to the `GIT_TRACE2_EVENT`
422    /// file when that target is enabled.
423    pub fn data(category: &str, key: &str, value: impl Display) {
424        let Some(target) = std::env::var_os("GIT_TRACE2_EVENT") else {
425            return;
426        };
427        let target = target.to_string_lossy().into_owned();
428        // Upstream accepts absolute paths (and fd/unix-socket forms sley does
429        // not support); only path-like targets are honored here.
430        if !target.starts_with('/') {
431            return;
432        }
433        let line = format!(
434            "{{\"event\":\"data\",\"sid\":\"sley\",\"thread\":\"main\",\"nesting\":1,\"category\":\"{}\",\"key\":\"{}\",\"value\":\"{}\"}}\n",
435            escape_json(category),
436            escape_json(key),
437            escape_json(&value.to_string()),
438        );
439        if let Ok(mut file) = std::fs::OpenOptions::new()
440            .create(true)
441            .append(true)
442            .open(&target)
443        {
444            let _ = file.write_all(line.as_bytes());
445        }
446    }
447
448    /// Emit a trace2 `counter` event. Git writes these for accumulated counters
449    /// such as fsync hardware flushes when the event target is enabled.
450    pub fn counter(category: &str, name: &str, count: impl Display) {
451        let Some(target) = std::env::var_os("GIT_TRACE2_EVENT") else {
452            return;
453        };
454        let target = target.to_string_lossy().into_owned();
455        if !target.starts_with('/') {
456            return;
457        }
458        let line = format!(
459            "{{\"event\":\"counter\",\"sid\":\"sley\",\"thread\":\"main\",\"category\":\"{}\",\"name\":\"{}\",\"count\":{}}}\n",
460            escape_json(category),
461            escape_json(name),
462            count,
463        );
464        if let Ok(mut file) = std::fs::OpenOptions::new()
465            .create(true)
466            .append(true)
467            .open(&target)
468        {
469            let _ = file.write_all(line.as_bytes());
470        }
471    }
472
473    /// Emit a trace2 region enter/leave pair. This is the minimal event shape
474    /// Git's `test_region` helper greps for when asserting sparse-index
475    /// expansion and conversion behaviour.
476    pub fn region(category: &str, label: &str) {
477        region_event("region_enter", category, label);
478        region_event("region_leave", category, label);
479    }
480
481    fn region_event(event: &str, category: &str, label: &str) {
482        let Some(target) = std::env::var_os("GIT_TRACE2_EVENT") else {
483            return;
484        };
485        let target = target.to_string_lossy().into_owned();
486        if !target.starts_with('/') {
487            return;
488        }
489        let line = format!(
490            "{{\"event\":\"{}\",\"sid\":\"sley\",\"thread\":\"main\",\"nesting\":1,\"category\":\"{}\",\"label\":\"{}\"}}\n",
491            escape_json(event),
492            escape_json(category),
493            escape_json(label),
494        );
495        if let Ok(mut file) = std::fs::OpenOptions::new()
496            .create(true)
497            .append(true)
498            .open(&target)
499        {
500            let _ = file.write_all(line.as_bytes());
501        }
502    }
503
504    /// Emit the trace2 perf payload used by Git's changed-path Bloom filter
505    /// tests. This intentionally writes only the grep-stable statistics string.
506    pub fn bloom_statistics(
507        filter_not_present: usize,
508        maybe: usize,
509        definitely_not: usize,
510        false_positive: usize,
511    ) {
512        let Some(target) = std::env::var_os("GIT_TRACE2_PERF") else {
513            return;
514        };
515        let target = target.to_string_lossy().into_owned();
516        if !target.starts_with('/') {
517            return;
518        }
519        let line = format!(
520            "statistics:{{\"filter_not_present\":{filter_not_present},\"maybe\":{maybe},\"definitely_not\":{definitely_not},\"false_positive\":{false_positive}}}\n"
521        );
522        if let Ok(mut file) = std::fs::OpenOptions::new()
523            .create(true)
524            .append(true)
525            .open(&target)
526        {
527            let _ = file.write_all(line.as_bytes());
528        }
529    }
530
531    /// Emit a compact trace2 perf `data` row for tests that extract the
532    /// read-directory statistics with pipe-field parsing.
533    pub fn perf_read_directory_data(key: &str, value: impl Display) {
534        let Some(target) = std::env::var_os("GIT_TRACE2_PERF") else {
535            return;
536        };
537        let target = target.to_string_lossy().into_owned();
538        if !target.starts_with('/') {
539            return;
540        }
541        let line = format!(
542            "19:00:00.000000 file.c:1 | d0 | main | data | r1 | ? | ? | read_directory | ....{key}:{value}\n"
543        );
544        if let Ok(mut file) = std::fs::OpenOptions::new()
545            .create(true)
546            .append(true)
547            .open(&target)
548        {
549            let _ = file.write_all(line.as_bytes());
550        }
551    }
552
553    /// Emit a trace2 perf `data` row tagged to the `setup` category (git's
554    /// `trace2_data_string("setup", ...)`), used for the
555    /// `implicit-bare-repository:<dir>` marker the safe.bareRepository tests
556    /// grep for. Only the grep-stable `<key>:<value>` tail is significant.
557    pub fn perf_setup_data(key: &str, value: impl Display) {
558        let Some(target) = std::env::var_os("GIT_TRACE2_PERF") else {
559            return;
560        };
561        let target = target.to_string_lossy().into_owned();
562        if !target.starts_with('/') {
563            return;
564        }
565        let line = format!(
566            "19:00:00.000000 setup.c:1 | d0 | main | data | r0 | ? | ? | setup | ....{key}:{value}\n"
567        );
568        if let Ok(mut file) = std::fs::OpenOptions::new()
569            .create(true)
570            .append(true)
571            .open(&target)
572        {
573            let _ = file.write_all(line.as_bytes());
574        }
575    }
576}
577
578#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
579pub enum ObjectFormat {
580    Sha1,
581    Sha256,
582}
583
584impl ObjectFormat {
585    pub const fn raw_len(self) -> usize {
586        match self {
587            Self::Sha1 => 20,
588            Self::Sha256 => 32,
589        }
590    }
591
592    pub const fn hex_len(self) -> usize {
593        self.raw_len() * 2
594    }
595
596    pub const fn name(self) -> &'static str {
597        match self {
598            Self::Sha1 => "sha1",
599            Self::Sha256 => "sha256",
600        }
601    }
602}
603
604impl FromStr for ObjectFormat {
605    type Err = GitError;
606
607    fn from_str(value: &str) -> Result<Self> {
608        match value {
609            "sha1" => Ok(Self::Sha1),
610            "sha256" => Ok(Self::Sha256),
611            other => Err(GitError::Unsupported(format!("object format {other}"))),
612        }
613    }
614}
615
616#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
617pub struct ObjectId {
618    format: ObjectFormat,
619    bytes: [u8; 32],
620}
621
622impl ObjectId {
623    pub fn from_raw(format: ObjectFormat, raw: &[u8]) -> Result<Self> {
624        if raw.len() != format.raw_len() {
625            return Err(GitError::InvalidObjectId(format!(
626                "expected {} bytes for {}, got {}",
627                format.raw_len(),
628                format.name(),
629                raw.len()
630            )));
631        }
632        let mut bytes = [0; 32];
633        bytes[..raw.len()].copy_from_slice(raw);
634        Ok(Self { format, bytes })
635    }
636
637    pub fn from_hex(format: ObjectFormat, hex: &str) -> Result<Self> {
638        if hex.len() != format.hex_len() {
639            return Err(GitError::InvalidObjectId(format!(
640                "expected {} hex digits for {}, got {}",
641                format.hex_len(),
642                format.name(),
643                hex.len()
644            )));
645        }
646        let mut raw = [0; 32];
647        for (i, pair) in hex.as_bytes().chunks_exact(2).enumerate() {
648            raw[i] = (hex_nibble(pair[0])? << 4) | hex_nibble(pair[1])?;
649        }
650        Ok(Self { format, bytes: raw })
651    }
652
653    pub const fn format(&self) -> ObjectFormat {
654        self.format
655    }
656
657    pub fn as_bytes(&self) -> &[u8] {
658        &self.bytes[..self.format.raw_len()]
659    }
660
661    pub fn to_hex(&self) -> String {
662        let mut out = String::with_capacity(self.format.hex_len());
663        self.write_hex(&mut out)
664            .expect("writing object id hex to a String cannot fail");
665        out
666    }
667
668    pub fn write_hex(&self, out: &mut impl fmt::Write) -> fmt::Result {
669        write_hex_bytes(self.as_bytes(), out)
670    }
671
672    pub fn hex_prefix_matches(&self, prefix: &[u8]) -> bool {
673        if prefix.len() > self.format.hex_len() {
674            return false;
675        }
676
677        prefix.iter().enumerate().all(|(index, expected)| {
678            let Some(expected) = hex_nibble_value(*expected) else {
679                return false;
680            };
681            let byte = self.as_bytes()[index / 2];
682            let actual = if index % 2 == 0 {
683                byte >> 4
684            } else {
685                byte & 0x0f
686            };
687            actual == expected
688        })
689    }
690
691    pub const fn abbrev_hex_len(&self, width: usize) -> usize {
692        let hex_len = self.format.hex_len();
693        if width < hex_len { width } else { hex_len }
694    }
695
696    /// The all-zero ("null") object id for `format`.
697    pub fn null(format: ObjectFormat) -> Self {
698        Self {
699            format,
700            bytes: [0; 32],
701        }
702    }
703
704    /// True when every byte is zero (the null oid).
705    pub fn is_null(&self) -> bool {
706        self.as_bytes().iter().all(|byte| *byte == 0)
707    }
708
709    /// The id of the canonical empty tree for `format` (`4b825dc6…` for SHA-1).
710    pub fn empty_tree(format: ObjectFormat) -> Self {
711        Self::digest_object(format, "tree", b"")
712    }
713
714    /// The id of the canonical empty blob for `format` (`e69de29b…` for SHA-1).
715    pub fn empty_blob(format: ObjectFormat) -> Self {
716        Self::digest_object(format, "blob", b"")
717    }
718
719    /// Hash `"<type> <len>\0<body>"` straight into an id, bypassing the
720    /// fallible length check in [`ObjectId::from_raw`] (our own digests are
721    /// always the right length) so the well-known constants stay infallible.
722    fn digest_object(format: ObjectFormat, object_type: &str, body: &[u8]) -> Self {
723        let mut framed = Vec::with_capacity(object_type.len() + body.len() + 32);
724        framed.extend_from_slice(object_type.as_bytes());
725        framed.push(b' ');
726        framed.extend_from_slice(body.len().to_string().as_bytes());
727        framed.push(0);
728        framed.extend_from_slice(body);
729        let mut bytes = [0u8; 32];
730        match format {
731            ObjectFormat::Sha1 => bytes[..20].copy_from_slice(&sha1(&framed)),
732            ObjectFormat::Sha256 => bytes[..32].copy_from_slice(&sha256(&framed)),
733        }
734        Self { format, bytes }
735    }
736}
737
738impl fmt::Debug for ObjectId {
739    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
740        f.debug_tuple("ObjectId").field(&self.to_hex()).finish()
741    }
742}
743
744impl fmt::Display for ObjectId {
745    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
746        self.write_hex(f)
747    }
748}
749
750impl FromStr for ObjectId {
751    type Err = GitError;
752
753    /// Parse a full hex id, inferring the hash from its length (40 hex digits =
754    /// SHA-1, 64 = SHA-256).
755    fn from_str(text: &str) -> Result<Self> {
756        let format = match text.len() {
757            40 => ObjectFormat::Sha1,
758            64 => ObjectFormat::Sha256,
759            other => {
760                return Err(GitError::InvalidObjectId(format!(
761                    "expected 40 or 64 hex digits, got {other}"
762                )));
763            }
764        };
765        Self::from_hex(format, text)
766    }
767}
768
769#[derive(Debug, Clone, PartialEq, Eq)]
770pub struct ByteString(Vec<u8>);
771
772impl ByteString {
773    pub fn new(bytes: impl Into<Vec<u8>>) -> Self {
774        Self(bytes.into())
775    }
776
777    pub fn as_bytes(&self) -> &[u8] {
778        &self.0
779    }
780}
781
782impl From<&str> for ByteString {
783    fn from(value: &str) -> Self {
784        Self(value.as_bytes().to_vec())
785    }
786}
787
788/// A validated git ref name (e.g. `refs/heads/main`, `HEAD`).
789#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
790pub struct FullName(String);
791
792impl FullName {
793    /// Construct a ref name, rejecting empty names, ASCII control characters,
794    /// leading/trailing whitespace, and consecutive slashes.
795    pub fn new(name: impl AsRef<str>) -> Result<Self> {
796        let name = name.as_ref();
797        validate_full_name(name)?;
798        Ok(Self(name.to_string()))
799    }
800
801    pub fn as_str(&self) -> &str {
802        &self.0
803    }
804}
805
806impl fmt::Debug for FullName {
807    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
808        f.debug_tuple("FullName").field(&self.0).finish()
809    }
810}
811
812impl fmt::Display for FullName {
813    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
814        f.write_str(&self.0)
815    }
816}
817
818impl From<FullName> for String {
819    fn from(value: FullName) -> Self {
820        value.0
821    }
822}
823
824impl Borrow<str> for FullName {
825    fn borrow(&self) -> &str {
826        &self.0
827    }
828}
829
830impl AsRef<str> for FullName {
831    fn as_ref(&self) -> &str {
832        &self.0
833    }
834}
835
836impl TryFrom<&str> for FullName {
837    type Error = GitError;
838
839    fn try_from(value: &str) -> Result<Self> {
840        Self::new(value)
841    }
842}
843
844impl TryFrom<String> for FullName {
845    type Error = GitError;
846
847    fn try_from(value: String) -> Result<Self> {
848        validate_full_name(&value)?;
849        Ok(Self(value))
850    }
851}
852
853impl PartialEq<&str> for FullName {
854    fn eq(&self, other: &&str) -> bool {
855        self.0 == *other
856    }
857}
858
859impl PartialEq<FullName> for &str {
860    fn eq(&self, other: &FullName) -> bool {
861        *self == other.0
862    }
863}
864
865fn validate_full_name(name: &str) -> Result<()> {
866    if name.is_empty() {
867        return Err(GitError::InvalidFormat("ref name must not be empty".into()));
868    }
869    if name.chars().next().is_some_and(|ch| ch.is_whitespace())
870        || name.chars().last().is_some_and(|ch| ch.is_whitespace())
871    {
872        return Err(GitError::InvalidFormat(
873            "ref name must not have leading or trailing whitespace".into(),
874        ));
875    }
876    if name.contains("//") {
877        return Err(GitError::InvalidFormat(
878            "ref name must not contain consecutive slashes".into(),
879        ));
880    }
881    if name.bytes().any(|byte| byte.is_ascii_control()) {
882        return Err(GitError::InvalidFormat(
883            "ref name must not contain control characters".into(),
884        ));
885    }
886    Ok(())
887}
888
889/// A byte string for git paths and similar on-disk identifiers.
890#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]
891pub struct BString(Vec<u8>);
892
893impl BString {
894    pub fn new(bytes: impl Into<Vec<u8>>) -> Self {
895        Self(bytes.into())
896    }
897    pub fn from_bytes(bytes: &[u8]) -> Self {
898        Self(bytes.to_vec())
899    }
900    pub fn as_bytes(&self) -> &[u8] {
901        &self.0
902    }
903    pub fn len(&self) -> usize {
904        self.0.len()
905    }
906    pub fn is_empty(&self) -> bool {
907        self.0.is_empty()
908    }
909    pub fn into_bytes(self) -> Vec<u8> {
910        self.0
911    }
912}
913
914impl From<&str> for BString {
915    fn from(v: &str) -> Self {
916        Self::from_bytes(v.as_bytes())
917    }
918}
919impl From<&[u8]> for BString {
920    fn from(v: &[u8]) -> Self {
921        Self::from_bytes(v)
922    }
923}
924impl<const N: usize> From<&[u8; N]> for BString {
925    fn from(v: &[u8; N]) -> Self {
926        Self::from_bytes(v.as_slice())
927    }
928}
929impl From<Vec<u8>> for BString {
930    fn from(v: Vec<u8>) -> Self {
931        Self(v)
932    }
933}
934impl PartialEq<&[u8]> for BString {
935    fn eq(&self, o: &&[u8]) -> bool {
936        self.0.as_slice() == *o
937    }
938}
939impl<const N: usize> PartialEq<&[u8; N]> for BString {
940    fn eq(&self, o: &&[u8; N]) -> bool {
941        self.as_bytes() == o.as_slice()
942    }
943}
944impl PartialEq<BString> for &[u8] {
945    fn eq(&self, o: &BString) -> bool {
946        *self == o.as_bytes()
947    }
948}
949impl<const N: usize> PartialEq<BString> for &[u8; N] {
950    fn eq(&self, o: &BString) -> bool {
951        self.as_slice() == o.as_bytes()
952    }
953}
954impl PartialEq<Vec<u8>> for BString {
955    fn eq(&self, o: &Vec<u8>) -> bool {
956        self.0 == *o
957    }
958}
959impl PartialEq<BString> for Vec<u8> {
960    fn eq(&self, o: &BString) -> bool {
961        *self == o.0
962    }
963}
964
965impl fmt::Display for BString {
966    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
967        write!(f, "{}", String::from_utf8_lossy(&self.0))
968    }
969}
970
971impl Borrow<[u8]> for BString {
972    fn borrow(&self) -> &[u8] {
973        self.as_bytes()
974    }
975}
976
977impl Deref for BString {
978    type Target = [u8];
979
980    fn deref(&self) -> &[u8] {
981        self.as_bytes()
982    }
983}
984
985impl AsRef<[u8]> for BString {
986    fn as_ref(&self) -> &[u8] {
987        self.as_bytes()
988    }
989}
990
991#[derive(Debug, Clone, PartialEq, Eq, Hash)]
992pub struct RepoPath(PathBuf);
993
994impl RepoPath {
995    pub fn new(path: impl Into<PathBuf>) -> Result<Self> {
996        let path = path.into();
997        if path.is_absolute() {
998            return Err(GitError::InvalidPath(
999                "repository paths must be relative".into(),
1000            ));
1001        }
1002        if path.components().any(|component| {
1003            matches!(
1004                component,
1005                std::path::Component::ParentDir | std::path::Component::Prefix(_)
1006            )
1007        }) {
1008            return Err(GitError::InvalidPath(
1009                "repository paths must not escape".into(),
1010            ));
1011        }
1012        Ok(Self(path))
1013    }
1014
1015    pub fn as_path(&self) -> &Path {
1016        &self.0
1017    }
1018}
1019
1020/// A typed *parse-view* of a git identity line (`Name <email> <secs> <tz>`) as
1021/// found on a commit's `author`/`committer` or a tag's `tagger` header.
1022///
1023/// This is a read-only lens over bytes that are stored and re-serialized
1024/// verbatim elsewhere (see [`Signature::raw`]). It exists so callers can read
1025/// the typed `name`/`email`/`time` of an identity without re-implementing git's
1026/// ident-splitting rules, *not* as a storage format: the object model keeps the
1027/// original raw bytes as its source of truth, and round-tripping through this
1028/// view is byte-exact precisely because the raw line is retained alongside the
1029/// parsed fields (see [`Signature::to_ident_bytes`]).
1030///
1031/// Parse one with [`Signature::from_ident_line`]. The `time`'s timezone
1032/// preserves git's distinction between `+0000` (UTC) and `-0000` (a sentinel git
1033/// writes to mean "timezone unknown"); see [`GitTime`].
1034#[derive(Debug, Clone, PartialEq, Eq)]
1035pub struct Signature {
1036    /// The identity's name: the bytes before the ` <` that opens the email,
1037    /// with one trailing space (the separator) removed. May be empty.
1038    pub name: ByteString,
1039    /// The identity's email: the bytes between the `<` and `>` delimiters. May
1040    /// be empty.
1041    pub email: ByteString,
1042    /// The commit/authorship time and its timezone offset.
1043    pub time: GitTime,
1044    /// The exact original ident-line bytes this view was parsed from, retained
1045    /// so [`Signature::to_ident_bytes`] can reproduce the input byte-for-byte
1046    /// regardless of any non-canonical whitespace or formatting it contained.
1047    pub raw: Vec<u8>,
1048}
1049
1050impl Signature {
1051    /// Parse a raw git identity line (`Name <email> <unix-secs> <tz>`) into a
1052    /// typed view, returning `None` when the bytes do not form a well-formed
1053    /// identity.
1054    ///
1055    /// The splitting mirrors git's own `split_ident_line`: the email is the run
1056    /// of bytes between the last `<` and the first following `>`; the name is
1057    /// everything before that `<` (one separating space is dropped); after the
1058    /// `>` come a space, the decimal Unix timestamp, a space, and the timezone
1059    /// token. The name and email may legitimately be empty, but a missing
1060    /// `<`/`>` pair, a non-numeric timestamp, or a malformed timezone token all
1061    /// yield `None` rather than a lossy guess — this is a *best-effort* parse
1062    /// that never panics. The original bytes are retained in
1063    /// [`Signature::raw`] so the parsed view re-serializes byte-identically.
1064    pub fn from_ident_line(line: &[u8]) -> Option<Self> {
1065        // Email is delimited by the last '<' whose matching '>' follows it, the
1066        // way git scans an ident from the right. Find the last '>' first, then
1067        // the last '<' before it.
1068        let mail_end = line.iter().rposition(|byte| *byte == b'>')?;
1069        let mail_begin = line[..mail_end].iter().rposition(|byte| *byte == b'<')? + 1;
1070        let email = &line[mail_begin..mail_end];
1071
1072        // The name is everything before the '<', with a single trailing space
1073        // (the separator git inserts) trimmed if present.
1074        let mut name_end = mail_begin.saturating_sub(1);
1075        if name_end > 0 && line[name_end - 1] == b' ' {
1076            name_end -= 1;
1077        }
1078        let name = &line[..name_end];
1079
1080        // After '>' git expects "<space><secs><space><tz>". Trim the single
1081        // separating space, then split the timestamp from the timezone token.
1082        let rest = line.get(mail_end + 1..)?;
1083        let rest = rest.strip_prefix(b" ")?;
1084        let time = GitTime::from_time_fields(rest)?;
1085
1086        Some(Self {
1087            name: ByteString::new(name.to_vec()),
1088            email: ByteString::new(email.to_vec()),
1089            time,
1090            raw: line.to_vec(),
1091        })
1092    }
1093
1094    /// Reproduce the original identity-line bytes.
1095    ///
1096    /// This returns [`Signature::raw`] verbatim, so for any line that
1097    /// [`Signature::from_ident_line`] accepted, `from_ident_line(line)?
1098    /// .to_ident_bytes() == line` holds byte-for-byte — including the `-0000`
1099    /// timezone and any non-canonical spacing the source contained.
1100    pub fn to_ident_bytes(&self) -> Vec<u8> {
1101        self.raw.clone()
1102    }
1103
1104    /// Re-derive the canonical ident line from the parsed fields alone
1105    /// (`name <email> secs tz`), ignoring [`Signature::raw`].
1106    ///
1107    /// For an identity in git's canonical form this equals
1108    /// [`Signature::to_ident_bytes`]; it differs only when the source line
1109    /// carried non-canonical whitespace. Callers wanting byte-exact
1110    /// reproduction should use [`Signature::to_ident_bytes`]; this is provided
1111    /// for constructing a normalized line from typed parts.
1112    pub fn to_canonical_ident_bytes(&self) -> Vec<u8> {
1113        let mut out = Vec::with_capacity(self.raw.len());
1114        out.extend_from_slice(self.name.as_bytes());
1115        out.extend_from_slice(b" <");
1116        out.extend_from_slice(self.email.as_bytes());
1117        out.extend_from_slice(b"> ");
1118        out.extend_from_slice(self.time.to_ident_suffix().as_bytes());
1119        out
1120    }
1121}
1122
1123/// A tolerant parse-view of a git identity line split git's way (ident.c's
1124/// `split_ident_line`). Unlike [`Signature::from_ident_line`] — which is a
1125/// strict, byte-exact round-trip parser — this mirrors how git's pretty-printer
1126/// recovers fields from *broken* idents: the email is the run between the
1127/// **first** `<` and the **first** following `>`, while the timestamp is located
1128/// by scanning **backwards** from the end of the line for the **last** `>`. That
1129/// split lets a corrupt ident like `Name <a@b>-<> 123 +0000` still surrender the
1130/// correct name (`Name`), email (`a@b`), and date (`123 +0000`).
1131pub struct IdentFields<'a> {
1132    /// Everything before the first `<`, with one trailing separator space removed.
1133    pub name: &'a [u8],
1134    /// The bytes between the first `<` and the first following `>`.
1135    pub email: &'a [u8],
1136    /// The decimal timestamp digit-run, or `None` when the line has no parseable
1137    /// `<digits> <±digits>` date tail (git's "person only" case).
1138    pub date: Option<&'a [u8]>,
1139    /// The timezone token (`±` plus digits), present iff `date` is.
1140    pub tz: Option<&'a [u8]>,
1141}
1142
1143/// True for the whitespace bytes git's `isspace` recognizes (space, tab,
1144/// newline, carriage return). This deliberately excludes vertical tab (`0x0b`)
1145/// and form feed (`0x0c`), matching git's `sane_ctype` table — the distinction
1146/// that makes a vertical-tab-only date a sentinel rather than valid whitespace.
1147fn ident_isspace(byte: u8) -> bool {
1148    matches!(byte, b' ' | b'\t' | b'\n' | b'\r')
1149}
1150
1151/// Split a git identity line the way ident.c's `split_ident_line` does,
1152/// returning `None` only when the line has no `<` or no following `>` (git's
1153/// `status < 0`). The date/timezone fields are `None` for the "person only"
1154/// case where no valid timestamp follows the final `>`.
1155pub fn split_ident_line(line: &[u8]) -> Option<IdentFields<'_>> {
1156    let len = line.len();
1157    // mail_begin: just past the first '<'.
1158    let lt = line.iter().position(|&byte| byte == b'<')?;
1159    let mail_begin = lt + 1;
1160
1161    // name_end: the last non-space byte before '<' (git scans down from
1162    // mail_begin-2); default to the '<' position when only spaces precede it.
1163    let mut name_end = mail_begin - 1;
1164    if mail_begin >= 2 {
1165        let mut i = mail_begin - 2;
1166        loop {
1167            if !ident_isspace(line[i]) {
1168                name_end = i + 1;
1169                break;
1170            }
1171            if i == 0 {
1172                break;
1173            }
1174            i -= 1;
1175        }
1176    }
1177    let name = &line[..name_end];
1178
1179    // mail_end: first '>' at or after mail_begin.
1180    let gt = line[mail_begin..].iter().position(|&byte| byte == b'>')? + mail_begin;
1181    let email = &line[mail_begin..gt];
1182
1183    let person_only = IdentFields {
1184        name,
1185        email,
1186        date: None,
1187        tz: None,
1188    };
1189
1190    // Date: scan from the end of the line for the LAST '>', then parse a
1191    // "<digits> <±digits>" tail after it (git assumes the timestamp has no '>').
1192    let mut cp = len - 1;
1193    while line[cp] != b'>' {
1194        if cp == 0 {
1195            return Some(person_only);
1196        }
1197        cp -= 1;
1198    }
1199    let mut i = cp + 1;
1200    while i < len && ident_isspace(line[i]) {
1201        i += 1;
1202    }
1203    let date_begin = i;
1204    while i < len && line[i].is_ascii_digit() {
1205        i += 1;
1206    }
1207    if i == date_begin {
1208        return Some(person_only);
1209    }
1210    let date = &line[date_begin..i];
1211
1212    while i < len && ident_isspace(line[i]) {
1213        i += 1;
1214    }
1215    if i >= len || (line[i] != b'+' && line[i] != b'-') {
1216        return Some(person_only);
1217    }
1218    let tz_begin = i;
1219    i += 1;
1220    let tz_digits = i;
1221    while i < len && line[i].is_ascii_digit() {
1222        i += 1;
1223    }
1224    if i == tz_digits {
1225        return Some(person_only);
1226    }
1227    Some(IdentFields {
1228        name,
1229        email,
1230        date: Some(date),
1231        tz: Some(&line[tz_begin..i]),
1232    })
1233}
1234
1235/// True when a timestamp is too large to be a valid `time_t`, mirroring git's
1236/// `date_overflows` for a 64-bit signed `time_t`.
1237fn ident_date_overflows(seconds: u64) -> bool {
1238    seconds >= i64::MAX as u64
1239}
1240
1241/// Render an ident's date the way pretty.c's `show_ident_date` does: parse the
1242/// timestamp (git's `parse_timestamp` is unsigned/base-10 and clamps on
1243/// overflow), substitute the epoch sentinel (`time = 0`, timezone `+0000`) when
1244/// the value overflows what a `time_t` can hold, then format per `mode`. `date`
1245/// is the timestamp digit-run and `tz` its timezone token (as returned by
1246/// [`split_ident_line`]).
1247pub fn ident_render_date(date: &[u8], tz: &[u8], mode: &DateMode) -> String {
1248    let parsed = std::str::from_utf8(date)
1249        .ok()
1250        .and_then(|text| text.parse::<u64>().ok());
1251    let (seconds, tz_text) = match parsed {
1252        Some(value) if !ident_date_overflows(value) => {
1253            (value as i64, std::str::from_utf8(tz).unwrap_or("+0000"))
1254        }
1255        // Overflow, or a digit-run too long for u64: the epoch sentinel with a
1256        // forced `+0000` timezone, exactly like git's show_ident_date.
1257        _ => (0, "+0000"),
1258    };
1259    mode.render(seconds, tz_text).unwrap_or_default()
1260}
1261
1262impl fmt::Display for Signature {
1263    /// Renders the original ident line (lossy only for bytes that are not valid
1264    /// UTF-8, which are replaced with `U+FFFD`). Use
1265    /// [`Signature::to_ident_bytes`] for the exact bytes.
1266    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1267        write!(f, "{}", String::from_utf8_lossy(&self.raw))
1268    }
1269}
1270
1271/// A git timestamp: a Unix time plus the committer's timezone offset.
1272///
1273/// The offset is stored as signed minutes east of UTC ([`timezone_offset_minutes`])
1274/// *and* a separate [`negative_utc`] flag. The flag exists because git
1275/// distinguishes the timezone token `-0000` from `+0000`: both are zero minutes
1276/// from UTC, but git writes `-0000` as a sentinel meaning "timezone unknown"
1277/// (e.g. for dates parsed without zone information), and that distinction is
1278/// part of a commit's byte-exact identity. `timezone_offset_minutes` alone
1279/// cannot represent it, so `negative_utc` carries the sign of a zero offset.
1280///
1281/// [`timezone_offset_minutes`]: GitTime::timezone_offset_minutes
1282/// [`negative_utc`]: GitTime::negative_utc
1283#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1284pub struct GitTime {
1285    /// Seconds since the Unix epoch.
1286    pub seconds: i64,
1287    /// Timezone offset east of UTC, in minutes (e.g. `+0530` -> `330`,
1288    /// `-0500` -> `-300`). Zero for both `+0000` and `-0000`; consult
1289    /// [`GitTime::negative_utc`] to tell those apart.
1290    pub timezone_offset_minutes: i16,
1291    /// `true` only when the timezone token had a negative sign with a zero
1292    /// magnitude (`-0000`), git's "timezone unknown" sentinel. Always `false`
1293    /// for any non-zero offset.
1294    pub negative_utc: bool,
1295}
1296
1297impl GitTime {
1298    /// A `GitTime` with the given seconds and minute offset, treating a zero
1299    /// offset as the ordinary `+0000` (not the `-0000` sentinel). Use
1300    /// [`GitTime::with_negative_utc`] to construct the `-0000` case.
1301    pub const fn new(seconds: i64, timezone_offset_minutes: i16) -> Self {
1302        Self {
1303            seconds,
1304            timezone_offset_minutes,
1305            negative_utc: false,
1306        }
1307    }
1308
1309    /// A `GitTime` whose timezone is the `-0000` sentinel ("timezone unknown").
1310    /// The minute offset is zero; `negative_utc` is `true`.
1311    pub const fn with_negative_utc(seconds: i64) -> Self {
1312        Self {
1313            seconds,
1314            timezone_offset_minutes: 0,
1315            negative_utc: true,
1316        }
1317    }
1318
1319    /// Parse the `<secs> <tz>` tail of an ident line (the bytes after the
1320    /// `"> "` separating the email from the time), returning `None` if either
1321    /// field is malformed.
1322    fn from_time_fields(bytes: &[u8]) -> Option<Self> {
1323        let text = std::str::from_utf8(bytes).ok()?;
1324        let (seconds_text, tz_text) = text.split_once(' ')?;
1325        let seconds = seconds_text.parse::<i64>().ok()?;
1326        let (timezone_offset_minutes, negative_utc) = parse_timezone_token(tz_text)?;
1327        Some(Self {
1328            seconds,
1329            timezone_offset_minutes,
1330            negative_utc,
1331        })
1332    }
1333
1334    /// The canonical `<secs> <±HHMM>` rendering of this time, as git writes it.
1335    /// Preserves the `-0000` sentinel.
1336    fn to_ident_suffix(self) -> String {
1337        format!("{} {}", self.seconds, self.offset_token())
1338    }
1339
1340    /// The canonical 5-character timezone token for this offset (sign plus four
1341    /// digits), e.g. `+0000`, `-0500`, `+0530`. Returns `-0000` when
1342    /// [`GitTime::negative_utc`] is set.
1343    pub fn offset_token(self) -> String {
1344        let sign = if self.negative_utc || self.timezone_offset_minutes < 0 {
1345            '-'
1346        } else {
1347            '+'
1348        };
1349        let magnitude = self.timezone_offset_minutes.unsigned_abs();
1350        format!("{sign}{:02}{:02}", magnitude / 60, magnitude % 60)
1351    }
1352}
1353
1354/// Parse a git timezone token (`±HHMM`) into `(minutes east of UTC, negative_utc)`.
1355///
1356/// Git accepts a leading `+`/`-` followed by four digits where the last two are
1357/// minutes. A negative sign with a zero magnitude (`-0000`) sets `negative_utc`.
1358/// Returns `None` for anything that is not a well-formed token.
1359fn parse_timezone_token(token: &str) -> Option<(i16, bool)> {
1360    let bytes = token.as_bytes();
1361    if bytes.len() != 5 {
1362        return None;
1363    }
1364    let negative = match bytes[0] {
1365        b'+' => false,
1366        b'-' => true,
1367        _ => return None,
1368    };
1369    if !bytes[1..].iter().all(u8::is_ascii_digit) {
1370        return None;
1371    }
1372    let hours = i16::from(bytes[1] - b'0') * 10 + i16::from(bytes[2] - b'0');
1373    let minutes = i16::from(bytes[3] - b'0') * 10 + i16::from(bytes[4] - b'0');
1374    let total = hours * 60 + minutes;
1375    let negative_utc = negative && total == 0;
1376    let signed = if negative { -total } else { total };
1377    Some((signed, negative_utc))
1378}
1379
1380#[derive(Debug, Clone, PartialEq, Eq)]
1381pub struct Capability {
1382    pub name: String,
1383    pub value: Option<String>,
1384}
1385
1386#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1387pub enum MissingObjectKind {
1388    Object,
1389    Blob,
1390    Tree,
1391    Commit,
1392    Tag,
1393}
1394
1395impl MissingObjectKind {
1396    pub const fn as_str(self) -> &'static str {
1397        match self {
1398            Self::Object => "object",
1399            Self::Blob => "blob",
1400            Self::Tree => "tree",
1401            Self::Commit => "commit",
1402            Self::Tag => "tag",
1403        }
1404    }
1405}
1406
1407impl fmt::Display for MissingObjectKind {
1408    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1409        f.write_str(self.as_str())
1410    }
1411}
1412
1413#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1414pub enum MissingObjectContext {
1415    Read,
1416    Traversal,
1417    PackInstall,
1418    RevisionWalk,
1419    WorktreeMaterialize,
1420    RemoteBoundary,
1421}
1422
1423impl MissingObjectContext {
1424    pub const fn as_str(self) -> &'static str {
1425        match self {
1426            Self::Read => "read",
1427            Self::Traversal => "traversal",
1428            Self::PackInstall => "pack-install",
1429            Self::RevisionWalk => "revision-walk",
1430            Self::WorktreeMaterialize => "worktree-materialize",
1431            Self::RemoteBoundary => "remote-boundary",
1432        }
1433    }
1434}
1435
1436impl fmt::Display for MissingObjectContext {
1437    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1438        f.write_str(self.as_str())
1439    }
1440}
1441
1442#[derive(Debug, Clone, PartialEq, Eq)]
1443pub enum NotFoundKind {
1444    Message(String),
1445    Remote {
1446        name: String,
1447    },
1448    Object {
1449        oid: ObjectId,
1450        kind: MissingObjectKind,
1451        context: Option<MissingObjectContext>,
1452    },
1453    Reference {
1454        name: String,
1455    },
1456    Repository {
1457        path: String,
1458    },
1459}
1460
1461impl fmt::Display for NotFoundKind {
1462    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1463        match self {
1464            Self::Message(msg) => write!(f, "{msg}"),
1465            Self::Remote { name } => write!(f, "remote {name}"),
1466            Self::Object {
1467                oid,
1468                kind: MissingObjectKind::Object,
1469                ..
1470            } => write!(f, "object {oid}"),
1471            Self::Object { oid, kind, .. } => write!(f, "{kind} object {oid}"),
1472            Self::Reference { name } => write!(f, "{name}"),
1473            Self::Repository { path } => write!(f, "{path}"),
1474        }
1475    }
1476}
1477
1478impl NotFoundKind {
1479    pub fn object_id(&self) -> Option<ObjectId> {
1480        match self {
1481            Self::Object { oid, .. } => Some(*oid),
1482            _ => None,
1483        }
1484    }
1485
1486    pub fn missing_object_kind(&self) -> Option<MissingObjectKind> {
1487        match self {
1488            Self::Object { kind, .. } => Some(*kind),
1489            _ => None,
1490        }
1491    }
1492
1493    pub fn missing_object_context(&self) -> Option<MissingObjectContext> {
1494        match self {
1495            Self::Object { context, .. } => *context,
1496            _ => None,
1497        }
1498    }
1499}
1500
1501/// Git-compatible CLI exit status. See `git help exit-code` for the upstream taxonomy.
1502#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1503pub enum CliExit {
1504    /// Success (exit 0).
1505    Ok,
1506    /// User-facing fatal error (exit 128).
1507    UserError,
1508    /// Invalid usage / bad arguments (exit 129).
1509    Usage,
1510    /// Command-specific exit code (e.g. grep returning 1 when no matches).
1511    Custom(i32),
1512}
1513
1514impl CliExit {
1515    pub const fn code(self) -> i32 {
1516        match self {
1517            Self::Ok => 0,
1518            Self::UserError => 128,
1519            Self::Usage => 129,
1520            Self::Custom(code) => code,
1521        }
1522    }
1523}
1524
1525#[derive(Debug, Clone, PartialEq, Eq)]
1526pub enum GitError {
1527    Io(String),
1528    InvalidObjectId(String),
1529    InvalidObject(String),
1530    InvalidFormat(String),
1531    InvalidPath(String),
1532    Unsupported(String),
1533    NotFound(NotFoundKind),
1534    Transaction(String),
1535    Command(String),
1536    /// Typed CLI exit with a user-facing message printed by the binary entrypoint.
1537    Cli(CliExit, String),
1538    /// Legacy explicit exit code; the message (if any) was already printed by the command.
1539    Exit(i32),
1540}
1541
1542pub type Result<T> = std::result::Result<T, GitError>;
1543
1544impl fmt::Display for GitError {
1545    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1546        match self {
1547            Self::Io(msg) => write!(f, "io error: {msg}"),
1548            Self::InvalidObjectId(msg) => write!(f, "invalid object id: {msg}"),
1549            Self::InvalidObject(msg) => write!(f, "invalid object: {msg}"),
1550            Self::InvalidFormat(msg) => write!(f, "invalid format: {msg}"),
1551            Self::InvalidPath(msg) => write!(f, "invalid path: {msg}"),
1552            Self::Unsupported(msg) => write!(f, "unsupported: {msg}"),
1553            Self::NotFound(kind) => write!(f, "not found: {kind}"),
1554            Self::Transaction(msg) => write!(f, "transaction failed: {msg}"),
1555            Self::Command(msg) => write!(f, "command failed: {msg}"),
1556            Self::Cli(_, msg) => f.write_str(msg),
1557            Self::Exit(code) => write!(f, "exit {code}"),
1558        }
1559    }
1560}
1561
1562impl Error for GitError {}
1563
1564impl GitError {
1565    pub fn usage(msg: impl Into<String>) -> Self {
1566        Self::Cli(CliExit::Usage, msg.into())
1567    }
1568
1569    pub fn user_error(msg: impl Into<String>) -> Self {
1570        Self::Cli(CliExit::UserError, msg.into())
1571    }
1572
1573    pub fn cli_exit(kind: CliExit, msg: impl Into<String>) -> Self {
1574        Self::Cli(kind, msg.into())
1575    }
1576
1577    pub fn cli_exit_code(&self) -> i32 {
1578        cli_exit_code(self)
1579    }
1580
1581    pub fn not_found(msg: impl Into<String>) -> Self {
1582        Self::NotFound(NotFoundKind::Message(msg.into()))
1583    }
1584
1585    pub fn remote_not_found(name: impl Into<String>) -> Self {
1586        Self::NotFound(NotFoundKind::Remote { name: name.into() })
1587    }
1588
1589    pub fn object_not_found(oid: ObjectId) -> Self {
1590        Self::object_kind_not_found(oid, MissingObjectKind::Object)
1591    }
1592
1593    pub fn object_kind_not_found(oid: ObjectId, kind: MissingObjectKind) -> Self {
1594        Self::NotFound(NotFoundKind::Object {
1595            oid,
1596            kind,
1597            context: None,
1598        })
1599    }
1600
1601    pub fn object_not_found_in(oid: ObjectId, context: MissingObjectContext) -> Self {
1602        Self::object_kind_not_found_in(oid, MissingObjectKind::Object, context)
1603    }
1604
1605    pub fn object_kind_not_found_in(
1606        oid: ObjectId,
1607        kind: MissingObjectKind,
1608        context: MissingObjectContext,
1609    ) -> Self {
1610        Self::NotFound(NotFoundKind::Object {
1611            oid,
1612            kind,
1613            context: Some(context),
1614        })
1615    }
1616
1617    pub fn reference_not_found(name: impl Into<String>) -> Self {
1618        Self::NotFound(NotFoundKind::Reference { name: name.into() })
1619    }
1620
1621    pub fn repository_not_found(path: impl Into<String>) -> Self {
1622        Self::NotFound(NotFoundKind::Repository { path: path.into() })
1623    }
1624
1625    pub fn not_found_kind(&self) -> Option<&NotFoundKind> {
1626        match self {
1627            Self::NotFound(kind) => Some(kind),
1628            _ => None,
1629        }
1630    }
1631}
1632
1633impl From<std::io::Error> for GitError {
1634    fn from(value: std::io::Error) -> Self {
1635        Self::Io(value.to_string())
1636    }
1637}
1638
1639/// Map a [`GitError`] to the process exit code the CLI should use.
1640pub fn cli_exit_code(err: &GitError) -> i32 {
1641    match err {
1642        GitError::Exit(code) => *code,
1643        GitError::Cli(kind, _) => kind.code(),
1644        // During migration, usage-style validation still returns `Command`; treat as
1645        // general failure until those call sites adopt `GitError::usage`.
1646        GitError::Command(_) => 1,
1647        _ => 1,
1648    }
1649}
1650
1651pub fn object_id_for_bytes(
1652    format: ObjectFormat,
1653    object_type: &str,
1654    body: &[u8],
1655) -> Result<ObjectId> {
1656    match format {
1657        // Hash the `"<type> <len>\0"` header and the body as separate updates so
1658        // the (potentially large) body is never copied into a combined buffer just
1659        // to feed the digest.
1660        ObjectFormat::Sha1 => ObjectId::from_raw(format, &sha1_object_digest(object_type, body)),
1661        ObjectFormat::Sha256 => {
1662            let mut framed = Vec::with_capacity(object_type.len() + body.len() + 32);
1663            framed.extend_from_slice(object_type.as_bytes());
1664            framed.push(b' ');
1665            framed.extend_from_slice(body.len().to_string().as_bytes());
1666            framed.push(0);
1667            framed.extend_from_slice(body);
1668            ObjectId::from_raw(format, &sha256(&framed))
1669        }
1670    }
1671}
1672
1673pub fn digest_bytes(format: ObjectFormat, bytes: &[u8]) -> Result<ObjectId> {
1674    match format {
1675        ObjectFormat::Sha1 => ObjectId::from_raw(format, &sha1(bytes)),
1676        ObjectFormat::Sha256 => ObjectId::from_raw(format, &sha256(bytes)),
1677    }
1678}
1679
1680pub struct StreamingDigest {
1681    format: ObjectFormat,
1682    inner: StreamingDigestInner,
1683}
1684
1685enum StreamingDigestInner {
1686    #[cfg(not(feature = "fast-sha1"))]
1687    Sha1(Sha1Hasher),
1688    #[cfg(feature = "fast-sha1")]
1689    Sha1(sha1::Sha1),
1690    Sha256(Sha256Hasher),
1691}
1692
1693impl StreamingDigest {
1694    pub fn new(format: ObjectFormat) -> Self {
1695        let inner = match format {
1696            #[cfg(not(feature = "fast-sha1"))]
1697            ObjectFormat::Sha1 => StreamingDigestInner::Sha1(Sha1Hasher::new()),
1698            #[cfg(feature = "fast-sha1")]
1699            ObjectFormat::Sha1 => {
1700                use sha1::Digest;
1701                StreamingDigestInner::Sha1(sha1::Sha1::new())
1702            }
1703            ObjectFormat::Sha256 => StreamingDigestInner::Sha256(Sha256Hasher::new()),
1704        };
1705        Self { format, inner }
1706    }
1707
1708    pub fn update(&mut self, data: &[u8]) {
1709        match &mut self.inner {
1710            #[cfg(not(feature = "fast-sha1"))]
1711            StreamingDigestInner::Sha1(hasher) => hasher.update(data),
1712            #[cfg(feature = "fast-sha1")]
1713            StreamingDigestInner::Sha1(hasher) => {
1714                use sha1::Digest;
1715                hasher.update(data);
1716            }
1717            StreamingDigestInner::Sha256(hasher) => hasher.update(data),
1718        }
1719    }
1720
1721    pub fn finalize(self) -> Result<ObjectId> {
1722        match self.inner {
1723            #[cfg(not(feature = "fast-sha1"))]
1724            StreamingDigestInner::Sha1(hasher) => {
1725                ObjectId::from_raw(self.format, &hasher.finalize())
1726            }
1727            #[cfg(feature = "fast-sha1")]
1728            StreamingDigestInner::Sha1(hasher) => {
1729                use sha1::Digest;
1730                let bytes: [u8; 20] = hasher.finalize().into();
1731                ObjectId::from_raw(self.format, &bytes)
1732            }
1733            StreamingDigestInner::Sha256(hasher) => {
1734                ObjectId::from_raw(self.format, &hasher.finalize())
1735            }
1736        }
1737    }
1738}
1739
1740pub fn to_hex(bytes: &[u8]) -> String {
1741    let mut out = String::with_capacity(bytes.len() * 2);
1742    write_hex_bytes(bytes, &mut out).expect("writing hex to a String cannot fail");
1743    out
1744}
1745
1746fn write_hex_bytes(bytes: &[u8], out: &mut impl fmt::Write) -> fmt::Result {
1747    const HEX: &[u8; 16] = b"0123456789abcdef";
1748    for byte in bytes {
1749        out.write_char(HEX[(byte >> 4) as usize] as char)?;
1750        out.write_char(HEX[(byte & 0x0f) as usize] as char)?;
1751    }
1752    Ok(())
1753}
1754
1755fn hex_nibble_value(byte: u8) -> Option<u8> {
1756    match byte {
1757        b'0'..=b'9' => Some(byte - b'0'),
1758        b'a'..=b'f' => Some(byte - b'a' + 10),
1759        b'A'..=b'F' => Some(byte - b'A' + 10),
1760        _ => None,
1761    }
1762}
1763
1764fn hex_nibble(byte: u8) -> Result<u8> {
1765    hex_nibble_value(byte)
1766        .ok_or_else(|| GitError::InvalidObjectId(format!("non-hex byte {:?}", byte as char)))
1767}
1768
1769// ---------------------------------------------------------------------------
1770// SHA-1
1771//
1772// The default is a pure-Rust streaming implementation that hashes 64-byte blocks
1773// straight from the caller's slices, so neither the body nor the framed object is
1774// copied just to be digested. Enabling the `fast-sha1` feature swaps in the
1775// RustCrypto `sha1` crate, which dispatches to ARMv8-SHA1 / x86 SHA-NI at runtime;
1776// the digests are byte-identical, so OIDs are unchanged either way.
1777// ---------------------------------------------------------------------------
1778
1779/// SHA-1 of a raw byte slice (already-framed object, bundle prerequisite, etc.).
1780#[cfg(not(feature = "fast-sha1"))]
1781fn sha1(input: &[u8]) -> [u8; 20] {
1782    let mut hasher = Sha1Hasher::new();
1783    hasher.update(input);
1784    hasher.finalize()
1785}
1786
1787/// SHA-1 of a raw byte slice using the hardware-accelerated backend.
1788#[cfg(feature = "fast-sha1")]
1789fn sha1(input: &[u8]) -> [u8; 20] {
1790    use sha1::{Digest, Sha1};
1791    let mut hasher = Sha1::new();
1792    hasher.update(input);
1793    hasher.finalize().into()
1794}
1795
1796/// SHA-1 of a git object framed as `"<type> <len>\0<body>"`, fed as separate
1797/// updates so the body is never copied into a combined buffer.
1798#[cfg(not(feature = "fast-sha1"))]
1799fn sha1_object_digest(object_type: &str, body: &[u8]) -> [u8; 20] {
1800    let mut hasher = Sha1Hasher::new();
1801    hasher.update(object_type.as_bytes());
1802    hasher.update(b" ");
1803    hasher.update(body.len().to_string().as_bytes());
1804    hasher.update(&[0u8]);
1805    hasher.update(body);
1806    hasher.finalize()
1807}
1808
1809#[cfg(feature = "fast-sha1")]
1810fn sha1_object_digest(object_type: &str, body: &[u8]) -> [u8; 20] {
1811    use sha1::{Digest, Sha1};
1812    let mut hasher = Sha1::new();
1813    hasher.update(object_type.as_bytes());
1814    hasher.update(b" ");
1815    hasher.update(body.len().to_string().as_bytes());
1816    hasher.update([0u8]);
1817    hasher.update(body);
1818    hasher.finalize().into()
1819}
1820
1821/// Streaming pure-Rust SHA-1: feeds full 64-byte blocks directly from each
1822/// `update` slice and buffers only the sub-block remainder, so large inputs are
1823/// hashed without an intermediate copy.
1824#[cfg(not(feature = "fast-sha1"))]
1825struct Sha1Hasher {
1826    state: [u32; 5],
1827    block: [u8; 64],
1828    block_len: usize,
1829    total_len: u64,
1830}
1831
1832#[cfg(not(feature = "fast-sha1"))]
1833impl Sha1Hasher {
1834    fn new() -> Self {
1835        Self {
1836            state: [0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0],
1837            block: [0u8; 64],
1838            block_len: 0,
1839            total_len: 0,
1840        }
1841    }
1842
1843    fn update(&mut self, mut data: &[u8]) {
1844        self.total_len = self.total_len.wrapping_add(data.len() as u64);
1845        if self.block_len > 0 {
1846            let take = (64 - self.block_len).min(data.len());
1847            self.block[self.block_len..self.block_len + take].copy_from_slice(&data[..take]);
1848            self.block_len += take;
1849            data = &data[take..];
1850            if self.block_len == 64 {
1851                let block = self.block;
1852                sha1_compress(&mut self.state, &block);
1853                self.block_len = 0;
1854            }
1855        }
1856        while data.len() >= 64 {
1857            sha1_compress(&mut self.state, &data[..64]);
1858            data = &data[64..];
1859        }
1860        if !data.is_empty() {
1861            self.block[..data.len()].copy_from_slice(data);
1862            self.block_len = data.len();
1863        }
1864    }
1865
1866    fn finalize(mut self) -> [u8; 20] {
1867        let bit_len = self.total_len.wrapping_mul(8);
1868        // 0x80, zero pad to a 56 mod 64 boundary, then the 64-bit big-endian length.
1869        // From a sub-block remainder this is at most two more blocks (128 bytes).
1870        let mut tail = [0u8; 128];
1871        tail[..self.block_len].copy_from_slice(&self.block[..self.block_len]);
1872        tail[self.block_len] = 0x80;
1873        let total = if self.block_len < 56 { 64 } else { 128 };
1874        tail[total - 8..total].copy_from_slice(&bit_len.to_be_bytes());
1875        sha1_compress(&mut self.state, &tail[..64]);
1876        if total == 128 {
1877            sha1_compress(&mut self.state, &tail[64..128]);
1878        }
1879        let mut out = [0u8; 20];
1880        out[0..4].copy_from_slice(&self.state[0].to_be_bytes());
1881        out[4..8].copy_from_slice(&self.state[1].to_be_bytes());
1882        out[8..12].copy_from_slice(&self.state[2].to_be_bytes());
1883        out[12..16].copy_from_slice(&self.state[3].to_be_bytes());
1884        out[16..20].copy_from_slice(&self.state[4].to_be_bytes());
1885        out
1886    }
1887}
1888
1889/// Mix one 64-byte block into the SHA-1 state. `block` must be at least 64 bytes.
1890#[cfg(not(feature = "fast-sha1"))]
1891fn sha1_compress(state: &mut [u32; 5], block: &[u8]) {
1892    let mut w = [0u32; 80];
1893    for (i, word) in w.iter_mut().take(16).enumerate() {
1894        let offset = i * 4;
1895        *word = u32::from_be_bytes([
1896            block[offset],
1897            block[offset + 1],
1898            block[offset + 2],
1899            block[offset + 3],
1900        ]);
1901    }
1902    for i in 16..80 {
1903        w[i] = (w[i - 3] ^ w[i - 8] ^ w[i - 14] ^ w[i - 16]).rotate_left(1);
1904    }
1905
1906    let mut a = state[0];
1907    let mut b = state[1];
1908    let mut c = state[2];
1909    let mut d = state[3];
1910    let mut e = state[4];
1911
1912    for (i, word) in w.iter().enumerate() {
1913        let (f, k) = match i {
1914            0..=19 => ((b & c) | ((!b) & d), 0x5a827999u32),
1915            20..=39 => (b ^ c ^ d, 0x6ed9eba1),
1916            40..=59 => ((b & c) | (b & d) | (c & d), 0x8f1bbcdc),
1917            _ => (b ^ c ^ d, 0xca62c1d6),
1918        };
1919        let temp = a
1920            .rotate_left(5)
1921            .wrapping_add(f)
1922            .wrapping_add(e)
1923            .wrapping_add(k)
1924            .wrapping_add(*word);
1925        e = d;
1926        d = c;
1927        c = b.rotate_left(30);
1928        b = a;
1929        a = temp;
1930    }
1931
1932    state[0] = state[0].wrapping_add(a);
1933    state[1] = state[1].wrapping_add(b);
1934    state[2] = state[2].wrapping_add(c);
1935    state[3] = state[3].wrapping_add(d);
1936    state[4] = state[4].wrapping_add(e);
1937}
1938
1939fn sha256(input: &[u8]) -> [u8; 32] {
1940    let mut hasher = Sha256Hasher::new();
1941    hasher.update(input);
1942    hasher.finalize()
1943}
1944
1945struct Sha256Hasher {
1946    state: [u32; 8],
1947    block: [u8; 64],
1948    block_len: usize,
1949    total_len: u64,
1950}
1951
1952impl Sha256Hasher {
1953    const K: [u32; 64] = [
1954        0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4,
1955        0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe,
1956        0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f,
1957        0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
1958        0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc,
1959        0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b,
1960        0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116,
1961        0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
1962        0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7,
1963        0xc67178f2,
1964    ];
1965
1966    fn new() -> Self {
1967        Self {
1968            state: [
1969                0x6a09e667u32,
1970                0xbb67ae85,
1971                0x3c6ef372,
1972                0xa54ff53a,
1973                0x510e527f,
1974                0x9b05688c,
1975                0x1f83d9ab,
1976                0x5be0cd19,
1977            ],
1978            block: [0u8; 64],
1979            block_len: 0,
1980            total_len: 0,
1981        }
1982    }
1983
1984    fn update(&mut self, mut data: &[u8]) {
1985        self.total_len = self.total_len.wrapping_add(data.len() as u64);
1986        if self.block_len > 0 {
1987            let take = (64 - self.block_len).min(data.len());
1988            self.block[self.block_len..self.block_len + take].copy_from_slice(&data[..take]);
1989            self.block_len += take;
1990            data = &data[take..];
1991            if self.block_len == 64 {
1992                let block = self.block;
1993                self.compress(&block);
1994                self.block_len = 0;
1995            }
1996        }
1997        while data.len() >= 64 {
1998            self.compress(&data[..64]);
1999            data = &data[64..];
2000        }
2001        if !data.is_empty() {
2002            self.block[..data.len()].copy_from_slice(data);
2003            self.block_len = data.len();
2004        }
2005    }
2006
2007    fn finalize(mut self) -> [u8; 32] {
2008        let bit_len = self.total_len.wrapping_mul(8);
2009        let mut tail = [0u8; 128];
2010        tail[..self.block_len].copy_from_slice(&self.block[..self.block_len]);
2011        tail[self.block_len] = 0x80;
2012        let total = if self.block_len < 56 { 64 } else { 128 };
2013        tail[total - 8..total].copy_from_slice(&bit_len.to_be_bytes());
2014        self.compress(&tail[..64]);
2015        if total == 128 {
2016            self.compress(&tail[64..128]);
2017        }
2018
2019        let mut out = [0; 32];
2020        for (idx, word) in self.state.iter().enumerate() {
2021            out[idx * 4..idx * 4 + 4].copy_from_slice(&word.to_be_bytes());
2022        }
2023        out
2024    }
2025
2026    fn compress(&mut self, chunk: &[u8]) {
2027        let mut w = [0u32; 64];
2028        for (i, word) in w.iter_mut().take(16).enumerate() {
2029            let offset = i * 4;
2030            *word = u32::from_be_bytes([
2031                chunk[offset],
2032                chunk[offset + 1],
2033                chunk[offset + 2],
2034                chunk[offset + 3],
2035            ]);
2036        }
2037        for i in 16..64 {
2038            let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3);
2039            let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10);
2040            w[i] = w[i - 16]
2041                .wrapping_add(s0)
2042                .wrapping_add(w[i - 7])
2043                .wrapping_add(s1);
2044        }
2045
2046        let mut a = self.state[0];
2047        let mut b = self.state[1];
2048        let mut c = self.state[2];
2049        let mut d = self.state[3];
2050        let mut e = self.state[4];
2051        let mut f = self.state[5];
2052        let mut g = self.state[6];
2053        let mut hh = self.state[7];
2054
2055        for (&word, &constant) in w.iter().zip(Self::K.iter()) {
2056            let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25);
2057            let ch = (e & f) ^ ((!e) & g);
2058            let temp1 = hh
2059                .wrapping_add(s1)
2060                .wrapping_add(ch)
2061                .wrapping_add(constant)
2062                .wrapping_add(word);
2063            let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22);
2064            let maj = (a & b) ^ (a & c) ^ (b & c);
2065            let temp2 = s0.wrapping_add(maj);
2066
2067            hh = g;
2068            g = f;
2069            f = e;
2070            e = d.wrapping_add(temp1);
2071            d = c;
2072            c = b;
2073            b = a;
2074            a = temp1.wrapping_add(temp2);
2075        }
2076
2077        self.state[0] = self.state[0].wrapping_add(a);
2078        self.state[1] = self.state[1].wrapping_add(b);
2079        self.state[2] = self.state[2].wrapping_add(c);
2080        self.state[3] = self.state[3].wrapping_add(d);
2081        self.state[4] = self.state[4].wrapping_add(e);
2082        self.state[5] = self.state[5].wrapping_add(f);
2083        self.state[6] = self.state[6].wrapping_add(g);
2084        self.state[7] = self.state[7].wrapping_add(hh);
2085    }
2086}
2087
2088#[cfg(test)]
2089mod tests {
2090    use super::*;
2091
2092    #[test]
2093    fn sha1_blob_matches_git_known_value() {
2094        let oid = object_id_for_bytes(ObjectFormat::Sha1, "blob", b"hello\n")
2095            .expect("known blob should hash as sha1");
2096        assert_eq!(oid.to_hex(), "ce013625030ba8dba906f756967f9e9ca394464a");
2097    }
2098
2099    #[test]
2100    fn sha256_blob_matches_git_known_value() {
2101        let oid = object_id_for_bytes(ObjectFormat::Sha256, "blob", b"hello\n")
2102            .expect("known blob should hash as sha256");
2103        assert_eq!(
2104            oid.to_hex(),
2105            "2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4"
2106        );
2107    }
2108
2109    #[test]
2110    fn object_id_round_trips_hex() {
2111        let oid = ObjectId::from_hex(
2112            ObjectFormat::Sha1,
2113            "ce013625030ba8dba906f756967f9e9ca394464a",
2114        )
2115        .expect("valid sha1 hex");
2116        assert_eq!(oid.to_hex(), "ce013625030ba8dba906f756967f9e9ca394464a");
2117    }
2118
2119    #[test]
2120    fn object_id_writes_hex_without_allocating_in_the_writer() {
2121        let oid = ObjectId::from_hex(
2122            ObjectFormat::Sha1,
2123            "CE013625030BA8DBA906F756967F9E9CA394464A",
2124        )
2125        .expect("valid uppercase sha1 hex");
2126
2127        let mut out = String::new();
2128        oid.write_hex(&mut out)
2129            .expect("writing object id hex to a String should not fail");
2130
2131        assert_eq!(out, "ce013625030ba8dba906f756967f9e9ca394464a");
2132        assert_eq!(oid.to_hex(), out);
2133        assert_eq!(format!("{oid}"), out);
2134    }
2135
2136    #[test]
2137    fn object_id_matches_hex_prefixes_by_nibble() {
2138        let oid = ObjectId::from_hex(
2139            ObjectFormat::Sha1,
2140            "ce013625030ba8dba906f756967f9e9ca394464a",
2141        )
2142        .expect("valid sha1 hex");
2143
2144        assert!(oid.hex_prefix_matches(b""));
2145        assert!(oid.hex_prefix_matches(b"c"));
2146        assert!(oid.hex_prefix_matches(b"ce013"));
2147        assert!(oid.hex_prefix_matches(b"CE013625"));
2148        assert!(oid.hex_prefix_matches(b"ce013625030ba8dba906f756967f9e9ca394464a"));
2149
2150        assert!(!oid.hex_prefix_matches(b"d"));
2151        assert!(!oid.hex_prefix_matches(b"ce014"));
2152        assert!(!oid.hex_prefix_matches(b"ce01x"));
2153
2154        let mut too_long = oid.to_hex();
2155        too_long.push('0');
2156        assert!(!oid.hex_prefix_matches(too_long.as_bytes()));
2157    }
2158
2159    #[test]
2160    fn object_id_abbrev_hex_len_clamps_to_format_width() {
2161        let sha1 = ObjectId::null(ObjectFormat::Sha1);
2162        let sha256 = ObjectId::null(ObjectFormat::Sha256);
2163
2164        assert_eq!(sha1.abbrev_hex_len(0), 0);
2165        assert_eq!(sha1.abbrev_hex_len(12), 12);
2166        assert_eq!(sha1.abbrev_hex_len(80), ObjectFormat::Sha1.hex_len());
2167        assert_eq!(sha256.abbrev_hex_len(80), ObjectFormat::Sha256.hex_len());
2168    }
2169
2170    #[test]
2171    fn signature_parses_a_normal_ident_and_round_trips() {
2172        let line = b"A U Thor <author@example.com> 1700000000 +0000";
2173        let sig = Signature::from_ident_line(line).expect("well-formed ident parses");
2174        assert_eq!(sig.name.as_bytes(), b"A U Thor");
2175        assert_eq!(sig.email.as_bytes(), b"author@example.com");
2176        assert_eq!(sig.time.seconds, 1_700_000_000);
2177        assert_eq!(sig.time.timezone_offset_minutes, 0);
2178        assert!(!sig.time.negative_utc);
2179        // Byte-exact round-trip, and the canonical form matches here too.
2180        assert_eq!(sig.to_ident_bytes(), line);
2181        assert_eq!(sig.to_canonical_ident_bytes(), line);
2182    }
2183
2184    #[test]
2185    fn signature_parses_positive_half_hour_offset() {
2186        let line = b"Half Hour <hh@example.com> 1500000000 +0530";
2187        let sig = Signature::from_ident_line(line).expect("offset ident parses");
2188        assert_eq!(sig.time.timezone_offset_minutes, 330);
2189        assert!(!sig.time.negative_utc);
2190        assert_eq!(sig.time.offset_token(), "+0530");
2191        assert_eq!(sig.to_ident_bytes(), line);
2192        assert_eq!(sig.to_canonical_ident_bytes(), line);
2193    }
2194
2195    #[test]
2196    fn signature_parses_negative_offset() {
2197        let line = b"Western <w@example.com> 1500000000 -0500";
2198        let sig = Signature::from_ident_line(line).expect("negative offset parses");
2199        assert_eq!(sig.time.timezone_offset_minutes, -300);
2200        assert!(!sig.time.negative_utc);
2201        assert_eq!(sig.time.offset_token(), "-0500");
2202        assert_eq!(sig.to_ident_bytes(), line);
2203    }
2204
2205    #[test]
2206    fn signature_preserves_negative_zero_timezone_distinct_from_positive_zero() {
2207        let negative = b"Unknown Zone <uz@example.com> 1500000000 -0000";
2208        let positive = b"Known Zone <kz@example.com> 1500000000 +0000";
2209
2210        let neg = Signature::from_ident_line(negative).expect("-0000 parses");
2211        let pos = Signature::from_ident_line(positive).expect("+0000 parses");
2212
2213        // Both are zero minutes from UTC...
2214        assert_eq!(neg.time.timezone_offset_minutes, 0);
2215        assert_eq!(pos.time.timezone_offset_minutes, 0);
2216        // ...but the sentinel flag distinguishes them, so the times differ.
2217        assert!(neg.time.negative_utc);
2218        assert!(!pos.time.negative_utc);
2219        assert_ne!(neg.time, pos.time);
2220
2221        // And the distinction survives re-serialization, byte-for-byte.
2222        assert_eq!(neg.time.offset_token(), "-0000");
2223        assert_eq!(pos.time.offset_token(), "+0000");
2224        assert_eq!(neg.to_ident_bytes(), negative);
2225        assert_eq!(pos.to_ident_bytes(), positive);
2226        assert_eq!(neg.to_canonical_ident_bytes(), negative);
2227        assert_eq!(pos.to_canonical_ident_bytes(), positive);
2228        assert_ne!(neg.to_ident_bytes(), pos.to_ident_bytes());
2229    }
2230
2231    #[test]
2232    fn signature_handles_empty_name_and_email() {
2233        // git permits an empty name and/or empty email; the delimiters still
2234        // anchor the parse.
2235        let line = b" <> 0 +0000";
2236        let sig = Signature::from_ident_line(line).expect("empty name/email parses");
2237        assert_eq!(sig.name.as_bytes(), b"");
2238        assert_eq!(sig.email.as_bytes(), b"");
2239        assert_eq!(sig.time.seconds, 0);
2240        assert_eq!(sig.to_ident_bytes(), line);
2241    }
2242
2243    #[test]
2244    fn signature_keeps_angle_brackets_inside_the_name() {
2245        // The email is delimited by the *last* '<'/'>' pair, so a name that
2246        // itself contains angle brackets parses with the trailing pair as the
2247        // email and round-trips exactly.
2248        let line = b"Weird <Name> <weird@example.com> 1 +0000";
2249        let sig = Signature::from_ident_line(line).expect("bracketed name parses");
2250        assert_eq!(sig.name.as_bytes(), b"Weird <Name>");
2251        assert_eq!(sig.email.as_bytes(), b"weird@example.com");
2252        assert_eq!(sig.to_ident_bytes(), line);
2253    }
2254
2255    #[test]
2256    fn signature_round_trips_non_canonical_whitespace_via_raw() {
2257        // An ident with two spaces before the email is not git's canonical form,
2258        // but the parse-view must still reproduce it byte-for-byte from `raw`.
2259        // (Only the canonical renderer normalizes the spacing.)
2260        let line = b"Spaced  <spaced@example.com> 5 +0000";
2261        let sig = Signature::from_ident_line(line).expect("non-canonical ident parses");
2262        // The name keeps the extra space (only one separator space is trimmed).
2263        assert_eq!(sig.name.as_bytes(), b"Spaced ");
2264        assert_eq!(sig.to_ident_bytes(), line);
2265    }
2266
2267    #[test]
2268    fn signature_rejects_malformed_idents() {
2269        // No email delimiters.
2270        assert!(Signature::from_ident_line(b"No Email Here 0 +0000").is_none());
2271        // Missing the time tail entirely.
2272        assert!(Signature::from_ident_line(b"A U Thor <a@example.com>").is_none());
2273        // Non-numeric timestamp.
2274        assert!(Signature::from_ident_line(b"A U Thor <a@example.com> later +0000").is_none());
2275        // Malformed timezone token (wrong width).
2276        assert!(Signature::from_ident_line(b"A U Thor <a@example.com> 0 +00").is_none());
2277        // Timezone token missing a sign.
2278        assert!(Signature::from_ident_line(b"A U Thor <a@example.com> 0 0000").is_none());
2279    }
2280
2281    #[test]
2282    fn git_time_constructors_set_the_sentinel() {
2283        assert!(!GitTime::new(0, 0).negative_utc);
2284        assert_eq!(GitTime::new(0, 330).offset_token(), "+0530");
2285        let unknown = GitTime::with_negative_utc(42);
2286        assert!(unknown.negative_utc);
2287        assert_eq!(unknown.seconds, 42);
2288        assert_eq!(unknown.offset_token(), "-0000");
2289    }
2290
2291    #[test]
2292    fn full_name_accepts_valid_ref_names() {
2293        let name = FullName::new("refs/heads/main").expect("valid ref name");
2294        assert_eq!(name.as_str(), "refs/heads/main");
2295        assert_eq!(name, "refs/heads/main");
2296        assert_eq!(format!("{name}"), "refs/heads/main");
2297        assert_eq!(String::from(name.clone()), "refs/heads/main");
2298        let borrowed: &str = name.borrow();
2299        assert_eq!(borrowed, "refs/heads/main");
2300    }
2301
2302    #[test]
2303    fn full_name_rejects_invalid_ref_names() {
2304        assert!(FullName::new("").is_err());
2305        assert!(FullName::new(" refs/heads/main").is_err());
2306        assert!(FullName::new("refs/heads/main ").is_err());
2307        assert!(FullName::new("refs//heads/main").is_err());
2308        assert!(FullName::new("refs/heads/\nmain").is_err());
2309    }
2310
2311    #[test]
2312    fn cli_exit_codes_match_git_taxonomy() {
2313        assert_eq!(CliExit::Ok.code(), 0);
2314        assert_eq!(CliExit::UserError.code(), 128);
2315        assert_eq!(CliExit::Usage.code(), 129);
2316        assert_eq!(CliExit::Custom(1).code(), 1);
2317        assert_eq!(CliExit::Custom(5).code(), 5);
2318    }
2319
2320    #[test]
2321    fn git_error_cli_exit_code_mapping() {
2322        assert_eq!(GitError::Exit(129).cli_exit_code(), 129);
2323        assert_eq!(GitError::Exit(128).cli_exit_code(), 128);
2324        assert_eq!(GitError::usage("unknown option").cli_exit_code(), 129);
2325        assert_eq!(
2326            GitError::user_error("not a git repository").cli_exit_code(),
2327            128
2328        );
2329        assert_eq!(
2330            GitError::cli_exit(CliExit::Custom(2), "diff found changes").cli_exit_code(),
2331            2
2332        );
2333        assert_eq!(GitError::Command("bad value".into()).cli_exit_code(), 1);
2334        assert_eq!(GitError::not_found("missing ref").cli_exit_code(), 1);
2335    }
2336
2337    #[test]
2338    fn git_error_cli_displays_message_only() {
2339        let err = GitError::usage("unknown option `--foo'");
2340        assert_eq!(err.to_string(), "unknown option `--foo'");
2341    }
2342
2343    #[test]
2344    fn bstring_round_trips_bytes_and_displays_lossily() {
2345        let path = BString::from_bytes(b"src/\xFF.txt");
2346        assert_eq!(path.as_bytes(), b"src/\xFF.txt");
2347        let borrowed: &[u8] = path.borrow();
2348        assert_eq!(borrowed, b"src/\xFF.txt".as_slice());
2349        assert_eq!(format!("{path}"), "src/\u{FFFD}.txt");
2350        assert_eq!(path, b"src/\xFF.txt");
2351        assert_eq!(path.clone().into_bytes(), b"src/\xFF.txt".to_vec());
2352    }
2353
2354    #[test]
2355    fn split_ident_line_parses_well_formed_ident() {
2356        let f = split_ident_line(b"A U Thor <author@example.com> 1112911993 -0700")
2357            .expect("well formed ident should parse");
2358        assert_eq!(f.name, b"A U Thor");
2359        assert_eq!(f.email, b"author@example.com");
2360        assert_eq!(f.date, Some(&b"1112911993"[..]));
2361        assert_eq!(f.tz, Some(&b"-0700"[..]));
2362    }
2363
2364    #[test]
2365    fn split_ident_line_recovers_broken_email() {
2366        // git inserts junk after the '>': email stops at the first '>', but the
2367        // timestamp is found by scanning back from the end for the last '>'.
2368        let f = split_ident_line(b"A U Thor <author@example.com>-<> 1112911993 -0700")
2369            .expect("broken-email ident should parse");
2370        assert_eq!(f.name, b"A U Thor");
2371        assert_eq!(f.email, b"author@example.com");
2372        assert_eq!(f.date, Some(&b"1112911993"[..]));
2373        assert_eq!(f.tz, Some(&b"-0700"[..]));
2374    }
2375
2376    #[test]
2377    fn split_ident_line_non_numeric_date_is_person_only() {
2378        let f = split_ident_line(b"A U Thor <author@example.com> totally_bogus -0700")
2379            .expect("ident without numeric date should still parse person");
2380        assert_eq!(f.email, b"author@example.com");
2381        assert_eq!(f.date, None);
2382        assert_eq!(f.tz, None);
2383    }
2384
2385    #[test]
2386    fn split_ident_line_whitespace_date_is_person_only() {
2387        // Trailing spaces after '>' with no timestamp -> no date.
2388        let f = split_ident_line(b"A U Thor <author@example.com>    ")
2389            .expect("ident with trailing whitespace should parse person");
2390        assert_eq!(f.date, None);
2391        // A vertical tab is NOT git-isspace, so it stops the space-skip and the
2392        // (non-digit) VT yields no date either.
2393        let f = split_ident_line(b"A U Thor <author@example.com>   \x0b")
2394            .expect("ident with non-git-whitespace suffix should parse person");
2395        assert_eq!(f.date, None);
2396    }
2397
2398    #[test]
2399    fn split_ident_line_requires_angle_brackets() {
2400        assert!(split_ident_line(b"no brackets here 123 +0000").is_none());
2401    }
2402
2403    #[test]
2404    fn ident_render_date_overflow_is_epoch_sentinel() {
2405        // 2^64 + 1 (clamps in u64 parse) and 2^64 - 2 (fits u64 but past time_t)
2406        // both render the epoch sentinel with a forced +0000 timezone.
2407        assert_eq!(
2408            ident_render_date(b"18446744073709551617", b"-0700", &DateMode::Default),
2409            "Thu Jan 1 00:00:00 1970 +0000"
2410        );
2411        assert_eq!(
2412            ident_render_date(b"18446744073709551614", b"-0700", &DateMode::Default),
2413            "Thu Jan 1 00:00:00 1970 +0000"
2414        );
2415    }
2416
2417    #[test]
2418    fn ident_render_date_valid_value_uses_original_timezone() {
2419        assert_eq!(
2420            ident_render_date(b"0", b"+0000", &DateMode::Default),
2421            "Thu Jan 1 00:00:00 1970 +0000"
2422        );
2423    }
2424}