Skip to main content

grit_lib/
fsck_standalone.rs

1//! Standalone object fsck for `hash-object` and similar entry points.
2//!
3//! Mirrors the buffer-safe checks in Git's `fsck.c` (`verify_headers`,
4//! `fsck_commit`, `fsck_tag_standalone`, `fsck_tree`) so error messages match
5//! `error: object fails fsck: <camelCaseId>: <detail>`.
6
7use crate::check_ref_format::{check_refname_format, RefNameOptions};
8use crate::git_date::tm::date_overflows;
9use crate::objects::{ObjectId, ObjectKind};
10
11/// Git-compatible fsck failure for loose object validation.
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct FsckError {
14    /// CamelCase message id (e.g. `missingTree`).
15    pub id: &'static str,
16    /// Human-readable detail after `id: `.
17    pub detail: String,
18}
19
20impl FsckError {
21    /// Construct an fsck diagnostic (library tests and `mktag` use this for uniform messages).
22    #[must_use]
23    pub fn new(id: &'static str, detail: impl Into<String>) -> Self {
24        Self {
25            id,
26            detail: detail.into(),
27        }
28    }
29
30    /// Full line after `error: object fails fsck: ` (matches Git).
31    #[must_use]
32    pub fn report_line(&self) -> String {
33        format!("{}: {}", self.id, self.detail)
34    }
35}
36
37/// Validate raw object bytes the same way `git hash-object` does before hashing.
38///
39/// Returns `Ok(())` when the object is well-formed, or the first fsck error Git
40/// would report for truncated or malformed buffers.
41pub fn fsck_object(kind: ObjectKind, data: &[u8]) -> Result<(), FsckError> {
42    match kind {
43        ObjectKind::Blob => Ok(()),
44        ObjectKind::Commit => fsck_commit(data),
45        ObjectKind::Tag => fsck_tag(data),
46        ObjectKind::Tree => fsck_tree(data),
47    }
48}
49
50fn verify_headers(data: &[u8], nul_msg_id: &'static str) -> Result<(), FsckError> {
51    for (i, &b) in data.iter().enumerate() {
52        if b == 0 {
53            return Err(FsckError::new(
54                nul_msg_id,
55                format!("unterminated header: NUL at offset {i}"),
56            ));
57        }
58        if b == b'\n' && i + 1 < data.len() && data[i + 1] == b'\n' {
59            return Ok(());
60        }
61    }
62    if !data.is_empty() && data[data.len() - 1] == b'\n' {
63        Ok(())
64    } else {
65        Err(FsckError::new("unterminatedHeader", "unterminated header"))
66    }
67}
68
69fn is_hex_lower(b: u8) -> bool {
70    matches!(b, b'0'..=b'9' | b'a'..=b'f')
71}
72
73/// Parse a 40-character lowercase hex object id at the start of `buf`, requiring
74/// the next byte to be `\n`. Returns bytes consumed (41).
75fn parse_oid_line(buf: &[u8], bad_sha1_id: &'static str) -> Result<usize, FsckError> {
76    if buf.len() < 41 {
77        return Err(FsckError::new(
78            bad_sha1_id,
79            format!(
80                "invalid '{}' line format - bad sha1",
81                line_kind(bad_sha1_id)
82            ),
83        ));
84    }
85    let hex = &buf[..40];
86    if !hex.iter().copied().all(is_hex_lower) {
87        return Err(FsckError::new(
88            bad_sha1_id,
89            format!(
90                "invalid '{}' line format - bad sha1",
91                line_kind(bad_sha1_id)
92            ),
93        ));
94    }
95    if buf[40] != b'\n' {
96        return Err(FsckError::new(
97            bad_sha1_id,
98            format!(
99                "invalid '{}' line format - bad sha1",
100                line_kind(bad_sha1_id)
101            ),
102        ));
103    }
104    let hex_str = std::str::from_utf8(hex).map_err(|_| {
105        FsckError::new(
106            bad_sha1_id,
107            format!(
108                "invalid '{}' line format - bad sha1",
109                line_kind(bad_sha1_id)
110            ),
111        )
112    })?;
113    hex_str.parse::<ObjectId>().map_err(|_| {
114        FsckError::new(
115            bad_sha1_id,
116            format!(
117                "invalid '{}' line format - bad sha1",
118                line_kind(bad_sha1_id)
119            ),
120        )
121    })?;
122    Ok(41)
123}
124
125fn line_kind(bad_sha1_id: &'static str) -> &'static str {
126    match bad_sha1_id {
127        "badObjectSha1" => "object",
128        "badParentSha1" => "parent",
129        _ => "tree",
130    }
131}
132
133fn fsck_ident(
134    data: &[u8],
135    start: usize,
136    buffer_end: usize,
137    oid_line: &'static str,
138) -> Result<usize, FsckError> {
139    let mut p = start;
140    if p >= buffer_end {
141        return Err(FsckError::new(
142            "missingEmail",
143            format!("invalid {oid_line} line - missing email"),
144        ));
145    }
146
147    let line_end = data[p..buffer_end]
148        .iter()
149        .position(|&b| b == b'\n')
150        .map(|rel| p + rel)
151        .ok_or_else(|| {
152            FsckError::new(
153                "missingEmail",
154                format!("invalid {oid_line} line - missing email"),
155            )
156        })?;
157
158    let ident_end = line_end;
159
160    if data[p] == b'<' {
161        return Err(FsckError::new(
162            "missingNameBeforeEmail",
163            format!("invalid {oid_line} line - missing space before email"),
164        ));
165    }
166
167    // Name: scan until '<' (Git `fsck_ident`).
168    loop {
169        if p >= ident_end || data[p] == b'\n' {
170            return Err(FsckError::new(
171                "missingEmail",
172                format!("invalid {oid_line} line - missing email"),
173            ));
174        }
175        if data[p] == b'>' {
176            return Err(FsckError::new(
177                "badName",
178                format!("invalid {oid_line} line - bad name"),
179            ));
180        }
181        if data[p] == b'<' {
182            break;
183        }
184        p += 1;
185    }
186
187    if p == start || data[p - 1] != b' ' {
188        return Err(FsckError::new(
189            "missingSpaceBeforeEmail",
190            format!("invalid {oid_line} line - missing space before email"),
191        ));
192    }
193    p += 1; // skip '<'
194
195    // Email (may be empty between `<>`).
196    loop {
197        if p >= ident_end || data[p] == b'<' || data[p] == b'\n' {
198            return Err(FsckError::new(
199                "badEmail",
200                format!("invalid {oid_line} line - bad email"),
201            ));
202        }
203        if data[p] == b'>' {
204            break;
205        }
206        p += 1;
207    }
208    p += 1; // skip '>'
209
210    if p >= ident_end || data[p] != b' ' {
211        return Err(FsckError::new(
212            "missingSpaceBeforeDate",
213            format!("invalid {oid_line} line - missing space before date"),
214        ));
215    }
216    p += 1;
217
218    while p < ident_end && (data[p] == b' ' || data[p] == b'\t') {
219        p += 1;
220    }
221
222    if p >= ident_end || !data[p].is_ascii_digit() {
223        return Err(FsckError::new(
224            "badDate",
225            format!("invalid {oid_line} line - bad date"),
226        ));
227    }
228
229    if data[p] == b'0' && p + 1 < ident_end && data[p + 1] != b' ' {
230        return Err(FsckError::new(
231            "zeroPaddedDate",
232            format!("invalid {oid_line} line - zero-padded date"),
233        ));
234    }
235
236    let ts_start = p;
237    while p < ident_end && data[p].is_ascii_digit() {
238        p += 1;
239    }
240    let ts_len = p - ts_start;
241    if ts_len > 21 {
242        return Err(FsckError::new(
243            "badDateOverflow",
244            format!("invalid {oid_line} line - date causes integer overflow"),
245        ));
246    }
247    let ts_str = std::str::from_utf8(&data[ts_start..p])
248        .map_err(|_| FsckError::new("badDate", format!("invalid {oid_line} line - bad date")))?;
249    let raw: u128 = ts_str
250        .parse()
251        .map_err(|_| FsckError::new("badDate", format!("invalid {oid_line} line - bad date")))?;
252    if raw > u64::MAX as u128 || date_overflows(raw as u64) {
253        return Err(FsckError::new(
254            "badDateOverflow",
255            format!("invalid {oid_line} line - date causes integer overflow"),
256        ));
257    }
258
259    if p >= ident_end || data[p] != b' ' {
260        return Err(FsckError::new(
261            "badDate",
262            format!("invalid {oid_line} line - bad date"),
263        ));
264    }
265    p += 1;
266
267    // Timezone: `[+-]HHMM` then newline (Git allows e.g. `-1430`).
268    if p + 5 > ident_end
269        || (data[p] != b'+' && data[p] != b'-')
270        || !data[p + 1..p + 5].iter().all(|b| b.is_ascii_digit())
271        || data[p + 5] != b'\n'
272    {
273        return Err(FsckError::new(
274            "badTimezone",
275            format!("invalid {oid_line} line - bad time zone"),
276        ));
277    }
278
279    Ok(line_end + 1)
280}
281
282fn fsck_commit(data: &[u8]) -> Result<(), FsckError> {
283    verify_headers(data, "nulInHeader")?;
284
285    let buffer_end = data.len();
286    let mut i = 0usize;
287
288    if i >= buffer_end || !data[i..].starts_with(b"tree ") {
289        return Err(FsckError::new(
290            "missingTree",
291            "invalid format - expected 'tree' line",
292        ));
293    }
294    i += 5;
295    let n = parse_oid_line(&data[i..], "badTreeSha1")?;
296    i += n;
297
298    while i < buffer_end && data[i..].starts_with(b"parent ") {
299        i += 7;
300        let n = parse_oid_line(&data[i..], "badParentSha1")?;
301        i += n;
302    }
303
304    let mut author_count = 0usize;
305    while i < buffer_end && data[i..].starts_with(b"author ") {
306        author_count += 1;
307        i += 7;
308        i = fsck_ident(data, i, buffer_end, "author/committer")?;
309    }
310
311    if author_count < 1 {
312        return Err(FsckError::new(
313            "missingAuthor",
314            "invalid format - expected 'author' line",
315        ));
316    }
317    if author_count > 1 {
318        return Err(FsckError::new(
319            "multipleAuthors",
320            "invalid format - multiple 'author' lines",
321        ));
322    }
323
324    if i >= buffer_end || !data[i..].starts_with(b"committer ") {
325        return Err(FsckError::new(
326            "missingCommitter",
327            "invalid format - expected 'committer' line",
328        ));
329    }
330    i += 10;
331    fsck_ident(data, i, buffer_end, "author/committer")?;
332
333    if data.contains(&0) {
334        return Err(FsckError::new(
335            "nulInCommit",
336            "NUL byte in the commit object body",
337        ));
338    }
339
340    Ok(())
341}
342
343/// Byte offset immediately after the newline that terminates the `tagger` line.
344fn parse_tag_headers_through_tagger(data: &[u8]) -> Result<usize, FsckError> {
345    verify_headers(data, "nulInHeader")?;
346
347    let buffer_end = data.len();
348    let mut i = 0usize;
349
350    if i >= buffer_end || !data[i..].starts_with(b"object ") {
351        return Err(FsckError::new(
352            "missingObject",
353            "invalid format - expected 'object' line",
354        ));
355    }
356    i += 7;
357    let n = parse_oid_line(&data[i..], "badObjectSha1")?;
358    i += n;
359
360    if i >= buffer_end || !data[i..].starts_with(b"type ") {
361        return Err(FsckError::new(
362            "missingTypeEntry",
363            "invalid format - expected 'type' line",
364        ));
365    }
366    i += 5;
367    let type_start = i;
368    let eol = data[type_start..buffer_end]
369        .iter()
370        .position(|&b| b == b'\n')
371        .map(|rel| type_start + rel)
372        .ok_or_else(|| {
373            FsckError::new(
374                "missingType",
375                "invalid format - unexpected end after 'type' line",
376            )
377        })?;
378
379    if ObjectKind::from_tag_type_field(&data[type_start..eol]).is_none() {
380        return Err(FsckError::new("badType", "invalid 'type' value"));
381    }
382    i = eol + 1;
383
384    if i >= buffer_end || !data[i..].starts_with(b"tag ") {
385        return Err(FsckError::new(
386            "missingTagEntry",
387            "invalid format - expected 'tag' line",
388        ));
389    }
390    i += 4;
391    let tag_start = i;
392    let eol = data[tag_start..buffer_end]
393        .iter()
394        .position(|&b| b == b'\n')
395        .map(|rel| tag_start + rel)
396        .ok_or_else(|| {
397            FsckError::new(
398                "missingTag",
399                "invalid format - unexpected end after 'type' line",
400            )
401        })?;
402
403    let tag_name = std::str::from_utf8(&data[tag_start..eol])
404        .map_err(|_| FsckError::new("badTagName", "invalid 'tag' name"))?;
405    let refname = format!("refs/tags/{tag_name}");
406    if check_refname_format(&refname, &RefNameOptions::default()).is_err() {
407        return Err(FsckError::new(
408            "badTagName",
409            format!("invalid 'tag' name: {tag_name}"),
410        ));
411    }
412    i = eol + 1;
413
414    if i >= buffer_end || !data[i..].starts_with(b"tagger ") {
415        return Err(FsckError::new(
416            "missingTaggerEntry",
417            "invalid format - expected 'tagger' line",
418        ));
419    }
420    i += 7;
421    fsck_ident(data, i, buffer_end, "author/committer")
422}
423
424fn fsck_tag(data: &[u8]) -> Result<(), FsckError> {
425    parse_tag_headers_through_tagger(data).map(|_| ())
426}
427
428/// Parse tag headers for `git mktag`, matching Git `fsck_tag_standalone` severities:
429/// `badTagName` and `missingTaggerEntry` are INFO→WARN: fatal only when `strict` is true.
430///
431/// Returns `(tagged_oid, tagged_type, header_end_offset, check_trailer)`.
432///
433/// When `check_trailer` is true, pass `header_end_offset` to [`fsck_tag_mktag_trailer_from`].
434/// After a lenient recovery from a broken `tagger` line (`--no-strict`), it is false because the
435/// cursor is already past the header/body boundary.
436pub fn parse_tag_for_mktag(
437    data: &[u8],
438    strict: bool,
439    on_warn: &mut impl FnMut(&FsckError),
440) -> Result<(ObjectId, ObjectKind, usize, bool), FsckError> {
441    verify_headers(data, "nulInHeader")?;
442
443    let buffer_end = data.len();
444    let mut i = 0usize;
445
446    if i >= buffer_end || !data[i..].starts_with(b"object ") {
447        return Err(FsckError::new(
448            "missingObject",
449            "invalid format - expected 'object' line",
450        ));
451    }
452    i += 7;
453    let n = parse_oid_line(&data[i..], "badObjectSha1")?;
454    let tagged_oid = std::str::from_utf8(&data[i..i + 40])
455        .map_err(|_| FsckError::new("badObjectSha1", "invalid 'object' line format - bad sha1"))?
456        .parse::<ObjectId>()
457        .map_err(|_| FsckError::new("badObjectSha1", "invalid 'object' line format - bad sha1"))?;
458    i += n;
459
460    if i >= buffer_end || !data[i..].starts_with(b"type ") {
461        return Err(FsckError::new(
462            "missingTypeEntry",
463            "invalid format - expected 'type' line",
464        ));
465    }
466    i += 5;
467    let type_start = i;
468    let type_eol = data[type_start..buffer_end]
469        .iter()
470        .position(|&b| b == b'\n')
471        .map(|rel| type_start + rel)
472        .ok_or_else(|| {
473            FsckError::new(
474                "missingType",
475                "invalid format - unexpected end after 'type' line",
476            )
477        })?;
478
479    let tagged_kind = ObjectKind::from_tag_type_field(&data[type_start..type_eol])
480        .ok_or_else(|| FsckError::new("badType", "invalid 'type' value"))?;
481    i = type_eol + 1;
482
483    if i >= buffer_end || !data[i..].starts_with(b"tag ") {
484        return Err(FsckError::new(
485            "missingTagEntry",
486            "invalid format - expected 'tag' line",
487        ));
488    }
489    i += 4;
490    let tag_start = i;
491    let tag_eol = data[tag_start..buffer_end]
492        .iter()
493        .position(|&b| b == b'\n')
494        .map(|rel| tag_start + rel)
495        .ok_or_else(|| {
496            FsckError::new(
497                "missingTag",
498                "invalid format - unexpected end after 'type' line",
499            )
500        })?;
501
502    let tag_name = std::str::from_utf8(&data[tag_start..tag_eol])
503        .map_err(|_| FsckError::new("badTagName", "invalid 'tag' name"))?;
504    let refname = format!("refs/tags/{tag_name}");
505    if check_refname_format(&refname, &RefNameOptions::default()).is_err() {
506        let e = FsckError::new("badTagName", format!("invalid 'tag' name: {tag_name}"));
507        if strict {
508            return Err(e);
509        }
510        on_warn(&e);
511    }
512    i = tag_eol + 1;
513
514    if i >= buffer_end {
515        let e = FsckError::new(
516            "missingTaggerEntry",
517            "invalid format - expected 'tagger' line",
518        );
519        if strict {
520            return Err(e);
521        }
522        on_warn(&e);
523        return Ok((tagged_oid, tagged_kind, i, true));
524    }
525
526    let tg_line_start = i;
527    let tg_eol = data[tg_line_start..buffer_end]
528        .iter()
529        .position(|&b| b == b'\n')
530        .map(|rel| tg_line_start + rel)
531        .ok_or_else(|| FsckError::new("unterminatedHeader", "unterminated header"))?;
532    let tg_line = &data[tg_line_start..tg_eol];
533
534    let missing_tagger = || {
535        FsckError::new(
536            "missingTaggerEntry",
537            "invalid format - expected 'tagger' line",
538        )
539    };
540
541    if tg_line == b"tagger" || !tg_line.starts_with(b"tagger ") {
542        let e = missing_tagger();
543        if strict {
544            return Err(e);
545        }
546        on_warn(&e);
547        i = tg_eol + 1;
548    } else {
549        i = tg_line_start + b"tagger ".len();
550        match fsck_ident(data, i, buffer_end, "author/committer") {
551            Ok(next) => {
552                i = next;
553                return Ok((tagged_oid, tagged_kind, i, true));
554            }
555            Err(e) => {
556                if strict {
557                    return Err(e);
558                }
559                on_warn(&e);
560                let tail = &data[tg_line_start..buffer_end];
561                i = if let Some(pos) = tail.windows(2).position(|w| w == b"\n\n") {
562                    tg_line_start + pos + 2
563                } else {
564                    buffer_end
565                };
566                return Ok((tagged_oid, tagged_kind, i, false));
567            }
568        }
569    }
570
571    Ok((tagged_oid, tagged_kind, i, true))
572}
573
574fn skip_tag_gpgsig_headers(data: &[u8], mut i: usize) -> Result<usize, FsckError> {
575    let buffer_end = data.len();
576    if i < buffer_end
577        && (data[i..].starts_with(b"gpgsig ") || data[i..].starts_with(b"gpgsig-sha256 "))
578    {
579        let sig_start = i;
580        let sig_eol = data[sig_start..buffer_end]
581            .iter()
582            .position(|&b| b == b'\n')
583            .map(|rel| sig_start + rel)
584            .ok_or_else(|| {
585                FsckError::new(
586                    "badGpgsig",
587                    "invalid format - unexpected end after 'gpgsig' or 'gpgsig-sha256' line",
588                )
589            })?;
590        i = sig_eol + 1;
591        while i < buffer_end && data[i] == b' ' {
592            let cont_eol = data[i..buffer_end]
593                .iter()
594                .position(|&b| b == b'\n')
595                .map(|rel| i + rel)
596                .ok_or_else(|| {
597                    FsckError::new(
598                        "badHeaderContinuation",
599                        "invalid format - unexpected end in 'gpgsig' or 'gpgsig-sha256' continuation line",
600                    )
601                })?;
602            i = cont_eol + 1;
603        }
604    }
605    Ok(i)
606}
607
608/// After `tagger` (or immediately after `tag` when tagger was omitted under `--no-strict`),
609/// validate optional `gpgsig` headers and the blank line before the body.
610pub fn fsck_tag_mktag_trailer_from(data: &[u8], start: usize) -> Result<(), FsckError> {
611    let buffer_end = data.len();
612    let i = skip_tag_gpgsig_headers(data, start)?;
613
614    if i < buffer_end && data[i] != b'\n' {
615        return Err(FsckError::new(
616            "extraHeaderEntry",
617            "invalid format - extra header(s) after 'tagger'",
618        ));
619    }
620
621    Ok(())
622}
623
624/// Trailing tag headers after `tagger` as enforced by `git mktag` / `fsck_tag_standalone`:
625/// optional `gpgsig` / `gpgsig-sha256` (+ continuations), then the blank line before the body.
626pub fn fsck_tag_mktag_trailer(data: &[u8]) -> Result<(), FsckError> {
627    let buffer_end = data.len();
628    let mut i = parse_tag_headers_through_tagger(data)?;
629
630    i = skip_tag_gpgsig_headers(data, i)?;
631
632    if i < buffer_end && data[i] != b'\n' {
633        return Err(FsckError::new(
634            "extraHeaderEntry",
635            "invalid format - extra header(s) after 'tagger'",
636        ));
637    }
638
639    Ok(())
640}
641
642fn fsck_tree(data: &[u8]) -> Result<(), FsckError> {
643    if parse_tree_gently(data).is_err() {
644        return Err(FsckError::new("badTree", "cannot be parsed as a tree"));
645    }
646    Ok(())
647}
648
649fn parse_tree_gently(data: &[u8]) -> Result<(), ()> {
650    let mut pos = 0usize;
651    while pos < data.len() {
652        let sp = data[pos..].iter().position(|&b| b == b' ').ok_or(())?;
653        let mode_bytes = &data[pos..pos + sp];
654        let mode_ok = std::str::from_utf8(mode_bytes)
655            .ok()
656            .and_then(|s| u32::from_str_radix(s, 8).ok())
657            .is_some();
658        if !mode_ok {
659            return Err(());
660        }
661        pos += sp + 1;
662
663        let nul = data[pos..].iter().position(|&b| b == 0).ok_or(())?;
664        pos += nul + 1;
665
666        if pos + 20 > data.len() {
667            return Err(());
668        }
669        if ObjectId::from_bytes(&data[pos..pos + 20]).is_err() {
670            return Err(());
671        }
672        pos += 20;
673    }
674    Ok(())
675}
676
677#[cfg(test)]
678mod tests {
679    use super::*;
680
681    #[test]
682    fn empty_commit_is_unterminated_header() {
683        let e = fsck_object(ObjectKind::Commit, b"").unwrap_err();
684        assert_eq!(e.id, "unterminatedHeader");
685    }
686
687    #[test]
688    fn commit_missing_tree_matches_git() {
689        let e = fsck_object(ObjectKind::Commit, b"\n\n").unwrap_err();
690        assert_eq!(e.id, "missingTree");
691    }
692
693    #[test]
694    fn tree_truncated_is_bad_tree() {
695        let e = fsck_object(ObjectKind::Tree, b"100644 foo\0\x01\x01\x01\x01").unwrap_err();
696        assert_eq!(e.id, "badTree");
697    }
698}