Skip to main content

grit_lib/
fsck_standalone.rs

1//! Standalone object fsck for `hash-object` and similar entry points.
2//!
3//! Mirrors the buffer-safe checks in Git's `fsck.c` (`verify_headers`,
4//! `fsck_commit`, `fsck_tag_standalone`, `fsck_tree`) so error messages match
5//! `error: object fails fsck: <camelCaseId>: <detail>`.
6
7use crate::check_ref_format::{check_refname_format, RefNameOptions};
8use crate::git_date::tm::date_overflows;
9use crate::objects::{ObjectId, ObjectKind};
10
11/// Git-compatible fsck failure for loose object validation.
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct FsckError {
14    /// CamelCase message id (e.g. `missingTree`).
15    pub id: &'static str,
16    /// Human-readable detail after `id: `.
17    pub detail: String,
18}
19
20impl FsckError {
21    fn new(id: &'static str, detail: impl Into<String>) -> Self {
22        Self {
23            id,
24            detail: detail.into(),
25        }
26    }
27
28    /// Full line after `error: object fails fsck: ` (matches Git).
29    #[must_use]
30    pub fn report_line(&self) -> String {
31        format!("{}: {}", self.id, self.detail)
32    }
33}
34
35/// Validate raw object bytes the same way `git hash-object` does before hashing.
36///
37/// Returns `Ok(())` when the object is well-formed, or the first fsck error Git
38/// would report for truncated or malformed buffers.
39pub fn fsck_object(kind: ObjectKind, data: &[u8]) -> Result<(), FsckError> {
40    match kind {
41        ObjectKind::Blob => Ok(()),
42        ObjectKind::Commit => fsck_commit(data),
43        ObjectKind::Tag => fsck_tag(data),
44        ObjectKind::Tree => fsck_tree(data),
45    }
46}
47
48fn verify_headers(data: &[u8], nul_msg_id: &'static str) -> Result<(), FsckError> {
49    for (i, &b) in data.iter().enumerate() {
50        if b == 0 {
51            return Err(FsckError::new(
52                nul_msg_id,
53                format!("unterminated header: NUL at offset {i}"),
54            ));
55        }
56        if b == b'\n' && i + 1 < data.len() && data[i + 1] == b'\n' {
57            return Ok(());
58        }
59    }
60    if !data.is_empty() && data[data.len() - 1] == b'\n' {
61        Ok(())
62    } else {
63        Err(FsckError::new("unterminatedHeader", "unterminated header"))
64    }
65}
66
67fn is_hex_lower(b: u8) -> bool {
68    matches!(b, b'0'..=b'9' | b'a'..=b'f')
69}
70
71/// Parse a 40-character lowercase hex object id at the start of `buf`, requiring
72/// the next byte to be `\n`. Returns bytes consumed (41).
73fn parse_oid_line(buf: &[u8], bad_sha1_id: &'static str) -> Result<usize, FsckError> {
74    if buf.len() < 41 {
75        return Err(FsckError::new(
76            bad_sha1_id,
77            format!(
78                "invalid '{}' line format - bad sha1",
79                line_kind(bad_sha1_id)
80            ),
81        ));
82    }
83    let hex = &buf[..40];
84    if !hex.iter().copied().all(is_hex_lower) {
85        return Err(FsckError::new(
86            bad_sha1_id,
87            format!(
88                "invalid '{}' line format - bad sha1",
89                line_kind(bad_sha1_id)
90            ),
91        ));
92    }
93    if buf[40] != b'\n' {
94        return Err(FsckError::new(
95            bad_sha1_id,
96            format!(
97                "invalid '{}' line format - bad sha1",
98                line_kind(bad_sha1_id)
99            ),
100        ));
101    }
102    let hex_str = std::str::from_utf8(hex).map_err(|_| {
103        FsckError::new(
104            bad_sha1_id,
105            format!(
106                "invalid '{}' line format - bad sha1",
107                line_kind(bad_sha1_id)
108            ),
109        )
110    })?;
111    hex_str.parse::<ObjectId>().map_err(|_| {
112        FsckError::new(
113            bad_sha1_id,
114            format!(
115                "invalid '{}' line format - bad sha1",
116                line_kind(bad_sha1_id)
117            ),
118        )
119    })?;
120    Ok(41)
121}
122
123fn line_kind(bad_sha1_id: &'static str) -> &'static str {
124    match bad_sha1_id {
125        "badObjectSha1" => "object",
126        "badParentSha1" => "parent",
127        _ => "tree",
128    }
129}
130
131fn fsck_ident(
132    data: &[u8],
133    start: usize,
134    buffer_end: usize,
135    oid_line: &'static str,
136) -> Result<usize, FsckError> {
137    let mut p = start;
138    if p >= buffer_end {
139        return Err(FsckError::new(
140            "missingEmail",
141            format!("invalid {oid_line} line - missing email"),
142        ));
143    }
144
145    let line_end = data[p..buffer_end]
146        .iter()
147        .position(|&b| b == b'\n')
148        .map(|rel| p + rel)
149        .ok_or_else(|| {
150            FsckError::new(
151                "missingEmail",
152                format!("invalid {oid_line} line - missing email"),
153            )
154        })?;
155
156    if data[p] == b'<' {
157        return Err(FsckError::new(
158            "missingNameBeforeEmail",
159            format!("invalid {oid_line} line - missing space before email"),
160        ));
161    }
162
163    let ident_end = line_end;
164    while p < ident_end {
165        if data[p] == b'\n' {
166            return Err(FsckError::new(
167                "missingEmail",
168                format!("invalid {oid_line} line - missing email"),
169            ));
170        }
171        if data[p] == b'>' {
172            return Err(FsckError::new(
173                "badName",
174                format!("invalid {oid_line} line - bad name"),
175            ));
176        }
177        if data[p] == b'<' {
178            break;
179        }
180        p += 1;
181    }
182
183    if p >= ident_end {
184        return Err(FsckError::new(
185            "missingEmail",
186            format!("invalid {oid_line} line - missing email"),
187        ));
188    }
189
190    if p == start || data[p - 1] != b' ' {
191        return Err(FsckError::new(
192            "missingSpaceBeforeEmail",
193            format!("invalid {oid_line} line - missing space before email"),
194        ));
195    }
196    p += 1; // skip '<'
197
198    let email_start = p;
199    while p < ident_end {
200        if data[p] == b'<' || data[p] == b'\n' {
201            return Err(FsckError::new(
202                "badEmail",
203                format!("invalid {oid_line} line - bad email"),
204            ));
205        }
206        if data[p] == b'>' {
207            break;
208        }
209        p += 1;
210    }
211
212    if p >= ident_end || p == email_start {
213        return Err(FsckError::new(
214            "badEmail",
215            format!("invalid {oid_line} line - bad email"),
216        ));
217    }
218    p += 1; // skip '>'
219
220    if p >= ident_end || data[p] != b' ' {
221        return Err(FsckError::new(
222            "missingSpaceBeforeDate",
223            format!("invalid {oid_line} line - missing space before date"),
224        ));
225    }
226    p += 1;
227
228    while p < ident_end && (data[p] == b' ' || data[p] == b'\t') {
229        p += 1;
230    }
231
232    if p >= ident_end || !data[p].is_ascii_digit() {
233        return Err(FsckError::new(
234            "badDate",
235            format!("invalid {oid_line} line - bad date"),
236        ));
237    }
238
239    if data[p] == b'0' && p + 1 < ident_end && data[p + 1] != b' ' {
240        return Err(FsckError::new(
241            "zeroPaddedDate",
242            format!("invalid {oid_line} line - zero-padded date"),
243        ));
244    }
245
246    let ts_start = p;
247    while p < ident_end && data[p].is_ascii_digit() {
248        p += 1;
249    }
250    let ts_len = p - ts_start;
251    if ts_len > 21 {
252        return Err(FsckError::new(
253            "badDateOverflow",
254            format!("invalid {oid_line} line - date causes integer overflow"),
255        ));
256    }
257    let ts_str = std::str::from_utf8(&data[ts_start..p])
258        .map_err(|_| FsckError::new("badDate", format!("invalid {oid_line} line - bad date")))?;
259    let raw: u128 = ts_str
260        .parse()
261        .map_err(|_| FsckError::new("badDate", format!("invalid {oid_line} line - bad date")))?;
262    if raw > u64::MAX as u128 || date_overflows(raw as u64) {
263        return Err(FsckError::new(
264            "badDateOverflow",
265            format!("invalid {oid_line} line - date causes integer overflow"),
266        ));
267    }
268
269    if p >= ident_end || data[p] != b' ' {
270        return Err(FsckError::new(
271            "badDate",
272            format!("invalid {oid_line} line - bad date"),
273        ));
274    }
275    p += 1;
276
277    if p + 5 > ident_end
278        || (data[p] != b'+' && data[p] != b'-')
279        || !data[p + 1..p + 5].iter().all(|b| b.is_ascii_digit())
280        || data[p + 5] != b'\n'
281    {
282        return Err(FsckError::new(
283            "badTimezone",
284            format!("invalid {oid_line} line - bad time zone"),
285        ));
286    }
287
288    Ok(line_end + 1)
289}
290
291fn fsck_commit(data: &[u8]) -> Result<(), FsckError> {
292    verify_headers(data, "nulInHeader")?;
293
294    let buffer_end = data.len();
295    let mut i = 0usize;
296
297    if i >= buffer_end || !data[i..].starts_with(b"tree ") {
298        return Err(FsckError::new(
299            "missingTree",
300            "invalid format - expected 'tree' line",
301        ));
302    }
303    i += 5;
304    let n = parse_oid_line(&data[i..], "badTreeSha1")?;
305    i += n;
306
307    while i < buffer_end && data[i..].starts_with(b"parent ") {
308        i += 7;
309        let n = parse_oid_line(&data[i..], "badParentSha1")?;
310        i += n;
311    }
312
313    let mut author_count = 0usize;
314    while i < buffer_end && data[i..].starts_with(b"author ") {
315        author_count += 1;
316        i += 7;
317        i = fsck_ident(data, i, buffer_end, "author/committer")?;
318    }
319
320    if author_count < 1 {
321        return Err(FsckError::new(
322            "missingAuthor",
323            "invalid format - expected 'author' line",
324        ));
325    }
326    if author_count > 1 {
327        return Err(FsckError::new(
328            "multipleAuthors",
329            "invalid format - multiple 'author' lines",
330        ));
331    }
332
333    if i >= buffer_end || !data[i..].starts_with(b"committer ") {
334        return Err(FsckError::new(
335            "missingCommitter",
336            "invalid format - expected 'committer' line",
337        ));
338    }
339    i += 10;
340    fsck_ident(data, i, buffer_end, "author/committer")?;
341
342    if data.contains(&0) {
343        return Err(FsckError::new(
344            "nulInCommit",
345            "NUL byte in the commit object body",
346        ));
347    }
348
349    Ok(())
350}
351
352fn object_type_from_tag_type_line(s: &str) -> Option<ObjectKind> {
353    match s {
354        "blob" => Some(ObjectKind::Blob),
355        "tree" => Some(ObjectKind::Tree),
356        "commit" => Some(ObjectKind::Commit),
357        "tag" => Some(ObjectKind::Tag),
358        _ => None,
359    }
360}
361
362fn fsck_tag(data: &[u8]) -> Result<(), FsckError> {
363    verify_headers(data, "nulInHeader")?;
364
365    let buffer_end = data.len();
366    let mut i = 0usize;
367
368    if i >= buffer_end || !data[i..].starts_with(b"object ") {
369        return Err(FsckError::new(
370            "missingObject",
371            "invalid format - expected 'object' line",
372        ));
373    }
374    i += 7;
375    let n = parse_oid_line(&data[i..], "badObjectSha1")?;
376    i += n;
377
378    if i >= buffer_end || !data[i..].starts_with(b"type ") {
379        return Err(FsckError::new(
380            "missingTypeEntry",
381            "invalid format - expected 'type' line",
382        ));
383    }
384    i += 5;
385    let type_start = i;
386    let eol = data[type_start..buffer_end]
387        .iter()
388        .position(|&b| b == b'\n')
389        .map(|rel| type_start + rel)
390        .ok_or_else(|| {
391            FsckError::new(
392                "missingType",
393                "invalid format - unexpected end after 'type' line",
394            )
395        })?;
396
397    let type_str = std::str::from_utf8(&data[type_start..eol])
398        .map_err(|_| FsckError::new("badType", "invalid 'type' value"))?;
399    if object_type_from_tag_type_line(type_str).is_none() {
400        return Err(FsckError::new("badType", "invalid 'type' value"));
401    }
402    i = eol + 1;
403
404    if i >= buffer_end || !data[i..].starts_with(b"tag ") {
405        return Err(FsckError::new(
406            "missingTagEntry",
407            "invalid format - expected 'tag' line",
408        ));
409    }
410    i += 4;
411    let tag_start = i;
412    let eol = data[tag_start..buffer_end]
413        .iter()
414        .position(|&b| b == b'\n')
415        .map(|rel| tag_start + rel)
416        .ok_or_else(|| {
417            FsckError::new(
418                "missingTag",
419                "invalid format - unexpected end after 'type' line",
420            )
421        })?;
422
423    let tag_name = std::str::from_utf8(&data[tag_start..eol])
424        .map_err(|_| FsckError::new("badTagName", "invalid 'tag' name"))?;
425    let refname = format!("refs/tags/{tag_name}");
426    if check_refname_format(&refname, &RefNameOptions::default()).is_err() {
427        return Err(FsckError::new(
428            "badTagName",
429            format!("invalid 'tag' name: {tag_name}"),
430        ));
431    }
432    i = eol + 1;
433
434    if i >= buffer_end || !data[i..].starts_with(b"tagger ") {
435        return Err(FsckError::new(
436            "missingTaggerEntry",
437            "invalid format - expected 'tagger' line",
438        ));
439    }
440    i += 7;
441    fsck_ident(data, i, buffer_end, "author/committer")?;
442
443    Ok(())
444}
445
446fn fsck_tree(data: &[u8]) -> Result<(), FsckError> {
447    if parse_tree_gently(data).is_err() {
448        return Err(FsckError::new("badTree", "cannot be parsed as a tree"));
449    }
450    Ok(())
451}
452
453fn parse_tree_gently(data: &[u8]) -> Result<(), ()> {
454    let mut pos = 0usize;
455    while pos < data.len() {
456        let sp = data[pos..].iter().position(|&b| b == b' ').ok_or(())?;
457        let mode_bytes = &data[pos..pos + sp];
458        let mode_ok = std::str::from_utf8(mode_bytes)
459            .ok()
460            .and_then(|s| u32::from_str_radix(s, 8).ok())
461            .is_some();
462        if !mode_ok {
463            return Err(());
464        }
465        pos += sp + 1;
466
467        let nul = data[pos..].iter().position(|&b| b == 0).ok_or(())?;
468        pos += nul + 1;
469
470        if pos + 20 > data.len() {
471            return Err(());
472        }
473        if ObjectId::from_bytes(&data[pos..pos + 20]).is_err() {
474            return Err(());
475        }
476        pos += 20;
477    }
478    Ok(())
479}
480
481#[cfg(test)]
482mod tests {
483    use super::*;
484
485    #[test]
486    fn empty_commit_is_unterminated_header() {
487        let e = fsck_object(ObjectKind::Commit, b"").unwrap_err();
488        assert_eq!(e.id, "unterminatedHeader");
489    }
490
491    #[test]
492    fn commit_missing_tree_matches_git() {
493        let e = fsck_object(ObjectKind::Commit, b"\n\n").unwrap_err();
494        assert_eq!(e.id, "missingTree");
495    }
496
497    #[test]
498    fn tree_truncated_is_bad_tree() {
499        let e = fsck_object(ObjectKind::Tree, b"100644 foo\0\x01\x01\x01\x01").unwrap_err();
500        assert_eq!(e.id, "badTree");
501    }
502}