1use crate::check_ref_format::{check_refname_format, RefNameOptions};
8use crate::git_date::tm::date_overflows;
9use crate::objects::{ObjectId, ObjectKind};
10
11#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct FsckError {
14 pub id: &'static str,
16 pub detail: String,
18}
19
20impl FsckError {
21 #[must_use]
23 pub fn new(id: &'static str, detail: impl Into<String>) -> Self {
24 Self {
25 id,
26 detail: detail.into(),
27 }
28 }
29
30 #[must_use]
32 pub fn report_line(&self) -> String {
33 format!("{}: {}", self.id, self.detail)
34 }
35}
36
37pub fn fsck_object(kind: ObjectKind, data: &[u8]) -> Result<(), FsckError> {
42 match kind {
43 ObjectKind::Blob => Ok(()),
44 ObjectKind::Commit => fsck_commit(data),
45 ObjectKind::Tag => fsck_tag(data),
46 ObjectKind::Tree => fsck_tree(data),
47 }
48}
49
50fn verify_headers(data: &[u8], nul_msg_id: &'static str) -> Result<(), FsckError> {
51 for (i, &b) in data.iter().enumerate() {
52 if b == 0 {
53 return Err(FsckError::new(
54 nul_msg_id,
55 format!("unterminated header: NUL at offset {i}"),
56 ));
57 }
58 if b == b'\n' && i + 1 < data.len() && data[i + 1] == b'\n' {
59 return Ok(());
60 }
61 }
62 if !data.is_empty() && data[data.len() - 1] == b'\n' {
63 Ok(())
64 } else {
65 Err(FsckError::new("unterminatedHeader", "unterminated header"))
66 }
67}
68
69fn is_hex_lower(b: u8) -> bool {
70 matches!(b, b'0'..=b'9' | b'a'..=b'f')
71}
72
73fn parse_oid_line(buf: &[u8], bad_sha1_id: &'static str) -> Result<usize, FsckError> {
76 if buf.len() < 41 {
77 return Err(FsckError::new(
78 bad_sha1_id,
79 format!(
80 "invalid '{}' line format - bad sha1",
81 line_kind(bad_sha1_id)
82 ),
83 ));
84 }
85 let hex = &buf[..40];
86 if !hex.iter().copied().all(is_hex_lower) {
87 return Err(FsckError::new(
88 bad_sha1_id,
89 format!(
90 "invalid '{}' line format - bad sha1",
91 line_kind(bad_sha1_id)
92 ),
93 ));
94 }
95 if buf[40] != b'\n' {
96 return Err(FsckError::new(
97 bad_sha1_id,
98 format!(
99 "invalid '{}' line format - bad sha1",
100 line_kind(bad_sha1_id)
101 ),
102 ));
103 }
104 let hex_str = std::str::from_utf8(hex).map_err(|_| {
105 FsckError::new(
106 bad_sha1_id,
107 format!(
108 "invalid '{}' line format - bad sha1",
109 line_kind(bad_sha1_id)
110 ),
111 )
112 })?;
113 hex_str.parse::<ObjectId>().map_err(|_| {
114 FsckError::new(
115 bad_sha1_id,
116 format!(
117 "invalid '{}' line format - bad sha1",
118 line_kind(bad_sha1_id)
119 ),
120 )
121 })?;
122 Ok(41)
123}
124
125fn line_kind(bad_sha1_id: &'static str) -> &'static str {
126 match bad_sha1_id {
127 "badObjectSha1" => "object",
128 "badParentSha1" => "parent",
129 _ => "tree",
130 }
131}
132
133fn fsck_ident(
134 data: &[u8],
135 start: usize,
136 buffer_end: usize,
137 oid_line: &'static str,
138) -> Result<usize, FsckError> {
139 let mut p = start;
140 if p >= buffer_end {
141 return Err(FsckError::new(
142 "missingEmail",
143 format!("invalid {oid_line} line - missing email"),
144 ));
145 }
146
147 let line_end = data[p..buffer_end]
148 .iter()
149 .position(|&b| b == b'\n')
150 .map(|rel| p + rel)
151 .ok_or_else(|| {
152 FsckError::new(
153 "missingEmail",
154 format!("invalid {oid_line} line - missing email"),
155 )
156 })?;
157
158 let ident_end = line_end;
159
160 if data[p] == b'<' {
161 return Err(FsckError::new(
162 "missingNameBeforeEmail",
163 format!("invalid {oid_line} line - missing space before email"),
164 ));
165 }
166
167 loop {
169 if p >= ident_end || data[p] == b'\n' {
170 return Err(FsckError::new(
171 "missingEmail",
172 format!("invalid {oid_line} line - missing email"),
173 ));
174 }
175 if data[p] == b'>' {
176 return Err(FsckError::new(
177 "badName",
178 format!("invalid {oid_line} line - bad name"),
179 ));
180 }
181 if data[p] == b'<' {
182 break;
183 }
184 p += 1;
185 }
186
187 if p == start || data[p - 1] != b' ' {
188 return Err(FsckError::new(
189 "missingSpaceBeforeEmail",
190 format!("invalid {oid_line} line - missing space before email"),
191 ));
192 }
193 p += 1; loop {
197 if p >= ident_end || data[p] == b'<' || data[p] == b'\n' {
198 return Err(FsckError::new(
199 "badEmail",
200 format!("invalid {oid_line} line - bad email"),
201 ));
202 }
203 if data[p] == b'>' {
204 break;
205 }
206 p += 1;
207 }
208 p += 1; if p >= ident_end || data[p] != b' ' {
211 return Err(FsckError::new(
212 "missingSpaceBeforeDate",
213 format!("invalid {oid_line} line - missing space before date"),
214 ));
215 }
216 p += 1;
217
218 while p < ident_end && (data[p] == b' ' || data[p] == b'\t') {
219 p += 1;
220 }
221
222 if p >= ident_end || !data[p].is_ascii_digit() {
223 return Err(FsckError::new(
224 "badDate",
225 format!("invalid {oid_line} line - bad date"),
226 ));
227 }
228
229 if data[p] == b'0' && p + 1 < ident_end && data[p + 1] != b' ' {
230 return Err(FsckError::new(
231 "zeroPaddedDate",
232 format!("invalid {oid_line} line - zero-padded date"),
233 ));
234 }
235
236 let ts_start = p;
237 while p < ident_end && data[p].is_ascii_digit() {
238 p += 1;
239 }
240 let ts_len = p - ts_start;
241 if ts_len > 21 {
242 return Err(FsckError::new(
243 "badDateOverflow",
244 format!("invalid {oid_line} line - date causes integer overflow"),
245 ));
246 }
247 let ts_str = std::str::from_utf8(&data[ts_start..p])
248 .map_err(|_| FsckError::new("badDate", format!("invalid {oid_line} line - bad date")))?;
249 let raw: u128 = ts_str
250 .parse()
251 .map_err(|_| FsckError::new("badDate", format!("invalid {oid_line} line - bad date")))?;
252 if raw > u64::MAX as u128 || date_overflows(raw as u64) {
253 return Err(FsckError::new(
254 "badDateOverflow",
255 format!("invalid {oid_line} line - date causes integer overflow"),
256 ));
257 }
258
259 if p >= ident_end || data[p] != b' ' {
260 return Err(FsckError::new(
261 "badDate",
262 format!("invalid {oid_line} line - bad date"),
263 ));
264 }
265 p += 1;
266
267 if p + 5 > ident_end
269 || (data[p] != b'+' && data[p] != b'-')
270 || !data[p + 1..p + 5].iter().all(|b| b.is_ascii_digit())
271 || data[p + 5] != b'\n'
272 {
273 return Err(FsckError::new(
274 "badTimezone",
275 format!("invalid {oid_line} line - bad time zone"),
276 ));
277 }
278
279 Ok(line_end + 1)
280}
281
282fn fsck_commit(data: &[u8]) -> Result<(), FsckError> {
283 verify_headers(data, "nulInHeader")?;
284
285 let buffer_end = data.len();
286 let mut i = 0usize;
287
288 if i >= buffer_end || !data[i..].starts_with(b"tree ") {
289 return Err(FsckError::new(
290 "missingTree",
291 "invalid format - expected 'tree' line",
292 ));
293 }
294 i += 5;
295 let n = parse_oid_line(&data[i..], "badTreeSha1")?;
296 i += n;
297
298 while i < buffer_end && data[i..].starts_with(b"parent ") {
299 i += 7;
300 let n = parse_oid_line(&data[i..], "badParentSha1")?;
301 i += n;
302 }
303
304 let mut author_count = 0usize;
305 while i < buffer_end && data[i..].starts_with(b"author ") {
306 author_count += 1;
307 i += 7;
308 i = fsck_ident(data, i, buffer_end, "author/committer")?;
309 }
310
311 if author_count < 1 {
312 return Err(FsckError::new(
313 "missingAuthor",
314 "invalid format - expected 'author' line",
315 ));
316 }
317 if author_count > 1 {
318 return Err(FsckError::new(
319 "multipleAuthors",
320 "invalid format - multiple 'author' lines",
321 ));
322 }
323
324 if i >= buffer_end || !data[i..].starts_with(b"committer ") {
325 return Err(FsckError::new(
326 "missingCommitter",
327 "invalid format - expected 'committer' line",
328 ));
329 }
330 i += 10;
331 fsck_ident(data, i, buffer_end, "author/committer")?;
332
333 if data.contains(&0) {
334 return Err(FsckError::new(
335 "nulInCommit",
336 "NUL byte in the commit object body",
337 ));
338 }
339
340 Ok(())
341}
342
343fn parse_tag_headers_through_tagger(data: &[u8]) -> Result<usize, FsckError> {
345 verify_headers(data, "nulInHeader")?;
346
347 let buffer_end = data.len();
348 let mut i = 0usize;
349
350 if i >= buffer_end || !data[i..].starts_with(b"object ") {
351 return Err(FsckError::new(
352 "missingObject",
353 "invalid format - expected 'object' line",
354 ));
355 }
356 i += 7;
357 let n = parse_oid_line(&data[i..], "badObjectSha1")?;
358 i += n;
359
360 if i >= buffer_end || !data[i..].starts_with(b"type ") {
361 return Err(FsckError::new(
362 "missingTypeEntry",
363 "invalid format - expected 'type' line",
364 ));
365 }
366 i += 5;
367 let type_start = i;
368 let eol = data[type_start..buffer_end]
369 .iter()
370 .position(|&b| b == b'\n')
371 .map(|rel| type_start + rel)
372 .ok_or_else(|| {
373 FsckError::new(
374 "missingType",
375 "invalid format - unexpected end after 'type' line",
376 )
377 })?;
378
379 if ObjectKind::from_tag_type_field(&data[type_start..eol]).is_none() {
380 return Err(FsckError::new("badType", "invalid 'type' value"));
381 }
382 i = eol + 1;
383
384 if i >= buffer_end || !data[i..].starts_with(b"tag ") {
385 return Err(FsckError::new(
386 "missingTagEntry",
387 "invalid format - expected 'tag' line",
388 ));
389 }
390 i += 4;
391 let tag_start = i;
392 let eol = data[tag_start..buffer_end]
393 .iter()
394 .position(|&b| b == b'\n')
395 .map(|rel| tag_start + rel)
396 .ok_or_else(|| {
397 FsckError::new(
398 "missingTag",
399 "invalid format - unexpected end after 'type' line",
400 )
401 })?;
402
403 let tag_name = std::str::from_utf8(&data[tag_start..eol])
404 .map_err(|_| FsckError::new("badTagName", "invalid 'tag' name"))?;
405 let refname = format!("refs/tags/{tag_name}");
406 if check_refname_format(&refname, &RefNameOptions::default()).is_err() {
407 return Err(FsckError::new(
408 "badTagName",
409 format!("invalid 'tag' name: {tag_name}"),
410 ));
411 }
412 i = eol + 1;
413
414 if i >= buffer_end || !data[i..].starts_with(b"tagger ") {
415 return Err(FsckError::new(
416 "missingTaggerEntry",
417 "invalid format - expected 'tagger' line",
418 ));
419 }
420 i += 7;
421 fsck_ident(data, i, buffer_end, "author/committer")
422}
423
424fn fsck_tag(data: &[u8]) -> Result<(), FsckError> {
425 parse_tag_headers_through_tagger(data).map(|_| ())
426}
427
428pub fn parse_tag_for_mktag(
437 data: &[u8],
438 strict: bool,
439 on_warn: &mut impl FnMut(&FsckError),
440) -> Result<(ObjectId, ObjectKind, usize, bool), FsckError> {
441 verify_headers(data, "nulInHeader")?;
442
443 let buffer_end = data.len();
444 let mut i = 0usize;
445
446 if i >= buffer_end || !data[i..].starts_with(b"object ") {
447 return Err(FsckError::new(
448 "missingObject",
449 "invalid format - expected 'object' line",
450 ));
451 }
452 i += 7;
453 let n = parse_oid_line(&data[i..], "badObjectSha1")?;
454 let tagged_oid = std::str::from_utf8(&data[i..i + 40])
455 .map_err(|_| FsckError::new("badObjectSha1", "invalid 'object' line format - bad sha1"))?
456 .parse::<ObjectId>()
457 .map_err(|_| FsckError::new("badObjectSha1", "invalid 'object' line format - bad sha1"))?;
458 i += n;
459
460 if i >= buffer_end || !data[i..].starts_with(b"type ") {
461 return Err(FsckError::new(
462 "missingTypeEntry",
463 "invalid format - expected 'type' line",
464 ));
465 }
466 i += 5;
467 let type_start = i;
468 let type_eol = data[type_start..buffer_end]
469 .iter()
470 .position(|&b| b == b'\n')
471 .map(|rel| type_start + rel)
472 .ok_or_else(|| {
473 FsckError::new(
474 "missingType",
475 "invalid format - unexpected end after 'type' line",
476 )
477 })?;
478
479 let tagged_kind = ObjectKind::from_tag_type_field(&data[type_start..type_eol])
480 .ok_or_else(|| FsckError::new("badType", "invalid 'type' value"))?;
481 i = type_eol + 1;
482
483 if i >= buffer_end || !data[i..].starts_with(b"tag ") {
484 return Err(FsckError::new(
485 "missingTagEntry",
486 "invalid format - expected 'tag' line",
487 ));
488 }
489 i += 4;
490 let tag_start = i;
491 let tag_eol = data[tag_start..buffer_end]
492 .iter()
493 .position(|&b| b == b'\n')
494 .map(|rel| tag_start + rel)
495 .ok_or_else(|| {
496 FsckError::new(
497 "missingTag",
498 "invalid format - unexpected end after 'type' line",
499 )
500 })?;
501
502 let tag_name = std::str::from_utf8(&data[tag_start..tag_eol])
503 .map_err(|_| FsckError::new("badTagName", "invalid 'tag' name"))?;
504 let refname = format!("refs/tags/{tag_name}");
505 if check_refname_format(&refname, &RefNameOptions::default()).is_err() {
506 let e = FsckError::new("badTagName", format!("invalid 'tag' name: {tag_name}"));
507 if strict {
508 return Err(e);
509 }
510 on_warn(&e);
511 }
512 i = tag_eol + 1;
513
514 if i >= buffer_end {
515 let e = FsckError::new(
516 "missingTaggerEntry",
517 "invalid format - expected 'tagger' line",
518 );
519 if strict {
520 return Err(e);
521 }
522 on_warn(&e);
523 return Ok((tagged_oid, tagged_kind, i, true));
524 }
525
526 let tg_line_start = i;
527 let tg_eol = data[tg_line_start..buffer_end]
528 .iter()
529 .position(|&b| b == b'\n')
530 .map(|rel| tg_line_start + rel)
531 .ok_or_else(|| FsckError::new("unterminatedHeader", "unterminated header"))?;
532 let tg_line = &data[tg_line_start..tg_eol];
533
534 let missing_tagger = || {
535 FsckError::new(
536 "missingTaggerEntry",
537 "invalid format - expected 'tagger' line",
538 )
539 };
540
541 if tg_line == b"tagger" || !tg_line.starts_with(b"tagger ") {
542 let e = missing_tagger();
543 if strict {
544 return Err(e);
545 }
546 on_warn(&e);
547 i = tg_eol + 1;
548 } else {
549 i = tg_line_start + b"tagger ".len();
550 match fsck_ident(data, i, buffer_end, "author/committer") {
551 Ok(next) => {
552 i = next;
553 return Ok((tagged_oid, tagged_kind, i, true));
554 }
555 Err(e) => {
556 if strict {
557 return Err(e);
558 }
559 on_warn(&e);
560 let tail = &data[tg_line_start..buffer_end];
561 i = if let Some(pos) = tail.windows(2).position(|w| w == b"\n\n") {
562 tg_line_start + pos + 2
563 } else {
564 buffer_end
565 };
566 return Ok((tagged_oid, tagged_kind, i, false));
567 }
568 }
569 }
570
571 Ok((tagged_oid, tagged_kind, i, true))
572}
573
574fn skip_tag_gpgsig_headers(data: &[u8], mut i: usize) -> Result<usize, FsckError> {
575 let buffer_end = data.len();
576 if i < buffer_end
577 && (data[i..].starts_with(b"gpgsig ") || data[i..].starts_with(b"gpgsig-sha256 "))
578 {
579 let sig_start = i;
580 let sig_eol = data[sig_start..buffer_end]
581 .iter()
582 .position(|&b| b == b'\n')
583 .map(|rel| sig_start + rel)
584 .ok_or_else(|| {
585 FsckError::new(
586 "badGpgsig",
587 "invalid format - unexpected end after 'gpgsig' or 'gpgsig-sha256' line",
588 )
589 })?;
590 i = sig_eol + 1;
591 while i < buffer_end && data[i] == b' ' {
592 let cont_eol = data[i..buffer_end]
593 .iter()
594 .position(|&b| b == b'\n')
595 .map(|rel| i + rel)
596 .ok_or_else(|| {
597 FsckError::new(
598 "badHeaderContinuation",
599 "invalid format - unexpected end in 'gpgsig' or 'gpgsig-sha256' continuation line",
600 )
601 })?;
602 i = cont_eol + 1;
603 }
604 }
605 Ok(i)
606}
607
608pub fn fsck_tag_mktag_trailer_from(data: &[u8], start: usize) -> Result<(), FsckError> {
611 let buffer_end = data.len();
612 let i = skip_tag_gpgsig_headers(data, start)?;
613
614 if i < buffer_end && data[i] != b'\n' {
615 return Err(FsckError::new(
616 "extraHeaderEntry",
617 "invalid format - extra header(s) after 'tagger'",
618 ));
619 }
620
621 Ok(())
622}
623
624pub fn fsck_tag_mktag_trailer(data: &[u8]) -> Result<(), FsckError> {
627 let buffer_end = data.len();
628 let mut i = parse_tag_headers_through_tagger(data)?;
629
630 i = skip_tag_gpgsig_headers(data, i)?;
631
632 if i < buffer_end && data[i] != b'\n' {
633 return Err(FsckError::new(
634 "extraHeaderEntry",
635 "invalid format - extra header(s) after 'tagger'",
636 ));
637 }
638
639 Ok(())
640}
641
642fn fsck_tree(data: &[u8]) -> Result<(), FsckError> {
643 if parse_tree_gently(data).is_err() {
644 return Err(FsckError::new("badTree", "cannot be parsed as a tree"));
645 }
646 Ok(())
647}
648
649fn parse_tree_gently(data: &[u8]) -> Result<(), ()> {
650 let mut pos = 0usize;
651 while pos < data.len() {
652 let sp = data[pos..].iter().position(|&b| b == b' ').ok_or(())?;
653 let mode_bytes = &data[pos..pos + sp];
654 let mode_ok = std::str::from_utf8(mode_bytes)
655 .ok()
656 .and_then(|s| u32::from_str_radix(s, 8).ok())
657 .is_some();
658 if !mode_ok {
659 return Err(());
660 }
661 pos += sp + 1;
662
663 let nul = data[pos..].iter().position(|&b| b == 0).ok_or(())?;
664 pos += nul + 1;
665
666 if pos + 20 > data.len() {
667 return Err(());
668 }
669 if ObjectId::from_bytes(&data[pos..pos + 20]).is_err() {
670 return Err(());
671 }
672 pos += 20;
673 }
674 Ok(())
675}
676
677#[cfg(test)]
678mod tests {
679 use super::*;
680
681 #[test]
682 fn empty_commit_is_unterminated_header() {
683 let e = fsck_object(ObjectKind::Commit, b"").unwrap_err();
684 assert_eq!(e.id, "unterminatedHeader");
685 }
686
687 #[test]
688 fn commit_missing_tree_matches_git() {
689 let e = fsck_object(ObjectKind::Commit, b"\n\n").unwrap_err();
690 assert_eq!(e.id, "missingTree");
691 }
692
693 #[test]
694 fn tree_truncated_is_bad_tree() {
695 let e = fsck_object(ObjectKind::Tree, b"100644 foo\0\x01\x01\x01\x01").unwrap_err();
696 assert_eq!(e.id, "badTree");
697 }
698}