1use crate::check_ref_format::{check_refname_format, RefNameOptions};
8use crate::git_date::tm::date_overflows;
9use crate::objects::{ObjectId, ObjectKind};
10
11#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct FsckError {
14 pub id: &'static str,
16 pub detail: String,
18}
19
20impl FsckError {
21 fn new(id: &'static str, detail: impl Into<String>) -> Self {
22 Self {
23 id,
24 detail: detail.into(),
25 }
26 }
27
28 #[must_use]
30 pub fn report_line(&self) -> String {
31 format!("{}: {}", self.id, self.detail)
32 }
33}
34
35pub fn fsck_object(kind: ObjectKind, data: &[u8]) -> Result<(), FsckError> {
40 match kind {
41 ObjectKind::Blob => Ok(()),
42 ObjectKind::Commit => fsck_commit(data),
43 ObjectKind::Tag => fsck_tag(data),
44 ObjectKind::Tree => fsck_tree(data),
45 }
46}
47
48fn verify_headers(data: &[u8], nul_msg_id: &'static str) -> Result<(), FsckError> {
49 for (i, &b) in data.iter().enumerate() {
50 if b == 0 {
51 return Err(FsckError::new(
52 nul_msg_id,
53 format!("unterminated header: NUL at offset {i}"),
54 ));
55 }
56 if b == b'\n' && i + 1 < data.len() && data[i + 1] == b'\n' {
57 return Ok(());
58 }
59 }
60 if !data.is_empty() && data[data.len() - 1] == b'\n' {
61 Ok(())
62 } else {
63 Err(FsckError::new("unterminatedHeader", "unterminated header"))
64 }
65}
66
67fn is_hex_lower(b: u8) -> bool {
68 matches!(b, b'0'..=b'9' | b'a'..=b'f')
69}
70
71fn parse_oid_line(buf: &[u8], bad_sha1_id: &'static str) -> Result<usize, FsckError> {
74 if buf.len() < 41 {
75 return Err(FsckError::new(
76 bad_sha1_id,
77 format!(
78 "invalid '{}' line format - bad sha1",
79 line_kind(bad_sha1_id)
80 ),
81 ));
82 }
83 let hex = &buf[..40];
84 if !hex.iter().copied().all(is_hex_lower) {
85 return Err(FsckError::new(
86 bad_sha1_id,
87 format!(
88 "invalid '{}' line format - bad sha1",
89 line_kind(bad_sha1_id)
90 ),
91 ));
92 }
93 if buf[40] != b'\n' {
94 return Err(FsckError::new(
95 bad_sha1_id,
96 format!(
97 "invalid '{}' line format - bad sha1",
98 line_kind(bad_sha1_id)
99 ),
100 ));
101 }
102 let hex_str = std::str::from_utf8(hex).map_err(|_| {
103 FsckError::new(
104 bad_sha1_id,
105 format!(
106 "invalid '{}' line format - bad sha1",
107 line_kind(bad_sha1_id)
108 ),
109 )
110 })?;
111 hex_str.parse::<ObjectId>().map_err(|_| {
112 FsckError::new(
113 bad_sha1_id,
114 format!(
115 "invalid '{}' line format - bad sha1",
116 line_kind(bad_sha1_id)
117 ),
118 )
119 })?;
120 Ok(41)
121}
122
123fn line_kind(bad_sha1_id: &'static str) -> &'static str {
124 match bad_sha1_id {
125 "badObjectSha1" => "object",
126 "badParentSha1" => "parent",
127 _ => "tree",
128 }
129}
130
131fn fsck_ident(
132 data: &[u8],
133 start: usize,
134 buffer_end: usize,
135 oid_line: &'static str,
136) -> Result<usize, FsckError> {
137 let mut p = start;
138 if p >= buffer_end {
139 return Err(FsckError::new(
140 "missingEmail",
141 format!("invalid {oid_line} line - missing email"),
142 ));
143 }
144
145 let line_end = data[p..buffer_end]
146 .iter()
147 .position(|&b| b == b'\n')
148 .map(|rel| p + rel)
149 .ok_or_else(|| {
150 FsckError::new(
151 "missingEmail",
152 format!("invalid {oid_line} line - missing email"),
153 )
154 })?;
155
156 if data[p] == b'<' {
157 return Err(FsckError::new(
158 "missingNameBeforeEmail",
159 format!("invalid {oid_line} line - missing space before email"),
160 ));
161 }
162
163 let ident_end = line_end;
164 while p < ident_end {
165 if data[p] == b'\n' {
166 return Err(FsckError::new(
167 "missingEmail",
168 format!("invalid {oid_line} line - missing email"),
169 ));
170 }
171 if data[p] == b'>' {
172 return Err(FsckError::new(
173 "badName",
174 format!("invalid {oid_line} line - bad name"),
175 ));
176 }
177 if data[p] == b'<' {
178 break;
179 }
180 p += 1;
181 }
182
183 if p >= ident_end {
184 return Err(FsckError::new(
185 "missingEmail",
186 format!("invalid {oid_line} line - missing email"),
187 ));
188 }
189
190 if p == start || data[p - 1] != b' ' {
191 return Err(FsckError::new(
192 "missingSpaceBeforeEmail",
193 format!("invalid {oid_line} line - missing space before email"),
194 ));
195 }
196 p += 1; let email_start = p;
199 while p < ident_end {
200 if data[p] == b'<' || data[p] == b'\n' {
201 return Err(FsckError::new(
202 "badEmail",
203 format!("invalid {oid_line} line - bad email"),
204 ));
205 }
206 if data[p] == b'>' {
207 break;
208 }
209 p += 1;
210 }
211
212 if p >= ident_end || p == email_start {
213 return Err(FsckError::new(
214 "badEmail",
215 format!("invalid {oid_line} line - bad email"),
216 ));
217 }
218 p += 1; if p >= ident_end || data[p] != b' ' {
221 return Err(FsckError::new(
222 "missingSpaceBeforeDate",
223 format!("invalid {oid_line} line - missing space before date"),
224 ));
225 }
226 p += 1;
227
228 while p < ident_end && (data[p] == b' ' || data[p] == b'\t') {
229 p += 1;
230 }
231
232 if p >= ident_end || !data[p].is_ascii_digit() {
233 return Err(FsckError::new(
234 "badDate",
235 format!("invalid {oid_line} line - bad date"),
236 ));
237 }
238
239 if data[p] == b'0' && p + 1 < ident_end && data[p + 1] != b' ' {
240 return Err(FsckError::new(
241 "zeroPaddedDate",
242 format!("invalid {oid_line} line - zero-padded date"),
243 ));
244 }
245
246 let ts_start = p;
247 while p < ident_end && data[p].is_ascii_digit() {
248 p += 1;
249 }
250 let ts_len = p - ts_start;
251 if ts_len > 21 {
252 return Err(FsckError::new(
253 "badDateOverflow",
254 format!("invalid {oid_line} line - date causes integer overflow"),
255 ));
256 }
257 let ts_str = std::str::from_utf8(&data[ts_start..p])
258 .map_err(|_| FsckError::new("badDate", format!("invalid {oid_line} line - bad date")))?;
259 let raw: u128 = ts_str
260 .parse()
261 .map_err(|_| FsckError::new("badDate", format!("invalid {oid_line} line - bad date")))?;
262 if raw > u64::MAX as u128 || date_overflows(raw as u64) {
263 return Err(FsckError::new(
264 "badDateOverflow",
265 format!("invalid {oid_line} line - date causes integer overflow"),
266 ));
267 }
268
269 if p >= ident_end || data[p] != b' ' {
270 return Err(FsckError::new(
271 "badDate",
272 format!("invalid {oid_line} line - bad date"),
273 ));
274 }
275 p += 1;
276
277 if p + 5 > ident_end
278 || (data[p] != b'+' && data[p] != b'-')
279 || !data[p + 1..p + 5].iter().all(|b| b.is_ascii_digit())
280 || data[p + 5] != b'\n'
281 {
282 return Err(FsckError::new(
283 "badTimezone",
284 format!("invalid {oid_line} line - bad time zone"),
285 ));
286 }
287
288 Ok(line_end + 1)
289}
290
291fn fsck_commit(data: &[u8]) -> Result<(), FsckError> {
292 verify_headers(data, "nulInHeader")?;
293
294 let buffer_end = data.len();
295 let mut i = 0usize;
296
297 if i >= buffer_end || !data[i..].starts_with(b"tree ") {
298 return Err(FsckError::new(
299 "missingTree",
300 "invalid format - expected 'tree' line",
301 ));
302 }
303 i += 5;
304 let n = parse_oid_line(&data[i..], "badTreeSha1")?;
305 i += n;
306
307 while i < buffer_end && data[i..].starts_with(b"parent ") {
308 i += 7;
309 let n = parse_oid_line(&data[i..], "badParentSha1")?;
310 i += n;
311 }
312
313 let mut author_count = 0usize;
314 while i < buffer_end && data[i..].starts_with(b"author ") {
315 author_count += 1;
316 i += 7;
317 i = fsck_ident(data, i, buffer_end, "author/committer")?;
318 }
319
320 if author_count < 1 {
321 return Err(FsckError::new(
322 "missingAuthor",
323 "invalid format - expected 'author' line",
324 ));
325 }
326 if author_count > 1 {
327 return Err(FsckError::new(
328 "multipleAuthors",
329 "invalid format - multiple 'author' lines",
330 ));
331 }
332
333 if i >= buffer_end || !data[i..].starts_with(b"committer ") {
334 return Err(FsckError::new(
335 "missingCommitter",
336 "invalid format - expected 'committer' line",
337 ));
338 }
339 i += 10;
340 fsck_ident(data, i, buffer_end, "author/committer")?;
341
342 if data.contains(&0) {
343 return Err(FsckError::new(
344 "nulInCommit",
345 "NUL byte in the commit object body",
346 ));
347 }
348
349 Ok(())
350}
351
352fn object_type_from_tag_type_line(s: &str) -> Option<ObjectKind> {
353 match s {
354 "blob" => Some(ObjectKind::Blob),
355 "tree" => Some(ObjectKind::Tree),
356 "commit" => Some(ObjectKind::Commit),
357 "tag" => Some(ObjectKind::Tag),
358 _ => None,
359 }
360}
361
362fn fsck_tag(data: &[u8]) -> Result<(), FsckError> {
363 verify_headers(data, "nulInHeader")?;
364
365 let buffer_end = data.len();
366 let mut i = 0usize;
367
368 if i >= buffer_end || !data[i..].starts_with(b"object ") {
369 return Err(FsckError::new(
370 "missingObject",
371 "invalid format - expected 'object' line",
372 ));
373 }
374 i += 7;
375 let n = parse_oid_line(&data[i..], "badObjectSha1")?;
376 i += n;
377
378 if i >= buffer_end || !data[i..].starts_with(b"type ") {
379 return Err(FsckError::new(
380 "missingTypeEntry",
381 "invalid format - expected 'type' line",
382 ));
383 }
384 i += 5;
385 let type_start = i;
386 let eol = data[type_start..buffer_end]
387 .iter()
388 .position(|&b| b == b'\n')
389 .map(|rel| type_start + rel)
390 .ok_or_else(|| {
391 FsckError::new(
392 "missingType",
393 "invalid format - unexpected end after 'type' line",
394 )
395 })?;
396
397 let type_str = std::str::from_utf8(&data[type_start..eol])
398 .map_err(|_| FsckError::new("badType", "invalid 'type' value"))?;
399 if object_type_from_tag_type_line(type_str).is_none() {
400 return Err(FsckError::new("badType", "invalid 'type' value"));
401 }
402 i = eol + 1;
403
404 if i >= buffer_end || !data[i..].starts_with(b"tag ") {
405 return Err(FsckError::new(
406 "missingTagEntry",
407 "invalid format - expected 'tag' line",
408 ));
409 }
410 i += 4;
411 let tag_start = i;
412 let eol = data[tag_start..buffer_end]
413 .iter()
414 .position(|&b| b == b'\n')
415 .map(|rel| tag_start + rel)
416 .ok_or_else(|| {
417 FsckError::new(
418 "missingTag",
419 "invalid format - unexpected end after 'type' line",
420 )
421 })?;
422
423 let tag_name = std::str::from_utf8(&data[tag_start..eol])
424 .map_err(|_| FsckError::new("badTagName", "invalid 'tag' name"))?;
425 let refname = format!("refs/tags/{tag_name}");
426 if check_refname_format(&refname, &RefNameOptions::default()).is_err() {
427 return Err(FsckError::new(
428 "badTagName",
429 format!("invalid 'tag' name: {tag_name}"),
430 ));
431 }
432 i = eol + 1;
433
434 if i >= buffer_end || !data[i..].starts_with(b"tagger ") {
435 return Err(FsckError::new(
436 "missingTaggerEntry",
437 "invalid format - expected 'tagger' line",
438 ));
439 }
440 i += 7;
441 fsck_ident(data, i, buffer_end, "author/committer")?;
442
443 Ok(())
444}
445
446fn fsck_tree(data: &[u8]) -> Result<(), FsckError> {
447 if parse_tree_gently(data).is_err() {
448 return Err(FsckError::new("badTree", "cannot be parsed as a tree"));
449 }
450 Ok(())
451}
452
453fn parse_tree_gently(data: &[u8]) -> Result<(), ()> {
454 let mut pos = 0usize;
455 while pos < data.len() {
456 let sp = data[pos..].iter().position(|&b| b == b' ').ok_or(())?;
457 let mode_bytes = &data[pos..pos + sp];
458 let mode_ok = std::str::from_utf8(mode_bytes)
459 .ok()
460 .and_then(|s| u32::from_str_radix(s, 8).ok())
461 .is_some();
462 if !mode_ok {
463 return Err(());
464 }
465 pos += sp + 1;
466
467 let nul = data[pos..].iter().position(|&b| b == 0).ok_or(())?;
468 pos += nul + 1;
469
470 if pos + 20 > data.len() {
471 return Err(());
472 }
473 if ObjectId::from_bytes(&data[pos..pos + 20]).is_err() {
474 return Err(());
475 }
476 pos += 20;
477 }
478 Ok(())
479}
480
481#[cfg(test)]
482mod tests {
483 use super::*;
484
485 #[test]
486 fn empty_commit_is_unterminated_header() {
487 let e = fsck_object(ObjectKind::Commit, b"").unwrap_err();
488 assert_eq!(e.id, "unterminatedHeader");
489 }
490
491 #[test]
492 fn commit_missing_tree_matches_git() {
493 let e = fsck_object(ObjectKind::Commit, b"\n\n").unwrap_err();
494 assert_eq!(e.id, "missingTree");
495 }
496
497 #[test]
498 fn tree_truncated_is_bad_tree() {
499 let e = fsck_object(ObjectKind::Tree, b"100644 foo\0\x01\x01\x01\x01").unwrap_err();
500 assert_eq!(e.id, "badTree");
501 }
502}