use std::io::Read;
use std::path::Path;
use git_lfs_git::AttrSet;
use git_lfs_git::cat_file::CatFileBatch;
use git_lfs_git::scanner::{scan_pointers, scan_tree_blobs};
use git_lfs_pointer::{MAX_POINTER_SIZE, Oid, Pointer};
use git_lfs_store::Store;
use sha2::{Digest, Sha256};
use crate::fetch::fetch_filter_set;
fn is_in_git_repo(cwd: &Path) -> bool {
git_lfs_git::git_dir(cwd).is_ok()
}
#[derive(Debug, thiserror::Error)]
pub enum FsckError {
#[error(transparent)]
Git(#[from] git_lfs_git::Error),
#[error(transparent)]
Io(#[from] std::io::Error),
#[error(transparent)]
Fetch(#[from] crate::fetch::FetchCommandError),
#[error("{0}")]
Other(String),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Mode {
Objects,
Pointers,
Both,
}
#[derive(Debug, Clone)]
pub struct Options {
pub mode: Mode,
pub dry_run: bool,
}
pub fn run(cwd: &Path, refspec: Option<&str>, opts: &Options) -> Result<i32, FsckError> {
if !is_in_git_repo(cwd) {
println!("Not in a Git repository.");
return Ok(128);
}
let store = Store::new(git_lfs_git::lfs_dir(cwd)?);
let r = refspec.unwrap_or("HEAD");
if !crate::fetch::is_resolvable_ref(cwd, r) {
return Err(FsckError::Other(format!("Git can't resolve ref: {r:?}")));
}
let mut corrupt_oids: Vec<Oid> = Vec::new();
let mut non_canonical: usize = 0;
if matches!(opts.mode, Mode::Objects | Mode::Both) {
let include_set = fetch_filter_set(cwd, "lfs.fetchinclude")?;
let exclude_set = fetch_filter_set(cwd, "lfs.fetchexclude")?;
let pointers = scan_pointers(cwd, &[r], &[])?;
for entry in &pointers {
if !crate::fetch::path_passes_filter(entry.path.as_deref(), &include_set, &exclude_set)
{
continue;
}
match verify_object(&store, entry.oid, entry.size)? {
ObjectVerify::Ok => {}
ObjectVerify::Missing => {
let name = entry
.path
.as_deref()
.map(|p| p.display().to_string())
.unwrap_or_else(|| entry.oid.to_string());
println!(
"objects: openError: {name} ({}) could not be checked: no such file",
entry.oid
);
corrupt_oids.push(entry.oid);
}
ObjectVerify::Corrupt => {
let name = entry
.path
.as_deref()
.map(|p| p.display().to_string())
.unwrap_or_else(|| entry.oid.to_string());
println!("objects: corruptObject: {name} ({}) is corrupt", entry.oid);
corrupt_oids.push(entry.oid);
}
}
}
}
let mut unexpected: usize = 0;
if matches!(opts.mode, Mode::Pointers | Mode::Both) {
let blobs = scan_tree_blobs(cwd, r)?;
let mut batch = CatFileBatch::spawn(cwd)?;
let attrs = build_tree_attrs(cwd, &blobs, &mut batch)?;
for blob in &blobs {
if blob.mode == "120000" {
continue;
}
let path_str = blob.path.to_string_lossy().replace('\\', "/");
if !attrs.is_lfs_tracked(&path_str) {
continue;
}
if (blob.size as usize) >= MAX_POINTER_SIZE {
println!(
"pointer: unexpectedGitObject: \"{path_str}\" (treeish {}) should have been a pointer but was not",
blob.blob_oid,
);
unexpected += 1;
continue;
}
let Some(content) = batch.read(&blob.blob_oid)? else {
continue;
};
match Pointer::parse(&content.content) {
Err(_) => {
println!(
"pointer: unexpectedGitObject: \"{path_str}\" (treeish {}) should have been a pointer but was not",
blob.blob_oid,
);
unexpected += 1;
}
Ok(p) if !p.canonical => {
println!(
"pointer: nonCanonicalPointer: Pointer for {} (blob {}) was not canonical",
p.oid, blob.blob_oid,
);
non_canonical += 1;
}
Ok(_) => {}
}
}
}
let ok = corrupt_oids.is_empty() && non_canonical == 0 && unexpected == 0;
if ok {
println!("Git LFS fsck OK");
return Ok(0);
}
if opts.dry_run || corrupt_oids.is_empty() {
return Ok(1);
}
let bad_dir = store.root().join("bad");
println!(
"objects: repair: moving corrupt objects to {}",
bad_dir.display()
);
std::fs::create_dir_all(&bad_dir)?;
for oid in &corrupt_oids {
let src = store.object_path(*oid);
let dst = bad_dir.join(oid.to_string());
match std::fs::rename(&src, &dst) {
Ok(()) => {}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
}
Err(e) => return Err(e.into()),
}
}
Ok(1)
}
#[derive(Debug, PartialEq, Eq)]
enum ObjectVerify {
Ok,
Missing,
Corrupt,
}
fn build_tree_attrs(
cwd: &Path,
blobs: &[git_lfs_git::scanner::TreeBlob],
batch: &mut CatFileBatch,
) -> std::io::Result<AttrSet> {
let mut attrs = AttrSet::empty();
let _ = cwd;
let mut by_depth: Vec<&git_lfs_git::scanner::TreeBlob> = blobs
.iter()
.filter(|b| b.path.file_name().is_some_and(|n| n == ".gitattributes"))
.collect();
by_depth.sort_by_key(|b| b.path.components().count());
for blob in by_depth {
let Some(content) = batch.read(&blob.blob_oid).map_err(std::io::Error::other)? else {
continue;
};
let dir = blob
.path
.parent()
.map(|p| p.to_string_lossy().replace('\\', "/"))
.unwrap_or_default();
attrs.add_buffer_at(&content.content, &dir);
}
Ok(attrs)
}
fn verify_object(store: &Store, oid: Oid, size: u64) -> std::io::Result<ObjectVerify> {
if oid == Oid::EMPTY {
return Ok(ObjectVerify::Ok);
}
let mut file = match store.open(oid) {
Ok(f) => f,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
if size == 0 {
return Ok(ObjectVerify::Ok);
}
return Ok(ObjectVerify::Missing);
}
Err(e) => return Err(e),
};
let mut hasher = Sha256::new();
let mut buf = [0u8; 64 * 1024];
let mut total: u64 = 0;
loop {
let n = file.read(&mut buf)?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
total += n as u64;
}
let computed: [u8; 32] = hasher.finalize().into();
if total != size || Oid::from_bytes(computed) != oid {
Ok(ObjectVerify::Corrupt)
} else {
Ok(ObjectVerify::Ok)
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn fixture() -> (TempDir, Store) {
let tmp = TempDir::new().unwrap();
let store = Store::new(tmp.path().join("lfs"));
(tmp, store)
}
#[test]
fn verify_object_ok_for_well_formed_object() {
let (_tmp, store) = fixture();
let (oid, size) = store.insert(&mut b"hello".as_slice()).unwrap();
assert_eq!(verify_object(&store, oid, size).unwrap(), ObjectVerify::Ok);
}
#[test]
fn verify_object_missing_for_unknown_oid() {
let (_tmp, store) = fixture();
let oid: Oid = "1111111111111111111111111111111111111111111111111111111111111111"
.parse()
.unwrap();
assert_eq!(
verify_object(&store, oid, 1).unwrap(),
ObjectVerify::Missing
);
}
#[test]
fn verify_object_corrupt_when_size_lies() {
let (_tmp, store) = fixture();
let (oid, _) = store.insert(&mut b"hello".as_slice()).unwrap();
assert_eq!(
verify_object(&store, oid, 99).unwrap(),
ObjectVerify::Corrupt,
);
}
#[test]
fn verify_object_corrupt_when_content_was_tampered() {
let (_tmp, store) = fixture();
let (oid, size) = store.insert(&mut b"hello".as_slice()).unwrap();
let path = store.object_path(oid);
std::fs::write(&path, b"world").unwrap();
assert_eq!(
verify_object(&store, oid, size).unwrap(),
ObjectVerify::Corrupt,
);
}
#[test]
fn verify_object_handles_empty_oid() {
let (_tmp, store) = fixture();
assert_eq!(
verify_object(&store, Oid::EMPTY, 0).unwrap(),
ObjectVerify::Ok
);
}
}