use crate::error::Result;
use crate::hash::Hash;
use crate::object::ObjectType;
use crate::store::Store;
use std::collections::HashSet;
use std::fs;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct GcStats {
pub objects_deleted: usize,
pub bytes_freed: u64,
}
impl Store {
pub fn gc(&self, dry_run: bool) -> Result<GcStats> {
let reachable = self.mark_reachable()?;
self.sweep(&reachable, dry_run)
}
fn mark_reachable(&self) -> Result<HashSet<Hash>> {
let mut reachable = HashSet::new();
let refs = self.refs().list()?;
for (_name, hash) in refs {
self.mark_object(&hash, &mut reachable)?;
}
Ok(reachable)
}
fn mark_object(&self, hash: &Hash, reachable: &mut HashSet<Hash>) -> Result<()> {
if reachable.contains(hash) {
return Ok(());
}
let obj_path = self.object_path(hash);
if !obj_path.exists() {
return Ok(());
}
reachable.insert(*hash);
let header = self.read_object_header(&obj_path)?;
if header.object_type == ObjectType::Tree {
let tree = self.get_tree(hash)?;
for entry in tree {
self.mark_object(&entry.hash, reachable)?;
}
}
Ok(())
}
fn sweep(&self, reachable: &HashSet<Hash>, dry_run: bool) -> Result<GcStats> {
let mut stats = GcStats {
objects_deleted: 0,
bytes_freed: 0,
};
let objects_dir = self.root().join("objects").join(self.algorithm().as_str());
if !objects_dir.exists() {
return Ok(stats);
}
for shard_entry in fs::read_dir(&objects_dir)? {
let shard_entry = shard_entry?;
let shard_path = shard_entry.path();
if !shard_path.is_dir() {
continue;
}
for obj_entry in fs::read_dir(&shard_path)? {
let obj_entry = obj_entry?;
let obj_path = obj_entry.path();
if !obj_path.is_file() {
continue;
}
let prefix = shard_path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("");
let suffix = obj_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
let hash_str = format!("{}{}", prefix, suffix);
if let Ok(hash) = Hash::from_hex(&hash_str) {
if !reachable.contains(&hash) {
let metadata = fs::metadata(&obj_path)?;
stats.bytes_freed += metadata.len();
stats.objects_deleted += 1;
if !dry_run {
fs::remove_file(&obj_path)?;
}
}
}
}
if !dry_run
&& let Ok(mut entries) = fs::read_dir(&shard_path)
&& entries.next().is_none()
{
let _ = fs::remove_dir(&shard_path);
}
}
Ok(stats)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::hash::Algorithm;
use tempfile::TempDir;
#[test]
fn test_gc_empty_store() {
let temp_dir = TempDir::new().unwrap();
let store = Store::init(temp_dir.path(), Algorithm::Blake3).unwrap();
let stats = store.gc(false).unwrap();
assert_eq!(stats.objects_deleted, 0);
assert_eq!(stats.bytes_freed, 0);
}
#[test]
fn test_gc_with_ref() {
let temp_dir = TempDir::new().unwrap();
let store = Store::init(temp_dir.path(), Algorithm::Blake3).unwrap();
let hash = store.put_blob(b"test data".as_ref()).unwrap();
store.refs().add("myref", &hash).unwrap();
let stats = store.gc(false).unwrap();
assert_eq!(stats.objects_deleted, 0);
}
#[test]
fn test_gc_unreferenced_blob() {
let temp_dir = TempDir::new().unwrap();
let store = Store::init(temp_dir.path(), Algorithm::Blake3).unwrap();
let hash = store.put_blob(b"orphan data".as_ref()).unwrap();
assert!(store.object_path(&hash).exists());
let stats = store.gc(false).unwrap();
assert_eq!(stats.objects_deleted, 1);
assert!(stats.bytes_freed > 0);
assert!(!store.object_path(&hash).exists());
}
#[test]
fn test_gc_dry_run() {
let temp_dir = TempDir::new().unwrap();
let store = Store::init(temp_dir.path(), Algorithm::Blake3).unwrap();
let hash = store.put_blob(b"orphan".as_ref()).unwrap();
let stats = store.gc(true).unwrap();
assert_eq!(stats.objects_deleted, 1);
assert!(stats.bytes_freed > 0);
assert!(store.object_path(&hash).exists());
let stats2 = store.gc(false).unwrap();
assert_eq!(stats2.objects_deleted, 1);
assert!(!store.object_path(&hash).exists());
}
#[test]
fn test_gc_tree_reachability() {
use crate::tree::{EntryType, TreeEntry, file_modes};
let temp_dir = TempDir::new().unwrap();
let store = Store::init(temp_dir.path(), Algorithm::Blake3).unwrap();
let blob1 = store.put_blob(b"file1".as_ref()).unwrap();
let blob2 = store.put_blob(b"file2".as_ref()).unwrap();
let orphan = store.put_blob(b"orphan".as_ref()).unwrap();
let entries = vec![
TreeEntry::new(
EntryType::Blob,
file_modes::REGULAR,
blob1,
"file1".to_string(),
)
.unwrap(),
TreeEntry::new(
EntryType::Blob,
file_modes::REGULAR,
blob2,
"file2".to_string(),
)
.unwrap(),
];
let tree = store.put_tree(entries).unwrap();
store.refs().add("mytree", &tree).unwrap();
let stats = store.gc(false).unwrap();
assert_eq!(stats.objects_deleted, 1);
assert!(store.object_path(&tree).exists());
assert!(store.object_path(&blob1).exists());
assert!(store.object_path(&blob2).exists());
assert!(!store.object_path(&orphan).exists());
}
#[test]
fn test_gc_after_ref_removed() {
let temp_dir = TempDir::new().unwrap();
let store = Store::init(temp_dir.path(), Algorithm::Blake3).unwrap();
let hash = store.put_blob(b"data".as_ref()).unwrap();
store.refs().add("ref1", &hash).unwrap();
let stats = store.gc(false).unwrap();
assert_eq!(stats.objects_deleted, 0);
store.refs().remove("ref1").unwrap();
let stats = store.gc(false).unwrap();
assert_eq!(stats.objects_deleted, 1);
assert!(!store.object_path(&hash).exists());
}
}