use std::collections::HashSet;
use crate::error::VcsError;
use crate::hash::ObjectId;
use crate::object::Object;
use crate::store::Store;
#[derive(Clone, Debug, Default)]
pub struct GcOptions {
pub dry_run: bool,
}
#[derive(Clone, Debug, Default)]
pub struct GcReport {
pub reachable: usize,
pub deleted: Vec<ObjectId>,
}
pub fn mark_reachable(
store: &dyn Store,
roots: &[ObjectId],
) -> Result<HashSet<ObjectId>, VcsError> {
let mut reachable = HashSet::new();
let mut queue: Vec<ObjectId> = roots.to_vec();
while let Some(id) = queue.pop() {
if !reachable.insert(id) {
continue;
}
if !store.has(&id) {
continue;
}
match store.get(&id)? {
Object::Commit(commit) => {
queue.push(commit.schema_id);
if let Some(mig_id) = commit.migration_id {
queue.push(mig_id);
}
for parent in commit.parents {
queue.push(parent);
}
if let Some(protocol_id) = commit.protocol_id {
queue.push(protocol_id);
}
for data_id in commit.data_ids {
queue.push(data_id);
}
for complement_id in commit.complement_ids {
queue.push(complement_id);
}
}
Object::Migration { src, tgt, .. } => {
queue.push(src);
queue.push(tgt);
}
Object::Schema(_) | Object::Protocol(_) | Object::Expr(_) => {}
Object::Tag(tag) => {
queue.push(tag.target);
}
Object::DataSet(dataset) => {
queue.push(dataset.schema_id);
}
Object::Complement(complement) => {
queue.push(complement.migration_id);
queue.push(complement.data_id);
}
}
}
Ok(reachable)
}
pub fn collect_roots(store: &dyn Store) -> Result<Vec<ObjectId>, VcsError> {
let mut roots = Vec::new();
if let Some(id) = crate::store::resolve_head(store)? {
roots.push(id);
}
for (_, id) in store.list_refs("refs/heads/")? {
roots.push(id);
}
for (_, id) in store.list_refs("refs/tags/")? {
roots.push(id);
}
if let Some(id) = store.get_ref("refs/stash")? {
roots.push(id);
}
roots.dedup();
Ok(roots)
}
pub fn gc(store: &mut dyn Store) -> Result<GcReport, VcsError> {
let roots = collect_roots(store)?;
let reachable = mark_reachable(store, &roots)?;
let all_objects = store.list_objects()?;
let mut deleted = Vec::new();
for id in all_objects {
if !reachable.contains(&id) {
store.delete_object(&id)?;
deleted.push(id);
}
}
Ok(GcReport {
reachable: reachable.len(),
deleted,
})
}
pub fn gc_with_options(store: &mut dyn Store, options: &GcOptions) -> Result<GcReport, VcsError> {
if options.dry_run {
let roots = collect_roots(store)?;
let reachable = mark_reachable(store, &roots)?;
let all_objects = store.list_objects()?;
let deleted: Vec<ObjectId> = all_objects
.into_iter()
.filter(|id| !reachable.contains(id))
.collect();
Ok(GcReport {
reachable: reachable.len(),
deleted,
})
} else {
gc(store)
}
}
pub fn gc_report(store: &dyn Store) -> Result<GcReport, VcsError> {
let roots = collect_roots(store)?;
let reachable = mark_reachable(store, &roots)?;
Ok(GcReport {
reachable: reachable.len(),
deleted: Vec::new(),
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::MemStore;
use crate::error::VcsError;
use crate::object::CommitObject;
fn empty_schema() -> panproto_schema::Schema {
panproto_schema::Schema {
protocol: "test".into(),
vertices: std::collections::HashMap::new(),
edges: std::collections::HashMap::new(),
hyper_edges: std::collections::HashMap::new(),
constraints: std::collections::HashMap::new(),
required: std::collections::HashMap::new(),
nsids: std::collections::HashMap::new(),
variants: std::collections::HashMap::new(),
orderings: std::collections::HashMap::new(),
recursion_points: std::collections::HashMap::new(),
spans: std::collections::HashMap::new(),
usage_modes: std::collections::HashMap::new(),
nominal: std::collections::HashMap::new(),
coercions: std::collections::HashMap::new(),
mergers: std::collections::HashMap::new(),
defaults: std::collections::HashMap::new(),
policies: std::collections::HashMap::new(),
outgoing: std::collections::HashMap::new(),
incoming: std::collections::HashMap::new(),
between: std::collections::HashMap::new(),
}
}
#[test]
fn mark_reachable_follows_commits() -> Result<(), VcsError> {
let mut store = MemStore::new();
let schema_id = store.put(&Object::Schema(Box::new(empty_schema())))?;
let c0 = CommitObject {
schema_id,
parents: vec![],
migration_id: None,
protocol: "test".into(),
author: "test".into(),
timestamp: 100,
message: "initial".into(),
renames: vec![],
protocol_id: None,
data_ids: vec![],
complement_ids: vec![],
};
let c0_id = store.put(&Object::Commit(c0))?;
let c1 = CommitObject {
schema_id,
parents: vec![c0_id],
migration_id: None,
protocol: "test".into(),
author: "test".into(),
timestamp: 200,
message: "second".into(),
renames: vec![],
protocol_id: None,
data_ids: vec![],
complement_ids: vec![],
};
let c1_id = store.put(&Object::Commit(c1))?;
let reachable = mark_reachable(&store, &[c1_id])?;
assert!(reachable.contains(&c1_id));
assert!(reachable.contains(&c0_id));
assert!(reachable.contains(&schema_id));
Ok(())
}
#[test]
fn gc_deletes_unreachable() -> Result<(), VcsError> {
let mut store = MemStore::new();
let schema_id = store.put(&Object::Schema(Box::new(empty_schema())))?;
let c0 = CommitObject {
schema_id,
parents: vec![],
migration_id: None,
protocol: "test".into(),
author: "test".into(),
timestamp: 100,
message: "initial".into(),
renames: vec![],
protocol_id: None,
data_ids: vec![],
complement_ids: vec![],
};
let c0_id = store.put(&Object::Commit(c0))?;
store.set_ref("refs/heads/main", c0_id)?;
let orphan_schema_id = store.put(&Object::Schema(Box::new(empty_schema())))?;
let orphan = CommitObject {
schema_id: orphan_schema_id,
parents: vec![],
migration_id: None,
protocol: "test".into(),
author: "test".into(),
timestamp: 300,
message: "orphan".into(),
renames: vec![],
protocol_id: None,
data_ids: vec![],
complement_ids: vec![],
};
let orphan_id = store.put(&Object::Commit(orphan))?;
assert!(store.has(&orphan_id));
let report = gc(&mut store)?;
assert_eq!(report.reachable, 2); assert!(report.deleted.contains(&orphan_id));
assert!(!store.has(&orphan_id));
Ok(())
}
#[test]
fn gc_report_counts_reachable() -> Result<(), VcsError> {
let mut store = MemStore::new();
let schema_id = store.put(&Object::Schema(Box::new(empty_schema())))?;
let c0 = CommitObject {
schema_id,
parents: vec![],
migration_id: None,
protocol: "test".into(),
author: "test".into(),
timestamp: 100,
message: "initial".into(),
renames: vec![],
protocol_id: None,
data_ids: vec![],
complement_ids: vec![],
};
let c0_id = store.put(&Object::Commit(c0))?;
store.set_ref("refs/heads/main", c0_id)?;
let report = gc_report(&store)?;
assert_eq!(report.reachable, 2);
Ok(())
}
#[test]
fn gc_marks_data_complement_protocol_reachable() -> Result<(), VcsError> {
use crate::object::{ComplementObject, DataSetObject};
let mut store = MemStore::new();
let schema_id = store.put(&Object::Schema(Box::new(empty_schema())))?;
let protocol = panproto_schema::Protocol {
name: "test-proto".into(),
..Default::default()
};
let protocol_id = store.put(&Object::Protocol(Box::new(protocol)))?;
let dataset = DataSetObject {
schema_id,
data: vec![1, 2, 3],
record_count: 1,
};
let data_id = store.put(&Object::DataSet(dataset))?;
let complement = ComplementObject {
migration_id: ObjectId::from_bytes([99; 32]),
data_id,
complement: vec![4, 5, 6],
};
let complement_id = store.put(&Object::Complement(complement))?;
let c0 = CommitObject {
schema_id,
parents: vec![],
migration_id: None,
protocol: "test".into(),
author: "test".into(),
timestamp: 100,
message: "initial".into(),
renames: vec![],
protocol_id: Some(protocol_id),
data_ids: vec![data_id],
complement_ids: vec![complement_id],
};
let c0_id = store.put(&Object::Commit(c0))?;
store.set_ref("refs/heads/main", c0_id)?;
let reachable = mark_reachable(&store, &[c0_id])?;
assert!(reachable.contains(&protocol_id));
assert!(reachable.contains(&data_id));
assert!(reachable.contains(&complement_id));
assert!(reachable.contains(&schema_id));
Ok(())
}
}