use std::collections::HashSet;
use crate::error::VcsError;
use crate::hash::ObjectId;
use crate::object::Object;
use crate::store::Store;
#[derive(Clone, Debug, Default)]
pub struct GcOptions {
pub dry_run: bool,
}
#[derive(Clone, Debug, Default)]
pub struct GcReport {
pub reachable: usize,
pub deleted: Vec<ObjectId>,
}
pub fn mark_reachable(
store: &dyn Store,
roots: &[ObjectId],
) -> Result<HashSet<ObjectId>, VcsError> {
let mut reachable = HashSet::new();
let mut queue: Vec<ObjectId> = roots.to_vec();
while let Some(id) = queue.pop() {
if !reachable.insert(id) {
continue;
}
if !store.has(&id) {
continue;
}
match store.get(&id)? {
Object::Commit(commit) => {
queue.push(commit.schema_id);
if let Some(mig_id) = commit.migration_id {
queue.push(mig_id);
}
for parent in commit.parents {
queue.push(parent);
}
if let Some(protocol_id) = commit.protocol_id {
queue.push(protocol_id);
}
for data_id in commit.data_ids {
queue.push(data_id);
}
for complement_id in commit.complement_ids {
queue.push(complement_id);
}
for edit_log_id in commit.edit_log_ids {
queue.push(edit_log_id);
}
for (_, theory_id) in commit.theory_ids {
queue.push(theory_id);
}
for cst_complement_id in commit.cst_complement_ids {
queue.push(cst_complement_id);
}
}
Object::Migration { src, tgt, .. } => {
queue.push(src);
queue.push(tgt);
}
Object::Protocol(_)
| Object::Expr(_)
| Object::Theory(_)
| Object::TheoryMorphism(_)
| Object::CstComplement(_)
| Object::FileSchema(_)
| Object::FlatSchema(_) => {}
Object::SchemaTree(tree) => match tree.as_ref() {
crate::object::SchemaTreeObject::SingleLeaf { file_schema_id } => {
queue.push(*file_schema_id);
}
crate::object::SchemaTreeObject::Directory { .. } => {
for (_, entry) in tree.sorted_entries() {
match entry {
crate::object::SchemaTreeEntry::File(id)
| crate::object::SchemaTreeEntry::Tree(id) => queue.push(*id),
}
}
}
},
Object::Tag(tag) => {
queue.push(tag.target);
}
Object::DataSet(dataset) => {
queue.push(dataset.schema_id);
}
Object::Complement(complement) => {
queue.push(complement.migration_id);
queue.push(complement.data_id);
}
Object::EditLog(edit_log) => {
queue.push(edit_log.schema_id);
queue.push(edit_log.data_id);
queue.push(edit_log.final_complement);
}
}
}
Ok(reachable)
}
pub fn collect_roots(store: &dyn Store) -> Result<Vec<ObjectId>, VcsError> {
let mut roots = Vec::new();
if let Some(id) = crate::store::resolve_head(store)? {
roots.push(id);
}
for (_, id) in store.list_refs("refs/heads/")? {
roots.push(id);
}
for (_, id) in store.list_refs("refs/tags/")? {
roots.push(id);
}
if let Some(id) = store.get_ref("refs/stash")? {
roots.push(id);
}
roots.dedup();
Ok(roots)
}
pub fn gc(store: &mut dyn Store) -> Result<GcReport, VcsError> {
let roots = collect_roots(store)?;
let reachable = mark_reachable(store, &roots)?;
let all_objects = store.list_objects()?;
let mut deleted = Vec::new();
for id in all_objects {
if !reachable.contains(&id) {
store.delete_object(&id)?;
deleted.push(id);
}
}
Ok(GcReport {
reachable: reachable.len(),
deleted,
})
}
pub fn gc_with_options(store: &mut dyn Store, options: &GcOptions) -> Result<GcReport, VcsError> {
if options.dry_run {
let roots = collect_roots(store)?;
let reachable = mark_reachable(store, &roots)?;
let all_objects = store.list_objects()?;
let deleted: Vec<ObjectId> = all_objects
.into_iter()
.filter(|id| !reachable.contains(id))
.collect();
Ok(GcReport {
reachable: reachable.len(),
deleted,
})
} else {
gc(store)
}
}
pub fn gc_report(store: &dyn Store) -> Result<GcReport, VcsError> {
let roots = collect_roots(store)?;
let reachable = mark_reachable(store, &roots)?;
Ok(GcReport {
reachable: reachable.len(),
deleted: Vec::new(),
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::MemStore;
use crate::error::VcsError;
use crate::object::CommitObject;
fn empty_schema() -> panproto_schema::Schema {
panproto_schema::Schema {
protocol: "test".into(),
vertices: std::collections::HashMap::new(),
edges: std::collections::HashMap::new(),
hyper_edges: std::collections::HashMap::new(),
constraints: std::collections::HashMap::new(),
required: std::collections::HashMap::new(),
nsids: std::collections::HashMap::new(),
entries: Vec::new(),
variants: std::collections::HashMap::new(),
orderings: std::collections::HashMap::new(),
recursion_points: std::collections::HashMap::new(),
spans: std::collections::HashMap::new(),
usage_modes: std::collections::HashMap::new(),
nominal: std::collections::HashMap::new(),
coercions: std::collections::HashMap::new(),
mergers: std::collections::HashMap::new(),
defaults: std::collections::HashMap::new(),
policies: std::collections::HashMap::new(),
outgoing: std::collections::HashMap::new(),
incoming: std::collections::HashMap::new(),
between: std::collections::HashMap::new(),
}
}
#[test]
fn mark_reachable_follows_commits() -> Result<(), VcsError> {
let mut store = MemStore::new();
let schema_id = crate::tree::store_schema_as_tree(&mut store, empty_schema())?;
let c0 = CommitObject::builder(schema_id, "test", "test", "initial")
.timestamp(100)
.build();
let c0_id = store.put(&Object::Commit(c0))?;
let c1 = CommitObject::builder(schema_id, "test", "test", "second")
.parents(vec![c0_id])
.timestamp(200)
.build();
let c1_id = store.put(&Object::Commit(c1))?;
let reachable = mark_reachable(&store, &[c1_id])?;
assert!(reachable.contains(&c1_id));
assert!(reachable.contains(&c0_id));
assert!(reachable.contains(&schema_id));
Ok(())
}
#[test]
fn gc_deletes_unreachable() -> Result<(), VcsError> {
let mut store = MemStore::new();
let schema_id = crate::tree::store_schema_as_tree(&mut store, empty_schema())?;
let c0 = CommitObject::builder(schema_id, "test", "test", "initial")
.timestamp(100)
.build();
let c0_id = store.put(&Object::Commit(c0))?;
store.set_ref("refs/heads/main", c0_id)?;
let orphan_schema_id = crate::tree::store_schema_as_tree(&mut store, empty_schema())?;
let orphan = CommitObject::builder(orphan_schema_id, "test", "test", "orphan")
.timestamp(300)
.build();
let orphan_id = store.put(&Object::Commit(orphan))?;
assert!(store.has(&orphan_id));
let report = gc(&mut store)?;
assert_eq!(report.reachable, 3);
assert!(report.deleted.contains(&orphan_id));
assert!(!store.has(&orphan_id));
Ok(())
}
#[test]
fn gc_report_counts_reachable() -> Result<(), VcsError> {
let mut store = MemStore::new();
let schema_id = crate::tree::store_schema_as_tree(&mut store, empty_schema())?;
let c0 = CommitObject::builder(schema_id, "test", "test", "initial")
.timestamp(100)
.build();
let c0_id = store.put(&Object::Commit(c0))?;
store.set_ref("refs/heads/main", c0_id)?;
let report = gc_report(&store)?;
assert_eq!(report.reachable, 3);
Ok(())
}
#[test]
fn gc_marks_theory_ids_and_cst_complements_reachable() -> Result<(), VcsError> {
use crate::object::CstComplementObject;
use std::collections::BTreeMap;
let mut store = MemStore::new();
let schema_id = crate::tree::store_schema_as_tree(&mut store, empty_schema())?;
let theory = panproto_gat::Theory::new(
"ThTest",
vec![panproto_gat::Sort::simple("Vertex")],
vec![],
vec![],
);
let theory_id = store.put(&Object::Theory(Box::new(theory)))?;
let cst = CstComplementObject {
data_id: ObjectId::from_bytes([77; 32]),
cst_complement: vec![1, 2, 3],
};
let cst_id = store.put(&Object::CstComplement(cst))?;
let mut theory_ids = BTreeMap::new();
theory_ids.insert("ThTest".to_owned(), theory_id);
let commit = CommitObject::builder(schema_id, "test", "test", "initial")
.timestamp(100)
.theory_ids(theory_ids)
.cst_complement_ids(vec![cst_id])
.build();
let commit_id = store.put(&Object::Commit(commit))?;
store.set_ref("refs/heads/main", commit_id)?;
let report = gc(&mut store)?;
assert!(!report.deleted.contains(&theory_id));
assert!(!report.deleted.contains(&cst_id));
assert!(store.has(&theory_id));
assert!(store.has(&cst_id));
Ok(())
}
#[test]
fn gc_marks_data_complement_protocol_reachable() -> Result<(), VcsError> {
use crate::object::{ComplementObject, DataSetObject};
let mut store = MemStore::new();
let schema_id = crate::tree::store_schema_as_tree(&mut store, empty_schema())?;
let protocol = panproto_schema::Protocol {
name: "test-proto".into(),
..Default::default()
};
let protocol_id = store.put(&Object::Protocol(Box::new(protocol)))?;
let dataset = DataSetObject {
schema_id,
data: vec![1, 2, 3],
record_count: 1,
};
let data_id = store.put(&Object::DataSet(dataset))?;
let complement = ComplementObject {
migration_id: ObjectId::from_bytes([99; 32]),
data_id,
complement: vec![4, 5, 6],
};
let complement_id = store.put(&Object::Complement(complement))?;
let c0 = CommitObject::builder(schema_id, "test", "test", "initial")
.timestamp(100)
.protocol_id(protocol_id)
.data_ids(vec![data_id])
.complement_ids(vec![complement_id])
.build();
let c0_id = store.put(&Object::Commit(c0))?;
store.set_ref("refs/heads/main", c0_id)?;
let reachable = mark_reachable(&store, &[c0_id])?;
assert!(reachable.contains(&protocol_id));
assert!(reachable.contains(&data_id));
assert!(reachable.contains(&complement_id));
assert!(reachable.contains(&schema_id));
Ok(())
}
}