use std::collections::HashSet;
use std::fs;
use std::path::{Component, Path, PathBuf};
use crate::StoreError;
const INDEX_FILE: &str = "index.sqlite";
const POINTER_FILE: &str = "latest";
const POINTER_TEMP: &str = "latest.publishing";
#[must_use]
pub fn corpus_dir(root: &Path, name: &str) -> PathBuf {
root.join(name)
}
#[must_use]
pub fn index_path(root: &Path, name: &str) -> PathBuf {
corpus_dir(root, name).join(INDEX_FILE)
}
pub fn set_latest(root: &Path, name: &str) -> Result<(), StoreError> {
fs::create_dir_all(root)?;
let temp = root.join(POINTER_TEMP);
fs::write(&temp, name)?;
fs::rename(&temp, root.join(POINTER_FILE))?;
Ok(())
}
#[must_use]
pub fn latest_name(root: &Path) -> Option<String> {
let raw = fs::read_to_string(root.join(POINTER_FILE)).ok()?;
let name = raw.trim();
let mut components = Path::new(name).components();
match (components.next(), components.next()) {
(Some(Component::Normal(only)), None) if only == name => Some(name.to_owned()),
_ => None,
}
}
#[must_use]
pub fn latest_index_path(root: &Path) -> Option<PathBuf> {
latest_name(root).map(|name| index_path(root, &name))
}
#[derive(Debug, Clone, Copy)]
pub enum CleanupMode {
DryRun,
Execute,
}
#[derive(Debug)]
pub struct CleanupReport {
pub executed: bool,
pub removable: Vec<CleanupEntry>,
pub protected: Vec<CleanupEntry>,
pub bytes_removable: u64,
}
#[derive(Debug)]
pub struct CleanupEntry {
pub dir: PathBuf,
pub bytes: u64,
pub reason: &'static str,
}
pub fn cleanup(root: &Path, keep: &[&str], mode: CleanupMode) -> Result<CleanupReport, StoreError> {
let execute = matches!(mode, CleanupMode::Execute);
let kept: HashSet<&str> = keep.iter().copied().collect();
let active = latest_name(root);
let mut removable = Vec::new();
let mut protected = Vec::new();
let mut bytes_removable: u64 = 0;
if !root.exists() {
return Ok(CleanupReport {
executed: execute,
removable,
protected,
bytes_removable,
});
}
for entry in fs::read_dir(root)? {
let entry = entry?;
if !entry.file_type().is_ok_and(|kind| kind.is_dir()) {
continue;
}
let dir = entry.path();
let Some(name) = entry.file_name().to_str().map(ToOwned::to_owned) else {
continue;
};
let bytes = directory_bytes(&dir);
if kept.contains(name.as_str()) {
protected.push(CleanupEntry {
dir,
bytes,
reason: "kept by caller",
});
} else if active.as_deref() == Some(name.as_str()) {
protected.push(CleanupEntry {
dir,
bytes,
reason: "active latest pointer",
});
} else {
bytes_removable = bytes_removable.saturating_add(bytes);
removable.push(CleanupEntry {
dir,
bytes,
reason: "not wanted and not the latest pointer",
});
}
}
if execute {
for entry in &removable {
fs::remove_dir_all(&entry.dir)?;
}
}
Ok(CleanupReport {
executed: execute,
removable,
protected,
bytes_removable,
})
}
fn directory_bytes(dir: &Path) -> u64 {
let mut total: u64 = 0;
let Ok(entries) = fs::read_dir(dir) else {
return total;
};
for entry in entries.flatten() {
let Ok(kind) = entry.file_type() else {
continue;
};
if kind.is_dir() {
total = total.saturating_add(directory_bytes(&entry.path()));
} else if let Ok(metadata) = entry.metadata() {
total = total.saturating_add(metadata.len());
}
}
total
}