use std::os::unix::fs::PermissionsExt;
use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
use clap::{Args, CommandFactory, Parser, Subcommand};
use snapdir_catalog::{
ancestors_json_line, locations_json_line, revisions_json_line, Catalog, SystemClock,
};
use snapdir_core::{
cache, expand_excludes, snapshot_id, walk, Blake3Hasher, Blake3KeyedHasher, ExcludeMatcher,
FollowMode, Hasher, Manifest, ManifestEntry, Md5Hasher, PathMode, PathType, Sha256Hasher,
Store, WalkOptions,
};
use snapdir_stores::{
resolve_adapter, Adapter, B2Store, ExternalStore, FileStore, GcsStore, S3Store,
};
#[derive(Debug, Parser)]
#[command(
name = "snapdir",
bin_name = "snapdir",
version,
propagate_version = true,
about = "Content-addressable directory snapshots.",
long_about = None
)]
pub struct Cli {
#[command(flatten)]
pub globals: GlobalArgs,
#[command(subcommand)]
pub command: Command,
}
#[derive(Debug, Args)]
#[allow(clippy::struct_excessive_bools)]
pub struct GlobalArgs {
#[arg(long, global = true, value_name = "DIR", env = "SNAPDIR_CACHE_DIR")]
pub cache_dir: Option<PathBuf>,
#[arg(long, global = true, value_name = "NAME", env = "SNAPDIR_CATALOG")]
pub catalog: Option<String>,
#[arg(long, global = true, value_name = "URI")]
pub store: Option<String>,
#[arg(long, global = true, value_name = "ID")]
pub id: Option<String>,
#[arg(long, global = true, value_name = "PATTERN")]
pub exclude: Option<String>,
#[arg(long, global = true, value_name = "PATTERN")]
pub paths: Option<String>,
#[arg(long, global = true)]
pub linked: bool,
#[arg(long, global = true)]
pub force: bool,
#[arg(long, global = true)]
pub purge: bool,
#[arg(long, global = true)]
pub keep: bool,
#[arg(long, global = true)]
pub dryrun: bool,
#[arg(long, global = true)]
pub verbose: bool,
#[arg(long, global = true)]
pub debug: bool,
#[arg(long, global = true, value_name = "DIR|STORE")]
pub location: Option<String>,
}
#[derive(Debug, Subcommand)]
pub enum Command {
Manifest {
#[arg(long)]
absolute: bool,
#[arg(long)]
no_follow: bool,
#[arg(long, value_name = "NAME")]
checksum_bin: Option<String>,
#[arg(long, value_name = "PATTERN")]
exclude: Option<String>,
path: Option<PathBuf>,
},
Id {
path: Option<PathBuf>,
},
Stage {
dir: Option<PathBuf>,
},
Push {
path: Option<PathBuf>,
},
Fetch,
Pull {
path: Option<PathBuf>,
},
Checkout {
dir: Option<PathBuf>,
},
Verify,
VerifyCache,
FlushCache,
Locations,
Ancestors,
Revisions,
Defaults,
Version,
#[command(hide = true)]
Completions {
shell: clap_complete::Shell,
},
#[command(hide = true)]
Man,
}
impl Cli {
pub fn run(&self) -> Result<()> {
match &self.command {
Command::Manifest {
absolute,
no_follow,
checksum_bin,
exclude,
path,
} => {
let exclude = exclude.as_deref().or(self.globals.exclude.as_deref());
let manifest = self.build_manifest(
path.as_deref(),
*absolute,
*no_follow,
checksum_bin.as_deref(),
exclude,
)?;
println!("{manifest}");
let id = snapshot_id(&manifest, &Blake3Hasher::new());
let abs = resolve_root(path.as_deref())
.context("resolving the manifested directory path")?;
self.log_event("manifest", &id, &abs.to_string_lossy())?;
Ok(())
}
Command::Id { path } => {
let exclude = self.globals.exclude.as_deref();
let manifest = self.build_manifest(path.as_deref(), false, false, None, exclude)?;
let id = snapshot_id(&manifest, &Blake3Hasher::new());
println!("{id}");
Ok(())
}
Command::Push { path } => self.run_push(path.as_deref()),
Command::Fetch => self.run_fetch(),
Command::Checkout { dir } => self.run_checkout(dir.as_deref()),
Command::Pull { path } => self.run_pull(path.as_deref()),
Command::Verify => self.run_verify(),
Command::Stage { dir } => self.run_stage(dir.as_deref()),
Command::VerifyCache => self.run_verify_cache(),
Command::FlushCache => self.run_flush_cache(),
Command::Locations => self.run_locations(),
Command::Ancestors => self.run_ancestors(),
Command::Revisions => self.run_revisions(),
Command::Version => {
println!("snapdir {}", env!("CARGO_PKG_VERSION"));
Ok(())
}
Command::Defaults => run_defaults(),
Command::Completions { shell } => {
let mut cmd = Cli::command();
clap_complete::generate(*shell, &mut cmd, "snapdir", &mut std::io::stdout());
Ok(())
}
Command::Man => {
clap_mangen::Man::new(Cli::command())
.render(&mut std::io::stdout())
.context("rendering the man page")?;
Ok(())
}
}
}
}
fn run_defaults() -> Result<()> {
let bin_path = std::env::current_exe()
.context("resolving the running binary path")?
.display()
.to_string();
let mut lines: Vec<String> = Vec::new();
let manifest_context = std::env::var("SNAPDIR_MANIFEST_CONTEXT").unwrap_or_default();
let manifest_exclude = std::env::var("SNAPDIR_MANIFEST_EXCLUDE").unwrap_or_default();
lines.push(format!("SNAPDIR_MANIFEST_BIN_PATH={bin_path}"));
lines.push(format!("SNAPDIR_MANIFEST_CONTEXT={manifest_context}"));
lines.push(format!("SNAPDIR_MANIFEST_EXCLUDE={manifest_exclude}"));
for (key, value) in std::env::vars() {
if !key.contains("SNAPDIR") || key.contains("VERSION") {
continue;
}
lines.push(reformat_env_default(&key, &value));
}
lines.push(format!("SNAPDIR_BIN_PATH={bin_path}"));
lines.sort();
lines.dedup();
for line in lines {
println!("{line}");
}
Ok(())
}
impl Cli {
fn run_push(&self, path: Option<&Path>) -> Result<()> {
let store = self.resolve_store()?;
let manifest =
self.build_manifest(path, false, false, None, self.globals.exclude.as_deref())?;
let root = resolve_root(path).context("resolving push path")?;
let id = snapshot_id(&manifest, &Blake3Hasher::new());
store
.push(&manifest, &root)
.with_context(|| format!("pushing snapshot {id} to store"))?;
println!("{id}");
let store_url = self
.globals
.store
.as_deref()
.context("missing --store option")?;
self.log_event("push", &id, store_url)?;
Ok(())
}
fn run_fetch(&self) -> Result<()> {
let store = self.resolve_store()?;
let id = self.require_id()?;
let manifest = store
.get_manifest(id)
.with_context(|| format!("fetching manifest {id} from store"))?;
let scratch = ScratchDir::new("fetch")?;
store
.fetch_files(&manifest, scratch.path())
.with_context(|| format!("fetching objects for snapshot {id}"))?;
let cache = self.cache_store();
cache
.push(&manifest, scratch.path())
.with_context(|| format!("saving snapshot {id} to the local cache"))?;
if self.globals.verbose {
eprintln!("SAVED: {id}");
}
Ok(())
}
fn run_checkout(&self, dir: Option<&Path>) -> Result<()> {
let id = self.require_id()?;
let cache = self.cache_store();
let manifest = cache.get_manifest(id).with_context(|| {
format!("manifest {id} not found locally; did you forget to fetch it?")
})?;
let dest = resolve_root(dir).context("resolving checkout destination")?;
cache
.fetch_files(&manifest, &dest)
.with_context(|| format!("checking out snapshot {id} to {}", dest.display()))?;
restore_permissions(&manifest, &dest)?;
Ok(())
}
fn run_pull(&self, path: Option<&Path>) -> Result<()> {
self.run_fetch()?;
self.run_checkout(path)
}
fn run_verify(&self) -> Result<()> {
if self.globals.purge {
anyhow::bail!(
"snapdir: `verify` does not support --purge; use `verify-cache --purge` to remove corrupt objects from the local cache"
);
}
let store = self.resolve_store()?;
let id = self.require_id()?;
let manifest = store
.get_manifest(id)
.with_context(|| format!("verifying manifest {id}"))?;
let scratch = ScratchDir::new("verify")?;
store
.fetch_files(&manifest, scratch.path())
.with_context(|| format!("verifying objects for snapshot {id}"))?;
Ok(())
}
fn run_stage(&self, path: Option<&Path>) -> Result<()> {
let manifest =
self.build_manifest(path, false, false, None, self.globals.exclude.as_deref())?;
let root = resolve_root(path).context("resolving stage path")?;
let id = snapshot_id(&manifest, &Blake3Hasher::new());
let cache = self.cache_store();
cache
.push(&manifest, &root)
.with_context(|| format!("staging snapshot {id} into the local cache"))?;
println!("{id}");
self.log_event("stage", &id, &root.to_string_lossy())?;
Ok(())
}
fn run_verify_cache(&self) -> Result<()> {
let cache_dir = self.cache_dir();
let report = cache::verify_cache(&cache_dir, self.globals.purge, &Blake3Hasher::new())
.with_context(|| format!("verifying cache at {}", cache_dir.display()))?;
for checksum in &report.corrupt {
eprintln!("Checksum mismatch for {checksum}");
}
if self.globals.purge && self.globals.verbose {
for checksum in &report.purged {
eprintln!("purged {checksum}");
}
}
if report.is_clean() {
return Ok(());
}
anyhow::bail!(
"snapdir: {} corrupt object(s) in the cache",
report.corrupt.len()
)
}
fn run_flush_cache(&self) -> Result<()> {
let cache_dir = self.cache_dir();
cache::flush_cache(&cache_dir)
.with_context(|| format!("flushing cache at {}", cache_dir.display()))?;
Ok(())
}
fn run_locations(&self) -> Result<()> {
let catalog = self.open_catalog()?;
for record in catalog.locations().context("querying catalog locations")? {
println!("{}", locations_json_line(&record));
}
Ok(())
}
fn run_ancestors(&self) -> Result<()> {
let catalog = self.open_catalog()?;
let id = self.require_id()?;
let location = self.globals.location.as_deref();
for record in catalog
.ancestors(id, location)
.with_context(|| format!("querying catalog ancestors of {id}"))?
{
println!("{}", ancestors_json_line(&record));
}
Ok(())
}
fn run_revisions(&self) -> Result<()> {
let catalog = self.open_catalog()?;
let location = self
.globals
.location
.as_deref()
.or(self.globals.store.as_deref())
.context("missing --location option")?;
for record in catalog
.revisions(location)
.with_context(|| format!("querying catalog revisions at {location}"))?
{
println!("{}", revisions_json_line(&record));
}
Ok(())
}
fn log_event(&self, event: &str, id: &str, location: &str) -> Result<()> {
let Some(db) = self.catalog_db_path() else {
return Ok(());
};
let catalog =
Catalog::open(&db).with_context(|| format!("opening catalog at {}", db.display()))?;
catalog
.log(event, id, location, &SystemClock)
.with_context(|| format!("recording catalog event {event} for {id}"))?;
Ok(())
}
fn open_catalog(&self) -> Result<Catalog> {
let db = self
.catalog_db_path()
.context("error: Missing SNAPDIR_CATALOG or --catalog")?;
if let Some(parent) = db.parent() {
std::fs::create_dir_all(parent)
.with_context(|| format!("creating catalog directory {}", parent.display()))?;
}
Catalog::open(&db).with_context(|| format!("opening catalog at {}", db.display()))
}
fn catalog_db_path(&self) -> Option<PathBuf> {
let catalog = self.globals.catalog.as_deref()?;
if catalog.is_empty() {
return None;
}
if catalog.contains(std::path::MAIN_SEPARATOR) {
Some(PathBuf::from(catalog))
} else {
Some(self.cache_dir().join(format!("{catalog}-catalog.redb")))
}
}
fn resolve_store(&self) -> Result<Box<dyn Store>> {
let store_url = self
.globals
.store
.as_deref()
.context("missing --store option")?;
let adapter = resolve_adapter(store_url).context("resolving --store protocol")?;
store_for_adapter(&adapter, store_url)
}
fn cache_store(&self) -> FileStore {
FileStore::from_root(self.cache_dir())
}
fn cache_dir(&self) -> PathBuf {
if let Some(dir) = &self.globals.cache_dir {
return dir.clone();
}
let home = std::env::var("HOME").unwrap_or_default();
let base = std::env::var("XDG_CACHE_HOME").unwrap_or_else(|_| format!("{home}/.cache"));
PathBuf::from(format!("{base}/snapdir"))
}
fn require_id(&self) -> Result<&str> {
self.globals.id.as_deref().context("missing --id option")
}
fn build_manifest(
&self,
path: Option<&Path>,
absolute: bool,
no_follow: bool,
checksum_bin: Option<&str>,
exclude: Option<&str>,
) -> Result<Manifest> {
let root = resolve_root(path).context("resolving manifest path")?;
let (home_cache, cache_dir) = exclude_runtime_paths(self.globals.cache_dir.as_deref());
let expanded = expand_excludes(exclude.unwrap_or(""), &home_cache, &cache_dir);
let matcher = match &expanded.pattern {
Some(pattern) => {
Some(ExcludeMatcher::new(pattern).context("compiling --exclude pattern")?)
}
None => None,
};
let follow = if no_follow || expanded.forces_no_follow {
FollowMode::NoFollow
} else {
FollowMode::Follow
};
let path_mode = if absolute {
PathMode::Absolute
} else {
PathMode::Relative
};
let options = WalkOptions {
follow,
path_mode,
exclude: matcher,
};
match checksum_bin {
None | Some("b3sum") => {
let context = std::env::var("SNAPDIR_MANIFEST_CONTEXT").unwrap_or_default();
if context.is_empty() {
walk_with(&root, &options, &Blake3Hasher::new())
} else {
walk_with(&root, &options, &Blake3KeyedHasher::new(context))
}
}
Some("md5sum") => walk_with(&root, &options, &Md5Hasher::new()),
Some("sha256sum") => walk_with(&root, &options, &Sha256Hasher::new()),
Some(other) => {
anyhow::bail!("snapdir: unsupported --checksum-bin '{other}'")
}
}
}
}
fn store_for_adapter(adapter: &Adapter, store_url: &str) -> Result<Box<dyn Store>> {
match adapter {
Adapter::File => Ok(Box::new(FileStore::new(store_url))),
Adapter::S3 => {
let endpoint = std::env::var("SNAPDIR_S3_STORE_ENDPOINT_URL").ok();
let store = S3Store::connect(store_url, endpoint.as_deref())
.with_context(|| format!("connecting to S3 store {store_url}"))?;
Ok(Box::new(store))
}
Adapter::B2 => {
let endpoint = std::env::var("SNAPDIR_S3_STORE_ENDPOINT_URL").ok();
let region = std::env::var("SNAPDIR_B2_REGION")
.or_else(|_| std::env::var("AWS_REGION"))
.ok();
let store = B2Store::connect(store_url, endpoint.as_deref(), region.as_deref())
.with_context(|| format!("connecting to B2 store {store_url}"))?;
Ok(Box::new(store))
}
Adapter::Gcs => {
let store = GcsStore::connect(store_url)
.with_context(|| format!("connecting to GCS store {store_url}"))?;
Ok(Box::new(store))
}
Adapter::External { .. } => {
let store = ExternalStore::new(store_url)
.with_context(|| format!("resolving external store for {store_url}"))?;
Ok(Box::new(store))
}
}
}
fn reformat_env_default(key: &str, value: &str) -> String {
let line = format!("{key}={value}");
let stripped = line
.strip_prefix("_SNAPDIR_")
.or_else(|| line.strip_prefix("SNAPDIR_"));
let body = match stripped {
Some(rest) => format!("--{rest}"),
None => line,
};
body.replace('_', "-").to_lowercase()
}
fn walk_with<H: Hasher>(root: &Path, options: &WalkOptions, hasher: &H) -> Result<Manifest> {
walk(root, options, hasher).with_context(|| format!("walking {}", root.display()))
}
fn restore_permissions(manifest: &Manifest, dest: &Path) -> Result<()> {
for entry in manifest.entries() {
if entry.path_type == PathType::Directory {
continue;
}
apply_mode(dest, entry)?;
}
let mut dirs: Vec<&_> = manifest
.entries()
.iter()
.filter(|e| e.path_type == PathType::Directory)
.collect();
dirs.sort_by_key(|e| std::cmp::Reverse(e.path.len()));
for entry in dirs {
apply_mode(dest, entry)?;
}
Ok(())
}
fn apply_mode(dest: &Path, entry: &ManifestEntry) -> Result<()> {
let rel = entry.path.strip_prefix("./").unwrap_or(&entry.path);
let rel = rel.strip_suffix('/').unwrap_or(rel);
let target = if rel.is_empty() {
dest.to_path_buf()
} else {
dest.join(rel)
};
let mode = u32::from_str_radix(&entry.permissions, 8)
.with_context(|| format!("invalid permissions {:?}", entry.permissions))?;
std::fs::set_permissions(&target, std::fs::Permissions::from_mode(mode))
.with_context(|| format!("setting permissions on {}", target.display()))?;
Ok(())
}
struct ScratchDir {
path: PathBuf,
}
impl ScratchDir {
fn new(tag: &str) -> Result<Self> {
use std::sync::atomic::{AtomicU64, Ordering};
static COUNTER: AtomicU64 = AtomicU64::new(0);
let n = COUNTER.fetch_add(1, Ordering::Relaxed);
let path =
std::env::temp_dir().join(format!("snapdir-cli-{tag}-{}-{n}", std::process::id()));
std::fs::create_dir_all(&path)
.with_context(|| format!("creating scratch dir {}", path.display()))?;
Ok(Self { path })
}
fn path(&self) -> &Path {
&self.path
}
}
impl Drop for ScratchDir {
fn drop(&mut self) {
let _ = std::fs::remove_dir_all(&self.path);
}
}
fn resolve_root(path: Option<&Path>) -> Result<PathBuf> {
let raw = match path {
Some(p) => p.to_path_buf(),
None => std::env::current_dir().context("getting current directory")?,
};
if raw.is_absolute() {
return Ok(raw);
}
let cwd = std::env::current_dir().context("getting current directory")?;
Ok(cwd.join(raw))
}
fn exclude_runtime_paths(cache_dir: Option<&Path>) -> (String, String) {
let home = std::env::var("HOME").unwrap_or_default();
let home_cache = format!("{home}/.cache/");
let cache_dir = if let Some(dir) = cache_dir {
dir.display().to_string()
} else {
let base = std::env::var("XDG_CACHE_HOME").unwrap_or_else(|_| format!("{home}/.cache"));
format!("{base}/snapdir")
};
(home_cache, cache_dir)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn remote_store_routing_resolves_every_scheme_to_its_adapter() {
assert_eq!(
resolve_adapter("file:///long/term/x").unwrap(),
Adapter::File
);
assert_eq!(resolve_adapter("s3://bucket/path").unwrap(), Adapter::S3);
assert_eq!(resolve_adapter("b2://bucket/path").unwrap(), Adapter::B2);
let gcs = resolve_adapter("gs://bucket/path").unwrap();
assert_eq!(gcs, Adapter::Gcs);
assert_eq!(gcs.name(), "gcs");
assert_eq!(gcs.store_binary(), "snapdir-gcs-store");
let xyz = resolve_adapter("xyz://bucket/path").unwrap();
assert_eq!(
xyz,
Adapter::External {
name: "xyz".to_owned()
}
);
assert!(!xyz.is_builtin());
assert_eq!(xyz.store_binary(), "snapdir-xyz-store");
}
#[test]
fn remote_store_routing_file_builds_filestore_without_io() {
let adapter = resolve_adapter("file:///tmp/snapdir-routing-test").unwrap();
let store = store_for_adapter(&adapter, "file:///tmp/snapdir-routing-test").unwrap();
assert!(store.get_manifest("0".repeat(64).as_str()).is_err());
}
#[test]
fn remote_store_routing_external_builds_shim_for_third_party_scheme() {
let adapter = resolve_adapter("xyz://bucket/base").unwrap();
let store = ExternalStore::new("xyz://bucket/base").unwrap();
assert_eq!(store.binary(), Path::new("snapdir-xyz-store"));
let routed = store_for_adapter(&adapter, "xyz://bucket/base");
assert!(routed.is_ok());
}
#[test]
fn remote_store_routing_rejects_invalid_protocol() {
assert!(resolve_adapter("NotAScheme://x").is_err());
}
}