#![allow(
clippy::too_many_lines,
clippy::similar_names,
clippy::items_after_statements,
clippy::manual_let_else,
clippy::doc_markdown
)]
use std::collections::BTreeSet;
use std::fs;
use std::os::unix::fs::PermissionsExt;
use std::path::{Path, PathBuf};
use std::process::{Command, Output};
fn snapdir_bin() -> PathBuf {
assert_cmd::cargo::cargo_bin("snapdir")
}
fn temp_dir(tag: &str) -> PathBuf {
let mut dir = std::env::temp_dir();
dir.push(format!(
"snapdir-syncsplit-{tag}-{}-{:?}",
std::process::id(),
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos()
));
fs::create_dir_all(&dir).expect("create temp dir");
dir
}
fn file_url(dir: &Path) -> String {
format!("file://{}", dir.display())
}
fn run_raw(args: &[&str], cache: &Path) -> Output {
Command::new(snapdir_bin())
.args(args)
.env("SNAPDIR_CACHE_DIR", cache)
.env_remove("SNAPDIR_STORE")
.env_remove("SNAPDIR_OBJECTS_STORE")
.output()
.expect("run snapdir")
}
fn run_ok(args: &[&str], cache: &Path) -> String {
let out = run_raw(args, cache);
assert!(
out.status.success(),
"snapdir {args:?} exited {:?}\nstderr: {}",
out.status.code(),
String::from_utf8_lossy(&out.stderr),
);
String::from_utf8(out.stdout)
.expect("stdout is UTF-8")
.trim_end()
.to_owned()
}
fn collect_files(dir: &Path) -> BTreeSet<PathBuf> {
let mut out = BTreeSet::new();
fn walk(base: &Path, dir: &Path, out: &mut BTreeSet<PathBuf>) {
let rd = match fs::read_dir(dir) {
Ok(rd) => rd,
Err(_) => return,
};
for entry in rd.flatten() {
let p = entry.path();
let ft = match entry.file_type() {
Ok(ft) => ft,
Err(_) => continue,
};
if ft.is_dir() {
walk(base, &p, out);
} else if ft.is_file() {
out.insert(p.strip_prefix(base).unwrap().to_path_buf());
}
}
}
walk(dir, dir, &mut out);
out
}
fn count_pool_objects(pool: &Path) -> usize {
collect_files(&pool.join(".objects")).len()
}
fn count_manifests(loc: &Path) -> usize {
collect_files(&loc.join(".manifests")).len()
}
fn dest_serves_manifest_id(loc: &Path, id: &str) -> bool {
collect_files(&loc.join(".manifests")).iter().any(|rel| {
let joined: String = rel
.components()
.filter_map(|c| c.as_os_str().to_str())
.collect();
joined == id
})
}
fn build_tree(dir: &Path, leaves: &[(&str, &[u8])]) {
for (rel, bytes) in leaves {
let path = dir.join(rel);
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).unwrap();
}
fs::write(&path, bytes).unwrap();
fs::set_permissions(&path, fs::Permissions::from_mode(0o644)).unwrap();
}
set_dir_perms_recursive(dir);
}
fn set_dir_perms_recursive(dir: &Path) {
fs::set_permissions(dir, fs::Permissions::from_mode(0o755)).unwrap();
for entry in fs::read_dir(dir).unwrap().flatten() {
if entry.file_type().unwrap().is_dir() {
set_dir_perms_recursive(&entry.path());
}
}
}
fn sample_leaves() -> &'static [(&'static str, &'static [u8])] {
&[
("top.txt", b"alpha-body"),
("sub/one.txt", b"bravo-body-bravo"),
("sub/two.bin", b"charlie!!!"),
("sub/deep/three.dat", b"delta-delta-delta"),
]
}
fn assert_tree_contents(dest: &Path, expected: &[(&str, &[u8])]) {
for (rel, bytes) in expected {
let got = fs::read(dest.join(rel))
.unwrap_or_else(|e| panic!("read {rel} from dest {}: {e}", dest.display()));
assert_eq!(&got[..], *bytes, "contents of {rel} must match source");
}
}
fn parse_count(line: &str, word: &str) -> Option<usize> {
let idx = line.find(word)?;
line[..idx]
.split_whitespace()
.next_back()
.and_then(|tok| tok.parse().ok())
}
fn summary_line(stderr: &str) -> &str {
stderr
.lines()
.find(|l| l.contains("copied"))
.unwrap_or_else(|| panic!("expected a sync summary line with a copied count:\n{stderr}"))
}
fn delete_one_object(pool: &Path) -> PathBuf {
let objs = collect_files(&pool.join(".objects"));
let rel = objs
.iter()
.next()
.unwrap_or_else(|| panic!("pool {} has no objects to delete", pool.display()))
.clone();
let abs = pool.join(".objects").join(&rel);
fs::remove_file(&abs).unwrap_or_else(|e| panic!("remove {}: {e}", abs.display()));
abs
}
#[test]
fn sync_split_both_sides_round_trips() {
let src = temp_dir("b-src");
let src_mani = temp_dir("b-srcmani");
let src_pool = temp_dir("b-srcpool");
let dst_mani = temp_dir("b-dstmani");
let dst_pool = temp_dir("b-dstpool");
let cache = temp_dir("b-cache");
let leaves = sample_leaves();
build_tree(&src, leaves);
let src_str = src.to_string_lossy().into_owned();
let src_mani_url = file_url(&src_mani);
let src_pool_url = file_url(&src_pool);
let dst_mani_url = file_url(&dst_mani);
let dst_pool_url = file_url(&dst_pool);
let src_id = run_ok(
&[
"push",
"--objects-store",
&src_pool_url,
"--store",
&src_mani_url,
&src_str,
],
&cache,
);
assert_eq!(src_id.len(), 64, "snapshot id is 64 hex chars");
assert_eq!(
count_pool_objects(&src_pool),
leaves.len(),
"source pool must hold one blob per distinct file"
);
let out = run_raw(
&[
"sync",
"--id",
&src_id,
"--from",
&src_mani_url,
"--from-objects",
&src_pool_url,
"--to",
&dst_mani_url,
"--to-objects",
&dst_pool_url,
],
&cache,
);
assert!(
out.status.success(),
"both-sides-split sync must succeed\nstderr: {}",
String::from_utf8_lossy(&out.stderr)
);
assert_eq!(
String::from_utf8(out.stdout).unwrap().trim_end(),
src_id,
"sync must print the snapshot id to stdout"
);
assert_eq!(
count_manifests(&dst_mani),
1,
"the manifest must land in the dest manifest location's .manifests/"
);
assert_eq!(
count_pool_objects(&dst_pool),
leaves.len(),
"every blob must land in the dest pool"
);
assert_eq!(
count_pool_objects(&dst_mani),
0,
"no objects may land in the dest MANIFEST location (split: objects go to --to-objects)"
);
let dest = temp_dir("b-out");
let dest_str = dest.to_string_lossy().into_owned();
let pullcache = temp_dir("b-pullcache");
run_ok(
&[
"pull",
"--objects-store",
&dst_pool_url,
"--store",
&dst_mani_url,
"--id",
&src_id,
&dest_str,
],
&pullcache,
);
assert_tree_contents(&dest, leaves);
assert_eq!(
run_ok(&["id", &dest_str], &pullcache),
src_id,
"tree pulled from the dest split must re-manifest to the source id"
);
}
#[test]
fn sync_split_dest_pool_has_blobs_recopies_zero_objects() {
let src = temp_dir("a-src");
let src_mani = temp_dir("a-srcmani");
let src_pool = temp_dir("a-srcpool");
let dst_mani = temp_dir("a-dstmani");
let dst_pool = temp_dir("a-dstpool");
let cache = temp_dir("a-cache");
let leaves = sample_leaves();
build_tree(&src, leaves);
let src_str = src.to_string_lossy().into_owned();
let src_mani_url = file_url(&src_mani);
let src_pool_url = file_url(&src_pool);
let dst_mani_url = file_url(&dst_mani);
let dst_pool_url = file_url(&dst_pool);
let src_id = run_ok(
&[
"push",
"--objects-store",
&src_pool_url,
"--store",
&src_mani_url,
&src_str,
],
&cache,
);
let seed_mani = temp_dir("a-seedmani");
let seed_mani_url = file_url(&seed_mani);
run_ok(
&[
"push",
"--objects-store",
&dst_pool_url,
"--store",
&seed_mani_url,
&src_str,
],
&cache,
);
let dst_pool_objects_before = count_pool_objects(&dst_pool);
assert_eq!(
dst_pool_objects_before,
leaves.len(),
"the pre-seed must put every blob into the dest pool"
);
assert_eq!(
count_manifests(&dst_mani),
0,
"dest manifest location must start without the snapshot manifest"
);
let out = run_raw(
&[
"sync",
"--id",
&src_id,
"--from",
&src_mani_url,
"--from-objects",
&src_pool_url,
"--to",
&dst_mani_url,
"--to-objects",
&dst_pool_url,
],
&cache,
);
assert!(
out.status.success(),
"cross-pool sync must succeed\nstderr: {}",
String::from_utf8_lossy(&out.stderr)
);
let stderr = String::from_utf8(out.stderr).unwrap();
let summary = summary_line(&stderr);
assert_eq!(
count_pool_objects(&dst_pool),
dst_pool_objects_before,
"no objects may be re-uploaded when the dest pool already holds them:\n{summary}"
);
let copied = parse_count(summary, "copied")
.unwrap_or_else(|| panic!("no copied count in summary:\n{summary}"));
assert_eq!(
copied, 0,
"every blob is already in the dest pool => 0 objects copied:\n{summary}"
);
if let Some(skipped) = parse_count(summary, "skipped") {
assert_eq!(
skipped,
leaves.len(),
"all {} present blobs must be reported skipped:\n{summary}",
leaves.len()
);
}
assert_eq!(
count_manifests(&dst_mani),
1,
"the manifest must be copied to the dest manifest location even when 0 objects copied"
);
let dest = temp_dir("a-out");
let dest_str = dest.to_string_lossy().into_owned();
let pullcache = temp_dir("a-pullcache");
run_ok(
&[
"pull",
"--objects-store",
&dst_pool_url,
"--store",
&dst_mani_url,
"--id",
&src_id,
&dest_str,
],
&pullcache,
);
assert_tree_contents(&dest, leaves);
assert_eq!(
run_ok(&["id", &dest_str], &pullcache),
src_id,
"the zero-object sync must still leave the dest fully serving the snapshot"
);
}
#[test]
fn sync_split_source_only_dest_colocated() {
let src = temp_dir("fo-src");
let src_mani = temp_dir("fo-srcmani");
let src_pool = temp_dir("fo-srcpool");
let dst = temp_dir("fo-dst");
let cache = temp_dir("fo-cache");
let leaves = sample_leaves();
build_tree(&src, leaves);
let src_str = src.to_string_lossy().into_owned();
let src_mani_url = file_url(&src_mani);
let src_pool_url = file_url(&src_pool);
let dst_url = file_url(&dst);
let src_id = run_ok(
&[
"push",
"--objects-store",
&src_pool_url,
"--store",
&src_mani_url,
&src_str,
],
&cache,
);
let out = run_raw(
&[
"sync",
"--id",
&src_id,
"--from",
&src_mani_url,
"--from-objects",
&src_pool_url,
"--to",
&dst_url,
],
&cache,
);
assert!(
out.status.success(),
"source-split/dest-colocated sync must succeed\nstderr: {}",
String::from_utf8_lossy(&out.stderr)
);
assert_eq!(
count_pool_objects(&dst),
leaves.len(),
"colocated dest must hold every object under its own .objects/"
);
assert_eq!(
count_manifests(&dst),
1,
"colocated dest must hold the manifest under its own .manifests/"
);
let dest = temp_dir("fo-out");
let dest_str = dest.to_string_lossy().into_owned();
let pullcache = temp_dir("fo-pullcache");
run_ok(
&["pull", "--store", &dst_url, "--id", &src_id, &dest_str],
&pullcache,
);
assert_tree_contents(&dest, leaves);
assert_eq!(
run_ok(&["id", &dest_str], &pullcache),
src_id,
"colocated dest must fully serve the snapshot"
);
}
#[test]
fn sync_split_dest_only_source_colocated() {
let src = temp_dir("to-src");
let src_store = temp_dir("to-srcstore");
let dst_mani = temp_dir("to-dstmani");
let dst_pool = temp_dir("to-dstpool");
let cache = temp_dir("to-cache");
let leaves = sample_leaves();
build_tree(&src, leaves);
let src_str = src.to_string_lossy().into_owned();
let src_store_url = file_url(&src_store);
let dst_mani_url = file_url(&dst_mani);
let dst_pool_url = file_url(&dst_pool);
let src_id = run_ok(&["push", "--store", &src_store_url, &src_str], &cache);
assert_eq!(
count_pool_objects(&src_store),
leaves.len(),
"colocated source must hold its objects colocated"
);
let out = run_raw(
&[
"sync",
"--id",
&src_id,
"--from",
&src_store_url,
"--to",
&dst_mani_url,
"--to-objects",
&dst_pool_url,
],
&cache,
);
assert!(
out.status.success(),
"source-colocated/dest-split sync must succeed\nstderr: {}",
String::from_utf8_lossy(&out.stderr)
);
assert_eq!(
count_pool_objects(&dst_pool),
leaves.len(),
"split dest must write objects to --to-objects"
);
assert_eq!(
count_manifests(&dst_mani),
1,
"split dest must write the manifest to --to"
);
assert_eq!(
count_pool_objects(&dst_mani),
0,
"split dest must NOT write objects into the manifest location"
);
let dest = temp_dir("to-out");
let dest_str = dest.to_string_lossy().into_owned();
let pullcache = temp_dir("to-pullcache");
run_ok(
&[
"pull",
"--objects-store",
&dst_pool_url,
"--store",
&dst_mani_url,
"--id",
&src_id,
&dest_str,
],
&pullcache,
);
assert_tree_contents(&dest, leaves);
assert_eq!(
run_ok(&["id", &dest_str], &pullcache),
src_id,
"split dest must fully serve the snapshot"
);
}
#[test]
fn sync_split_dest_manifest_present_is_noop() {
let src = temp_dir("noop-src");
let src_mani = temp_dir("noop-srcmani");
let src_pool = temp_dir("noop-srcpool");
let dst_mani = temp_dir("noop-dstmani");
let dst_pool = temp_dir("noop-dstpool");
let cache = temp_dir("noop-cache");
let leaves = sample_leaves();
build_tree(&src, leaves);
let src_str = src.to_string_lossy().into_owned();
let src_mani_url = file_url(&src_mani);
let src_pool_url = file_url(&src_pool);
let dst_mani_url = file_url(&dst_mani);
let dst_pool_url = file_url(&dst_pool);
let src_id = run_ok(
&[
"push",
"--objects-store",
&src_pool_url,
"--store",
&src_mani_url,
&src_str,
],
&cache,
);
let sync_args = [
"sync",
"--id",
&src_id,
"--from",
&src_mani_url,
"--from-objects",
&src_pool_url,
"--to",
&dst_mani_url,
"--to-objects",
&dst_pool_url,
];
run_ok(&sync_args, &cache);
let dst_mani_after_first = collect_files(&dst_mani);
let dst_pool_after_first = collect_files(&dst_pool);
assert!(
!dst_mani_after_first.is_empty() && !dst_pool_after_first.is_empty(),
"the first sync must populate both dest sides"
);
let out2 = run_raw(&sync_args, &cache);
assert!(out2.status.success(), "the no-op sync must succeed");
let stderr2 = String::from_utf8(out2.stderr).unwrap();
let summary2 = summary_line(&stderr2);
let copied2 = parse_count(summary2, "copied")
.unwrap_or_else(|| panic!("no copied count in second summary:\n{summary2}"));
assert_eq!(
copied2, 0,
"a sync whose dest manifest is already present must copy 0 objects:\n{summary2}"
);
assert_eq!(
collect_files(&dst_mani),
dst_mani_after_first,
"the no-op sync must leave the dest manifest location unchanged"
);
assert_eq!(
collect_files(&dst_pool),
dst_pool_after_first,
"the no-op sync must leave the dest pool unchanged"
);
}
#[test]
fn sync_split_missing_source_object_errors_and_writes_no_dest_manifest() {
let src = temp_dir("miss-src");
let src_mani = temp_dir("miss-srcmani");
let src_pool = temp_dir("miss-srcpool");
let dst_mani = temp_dir("miss-dstmani");
let dst_pool = temp_dir("miss-dstpool");
let cache = temp_dir("miss-cache");
let leaves = sample_leaves();
build_tree(&src, leaves);
let src_str = src.to_string_lossy().into_owned();
let src_mani_url = file_url(&src_mani);
let src_pool_url = file_url(&src_pool);
let dst_mani_url = file_url(&dst_mani);
let dst_pool_url = file_url(&dst_pool);
let src_id = run_ok(
&[
"push",
"--objects-store",
&src_pool_url,
"--store",
&src_mani_url,
&src_str,
],
&cache,
);
let removed = delete_one_object(&src_pool);
assert!(
!removed.exists(),
"the referenced blob must actually be gone from the source pool"
);
let out = run_raw(
&[
"sync",
"--id",
&src_id,
"--from",
&src_mani_url,
"--from-objects",
&src_pool_url,
"--to",
&dst_mani_url,
"--to-objects",
&dst_pool_url,
],
&cache,
);
assert!(
!out.status.success(),
"a sync with a missing source object must FAIL (non-zero exit), stderr: {}",
String::from_utf8_lossy(&out.stderr)
);
assert_eq!(
count_manifests(&dst_mani),
0,
"a FAILED sync must NOT publish the manifest to the dest manifest location (manifest-last)"
);
assert!(
!dest_serves_manifest_id(&dst_mani, &src_id),
"the dest .manifests/ tree must NOT physically contain the failed id {src_id}"
);
let probecache = temp_dir("miss-probecache");
let probe = run_raw(
&[
"fetch",
"--objects-store",
&dst_pool_url,
"--store",
&dst_mani_url,
"--id",
&src_id,
],
&probecache,
);
assert!(
!probe.status.success(),
"the dest must not serve a manifest for the failed sync's id"
);
}
#[test]
fn sync_split_source_pool_untouched_objects_land_in_dest_pool() {
let src = temp_dir("ro-src");
let src_mani = temp_dir("ro-srcmani");
let src_pool = temp_dir("ro-srcpool");
let dst_mani = temp_dir("ro-dstmani");
let dst_pool = temp_dir("ro-dstpool");
let cache = temp_dir("ro-cache");
let leaves = sample_leaves();
build_tree(&src, leaves);
let src_str = src.to_string_lossy().into_owned();
let src_mani_url = file_url(&src_mani);
let src_pool_url = file_url(&src_pool);
let dst_mani_url = file_url(&dst_mani);
let dst_pool_url = file_url(&dst_pool);
let src_id = run_ok(
&[
"push",
"--objects-store",
&src_pool_url,
"--store",
&src_mani_url,
&src_str,
],
&cache,
);
let src_pool_before = collect_files(&src_pool);
let src_mani_before = collect_files(&src_mani);
assert!(
!src_pool_before.is_empty(),
"source pool must hold the blobs before the sync"
);
run_ok(
&[
"sync",
"--id",
&src_id,
"--from",
&src_mani_url,
"--from-objects",
&src_pool_url,
"--to",
&dst_mani_url,
"--to-objects",
&dst_pool_url,
],
&cache,
);
assert_eq!(
collect_files(&src_pool),
src_pool_before,
"the source pool must be byte-for-byte unchanged (read-only) after the sync"
);
assert_eq!(
collect_files(&src_mani),
src_mani_before,
"the source manifest location must be unchanged after the sync"
);
assert_eq!(
count_pool_objects(&dst_pool),
leaves.len(),
"objects must be written to the DEST pool"
);
assert_ne!(
dst_pool, src_pool,
"source and dest pools must be distinct directories in this test"
);
}
#[test]
fn sync_split_external_objects_uri_rejected_per_side() {
let src = temp_dir("ext-src");
let src_mani = temp_dir("ext-srcmani");
let src_pool = temp_dir("ext-srcpool");
let dst_mani = temp_dir("ext-dstmani");
let dst_pool = temp_dir("ext-dstpool");
let cache = temp_dir("ext-cache");
let leaves = sample_leaves();
build_tree(&src, leaves);
let src_str = src.to_string_lossy().into_owned();
let src_mani_url = file_url(&src_mani);
let src_pool_url = file_url(&src_pool);
let dst_mani_url = file_url(&dst_mani);
let dst_pool_url = file_url(&dst_pool);
let src_id = run_ok(
&[
"push",
"--objects-store",
&src_pool_url,
"--store",
&src_mani_url,
&src_str,
],
&cache,
);
let from_ext = run_raw(
&[
"sync",
"--id",
&src_id,
"--from",
&src_mani_url,
"--from-objects",
"custom://external/source/pool",
"--to",
&dst_mani_url,
"--to-objects",
&dst_pool_url,
],
&cache,
);
assert!(
!from_ext.status.success(),
"an external --from-objects URL must be rejected (non-zero exit)"
);
let from_err = String::from_utf8_lossy(&from_ext.stderr);
assert!(
from_err.contains("in-process") || from_err.contains("not supported"),
"the rejection must name the in-process-only contract, got:\n{from_err}"
);
assert_eq!(
count_manifests(&dst_mani),
0,
"a rejected external --from-objects sync must not publish a dest manifest"
);
assert_eq!(
count_pool_objects(&dst_pool),
0,
"a rejected external --from-objects sync must not write dest objects"
);
let to_ext = run_raw(
&[
"sync",
"--id",
&src_id,
"--from",
&src_mani_url,
"--from-objects",
&src_pool_url,
"--to",
&dst_mani_url,
"--to-objects",
"custom://external/dest/pool",
],
&cache,
);
assert!(
!to_ext.status.success(),
"an external --to-objects URL must be rejected (non-zero exit)"
);
let to_err = String::from_utf8_lossy(&to_ext.stderr);
assert!(
to_err.contains("in-process") || to_err.contains("not supported"),
"the rejection must name the in-process-only contract, got:\n{to_err}"
);
assert_eq!(
count_manifests(&dst_mani),
0,
"a rejected external --to-objects sync must not publish a dest manifest"
);
}
#[test]
fn sync_split_objects_flags_independent_of_global_objects_store() {
let src = temp_dir("ind-src");
let src_mani = temp_dir("ind-srcmani");
let src_pool = temp_dir("ind-srcpool");
let dst_mani = temp_dir("ind-dstmani");
let dst_pool = temp_dir("ind-dstpool");
let bogus_global = temp_dir("ind-bogusglobal");
let cache = temp_dir("ind-cache");
let leaves = sample_leaves();
build_tree(&src, leaves);
let src_str = src.to_string_lossy().into_owned();
let src_mani_url = file_url(&src_mani);
let src_pool_url = file_url(&src_pool);
let dst_mani_url = file_url(&dst_mani);
let dst_pool_url = file_url(&dst_pool);
let bogus_global_url = file_url(&bogus_global);
let src_id = run_ok(
&[
"push",
"--objects-store",
&src_pool_url,
"--store",
&src_mani_url,
&src_str,
],
&cache,
);
let out = run_raw(
&[
"sync",
"--objects-store",
&bogus_global_url,
"--id",
&src_id,
"--from",
&src_mani_url,
"--from-objects",
&src_pool_url,
"--to",
&dst_mani_url,
"--to-objects",
&dst_pool_url,
],
&cache,
);
assert!(
out.status.success(),
"sync must succeed routing by the per-side flags, ignoring the global \
--objects-store\nstderr: {}",
String::from_utf8_lossy(&out.stderr)
);
assert_eq!(
count_pool_objects(&dst_pool),
leaves.len(),
"objects must land in --to-objects, not the global --objects-store pool"
);
assert_eq!(
count_pool_objects(&bogus_global),
0,
"the global --objects-store pool must be untouched by sync (ignored)"
);
assert_eq!(
count_manifests(&dst_mani),
1,
"the manifest must land in --to"
);
assert!(
dest_serves_manifest_id(&dst_mani, &src_id),
"the dest .manifests/ tree must physically hold the synced id {src_id}"
);
let dest = temp_dir("ind-out");
let dest_str = dest.to_string_lossy().into_owned();
let pullcache = temp_dir("ind-pullcache");
run_ok(
&[
"pull",
"--objects-store",
&dst_pool_url,
"--store",
&dst_mani_url,
"--id",
&src_id,
&dest_str,
],
&pullcache,
);
assert_tree_contents(&dest, leaves);
assert_eq!(
run_ok(&["id", &dest_str], &pullcache),
src_id,
"the per-side-routed sync must leave the dest fully serving the snapshot"
);
}