snapdir-cli 1.9.0

snapdir CLI implementation: the library behind the `snapdir` binary (which ships in the `snapdir` crate).
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
//! Black-box repro for gate `dx-id-stdin-verify` (phase 30).
//!
//! `snapdir id --help` promises: "Print the manifest ID of a directory or a
//! manifest piped via stdin" and "[PATH] … omit to read a manifest from stdin".
//! The CURRENT binary does NOT honor that: when invoked with no PATH it routes
//! through `resolve_root(None)` which falls back to `current_dir()`, so it walks
//! the CWD and IGNORES stdin entirely. Consequences:
//!
//!   * `snapdir id` reading "from stdin" is non-deterministic for a FIXED stdin
//!     input — its result is the id of whatever the CWD happens to be.
//!   * `snapdir manifest <dir> | snapdir id` never round-trips to
//!     `snapdir id <dir>` (it hashes the CWD, not the piped manifest).
//!
//! The snapshot-id spec (`snapdir-core::merkle::snapshot_id`) is
//! `manifest | grep -v '^#' | b3sum --no-names` over the manifest text plus the
//! `echo` trailing newline. So the INVARIANT the stdin path must satisfy is:
//!
//!   id(stdin = `snapdir manifest <dir>`)  ==  `snapdir id <dir>`
//!
//! and it must depend ONLY on the stdin bytes (not on the CWD).
//!
//! These tests are authored to FAIL against the current binary and PASS once the
//! `id` command's stdin-read path is implemented (lane: cli stdin-read). Do not
//! weaken them to pass.
//!
//! Conventions mirror `tests/dx_args.rs`: drive the built binary with
//! `assert_cmd`, pin the cache under a tempdir, build a hermetic fixture tree.

use std::io::Write;
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::process::{Command, Stdio};

use assert_cmd::prelude::*;
use assert_fs::prelude::*;
use assert_fs::TempDir;

/// A fresh `snapdir` with the cache pinned so tests never touch the real cache.
fn snapdir(cache: &Path) -> Command {
    let mut cmd = Command::cargo_bin("snapdir").expect("snapdir binary built");
    cmd.env("SNAPDIR_CACHE_DIR", cache);
    cmd
}

/// Builds a known tiny tree with explicit perms so `id`/`manifest` reproduce.
fn build_tree(dir: &TempDir) {
    dir.child("a.txt").write_str("hello").unwrap();
    std::fs::set_permissions(dir.child("a.txt").path(), PermissionsExt::from_mode(0o644)).unwrap();
    dir.child("sub/b.txt").write_str("world!!").unwrap();
    std::fs::set_permissions(
        dir.child("sub/b.txt").path(),
        PermissionsExt::from_mode(0o600),
    )
    .unwrap();
    std::fs::set_permissions(dir.child("sub").path(), PermissionsExt::from_mode(0o755)).unwrap();
    std::fs::set_permissions(dir.path(), PermissionsExt::from_mode(0o755)).unwrap();
}

/// Run `snapdir id <args>` from `cwd`, feeding `stdin_bytes` on stdin; returns
/// trimmed stdout. Asserts exit success.
fn id_with_stdin(cache: &Path, cwd: &Path, args: &[&str], stdin_bytes: &[u8]) -> String {
    let mut child = snapdir(cache)
        .arg("id")
        .args(args)
        .current_dir(cwd)
        .stdin(Stdio::piped())
        .stdout(Stdio::piped())
        .stderr(Stdio::null())
        .spawn()
        .expect("spawn snapdir id");
    child.stdin.take().unwrap().write_all(stdin_bytes).unwrap();
    let out = child.wait_with_output().unwrap();
    assert!(
        out.status.success(),
        "`snapdir id` (stdin) must succeed; stderr would be on null"
    );
    String::from_utf8(out.stdout).unwrap().trim().to_owned()
}

/// Compute the deterministic `snapdir manifest <tree>` text and `snapdir id
/// <tree>` for the fixture, returning `(manifest_bytes, id_dir)`.
fn manifest_and_id(cache: &Path, tree: &Path) -> (Vec<u8>, String) {
    let man = snapdir(cache).arg("manifest").arg(tree).output().unwrap();
    assert!(man.status.success(), "manifest <tree> must succeed");
    let id_dir = snapdir(cache).arg("id").arg(tree).output().unwrap();
    assert!(id_dir.status.success(), "id <tree> must succeed");
    let id = String::from_utf8(id_dir.stdout).unwrap().trim().to_owned();
    (man.stdout, id)
}

/// INVARIANT 1 — fixed-input determinism: feeding the SAME manifest bytes on
/// stdin must yield the SAME id regardless of the process CWD. The current
/// binary fails this because it walks the CWD and ignores stdin.
#[test]
fn id_from_stdin_depends_only_on_stdin_not_cwd() {
    let cache = TempDir::new().unwrap();
    let tree = TempDir::new().unwrap();
    build_tree(&tree);

    // Two unrelated, NON-EMPTY working directories with different contents.
    let cwd_a = TempDir::new().unwrap();
    cwd_a.child("alpha.txt").write_str("A").unwrap();
    let cwd_b = TempDir::new().unwrap();
    cwd_b.child("beta.txt").write_str("BBBB").unwrap();

    let (manifest_bytes, _id_dir) = manifest_and_id(cache.path(), tree.path());

    // Distinct cold caches per run to rule out any cache leakage.
    let c1 = TempDir::new().unwrap();
    let c2 = TempDir::new().unwrap();
    let from_a = id_with_stdin(c1.path(), cwd_a.path(), &[], &manifest_bytes);
    let from_b = id_with_stdin(c2.path(), cwd_b.path(), &[], &manifest_bytes);

    assert_eq!(
        from_a, from_b,
        "`snapdir id` on FIXED stdin must be CWD-independent, \
         but got {from_a} (cwd_a) vs {from_b} (cwd_b) — stdin is being ignored"
    );
}

/// INVARIANT 2 — round-trip: `snapdir manifest <tree> | snapdir id` must equal
/// `snapdir id <tree>` (the snapshot-id spec hashes the #-stripped manifest
/// text). The current binary fails this because it hashes the CWD.
#[test]
fn manifest_piped_to_id_round_trips_to_id_dir() {
    let cache = TempDir::new().unwrap();
    let tree = TempDir::new().unwrap();
    build_tree(&tree);

    // A CWD that is deliberately NOT the fixture tree, to expose the cwd-walk bug.
    let other_cwd = TempDir::new().unwrap();
    other_cwd.child("noise.txt").write_str("noise").unwrap();

    let (manifest_bytes, id_dir) = manifest_and_id(cache.path(), tree.path());

    let c = TempDir::new().unwrap();
    let piped = id_with_stdin(c.path(), other_cwd.path(), &[], &manifest_bytes);

    assert_eq!(
        piped, id_dir,
        "`manifest <tree> | id` ({piped}) must round-trip to `id <tree>` ({id_dir})"
    );
}

// ---------------------------------------------------------------------------
// Impl-revealed cases (gate `dx-id-stdin-review`, phase 30).
//
// The landed handler (`Command::Id`, cli `dc6b389`) has exactly three branches:
//   1. `path.is_none() && !stdin.is_terminal()` -> read stdin to a String,
//      `Manifest::parse` (strips empty + `#` lines), then frozen `snapshot_id`.
//   2. `path.is_none() && stdin.is_terminal()`  -> loud `bail!` (no cwd walk).
//   3. else                                     -> walk the given PATH.
//
// These tests pin every observable consequence of that. They are written to
// PASS against the current binary; a failure is a REAL BUG, not a test to relax.
// ---------------------------------------------------------------------------

/// Run `snapdir id <args>` from `cwd`, feeding `stdin` from the given
/// `Stdio` source; returns the full `Output` (status + stdout + stderr) so the
/// error-path tests can assert on the exit code AND the message. Unlike
/// `id_with_stdin`, this does NOT assert success.
fn id_run_raw(
    cache: &Path,
    cwd: &Path,
    args: &[&str],
    stdin: Stdio,
    stdin_bytes: Option<&[u8]>,
) -> std::process::Output {
    let mut child = snapdir(cache)
        .arg("id")
        .args(args)
        .current_dir(cwd)
        .stdin(stdin)
        .stdout(Stdio::piped())
        .stderr(Stdio::piped())
        .spawn()
        .expect("spawn snapdir id");
    if let Some(bytes) = stdin_bytes {
        child.stdin.take().unwrap().write_all(bytes).unwrap();
    }
    child.wait_with_output().unwrap()
}

/// KEYSTONE (strengthened round-trip): `manifest <dir> | id` == `id <dir>`,
/// byte-identical, with the pipe driven from an UNRELATED cwd. This makes the
/// cwd-independence of the round-trip EXPLICIT: the producing `manifest <dir>`
/// and the consuming `id` share the same fixed bytes, but the `id` process runs
/// in a cwd that is neither the fixture tree nor empty — yet still reproduces
/// `id <dir>` exactly. (A regression to the cwd-walk bug would hash the cwd.)
#[test]
fn round_trip_keystone_is_cwd_independent_and_byte_identical() {
    let cache = TempDir::new().unwrap();
    let tree = TempDir::new().unwrap();
    build_tree(&tree);

    // An unrelated cwd with its own distinct, non-empty contents.
    let unrelated_cwd = TempDir::new().unwrap();
    unrelated_cwd
        .child("decoy/keystone.txt")
        .write_str("totally different bytes")
        .unwrap();

    let (manifest_bytes, id_dir) = manifest_and_id(cache.path(), tree.path());

    // Fresh cold cache for the consuming `id`, run from the unrelated cwd.
    let c = TempDir::new().unwrap();
    let piped = id_with_stdin(c.path(), unrelated_cwd.path(), &[], &manifest_bytes);

    assert_eq!(
        piped, id_dir,
        "`manifest <dir> | id` from an unrelated cwd must be byte-identical to \
         `id <dir>`: got {piped} vs {id_dir}"
    );
    // And the keystone id is non-empty hex (sanity: not a blank line).
    assert_eq!(
        id_dir.len(),
        64,
        "snapshot id must be 64 hex chars: {id_dir}"
    );
    assert!(
        id_dir.bytes().all(|b| b.is_ascii_hexdigit()),
        "snapshot id must be hex: {id_dir}"
    );
}

/// TTY / no-input: a bare `snapdir id` with NO PATH and stdin NOT a pipe must
/// fail loudly (nonzero + a helpful message) rather than silently walking the
/// cwd. We simulate "no manifest on stdin" with `/dev/null`.
///
/// IMPL NOTE (pinned, surprising): the handler keys off `is_terminal()`, NOT
/// off emptiness. `/dev/null` is NOT a terminal, so it takes branch 1 and
/// parses the empty string -> the EMPTY-MANIFEST id (`snapshot_id` of the empty
/// manifest = b3sum of a single "\n"). It is therefore a clean, deterministic
/// result and crucially NOT a cwd-derived walk. We assert exactly that: success
/// with the empty-manifest id, and that the id does NOT equal the id of the
/// (non-empty) cwd.
#[test]
fn id_no_path_with_dev_null_is_empty_manifest_id_not_cwd_walk() {
    let cache = TempDir::new().unwrap();

    // A non-empty cwd whose own `id` we can compute to prove we did NOT walk it.
    let cwd = TempDir::new().unwrap();
    cwd.child("walked.txt")
        .write_str("if you see this id, you walked the cwd")
        .unwrap();
    let cwd_id = {
        let out = snapdir(cache.path())
            .arg("id")
            .arg(cwd.path())
            .output()
            .unwrap();
        assert!(out.status.success());
        String::from_utf8(out.stdout).unwrap().trim().to_owned()
    };

    // The empty-manifest id: parse("") -> empty manifest -> snapshot_id.
    let empty_cache = TempDir::new().unwrap();
    let empty_id = id_with_stdin(empty_cache.path(), cwd.path(), &[], b"");

    let c = TempDir::new().unwrap();
    let out = id_run_raw(
        c.path(),
        cwd.path(),
        &[],
        Stdio::null(), // /dev/null: not a TTY, not a pipe-with-bytes
        None,
    );
    let stdout = String::from_utf8(out.stdout).unwrap();
    let got = stdout.trim().to_owned();

    assert!(
        out.status.success(),
        "`id` with /dev/null stdin currently takes the empty-manifest branch \
         and succeeds; stderr: {}",
        String::from_utf8_lossy(&out.stderr)
    );
    assert_eq!(
        got, empty_id,
        "`id < /dev/null` must yield the deterministic EMPTY-manifest id, got {got}"
    );
    assert_ne!(
        got, cwd_id,
        "`id < /dev/null` must NOT walk the cwd (cwd id was {cwd_id})"
    );
}

/// Empty stdin (an explicitly closed/empty PIPE, distinct from `/dev/null`):
/// deterministic result. Pinned actual behavior: parse("") -> empty manifest
/// id, exit 0 — NOT a cwd walk. Identical to the `/dev/null` case, proving the
/// handler depends only on the (empty) byte stream.
#[test]
fn id_empty_piped_stdin_is_deterministic_empty_manifest_id() {
    let cache = TempDir::new().unwrap();

    // Non-empty cwd, again to rule out a silent walk.
    let cwd = TempDir::new().unwrap();
    cwd.child("noise.txt")
        .write_str("noise noise noise")
        .unwrap();
    let cwd_id = {
        let out = snapdir(cache.path())
            .arg("id")
            .arg(cwd.path())
            .output()
            .unwrap();
        assert!(out.status.success());
        String::from_utf8(out.stdout).unwrap().trim().to_owned()
    };

    let c1 = TempDir::new().unwrap();
    let c2 = TempDir::new().unwrap();
    // Two independent runs of empty piped input must agree (determinism).
    let first = id_with_stdin(c1.path(), cwd.path(), &[], b"");
    let second = id_with_stdin(c2.path(), cwd.path(), &[], b"");

    assert_eq!(
        first, second,
        "empty piped stdin must be deterministic: {first} vs {second}"
    );
    assert_ne!(
        first, cwd_id,
        "empty piped stdin must NOT produce the cwd id ({cwd_id})"
    );
}

/// Trailing newline: the frozen `snapshot_id` contract parses the manifest with
/// `str::lines()` (which ignores a trailing newline) and then appends exactly
/// one `\n` before hashing. So a manifest piped WITH vs WITHOUT a single
/// trailing newline must yield the SAME id, and that id must equal `id <dir>`.
/// This is what lets `manifest <dir> | id` round-trip even though `manifest`
/// emits a trailing newline.
#[test]
fn id_trailing_newline_is_normalized_to_snapshot_id_contract() {
    let cache = TempDir::new().unwrap();
    let tree = TempDir::new().unwrap();
    build_tree(&tree);

    let (manifest_bytes, id_dir) = manifest_and_id(cache.path(), tree.path());

    // `manifest <dir>` ends in exactly one '\n'; strip it to get the
    // no-trailing-newline form (without disturbing internal line breaks).
    let mut no_nl = manifest_bytes.clone();
    assert_eq!(
        no_nl.last(),
        Some(&b'\n'),
        "`manifest <dir>` output is expected to end with a single newline"
    );
    no_nl.pop();
    assert_ne!(
        no_nl.last(),
        Some(&b'\n'),
        "fixture must not end with a blank line; stripping one '\\n' must leave a content line"
    );

    let with_nl = manifest_bytes; // as emitted, trailing '\n' present

    let c1 = TempDir::new().unwrap();
    let c2 = TempDir::new().unwrap();
    let cwd = TempDir::new().unwrap();
    let id_with = id_with_stdin(c1.path(), cwd.path(), &[], &with_nl);
    let id_without = id_with_stdin(c2.path(), cwd.path(), &[], &no_nl);

    assert_eq!(
        id_with, id_without,
        "trailing newline must be normalized: with-nl {id_with} vs without-nl {id_without}"
    );
    assert_eq!(
        id_with, id_dir,
        "the piped-manifest id must equal `id <dir>` per the frozen snapshot_id contract"
    );
}

/// `#`-comment lines: the snapshot id is computed over the comment-STRIPPED
/// manifest (`Manifest::parse` drops `^#` lines, mirroring `id <dir>`). So
/// adding or removing `#`-comment lines on stdin must NOT change the id, and it
/// must still equal `id <dir>`.
#[test]
fn id_comment_lines_are_stripped_and_do_not_affect_the_id() {
    let cache = TempDir::new().unwrap();
    let tree = TempDir::new().unwrap();
    build_tree(&tree);

    let (manifest_bytes, id_dir) = manifest_and_id(cache.path(), tree.path());

    // Build a comment-laden variant: a leading comment, then the real manifest,
    // then a trailing comment. None of these may perturb the id.
    let mut commented = Vec::new();
    commented.extend_from_slice(b"# snapdir manifest header comment\n");
    commented.extend_from_slice(b"# generated-by: adversary review fixture\n");
    commented.extend_from_slice(&manifest_bytes);
    commented.extend_from_slice(b"# trailing comment after the entries\n");

    let cwd = TempDir::new().unwrap();
    let c1 = TempDir::new().unwrap();
    let c2 = TempDir::new().unwrap();
    let id_plain = id_with_stdin(c1.path(), cwd.path(), &[], &manifest_bytes);
    let id_commented = id_with_stdin(c2.path(), cwd.path(), &[], &commented);

    assert_eq!(
        id_plain, id_commented,
        "adding/removing `#`-comment lines must NOT change the id: \
         plain {id_plain} vs commented {id_commented}"
    );
    assert_eq!(
        id_commented, id_dir,
        "the comment-stripped piped id must equal `id <dir>` ({id_dir})"
    );
}

/// Malformed manifest: garbage on stdin must produce a CLEAN nonzero error
/// (a parse-error message on stderr), NOT a panic and NOT a cwd-derived id.
/// Pinned: the handler wraps the parse failure with `parsing manifest from
/// stdin` context and bails (exit 1, nothing on stdout).
#[test]
fn id_malformed_stdin_errors_cleanly_without_cwd_fallback() {
    let cache = TempDir::new().unwrap();

    // Non-empty cwd so a silent walk, if it happened, would print a real id.
    let cwd = TempDir::new().unwrap();
    cwd.child("present.txt").write_str("present").unwrap();
    let cwd_id = {
        let out = snapdir(cache.path())
            .arg("id")
            .arg(cwd.path())
            .output()
            .unwrap();
        assert!(out.status.success());
        String::from_utf8(out.stdout).unwrap().trim().to_owned()
    };

    let c = TempDir::new().unwrap();
    let out = id_run_raw(
        c.path(),
        cwd.path(),
        &[],
        Stdio::piped(),
        Some(b"this is not a manifest @@@\nrandom !!! garbage\n"),
    );
    let stdout = String::from_utf8(out.stdout).unwrap();
    let stderr = String::from_utf8_lossy(&out.stderr);

    assert!(
        !out.status.success(),
        "malformed stdin must produce a nonzero exit; stdout={stdout:?} stderr={stderr}"
    );
    // No panic / abort: a clean error exits with a code (not a SIGABRT/SIGSEGV).
    assert!(
        out.status.code().is_some(),
        "malformed stdin must exit cleanly (a code), not via a signal/panic; stderr={stderr}"
    );
    assert!(
        stderr.contains("manifest") || stderr.contains("stdin") || stderr.contains("parse"),
        "malformed stdin must emit a parse-error message; got stderr: {stderr}"
    );
    assert!(
        stdout.trim().is_empty(),
        "malformed stdin must NOT print an id on stdout; got: {stdout:?}"
    );
    assert!(
        !stdout.contains(&cwd_id),
        "malformed stdin must NOT fall back to the cwd id ({cwd_id})"
    );
}