cordance-scan 0.1.1

Cordance repository scanners. Deterministic surface classification.
Documentation
//! Integration tests for `cordance_scan::walker`.

use std::io::Write;

use camino::Utf8PathBuf;
use cordance_core::source::SourceClass;
use cordance_scan::walker::walk;

fn fixture_root() -> Utf8PathBuf {
    // CARGO_MANIFEST_DIR is the crate root: <workspace>/crates/cordance-scan
    let manifest = env!("CARGO_MANIFEST_DIR");
    Utf8PathBuf::from(manifest).join("../../fixtures/scan/minimal-rust")
}

#[test]
fn walk_minimal_rust_fixture() {
    let root = fixture_root();
    let records = walk(&root).expect("walk should succeed");

    assert!(
        records.len() >= 5,
        "expected >= 5 records, got {}",
        records.len()
    );

    let adr = records
        .iter()
        .find(|r| r.path.as_str().contains("0001-test.md"))
        .expect("docs/adr/0001-test.md should be present");
    assert_eq!(
        adr.class,
        SourceClass::ProjectAdr,
        "ADR should be classified as ProjectAdr"
    );

    let deny = records
        .iter()
        .find(|r| r.path.as_str() == "deny.toml")
        .expect("deny.toml should be present");
    assert_eq!(
        deny.class,
        SourceClass::ProjectReleaseGate,
        "deny.toml should be ProjectReleaseGate"
    );

    let lib_rs = records
        .iter()
        .find(|r| r.path.as_str().contains("src/lib.rs"))
        .expect("src/lib.rs should be present");
    assert_eq!(
        lib_rs.class,
        SourceClass::ProjectSourceCode,
        "src/lib.rs should be ProjectSourceCode"
    );

    let any_blocked = records.iter().any(|r| r.blocked);
    assert!(!any_blocked, "no files in the fixture should be blocked");
}

#[test]
fn blocked_path_not_read() {
    let dir = tempfile::tempdir().expect("tempdir");

    // Write a .env file — should be blocked.
    let env_path = dir.path().join(".env");
    {
        let mut f = std::fs::File::create(&env_path).expect("create .env");
        f.write_all(b"SECRET=hunter2\n").expect("write .env");
    }

    // Write a README.md — should not be blocked.
    let readme_path = dir.path().join("README.md");
    {
        let mut f = std::fs::File::create(&readme_path).expect("create README");
        f.write_all(b"# README\n").expect("write README");
    }

    let root: Utf8PathBuf = dir.path().to_str().expect("tempdir path is utf-8").into();

    let records = walk(&root).expect("walk should succeed");

    let env_record = records
        .iter()
        .find(|r| r.path.as_str().contains(".env"))
        .expect(".env record should be present");
    assert!(env_record.blocked, ".env should be blocked");
    assert!(
        env_record.sha256.is_empty(),
        ".env sha256 must be empty (content not read)"
    );

    let readme_record = records
        .iter()
        .find(|r| r.path.as_str().contains("README.md"))
        .expect("README.md record should be present");
    assert!(!readme_record.blocked, "README.md should not be blocked");
}

#[test]
fn results_are_sorted_by_path() {
    let root = fixture_root();
    let records = walk(&root).expect("walk should succeed");

    let paths: Vec<&str> = records.iter().map(|r| r.path.as_str()).collect();
    let mut sorted = paths.clone();
    sorted.sort_unstable();
    assert_eq!(paths, sorted, "records must be sorted by path");
}

/// The minimal-rust fixture is well under `PARALLEL_THRESHOLD` (256), so this
/// exercises the sequential branch of `walk`. It guards against regressions in
/// the small-repo path after the parallel refactor.
#[test]
fn sequential_path_works_for_small_fixture() {
    let root = fixture_root();
    let records = walk(&root).expect("walk should succeed");
    assert!(records.len() >= 5, "expected >= 5 records");
    assert!(
        records.iter().all(|r| r.blocked || !r.sha256.is_empty()),
        "every non-blocked record must have a sha256"
    );
}

/// On Unix, create an unreadable file and assert the walker records it as a
/// `BlockedSurface` with an explanatory `blocked_reason` rather than emitting
/// a phantom record with an empty sha256.
///
/// Skipped on Windows because chmod-style permission tricks aren't reliably
/// honoured there; the hash-failure path is still exercised through unit
/// tests against the hasher.
#[test]
fn hash_failure_records_as_blocked_surface() {
    if cfg!(windows) {
        return;
    }
    let dir = tempfile::tempdir().expect("tempdir");
    let p = dir.path().join("locked.bin");
    std::fs::write(&p, b"x").expect("write locked.bin");

    #[cfg(unix)]
    {
        use std::os::unix::fs::PermissionsExt;
        std::fs::set_permissions(&p, std::fs::Permissions::from_mode(0o000))
            .expect("chmod 000 on locked.bin");
    }

    let root: Utf8PathBuf = dir.path().to_str().expect("tempdir path is utf-8").into();

    let records = walk(&root).expect("walk should succeed");

    // Restore permissions so the tempdir cleanup can remove the file.
    #[cfg(unix)]
    {
        use std::os::unix::fs::PermissionsExt;
        let _ = std::fs::set_permissions(&p, std::fs::Permissions::from_mode(0o644));
    }

    let locked = records
        .iter()
        .find(|r| r.path.as_str().ends_with("locked.bin"));

    // The walker may or may not surface the file depending on filesystem
    // semantics. If it did, it must be marked blocked with an empty hash and
    // a path-free `hash failed` reason.
    if let Some(r) = locked {
        assert!(
            r.blocked,
            "expected blocked record for unreadable file, got {r:?}"
        );
        assert_eq!(r.class, SourceClass::BlockedSurface);
        assert!(
            r.sha256.is_empty(),
            "blocked hash-failure records must not carry a fake sha256"
        );
        let reason = r
            .blocked_reason
            .as_deref()
            .expect("blocked record must carry a reason");
        assert!(
            reason.starts_with("hash failed"),
            "unexpected blocked_reason: {reason:?}"
        );
        // Round-2 redteam HIGH: blocked_reason must NOT leak the on-disk
        // absolute path. `pack.json` is a shareable build artefact.
        let abs_str = p.to_string_lossy();
        assert!(
            !reason.contains(abs_str.as_ref()),
            "blocked_reason leaks absolute path: reason={reason:?} path={abs_str}"
        );
        assert!(
            !reason.contains("locked.bin"),
            "blocked_reason still contains file name fragment: {reason:?}"
        );
    }
}

/// Round-2 redteam HIGH: even if a hash failure produces a custom error with
/// a path embedded, the walker must redact it before surfacing the message
/// to callers. This is the path-disclosure-prevention contract test.
#[test]
#[cfg(unix)]
fn hash_failure_blocked_reason_does_not_leak_path() {
    use std::os::unix::fs::PermissionsExt;

    let dir = tempfile::tempdir().expect("tempdir");
    let p = dir.path().join("unreadable.bin");
    std::fs::write(&p, b"secret").expect("seed file");
    std::fs::set_permissions(&p, std::fs::Permissions::from_mode(0o000)).expect("chmod 000");

    let root: Utf8PathBuf = dir.path().to_str().expect("tempdir path is utf-8").into();

    let records = walk(&root).expect("walk");

    // Restore permissions for cleanup regardless of assertion outcome.
    let _ = std::fs::set_permissions(&p, std::fs::Permissions::from_mode(0o644));

    let Some(r) = records
        .iter()
        .find(|r| r.path.as_str().ends_with("unreadable.bin"))
    else {
        // Filesystem didn't surface the file at all — nothing to assert.
        return;
    };
    assert!(r.blocked, "unreadable file must be blocked: {r:?}");
    let reason = r
        .blocked_reason
        .as_deref()
        .expect("blocked record must carry a reason");
    let abs_str = p.to_string_lossy();
    assert!(
        !reason.contains(abs_str.as_ref()),
        "blocked_reason leaks absolute path: {reason:?}"
    );
    let dir_str = dir.path().to_string_lossy();
    assert!(
        !reason.contains(dir_str.as_ref()),
        "blocked_reason leaks tempdir prefix: {reason:?}"
    );
}

/// Build a synthetic repo larger than `PARALLEL_THRESHOLD` to exercise the
/// rayon-parallel branch. Asserts every file got hashed and that the final
/// order is deterministic (sorted by path) despite parallel execution.
#[test]
fn parallel_path_handles_300_files() {
    let dir = tempfile::tempdir().expect("tempdir");
    let root: Utf8PathBuf = dir.path().to_str().expect("tempdir path is utf-8").into();

    for i in 0..300 {
        let p = dir.path().join(format!("file_{i:03}.txt"));
        let mut f = std::fs::File::create(&p).expect("create");
        f.write_all(format!("content {i}").as_bytes())
            .expect("write");
    }

    let records = walk(&root).expect("walk should succeed");

    assert_eq!(records.len(), 300, "expected exactly 300 records");

    let non_blocked: Vec<_> = records.iter().filter(|r| !r.blocked).collect();
    assert_eq!(
        non_blocked.len(),
        300,
        "no synthetic file should be blocked"
    );
    assert!(
        non_blocked.iter().all(|r| !r.sha256.is_empty()),
        "every non-blocked record must carry a real sha256"
    );

    // Output order must remain deterministic regardless of parallel hashing.
    for w in records.windows(2) {
        assert!(
            w[0].path <= w[1].path,
            "records must be sorted by path: {} > {}",
            w[0].path,
            w[1].path
        );
    }
}