floe-cli 0.5.6

CLI for Floe, a YAML-driven technical ingestion tool.
//! Delegation behavior of the lean `floe` binary toward the `floe-duckdb`
//! companion. These tests build against default features (no `duckdb`), so the
//! test binary is the lean variant that must delegate.

use assert_cmd::Command;
use predicates::prelude::*;
use std::fs;
use tempfile::tempdir;

fn write_duckdb_config(dir: &std::path::Path) -> std::path::PathBuf {
    let config_path = dir.join("config.yml");
    let body = format!(
        r#"version: "0.1"
report:
  path: "{report}"
entities:
  - name: customers
    source:
      format: csv
      path: "{input}"
    sink:
      accepted:
        format: duckdb
        path: "{out}"
        duckdb:
          table: customers
          schema: main
    policy:
      severity: warn
    schema:
      columns:
        - name: id
          type: string
"#,
        report = dir.join("report").display(),
        input = dir.join("in.csv").display(),
        out = dir.join("out.duckdb").display(),
    );
    fs::write(&config_path, body).expect("write config");
    config_path
}

#[test]
fn lean_run_with_duckdb_sink_and_no_companion_errors_clearly() {
    let dir = tempdir().expect("tempdir");
    let config_path = write_duckdb_config(dir.path());

    // PATH points at an empty dir so no stray `floe-duckdb` is discovered.
    let empty_path = dir.path().join("emptybin");
    fs::create_dir_all(&empty_path).expect("mkdir");

    let mut cmd = Command::new(assert_cmd::cargo::cargo_bin!("floe"));
    cmd.args(["run", "-c"])
        .arg(&config_path)
        .env("PATH", &empty_path)
        .assert()
        .failure()
        .stderr(predicate::str::contains("DuckDB sink"))
        .stderr(predicate::str::contains("floe-duckdb"))
        .stderr(predicate::str::contains("--features duckdb"));
}

#[test]
fn lean_run_does_not_resolve_companion_from_cwd() {
    let dir = tempdir().expect("tempdir");
    let config_path = write_duckdb_config(dir.path());

    // Drop an executable named like the companion into the CWD. A correct
    // resolver must ignore it (git-lfs CVE GHSA-6rw3-3whw-jvjj) and still error.
    let cwd_companion = dir.path().join("floe-duckdb");
    fs::write(&cwd_companion, "#!/bin/sh\nexit 0\n").expect("write fake companion");
    #[cfg(unix)]
    {
        use std::os::unix::fs::PermissionsExt;
        let mut perms = fs::metadata(&cwd_companion).unwrap().permissions();
        perms.set_mode(0o755);
        fs::set_permissions(&cwd_companion, perms).unwrap();
    }

    let empty_path = dir.path().join("emptybin");
    fs::create_dir_all(&empty_path).expect("mkdir");

    let mut cmd = Command::new(assert_cmd::cargo::cargo_bin!("floe"));
    cmd.args(["run", "-c"])
        .arg(&config_path)
        .current_dir(dir.path())
        .env("PATH", &empty_path)
        .assert()
        .failure()
        .stderr(predicate::str::contains("floe-duckdb"));
}

#[cfg(unix)]
#[test]
fn lean_run_does_not_resolve_companion_from_relative_path_entry() {
    let dir = tempdir().expect("tempdir");
    let config_path = write_duckdb_config(dir.path());

    // A relative PATH entry ("bin") resolves against the CWD, so an executable
    // dropped at ./bin/floe-duckdb must NOT be trusted (git-lfs CVE
    // GHSA-6rw3-3whw-jvjj). The resolver must skip non-absolute PATH entries.
    let bin_dir = dir.path().join("bin");
    fs::create_dir_all(&bin_dir).expect("mkdir");
    let stub = bin_dir.join("floe-duckdb");
    fs::write(&stub, "#!/bin/sh\necho HIJACKED\nexit 0\n").expect("write stub");
    {
        use std::os::unix::fs::PermissionsExt;
        let mut perms = fs::metadata(&stub).unwrap().permissions();
        perms.set_mode(0o755);
        fs::set_permissions(&stub, perms).unwrap();
    }

    let mut cmd = Command::new(assert_cmd::cargo::cargo_bin!("floe"));
    cmd.args(["run", "-c"])
        .arg(&config_path)
        .current_dir(dir.path())
        .env("PATH", "bin")
        .assert()
        .failure()
        .stdout(predicate::str::contains("HIJACKED").not())
        .stderr(predicate::str::contains("floe-duckdb"));
}

#[cfg(unix)]
#[test]
fn lean_run_skips_non_executable_companion_and_continues_search() {
    let dir = tempdir().expect("tempdir");
    let config_path = write_duckdb_config(dir.path());

    // A non-executable file named like the companion sits earlier in PATH. A
    // correct resolver must skip it (it cannot be re-execed) and keep searching,
    // mirroring how a shell resolves a command on PATH.
    use std::os::unix::fs::PermissionsExt;
    let first_dir = dir.path().join("first");
    fs::create_dir_all(&first_dir).expect("mkdir");
    let non_exec = first_dir.join("floe-duckdb");
    fs::write(&non_exec, "not executable\n").expect("write non-exec");
    let mut perms = fs::metadata(&non_exec).unwrap().permissions();
    perms.set_mode(0o644);
    fs::set_permissions(&non_exec, perms).unwrap();

    let second_dir = dir.path().join("second");
    fs::create_dir_all(&second_dir).expect("mkdir");
    let stub = second_dir.join("floe-duckdb");
    fs::write(&stub, "#!/bin/sh\necho DELEGATED_OK\nexit 0\n").expect("write stub");
    let mut perms = fs::metadata(&stub).unwrap().permissions();
    perms.set_mode(0o755);
    fs::set_permissions(&stub, perms).unwrap();

    let path = std::env::join_paths([&first_dir, &second_dir]).expect("join paths");

    let mut cmd = Command::new(assert_cmd::cargo::cargo_bin!("floe"));
    cmd.args(["run", "-c"])
        .arg(&config_path)
        .env("PATH", &path)
        .assert()
        .success()
        .stdout(predicate::str::contains("DELEGATED_OK"));
}

/// A config whose ONLY DuckDB reference is the rejected sink. Rejected sinks
/// accept `csv` only, so this is an invalid config — it must NOT trigger
/// companion delegation.
fn write_rejected_only_duckdb_config(dir: &std::path::Path) -> std::path::PathBuf {
    let config_path = dir.join("rejected-config.yml");
    let body = format!(
        r#"version: "0.1"
report:
  path: "{report}"
entities:
  - name: customers
    source:
      format: csv
      path: "{input}"
    sink:
      accepted:
        format: parquet
        path: "{accepted}"
      rejected:
        format: duckdb
        path: "{rejected}"
    policy:
      severity: warn
    schema:
      columns:
        - name: id
          type: string
"#,
        report = dir.join("report").display(),
        input = dir.join("in.csv").display(),
        accepted = dir.join("accepted.parquet").display(),
        rejected = dir.join("rejected.duckdb").display(),
    );
    fs::write(&config_path, body).expect("write config");
    config_path
}

/// A `rejected.format: duckdb` config is invalid (rejected sinks accept `csv`
/// only), so the lean binary must NOT delegate to the companion even when one is
/// present on PATH. It must instead surface the real unsupported-rejected-sink
/// validation error rather than a misleading companion-install hint.
#[cfg(unix)]
#[test]
fn lean_run_with_only_rejected_duckdb_sink_does_not_delegate() {
    let dir = tempdir().expect("tempdir");
    let config_path = write_rejected_only_duckdb_config(dir.path());

    // A stub companion on PATH that would print a marker if (wrongly) re-execed.
    use std::os::unix::fs::PermissionsExt;
    let bin_dir = dir.path().join("bin");
    fs::create_dir_all(&bin_dir).expect("mkdir");
    let stub = bin_dir.join("floe-duckdb");
    fs::write(&stub, "#!/bin/sh\necho DELEGATED_OK\nexit 0\n").expect("write stub");
    let mut perms = fs::metadata(&stub).unwrap().permissions();
    perms.set_mode(0o755);
    fs::set_permissions(&stub, perms).unwrap();

    let mut cmd = Command::new(assert_cmd::cargo::cargo_bin!("floe"));
    cmd.args(["run", "-c"])
        .arg(&config_path)
        .env("PATH", &bin_dir)
        .assert()
        .failure()
        .stdout(predicate::str::contains("DELEGATED_OK").not())
        .stderr(predicate::str::contains("sink.rejected.format=duckdb"));
}

#[cfg(unix)]
#[test]
fn lean_run_delegates_to_companion_on_path() {
    let dir = tempdir().expect("tempdir");
    let config_path = write_duckdb_config(dir.path());

    // A stub companion on PATH that prints a marker and exits 0 stands in for the
    // real floe-duckdb. Proves the lean binary locates and re-execs it.
    let bin_dir = dir.path().join("bin");
    fs::create_dir_all(&bin_dir).expect("mkdir");
    let stub = bin_dir.join("floe-duckdb");
    fs::write(&stub, "#!/bin/sh\necho DELEGATED_OK\nexit 0\n").expect("write stub");
    #[cfg(unix)]
    {
        use std::os::unix::fs::PermissionsExt;
        let mut perms = fs::metadata(&stub).unwrap().permissions();
        perms.set_mode(0o755);
        fs::set_permissions(&stub, perms).unwrap();
    }

    let mut cmd = Command::new(assert_cmd::cargo::cargo_bin!("floe"));
    cmd.args(["run", "-c"])
        .arg(&config_path)
        .env("PATH", &bin_dir)
        .assert()
        .success()
        .stdout(predicate::str::contains("DELEGATED_OK"));
}