floe-core 0.3.2

Core library for Floe, a YAML-driven technical ingestion tool.
Documentation
use std::fs;
use std::path::{Path, PathBuf};

use floe_core::{run, RunOptions};

fn write_csv(dir: &Path, name: &str, contents: &str) -> PathBuf {
    let path = dir.join(name);
    fs::write(&path, contents).expect("write csv");
    path
}

fn write_config(dir: &Path, contents: &str) -> PathBuf {
    let path = dir.join("config.yml");
    fs::write(&path, contents).expect("write config");
    path
}

#[test]
fn composite_unique_checks_across_files_and_reports_samples() {
    let temp_dir = tempfile::TempDir::new().expect("temp dir");
    let root = temp_dir.path();
    let input_dir = root.join("in");
    let accepted_dir = root.join("out/accepted/customer");
    let rejected_dir = root.join("out/rejected/customer");
    let report_dir = root.join("report");

    fs::create_dir_all(&input_dir).expect("create input dir");
    write_csv(
        &input_dir,
        "batch1.csv",
        "id;country;email\n1;fr;alice@demo\n2;;bob@demo\n3;us;carol@demo\n",
    );
    write_csv(
        &input_dir,
        "batch2.csv",
        "id;country;email\n1;fr;alice_new@demo\n2;;bob_new@demo\n4;us;carol@demo\n",
    );

    let yaml = format!(
        r#"version: "0.1"
report:
  path: "{report_dir}"
entities:
  - name: "customer"
    source:
      format: "csv"
      path: "{input_dir}"
    sink:
      accepted:
        format: "parquet"
        path: "{accepted_dir}"
      rejected:
        format: "csv"
        path: "{rejected_dir}"
    policy:
      severity: "reject"
    schema:
      primary_key: ["id", "country"]
      unique_keys:
        - ["id", "country"]
        - ["email"]
      columns:
        - name: "id"
          type: "string"
        - name: "country"
          type: "string"
        - name: "email"
          type: "string"
"#,
        report_dir = report_dir.display(),
        input_dir = input_dir.display(),
        accepted_dir = accepted_dir.display(),
        rejected_dir = rejected_dir.display(),
    );
    let config_path = write_config(root, &yaml);

    let outcome = run(
        &config_path,
        RunOptions {
            run_id: Some("it-composite-unique".to_string()),
            entities: Vec::new(),
            dry_run: false,
        },
    )
    .expect("run config");

    let report = &outcome.entity_outcomes[0].report;
    assert_eq!(report.results.rows_total, 6);
    assert_eq!(report.results.accepted_total, 2);
    assert_eq!(report.results.rejected_total, 4);

    assert_eq!(report.unique_constraints.len(), 2);
    let tuple_constraint = report
        .unique_constraints
        .iter()
        .find(|constraint| constraint.columns == vec!["id".to_string(), "country".to_string()])
        .expect("tuple unique constraint");
    assert_eq!(tuple_constraint.duplicates_count, 1);
    assert!(!tuple_constraint.samples.is_empty());

    let email_constraint = report
        .unique_constraints
        .iter()
        .find(|constraint| constraint.columns == vec!["email".to_string()])
        .expect("email unique constraint");
    assert_eq!(email_constraint.duplicates_count, 1);
    assert!(!email_constraint.samples.is_empty());
}