rust-data-processing 0.3.0

Schema-first ingestion (CSV, JSON, Parquet, Excel) into an in-memory DataSet, plus Polars-backed pipelines, SQL, profiling, validation, and map/reduce-style processing.
use rust_data_processing::ingestion::json::{ingest_json_from_path, ingest_json_from_str};
use rust_data_processing::pipeline_spec::PipelineBundle;
use rust_data_processing::types::{Schema, Value};

fn people_schema_nested() -> Schema {
    PipelineBundle::from_repo_fixture("people").expect_schema("schemas/people_json.schema.json")
}

#[test]
fn ingest_json_array_from_path_happy_path() {
    let schema = people_schema_nested();
    let ds = ingest_json_from_path("tests/fixtures/people.json", &schema).unwrap();

    assert_eq!(ds.row_count(), 2);
    assert_eq!(ds.rows[0][0], Value::Int64(1));
    assert_eq!(ds.rows[0][1], Value::Utf8("Ada".to_string()));
    assert_eq!(ds.rows[1][1], Value::Utf8("Grace".to_string()));
}

#[test]
fn ingest_json_ndjson_happy_path() {
    let schema = people_schema_nested();
    let input = r#"
{"id":1,"user":{"name":"Ada"},"score":98.5,"active":true}
{"id":2,"user":{"name":"Grace"},"score":87.25,"active":false}
"#;
    let ds = ingest_json_from_str(input, &schema).unwrap();
    assert_eq!(ds.row_count(), 2);
    assert_eq!(ds.rows[0][1], Value::Utf8("Ada".to_string()));
}

#[test]
fn ingest_json_errors_on_missing_field() {
    let schema = people_schema_nested();
    let input = r#"[{"id":1,"user":{"name":"Ada"},"score":98.5}]"#;
    let err = ingest_json_from_str(input, &schema).unwrap_err();
    let msg = err.to_string();
    assert!(msg.contains("schema mismatch"));
    assert!(msg.contains("missing required field 'active'"));
}

#[test]
fn ingest_json_errors_on_type_mismatch() {
    let schema = people_schema_nested();
    let input = r#"[{"id":"nope","user":{"name":"Ada"},"score":98.5,"active":true}]"#;
    let err = ingest_json_from_str(input, &schema).unwrap_err();
    let msg = err.to_string();
    assert!(msg.contains("failed to parse value"));
    assert!(msg.contains("column 'id'"));
}