taxa-core 0.1.0

taxa engine core: manifest model, formula AST→Polars Expr, bounded query generators over Polars.
//! Manifest inference parity — mirrors `test_inference.py::test_infer_dataset_shapes`.

use polars::prelude::*;
use taxa_core::{infer_dataset, FrameSource};

fn frame() -> FrameSource {
    let df = df![
        "owner" => ["microsoft", "microsoft", "google", "facebook"],
        "repo" => ["vscode", "terminal", "tensorflow", "react"],
        "language" => ["TypeScript", "C++", "C++", "JavaScript"],
        "dt" => Series::new("dt".into(), [20454i32, 20454, 20454, 20454]).cast(&DataType::Date).unwrap(),
        "stars" => [163000i64, 95000, 186000, 228000],
        "forks" => [28800i64, 8300, 74300, 46600],
    ]
    .unwrap();
    FrameSource::new(df)
}

#[test]
fn infers_shapes() {
    // Inference now produces the canonical frames/views manifest: a single frame
    // `"main"` carrying the inferred source/id/label/metrics(+timestamp); the
    // shared axes/filters/title at the top level. `views` is omitted (defaults).
    let ds = infer_dataset(&frame(), Some("repos"), "repo").unwrap();
    let main = ds.frame("main").expect("single frame `main`");
    // entity id = highest-cardinality text column (repo, all unique)
    assert_eq!(main.id_column, "repo");
    // timestamp detected → the default views include a series tab
    assert_eq!(main.timestamp.as_deref(), Some("dt"));
    assert!(ds.resolved_views().unwrap().contains_key("series"));
    // numeric columns → sum metrics, plus a built-in count
    let mids: std::collections::HashSet<&str> =
        main.metrics.iter().map(|m| m.id.as_str()).collect();
    assert!(mids.contains("count") && mids.contains("stars") && mids.contains("forks"));
    let metric = |id: &str| main.metrics.iter().find(|m| m.id == id).unwrap();
    assert_eq!(metric("stars").agg, "sum");
    assert_eq!(metric("count").agg, "count");
    // low-cardinality categoricals → hierarchy + filters; entity is the leaf
    let hier = ds.axis("hierarchy").unwrap();
    assert_eq!(hier.levels.last().unwrap(), "repo");
    let fids: std::collections::HashSet<&str> = ds.filters.iter().map(|f| f.id.as_str()).collect();
    assert!(fids.contains("owner") && fids.contains("language"));
    assert!(
        ds.default_size_by.as_deref() == Some("stars")
            || ds.default_size_by.as_deref() == Some("forks")
    );
}