taxa-core 0.1.0

taxa engine core: manifest model, formula AST→Polars Expr, bounded query generators over Polars.
//! Engine output → frontend shape. The D3 frontend reads FLAT node fields
//! (`d.data.<metric_id>`, `d.data.count`, `d.data._others`, `d.data.level`),
//! not the engine's nested `measures{}`. This is a direct port of the Python
//! `adapter.py::_to_node`, and it is the contract the treemap view renders.
//!
//! (The boot manifest — `make_spec`/`manifest_dict` — is built in the server
//! layer (M2), where the full view/metric config lives.)

use serde_json::{json, Map, Value as Json};

use crate::manifest::FrameDataset;
use crate::output::TreeNode;

/// Context for flattening: the axis levels (for per-depth `level` names), the
/// metric ids to surface as flat fields, the count-metric id (if any), and the
/// entity noun used as the leaf level label.
pub struct NodeCtx<'a> {
    pub levels: &'a [String],
    pub metric_ids: &'a [String],
    pub count_id: Option<&'a str>,
    pub entity_noun: &'a str,
    /// Path axis: `levels` is variable-depth, so per-depth names are synthesized
    /// (`__lvl{depth-1}`) rather than read from a fixed list.
    pub is_path: bool,
}

/// `TreeNode` → frontend node JSON. `depth` is 0 at the root.
pub fn frontend_node(t: &TreeNode, ctx: &NodeCtx, depth: usize) -> Json {
    let level = if depth == 0 {
        "root".to_string()
    } else if ctx.is_path {
        // Variable-depth path axis: a generic per-depth label (the frontend only
        // uses `level` as a grouping label).
        crate::path::level_col(depth - 1)
    } else if depth <= ctx.levels.len() {
        ctx.levels[depth - 1].clone()
    } else {
        ctx.entity_noun.to_string()
    };

    let mut node = Map::new();
    node.insert("name".into(), json!(t.name));
    node.insert("level".into(), json!(level));
    let count = ctx
        .count_id
        .and_then(|cid| t.measures.get(cid))
        .and_then(|v| v.as_f64())
        .map(|f| f as i64)
        .unwrap_or(0);
    node.insert("count".into(), json!(count));
    for mid in ctx.metric_ids {
        node.insert(
            mid.clone(),
            t.measures.get(mid).cloned().unwrap_or(Json::Null),
        );
    }
    if t.is_other {
        node.insert("_others".into(), json!(t.n_folded.max(1)));
    }
    if t.has_more {
        // at the fetched-depth boundary: zoomable even though its children weren't
        // materialized (a windowed frontend re-fetches on zoom).
        node.insert("has_more_children".into(), json!(true));
    }
    node.insert(
        "children".into(),
        Json::Array(
            t.children
                .iter()
                .map(|c| frontend_node(c, ctx, depth + 1))
                .collect(),
        ),
    );
    Json::Object(node)
}

/// The frontend boot manifest (`/api/manifest`) for a declarative dataset.
/// Reproduces the Python `make_spec(ds).manifest_dict()` shape so the unchanged
/// D3 frontend boots from it. A view appears only when it's meaningful (series
/// only with a timestamp).
pub fn boot_manifest(ds: &FrameDataset) -> Json {
    let metric_ids: Vec<String> = ds.metrics.iter().map(|m| m.id.clone()).collect();
    let extensive = |agg: &str| matches!(agg, "sum" | "count");

    let metrics: Vec<Json> = ds
        .metrics
        .iter()
        .map(|m| {
            json!({
                "id": m.id,
                "label": m.label.clone().unwrap_or_else(|| m.id.clone()),
                "unit": m.unit,
                "default_agg": if extensive(&m.agg) { "sum" } else { "median" },
                "kind": if extensive(&m.agg) { "extensive" } else { "intensive" },
                "cadence": "daily",
                "timeseries": true,
                "notice": Json::Null,
            })
        })
        .collect();

    let axes: Vec<Json> = ds
        .axes
        .iter()
        .map(|a| {
            let mut o = json!({
                "id": a.id,
                "label": a.label.clone().unwrap_or_else(|| a.id.clone()),
                "levels": a.levels,
                "descr": "",
                "row": 0,
            });
            // Variable-depth path axis: a hint so the frontend treats levels as
            // derived/unbounded. Omitted for fixed axes (manifest stays identical).
            if a.path.is_some() {
                if let Some(obj) = o.as_object_mut() {
                    obj.insert("path".into(), json!(true));
                }
            }
            if let Some(dsb) = &a.default_size_by {
                if let Some(obj) = o.as_object_mut() {
                    obj.insert("default_size_by".into(), json!(dsb));
                }
            }
            if let Some(sb) = &a.size_by {
                if let Some(obj) = o.as_object_mut() {
                    obj.insert("size_by".into(), json!(sb));
                }
            }
            o
        })
        .collect();

    let filters: Vec<Json> = ds
        .filters
        .iter()
        .map(|f| {
            let is_range = f.r#type == "range";
            // Honor an explicit `control` override (select | multiselect | range);
            // otherwise default by type (range -> range, else the button row).
            let control = f
                .control
                .clone()
                .filter(|c| matches!(c.as_str(), "select" | "multiselect" | "range"))
                .unwrap_or_else(|| {
                    if is_range {
                        "range".into()
                    } else {
                        "multiselect".into()
                    }
                });
            json!({
                "id": f.id,
                "label": f.label.clone().unwrap_or_else(|| f.id.clone()),
                "control": control,
                "options": Json::Null,
                "options_provider": !is_range,
                "range_min": Json::Null,
                "range_max": Json::Null,
                "default": f.default.clone().unwrap_or(Json::Null),
            })
        })
        .collect();

    let default_axis = ds
        .default_axis
        .clone()
        .unwrap_or_else(|| ds.axes[0].id.clone());
    let default_size = ds
        .default_size_by
        .clone()
        .unwrap_or_else(|| metric_ids[0].clone());
    let default_y = metric_ids
        .get(1)
        .cloned()
        .unwrap_or_else(|| metric_ids[0].clone());

    // Clamp the loading knobs to sane ranges — a negative lookahead would make the
    // frontend's window never cover any focus (constant re-fetch); caps/levels < 1
    // are meaningless.
    let lookahead = ds.lookahead.map(|n| n.max(0));
    let branch_cap = ds.branch_cap.max(1);
    let leaf_cap = ds.leaf_cap.max(1);
    // Levels slider max = the deepest fixed axis (so a 5-level axis is fully
    // reachable), with a floor of 4 to preserve prior behavior and cover path axes
    // (whose `levels` are derived/empty). Engine clamps depth per-axis anyway.
    let max_levels = ds
        .axes
        .iter()
        .map(|a| a.levels.len() as i64)
        .max()
        .unwrap_or(4)
        .max(4);
    let levels_default = ds.default_levels.clamp(1, max_levels);

    let treemap = json!({
        "axes": axes,
        "size_by": metric_ids,
        "default_axis": default_axis,
        "default_size_by": default_size,
        "filters": filters,
        "levels": {"min": 1, "max": max_levels, "default": levels_default},
        "branch_cap": branch_cap,
        "leaf_cap": leaf_cap,
        // null → full-load mode; n → prefetch a window n levels beyond the display.
        "lookahead": lookahead,
        "entity_level_label": ds.entity_noun,
    });
    let scatter = json!({
        "metrics": metric_ids,
        "default_x": metric_ids[0],
        "default_y": default_y,
        "color_label": "",
        "log_x": true,
        "log_y": true,
    });
    // The detail page's time chart offers the same series metrics + windows the
    // Line tab does (when a series source / timestamp exists). Without these the
    // frontend's metric selector is empty, no metric is selected, and the chart
    // never fetches — leaving an empty chart area. OHLC is unsupported (no price
    // source), so the page degrades to the metric line.
    let has_series = ds.series_source.is_some() || ds.timestamp_column.is_some();
    let detail_metrics: Vec<Json> = if has_series {
        ds.series_metrics
            .clone()
            .unwrap_or_else(|| metric_ids.clone())
            .into_iter()
            .map(Json::String)
            .collect()
    } else {
        vec![]
    };
    let detail_windows = if has_series {
        json!([
            {"id": "1m", "days": 31}, {"id": "1y", "days": 365},
            {"id": "5y", "days": 1825}, {"id": "max", "days": 36500}
        ])
    } else {
        json!([])
    };
    // The detail chart's resolutions come from the series frame's declared
    // resolutions (a weekly-pre-bucketed producer restricts to `["w"]` so the
    // engine never sums a stock metric to a coarser cadence) — NOT a hardcoded
    // list. Absent → the same sane default the Line tab uses (`["d","w","m"]`).
    let detail_resolutions = ds
        .series_resolutions
        .clone()
        .unwrap_or_else(|| vec!["d".into(), "w".into(), "m".into()]);
    // The detail chart's DEFAULT resolution: the producer's declared default, else
    // the first allowed resolution (NOT a hardcoded "d"). For a `["w","m","y"]`
    // frame this is "w", so the frontend's first detail fetch sends a VALID
    // resolution instead of an invalid global default.
    let detail_default_resolution = ds
        .series_default_resolution
        .clone()
        .or_else(|| detail_resolutions.first().cloned())
        .unwrap_or_else(|| "d".into());
    let detail = json!({
        "facts_layout": [], "headline_metrics": [],
        "series_metrics": detail_metrics, "windows": detail_windows,
        "resolutions": detail_resolutions,
        "default_resolution": detail_default_resolution,
        "chart_styles": ["line", "area"],
        "second_metric": true,
        "supports_ohlc": false,
    });

    let mut views = Map::new();
    views.insert("treemap".into(), treemap);
    // The Line tab is meaningful when there's a timestamped series — either a
    // dedicated `series_source` or a `timestamp_column` on the main source.
    if ds.series_source.is_some() || ds.timestamp_column.is_some() {
        // The metric list the Line tab offers: the configured `series_metrics`
        // subset (the series table's columns) when set, else all metrics.
        let series_metrics = ds
            .series_metrics
            .clone()
            .unwrap_or_else(|| metric_ids.clone());
        // Resolutions/default are creator-configurable: a producer with a
        // weekly-pre-bucketed source can restrict to `["w"]` so the engine never
        // sums a stock metric up to a coarser cadence. Absent → ["d","w","m"]/"d".
        let resolutions = ds
            .series_resolutions
            .clone()
            .unwrap_or_else(|| vec!["d".into(), "w".into(), "m".into()]);
        // The DEFAULT resolution: the producer's declared default, else the first
        // ALLOWED resolution (NOT a hardcoded "d"). For a `["w","m","y"]` frame this
        // is "w", so the Line tab's first fetch sends a VALID resolution instead of
        // an invalid global default the (now frame-scoped) /api/series would reject.
        let default_resolution = ds
            .series_default_resolution
            .clone()
            .or_else(|| resolutions.first().cloned())
            .unwrap_or_else(|| "d".into());
        views.insert(
            "series".into(),
            json!({
                "windows": [
                    {"id": "1m", "days": 31}, {"id": "1y", "days": 365},
                    {"id": "5y", "days": 1825}, {"id": "max", "days": 36500}
                ],
                "resolutions": resolutions,
                "metrics": series_metrics,
                "aggs": ["sum", "mean", "median"],
                "default_window": "1y",
                "default_resolution": default_resolution,
                "log_scale": true,
                "follows_treemap": true,
            }),
        );
    }
    views.insert("scatter".into(), scatter);
    views.insert("detail".into(), detail);

    json!({
        "dataset_id": ds.source,
        "title": ds.title,
        "entity_noun": ds.entity_noun,
        "entity_noun_plural": ds.entity_noun_plural,
        "metrics": metrics,
        "views": Json::Object(views),
        "theme": {},
        "fonts": Json::Null,
        "warm_views": [],
    })
}

/// Convenience: flatten a whole tree for a dataset + axis.
pub fn frontend_tree(ds: &FrameDataset, axis: &str, tree: &TreeNode) -> Option<Json> {
    let a = ds.axis(axis)?;
    let levels = &a.levels;
    let metric_ids: Vec<String> = ds.metrics.iter().map(|m| m.id.clone()).collect();
    let count_id = ds
        .metrics
        .iter()
        .find(|m| m.agg == "count")
        .map(|m| m.id.clone());
    let ctx = NodeCtx {
        levels,
        metric_ids: &metric_ids,
        count_id: count_id.as_deref(),
        entity_noun: &ds.entity_noun,
        is_path: a.path.is_some(),
    };
    Some(frontend_node(tree, &ctx, 0))
}