taxa-core 0.1.0

//! Bounded per-branch time series. Port of `sql.py`'s `series()`.
//!
//! Polars does the heavy lift — `group_by([branch, time_bucket]).agg(...)` over
//! the (possibly huge) source. The result is small (branches × buckets), so the
//! grid reshape — forward-fill each branch across the common date grid, keep
//! top-K by the treemap's `size_by` at the as-of (latest) bucket (so the Line
//! tab and treemap show the same *current* branch set), fold the tail into
//! "Other" — is done directly. The window
//! is anchored to the series frame's max timestamp (`as_of`), not the wall clock.
//! (Per-entity ASOF forward-fill for sparse sources is the documented future
//! upgrade.)

use std::collections::{BTreeSet, HashMap};

use polars::prelude::*;
use serde_json::{json, Map, Value as Json};

use crate::error::{Error, Result};
use crate::filters::{axis_row_filter, filter_exprs, focus_exprs};
use crate::manifest::FrameDataset;
use crate::metric::{metric_plan, value_expr};
use crate::output::av_to_label;
use crate::source::Source;

/// Branch cap — kept in lockstep with the treemap's `MAX_TOP_K` (`treemap.rs`)
/// so the Line tab and treemap bound the branch set identically.
pub const MAX_TOP_K: usize = 50;

/// Resolution code (`d`/`w`/`m`/`q`/`y`) → the Polars `dt().truncate` `every`
/// string. Shared by the group-series and `entity_series` so BOTH support the
/// full set (the server allowlist also accepts `q`/`y`). Anything else → daily.
fn resolution_every(resolution: &str) -> &'static str {
    match resolution {
        "w" => "1w",
        "m" => "1mo",
        "q" => "1q",
        "y" => "1y",
        _ => "1d",
    }
}

pub struct SeriesArgs {
    pub axis: String,
    pub metric: String,
    pub focus: Vec<Json>,
    pub filters: Map<String, Json>,
    pub agg: Option<String>,
    pub resolution: String, // "d" | "w" | "m"
    pub window_days: Option<i64>,
    pub top_k: usize,
    pub other_label: String,
    /// Metric the branch set is ranked by — the SAME `size_by` the treemap uses,
    /// so the Line tab and treemap show the same branches. `None` → falls back to
    /// the dataset's `resolve_size_by` (default_size_by / first metric).
    pub size_by: Option<String>,
    /// `branch_set: "treemap"` override: an explicit, pre-resolved set of branch
    /// keys (the treemap's kept top-K at this axis/focus/size_by). When `Some`,
    /// `series()` plots EXACTLY these branches instead of running its own ranking
    /// — fixing rank-by-history and size_by-not-in-series-metrics at the source.
    /// The order is the treemap's order; rows whose branch ∉ this set fold into
    /// "Other" when `include_other` is true.
    pub branches: Option<Vec<String>>,
    /// Whether the tail (rows outside `branches`) folds into an "Other" line.
    /// Mirrors whether the treemap rollup produced an "Other" node.
    pub include_other: bool,
}

impl SeriesArgs {
    pub fn new(axis: impl Into<String>, metric: impl Into<String>) -> Self {
        SeriesArgs {
            axis: axis.into(),
            metric: metric.into(),
            focus: vec![],
            filters: Map::new(),
            agg: None,
            resolution: "d".into(),
            window_days: None,
            top_k: 12,
            other_label: "Other".into(),
            size_by: None,
            branches: None,
            include_other: true,
        }
    }
}

/// Mean as `sum / count` (non-null) rather than `Expr::mean()`. Polars routes
/// `mean()` through a partitioned aggregation evaluator that `unimplemented!()`s for
/// high-cardinality group_bys (e.g. the per-entity series stage), panicking the
/// worker threads. `sum` and `count` are both implemented on that path, and
/// `sum/count` reproduces mean's non-null semantics exactly.
fn mean_expr(e: Expr) -> Expr {
    e.clone().sum() / e.count()
}

pub fn series(ds: &FrameDataset, source: &dyn Source, args: &SeriesArgs) -> Result<Json> {
    let columns = source.columns()?;
    let axis = ds
        .axis(&args.axis)
        .ok_or_else(|| Error::Schema(format!("unknown axis {:?}", args.axis)))?;
    let m = ds
        .metric(&args.metric)
        .ok_or_else(|| Error::Schema(format!("unknown metric {:?}", args.metric)))?;
    let ts = ds
        .timestamp_column
        .as_ref()
        .ok_or_else(|| Error::Schema("dataset has no timestamp_column".into()))?;
    let agg = args
        .agg
        .clone()
        .unwrap_or_else(|| m.cross_agg().to_string());
    let every = resolution_every(&args.resolution);

    // ── two-stage time/cross-section aggregation ────────────────────────
    // The source is one row per entity per (fine) source bucket. At a coarser
    // resolution a coarse bucket holds several source rows PER entity. Summing
    // them all (single-stage) is correct for a *flow* metric but WRONG for a
    // *stock* (e.g. market cap sums ~4 weekly snapshots into a month). So:
    //   stage 1 — per (entity, branch, coarse_bucket): fold the entity's rows
    //             with the metric's `time_agg` (e.g. `last` → its latest value);
    //   stage 2 — per (branch, coarse_bucket): apply the cross-sectional `agg`
    //             (e.g. `sum`) over the stage-1 per-entity values.
    // When the source cadence == the requested resolution there is one source row
    // per (entity, bucket), so stage 1 is the identity and the result is unchanged
    // from the old single-stage path — preserving existing behavior.
    //
    // `count` is a pure row-count cross-section (no per-row value), so it skips
    // stage 1 and counts the entities present in each (branch, bucket) directly.
    let is_count = agg == "count";
    // Stage-1 per-entity fold expr over the raw value column → aliased "v".
    let time_fold_expr = if is_count {
        None
    } else {
        let v = value_expr(m, &columns)?;
        // Stage-1 (within-entity time fold) uses the metric's OWN time semantics
        // ONLY: `time_agg` if declared, else the metric's `agg`. The request's
        // `args.agg` must NOT leak into stage 1 — it drives ONLY the cross-
        // sectional stage-2 agg. Otherwise a Line-tab AGGREGATE=mean/median would
        // override a stock metric's `time_agg:"last"`, turning monthly mcap into
        // "mean of each entity's monthly mean" instead of each entity's last value.
        let ta = m.time_agg.as_deref().unwrap_or(&m.agg);
        Some(
            match ta {
                // `last`/`first` are ORDER-dependent: the source SQL has no ORDER
                // BY, so Postgres row order within a (entity, branch, bucket) group
                // is arbitrary. Sort by the timestamp column inside the fold so a
                // monthly/yearly stock bucket deterministically picks the latest
                // (resp. earliest) row by ts. mean/median/sum/min/max are order-
                // independent, so they fold the raw column directly.
                "last" => v
                    .sort_by([col(ts.as_str())], SortMultipleOptions::default())
                    .last(),
                "first" => v
                    .sort_by([col(ts.as_str())], SortMultipleOptions::default())
                    .first(),
                "mean" => mean_expr(v),
                "median" => v.median(),
                "min" => v.min(),
                "max" => v.max(),
                _ => v.sum(),
            }
            .alias("v"),
        )
    };
    // Stage-2 cross-sectional agg expr over the stage-1 "v" column → aliased "v".
    // (For `count`, the single-stage row count over the source rows in the bucket.)
    let cross_expr = if is_count {
        // Count DISTINCT entities per (branch, bucket), not source rows: a weekly
        // source at monthly resolution has ~4 rows per entity per bucket, so a raw
        // `len()` would count ticker-weeks, not entities present in that month.
        col(ds.id_column.as_str()).n_unique().alias("v")
    } else {
        let c = col("v");
        match agg.as_str() {
            "mean" => mean_expr(c),
            "median" => c.median(),
            "max" => c.max(),
            "min" => c.min(),
            _ => c.sum(),
        }
        .alias("v")
    };

    let mut lf = source.frame()?;
    for e in filter_exprs(ds, &args.filters) {
        lf = lf.filter(e);
    }
    // Per-axis row universe: applied ONLY for the selected axis, after the base
    // filters and before grouping (mirrors `treemap()`), so the Line tab's
    // rollup totals match the treemap's for that axis.
    if let Some(pred) = axis_row_filter(axis, &columns)? {
        lf = lf.filter(pred);
    }
    // Entity-grain metrics: add the masked `<id>__eff` columns so a `size_by`
    // ranked via `metric_plan` (below) can roll them up. Display uses the metric's
    // own `time_agg` path, so this only feeds the branch ranking.
    let masks = crate::metric::entity_mask_exprs(&ds.metrics, &ds.id_column, &columns)?;
    let lf = if masks.is_empty() {
        lf
    } else {
        lf.with_columns(masks)
    };
    // Resolve levels (derive `__lvl*` for a path axis, identity for a fixed one);
    // the branch column is the level at the current focus depth.
    let f = args.focus.len();
    let (levels, lf) = crate::path::resolved_levels(axis, lf)?;
    let levels = &levels;
    let branch_col = levels
        .get(f)
        .or_else(|| levels.last())
        .cloned()
        .ok_or_else(|| Error::Schema("axis has no levels".into()))?;
    let mut lf = lf;
    for e in focus_exprs(levels, &args.focus) {
        lf = lf.filter(e);
    }
    // `as_of` anchoring: window measured back from the series frame's MAX
    // timestamp (the data's own "now"), not the wall clock — so a static or
    // periodically-refreshed series doesn't clip as real time passes. Computed
    // once over the (filtered, focused) frame.
    if let Some(days) = args.window_days {
        let max_df = lf
            .clone()
            .select([col(ts.as_str())
                .cast(DataType::Date)
                .max()
                .alias("__max_ts")])
            .collect()?;
        let as_of: Option<i32> = max_df
            .column("__max_ts")
            .ok()
            .and_then(|c| c.get(0).ok())
            .and_then(|av| match av {
                AnyValue::Date(d) => Some(d),
                _ => None,
            });
        if let Some(as_of) = as_of {
            let cutoff = as_of - days as i32;
            lf = lf.filter(
                col(ts.as_str())
                    .cast(DataType::Date)
                    .gt_eq(lit(cutoff).cast(DataType::Date)),
            );
        }
    }

    let bucket = col(ts.as_str())
        .cast(DataType::Date)
        .dt()
        .truncate(lit(every))
        .cast(DataType::Date)
        .alias("t");
    // `bucketed` carries the raw source rows + their coarse bucket `t`; the tail
    // re-aggregation and the as-of ranking read it directly.
    let bucketed = lf.with_column(bucket);
    // `staged` is what the cross-sectional groupings consume. For a value metric
    // it is the STAGE-1 output: one row per (id, branch, t) holding the entity's
    // time-folded value in column "v". For `count` there is no value to fold, so
    // it is `bucketed` itself (the second stage counts source rows directly).
    let staged = match &time_fold_expr {
        Some(fold) => bucketed
            .clone()
            .group_by([
                col(ds.id_column.as_str()),
                col(branch_col.as_str()),
                col("t"),
            ])
            .agg([fold.clone()]),
        None => bucketed.clone(),
    };
    let grouped = staged
        .clone()
        .group_by([col(branch_col.as_str()).alias("branch"), col("t")])
        .agg([cross_expr.clone()])
        .collect()?;

    // ── grid reshape (bounded) ──────────────────────────────────────
    let branch_s = grouped.column("branch")?;
    let t_s = grouped.column("t")?;
    let v_s = grouped.column("v")?;
    let h = grouped.height();

    let mut by_branch: HashMap<String, HashMap<String, f64>> = HashMap::new();
    let mut all_t: BTreeSet<String> = BTreeSet::new();
    for i in 0..h {
        let b = av_to_label(&branch_s.get(i)?.into_static());
        let t = av_to_label(&t_s.get(i)?.into_static());
        let v = crate::output::av_to_f64(&v_s.get(i)?.into_static());
        if branch_s.get(i)?.is_null() || t_s.get(i)?.is_null() {
            continue;
        }
        if let Some(v) = v {
            by_branch.entry(b).or_default().insert(t.clone(), v);
        }
        all_t.insert(t);
    }
    let grid: Vec<String> = all_t.into_iter().collect();

    let ffill = |d: &HashMap<String, f64>| -> Vec<Option<f64>> {
        let mut out = Vec::with_capacity(grid.len());
        let mut last: Option<f64> = None;
        for t in &grid {
            if let Some(v) = d.get(t) {
                last = Some(*v);
            }
            out.push(last);
        }
        out
    };
    let filled: HashMap<String, Vec<Option<f64>>> = by_branch
        .iter()
        .map(|(b, d)| (b.clone(), ffill(d)))
        .collect();

    // ── branch selection ────────────────────────────────────────────
    // Two paths:
    //  (a) `branch_set: "treemap"` → `args.branches` carries a PRE-RESOLVED set
    //      of branch keys (the treemap's kept top-K at this axis/focus/size_by).
    //      Plot exactly those, in the treemap's order; everything else folds into
    //      "Other" iff `include_other`. This replaces series's own ranking — so
    //      the Line tab matches the treemap even when ranking by a metric the
    //      series frame lacks, or by current (not summed-history) state.
    //  (b) otherwise → rank branches by `size_by` AT THE AS-OF (latest/max)
    //      bucket (the same current top-K the treemap shows) and take top_k.
    let (top, rest): (Vec<&String>, Vec<&String>) = if let Some(keep) = &args.branches {
        // The treemap may include an "__other__" branch key; that's not a real
        // branch label, so we keep only the real ones (the fold is governed by
        // `include_other`).
        let keep_set: std::collections::HashSet<&str> = keep
            .iter()
            .map(|s| s.as_str())
            .filter(|s| *s != "__other__")
            .collect();
        // `top` follows the treemap's authored order, restricted to branches that
        // actually have series data; `rest` is every other present branch.
        let present: std::collections::HashSet<&String> = filled.keys().collect();
        let top: Vec<&String> = keep
            .iter()
            .filter(|k| k.as_str() != "__other__")
            .filter_map(|k| present.get(k).copied())
            .collect();
        let rest: Vec<&String> = filled
            .keys()
            .filter(|b| !keep_set.contains(b.as_str()))
            .collect();
        (top, rest)
    } else {
        // Rank branches by the `size_by` metric AT THE AS-OF (latest/max) bucket —
        // the SAME current top-K the treemap shows. Ranking over the whole windowed
        // history (a sum across all buckets) would let a historically-big-but-now-
        // small branch outrank the treemap's current top-K; the Line tab must match
        // the treemap's *current* view, so we rank at the data's own "now".
        let size_by = ds
            .resolve_size_by(args.size_by.as_deref())
            .ok_or_else(|| Error::Schema("no size_by / metrics".into()))?;
        let size_m = ds
            .metric(&size_by)
            .ok_or_else(|| Error::Schema(format!("size_by {size_by:?} not a metric")))?;
        // For a `count` size_by, rank by DISTINCT ENTITIES at the as-of bucket —
        // consistent with what the series DISPLAYS (`n_unique(id)` in `cross_expr`).
        // `metric_plan`'s count stat is `len()` (raw row count), so ranking by it
        // would let a 1-entity × 4-weekly-row branch outrank a 2-entity branch at a
        // monthly resolution. Use `n_unique(id)` for the rank too so rank == display.
        let count_rank = size_m.cross_agg() == "count";
        let size_plan = metric_plan(size_m, &columns)?;
        let rank_stat: Expr = if count_rank {
            col(ds.id_column.as_str()).n_unique().alias("_rankval")
        } else {
            size_plan.rank_expr.clone().alias("_rankval")
        };
        // The as-of bucket: the max time bucket present after windowing/focus. Rank
        // only the rows in that bucket so the ranking reflects the current state.
        let max_bucket: Option<i32> = bucketed
            .clone()
            .select([col("t").cast(DataType::Date).max().alias("__max_b")])
            .collect()?
            .column("__max_b")
            .ok()
            .and_then(|c| c.get(0).ok())
            .and_then(|av| match av {
                AnyValue::Date(d) => Some(d),
                _ => None,
            });
        let rank_frame = match max_bucket {
            Some(b) => bucketed.clone().filter(
                col("t")
                    .cast(DataType::Date)
                    .eq(lit(b).cast(DataType::Date)),
            ),
            // Empty windowed frame → no as-of bucket; rank over the (empty) frame.
            None => bucketed.clone(),
        };
        let size_rank = if count_rank {
            // Count ranks directly on distinct entities — one agg, no stat columns.
            rank_frame
                .group_by([col(branch_col.as_str()).alias("branch")])
                .agg([rank_stat.clone()])
                .select([col("branch"), col("_rankval")])
                .collect()?
        } else {
            rank_frame
                .group_by([col(branch_col.as_str()).alias("branch")])
                .agg(
                    size_plan
                        .stats
                        .iter()
                        .map(|(_, e)| e.clone())
                        .collect::<Vec<_>>(),
                )
                .with_column(rank_stat.clone())
                .select([col("branch"), col("_rankval")])
                .collect()?
        };
        // branch label → rank value (descending sort key; missing/null = -inf).
        let mut rankval: HashMap<String, f64> = HashMap::new();
        {
            let (bs, rs) = (size_rank.column("branch")?, size_rank.column("_rankval")?);
            for i in 0..size_rank.height() {
                if bs.get(i)?.is_null() {
                    continue;
                }
                let b = av_to_label(&bs.get(i)?.into_static());
                let r = crate::output::av_to_f64(&rs.get(i)?.into_static())
                    .unwrap_or(f64::NEG_INFINITY);
                rankval.insert(b, r);
            }
        }

        let mut ordered: Vec<&String> = filled.keys().collect();
        ordered.sort_by(|a, b| {
            let ra = rankval.get(*a).copied().unwrap_or(f64::NEG_INFINITY);
            let rb = rankval.get(*b).copied().unwrap_or(f64::NEG_INFINITY);
            // Tie-break on branch label for determinism (matches a stable top-K).
            rb.partial_cmp(&ra)
                .unwrap_or(std::cmp::Ordering::Equal)
                .then_with(|| a.cmp(b))
        });

        // Clamp top_k to the treemap's bound (MAX_TOP_K) so an out-of-range request
        // can't produce an unbounded branch set.
        let top_k = args.top_k.min(MAX_TOP_K);
        let top: Vec<&String> = ordered.iter().take(top_k).copied().collect();
        let rest: Vec<&String> = ordered.iter().skip(top_k).copied().collect();
        (top, rest)
    };

    let to_values = |vs: &[Option<f64>]| -> Vec<Json> {
        vs.iter()
            .map(|v| {
                v.and_then(|x| serde_json::Number::from_f64(x).map(Json::Number))
                    .unwrap_or(Json::Null)
            })
            .collect()
    };

    let mut out_series: Vec<Json> = top
        .iter()
        .map(|b| {
            json!({"key": b, "label": b, "dates": grid, "values": to_values(&filled[*b]), "is_other": false})
        })
        .collect();
    let mut branch_keys: Vec<Json> = top.iter().map(|b| json!(b)).collect();
    // Fold the tail into "Other" when there IS a tail and an "Other" is wanted.
    // For the ranked path `include_other` is always true (default); for the
    // treemap branch-set path it mirrors whether the treemap produced an "Other".
    if !rest.is_empty() && args.include_other {
        // Re-aggregate the tail's STAGE-1 (per-entity, per-bucket) values per
        // bucket — summing the already-rolled-up branch values is only valid for
        // sum/count; for mean/median/min/max it is meaningless. Re-aggregating
        // from `staged` keeps the two-stage semantics: e.g. a monthly stock
        // metric's Other = sum over tail entities of each entity's last weekly
        // value. is_in over the branch labels (cast to string) selects the tail.
        let rest_labels: Vec<String> = rest.iter().map(|s| s.to_string()).collect();
        let mask = col(branch_col.as_str())
            .cast(DataType::String)
            .is_in(lit(Series::new("".into(), rest_labels)).implode(), false);
        let other_df = staged
            .clone()
            .filter(mask)
            .group_by([col("t")])
            .agg([cross_expr])
            .collect()?;
        let mut other_map: HashMap<String, f64> = HashMap::new();
        let (ot, ov) = (other_df.column("t")?, other_df.column("v")?);
        for i in 0..other_df.height() {
            if let (false, Some(v)) = (
                ot.get(i)?.is_null(),
                crate::output::av_to_f64(&ov.get(i)?.into_static()),
            ) {
                other_map.insert(av_to_label(&ot.get(i)?.into_static()), v);
            }
        }
        out_series.push(json!({
            "key": "__other__", "label": format!("{} (n={})", args.other_label, rest.len()),
            "dates": grid, "values": to_values(&ffill(&other_map)), "is_other": true,
        }));
        branch_keys.push(json!("__other__"));
    }

    // Cadence labels are consumed by the frontend's stepped-render check
    // (`web/viz/timeseries.js`), which treats `"quarterly"`/`"annual"` as filing
    // cadences that hold their value until the next point (step line). Emit those
    // exact labels so q/y series render stepped (NOT `"yearly"`, which the
    // frontend doesn't recognize → it would fall back to a straight line).
    let cadence = match args.resolution.as_str() {
        "w" => "weekly",
        "m" => "monthly",
        "q" => "quarterly",
        "y" => "annual",
        _ => "daily",
    };
    Ok(json!({
        "series": out_series,
        "meta": {"cadence": cadence, "agg": agg, "unit": m.unit, "metric_id": args.metric,
                 "branch_keys": branch_keys, "notice": Json::Null},
    }))
}

/// One entity's metric over time — the detail-page time chart.
///
/// Filters the series frame to `id_column == id`, buckets the timestamp at the
/// requested resolution, takes the metric value per bucket (a single entity has
/// at most one row per source bucket, so sum/last are equivalent), forward-fills
/// across the observed buckets, and windows to the entity's own `as_of` (its max
/// timestamp − `window_days`). Returns the FLAT shape `entity.js::buildDataset`
/// reads for a `/series` payload: `{dates, values, unit, label}`.
pub fn entity_series(
    ds: &FrameDataset,
    source: &dyn Source,
    id: &str,
    metric: &str,
    window_days: Option<i64>,
    resolution: &str,
) -> Result<Json> {
    let columns = source.columns()?;
    let m = ds
        .metric(metric)
        .ok_or_else(|| Error::Schema(format!("unknown metric {metric:?}")))?;
    let ts = ds
        .timestamp_column
        .as_ref()
        .ok_or_else(|| Error::Schema("dataset has no timestamp_column".into()))?;
    let every = resolution_every(resolution);

    // One entity may have several source rows inside a coarse bucket (e.g. weekly
    // rows in a monthly bucket). Fold them with the metric's WITHIN-ENTITY time
    // semantics — `time_agg` when declared, else `last` (a single entity's coarse
    // bucket is a point-in-time snapshot: its latest value, NOT a sum of its own
    // weekly rows). This is independent of any request `agg`, which only ever
    // drove the group series's cross-section. `last`/`first` are ORDER-dependent
    // and the source SQL has no ORDER BY, so sort by the timestamp column inside
    // the fold to pick deterministically (not an arbitrary row).
    let v = value_expr(m, &columns)?;
    let ta = m.time_agg.as_deref().unwrap_or("last");
    let agg_expr = match ta {
        "last" => v
            .sort_by([col(ts.as_str())], SortMultipleOptions::default())
            .last(),
        "first" => v
            .sort_by([col(ts.as_str())], SortMultipleOptions::default())
            .first(),
        "mean" => mean_expr(v),
        "median" => v.median(),
        "min" => v.min(),
        "max" => v.max(),
        _ => v.sum(),
    }
    .alias("v");

    let lf = source
        .frame()?
        .filter(col(ds.id_column.as_str()).eq(lit(id)));
    let bucket = col(ts.as_str())
        .cast(DataType::Date)
        .dt()
        .truncate(lit(every))
        .cast(DataType::Date)
        .alias("t");
    let grouped = lf
        .with_column(bucket)
        .group_by([col("t")])
        .agg([agg_expr])
        .sort(["t"], SortMultipleOptions::default())
        .collect()?;

    // Collect (bucket -> value), dropping null buckets/values.
    let t_s = grouped.column("t")?;
    let v_s = grouped.column("v")?;
    let mut points: Vec<(String, f64)> = Vec::with_capacity(grouped.height());
    for i in 0..grouped.height() {
        if t_s.get(i)?.is_null() {
            continue;
        }
        let t = av_to_label(&t_s.get(i)?.into_static());
        if let Some(val) = crate::output::av_to_f64(&v_s.get(i)?.into_static()) {
            points.push((t, val));
        }
    }

    // `as_of` window: measured back from this entity's OWN max bucket (the data's
    // "now" for this entity), not the wall clock. ISO date strings sort
    // lexicographically, so the max is the last point.
    if let (Some(days), Some((max_t, _))) = (window_days, points.last().cloned()) {
        if let Ok(as_of) = chrono_days(&max_t) {
            let cutoff = as_of - days;
            points.retain(|(t, _)| chrono_days(t).map(|d| d >= cutoff).unwrap_or(true));
        }
    }

    // Emit one point per observed bucket. The source is already per-ticker
    // forward-filled onto a dense weekly grid, so each kept bucket carries a
    // real value; we keep only buckets that have a value (drop nulls) — no
    // cross-bucket carry is needed for a single dense entity.
    let mut dates: Vec<Json> = Vec::with_capacity(points.len());
    let mut values: Vec<Json> = Vec::with_capacity(points.len());
    for (t, val) in &points {
        dates.push(json!(t));
        values.push(
            serde_json::Number::from_f64(*val)
                .map(Json::Number)
                .unwrap_or(Json::Null),
        );
    }

    let label = m.label.clone().unwrap_or_else(|| m.id.clone());
    Ok(json!({
        "dates": dates,
        "values": values,
        "unit": m.unit,
        "label": label,
        "metric_id": metric,
    }))
}

/// Days since the Unix epoch for an ISO `YYYY-MM-DD` date string (the bucket
/// labels emitted by `av_to_label` for a Date column). Used for window math
/// without re-collecting the frame.
fn chrono_days(iso: &str) -> std::result::Result<i64, ()> {
    let mut it = iso.split('-');
    let y: i32 = it.next().and_then(|s| s.parse().ok()).ok_or(())?;
    let mo: u32 = it.next().and_then(|s| s.parse().ok()).ok_or(())?;
    let d: u32 = it.next().and_then(|s| s.parse().ok()).ok_or(())?;
    // Days from civil date (Howard Hinnant's algorithm).
    let y = if mo <= 2 { y - 1 } else { y };
    let era = if y >= 0 { y } else { y - 399 } / 400;
    let yoe = (y - era * 400) as i64;
    let mp = ((mo as i64 + 9) % 12) as i64;
    let doy = (153 * mp + 2) / 5 + d as i64 - 1;
    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
    Ok(era as i64 * 146097 + doe - 719468)
}