taxa-core 0.1.0

taxa engine core: manifest model, formula AST→Polars Expr, bounded query generators over Polars.
//! `Backend` — the single seam the hosting layer (server / CLI / bindings) talks
//! to for *all* data access. It speaks taxa's bounded view operations (treemap /
//! series / scatter / detail / search / filter_options) rather than handing back
//! raw frames, so that a future store (a live Postgres, a DuckDB-over-Parquet
//! out-of-core engine) can implement the *same* operations by pushing them down,
//! while the in-memory `FrameBackend` implements them with the Polars engine.
//!
//! `FrameBackend` is impl #1: it wraps a `Source` (a Polars `LazyFrame` provider)
//! and delegates each operation to the existing engine functions — so this seam is
//! a faithful refactor (identical behavior), not a rewrite. New backends slot in
//! behind the same trait; see `docs/BACKENDS.md` / `docs/PROVIDERS.md`.

use std::collections::HashSet;
use std::sync::Arc;

use serde_json::{Map, Value};

use crate::series::SeriesArgs;
use crate::source::{file_source, Source};
use crate::treemap::TreemapArgs;
use crate::{FrameDataset, Result, TreeNode};

/// Everything the hosting layer needs from a data store, as bounded view
/// operations. The `FrameDataset` (manifest) is supplied per call — it's the schema/
/// binding the operation is interpreted against, independent of the store.
pub trait Backend: Send + Sync {
    /// Column names available to bind axes/metrics/filters against.
    fn columns(&self) -> Result<HashSet<String>>;
    /// `(name, dtype)` pairs (for inference / introspection).
    fn schema(&self) -> Result<Vec<(String, String)>>;

    /// Bounded rollup subtree (top-K + "Other" per branch), as core `TreeNode`s;
    /// the hosting layer adapts to the frontend shape via `frontend_tree`.
    fn treemap(&self, ds: &FrameDataset, args: &TreemapArgs) -> Result<TreeNode>;
    /// The treemap's kept branch set one level below focus (for `branch_set:
    /// "treemap"` — the series Line tab follows the snapshot ranking).
    fn branch_set(
        &self,
        ds: &FrameDataset,
        args: &TreemapArgs,
    ) -> Result<crate::treemap::BranchSet>;
    /// Per-branch time series (forward-filled grid), frontend JSON.
    fn series(&self, ds: &FrameDataset, args: &SeriesArgs) -> Result<Value>;
    /// One entity's metric over time (the detail-page time chart), frontend JSON.
    fn entity_series(
        &self,
        ds: &FrameDataset,
        id: &str,
        metric: &str,
        window_days: Option<i64>,
        resolution: &str,
    ) -> Result<Value>;
    /// Two-metric scatter points (optionally colored / capped).
    fn scatter(
        &self,
        ds: &FrameDataset,
        x: &str,
        y: &str,
        filters: &Map<String, Value>,
        color: Option<&str>,
        limit: Option<u32>,
    ) -> Result<Value>;
    /// Choropleth aggregate: a metric summed per region key (no top-K fold).
    fn geo(
        &self,
        ds: &FrameDataset,
        key_column: &str,
        metric: &str,
        filters: &Map<String, Value>,
    ) -> Result<Value>;
    /// One entity's record (`None` if unknown).
    fn detail(&self, ds: &FrameDataset, eid: &str) -> Result<Option<Value>>;
    /// Tree-node typeahead (searches every level of `axis`, else the first axis).
    fn search(&self, ds: &FrameDataset, q: &str, axis: Option<&str>, limit: u32) -> Result<Value>;
    /// Distinct values for a filter facet (bounded by `q`/`limit`).
    fn filter_options(
        &self,
        ds: &FrameDataset,
        facet: &str,
        q: Option<&str>,
        limit: u32,
    ) -> Result<Vec<Value>>;
}

/// Impl #1 — in-memory / file: a `Source` (Polars `LazyFrame`) bounded by taxa's
/// own engine. Each method delegates to the engine free functions.
pub struct FrameBackend {
    source: Arc<dyn Source>,
}

impl FrameBackend {
    pub fn new(source: Arc<dyn Source>) -> Self {
        Self { source }
    }
}

impl Backend for FrameBackend {
    fn columns(&self) -> Result<HashSet<String>> {
        self.source.columns()
    }
    fn schema(&self) -> Result<Vec<(String, String)>> {
        self.source.schema()
    }
    fn treemap(&self, ds: &FrameDataset, args: &TreemapArgs) -> Result<TreeNode> {
        crate::treemap::treemap(ds, &*self.source, args)
    }
    fn branch_set(
        &self,
        ds: &FrameDataset,
        args: &TreemapArgs,
    ) -> Result<crate::treemap::BranchSet> {
        crate::treemap::branch_set(ds, &*self.source, args)
    }
    fn series(&self, ds: &FrameDataset, args: &SeriesArgs) -> Result<Value> {
        crate::series::series(ds, &*self.source, args)
    }
    fn entity_series(
        &self,
        ds: &FrameDataset,
        id: &str,
        metric: &str,
        window_days: Option<i64>,
        resolution: &str,
    ) -> Result<Value> {
        crate::series::entity_series(ds, &*self.source, id, metric, window_days, resolution)
    }
    fn scatter(
        &self,
        ds: &FrameDataset,
        x: &str,
        y: &str,
        filters: &Map<String, Value>,
        color: Option<&str>,
        limit: Option<u32>,
    ) -> Result<Value> {
        crate::query::scatter(ds, &*self.source, x, y, filters, color, limit)
    }
    fn geo(
        &self,
        ds: &FrameDataset,
        key_column: &str,
        metric: &str,
        filters: &Map<String, Value>,
    ) -> Result<Value> {
        crate::query::geo(ds, &*self.source, key_column, metric, filters)
    }
    fn detail(&self, ds: &FrameDataset, eid: &str) -> Result<Option<Value>> {
        crate::query::detail(ds, &*self.source, eid)
    }
    fn search(&self, ds: &FrameDataset, q: &str, axis: Option<&str>, limit: u32) -> Result<Value> {
        crate::query::search(ds, &*self.source, q, axis, limit)
    }
    fn filter_options(
        &self,
        ds: &FrameDataset,
        facet: &str,
        q: Option<&str>,
        limit: u32,
    ) -> Result<Vec<Value>> {
        crate::query::filter_options(ds, &*self.source, facet, q, limit)
    }
}

/// Build a `FrameBackend` from a file path (Parquet/CSV) — the file → in-memory
/// shorthand used by the CLI and bindings.
pub fn file_backend(path: &str) -> Box<dyn Backend> {
    Box::new(FrameBackend::new(Arc::from(file_source(path))))
}