taxa-core 0.1.0

taxa engine core: manifest model, formula AST→Polars Expr, bounded query generators over Polars.
//! Variable-depth path axes.
//!
//! A path axis is defined by a single column holding a delimited path
//! (`a/b/c.txt`). At query time we DERIVE fixed level columns from it so the
//! engine's existing fixed-depth level-walk can be reused unchanged:
//!
//! 1. compute the max component count `D` over the (filtered) frame,
//! 2. add `__lvl0..__lvl{D-1}` columns via `split → list.get(i, null_on_oob)`,
//!    so a shorter path is null-padded at the deeper levels.
//!
//! The two variable-depth corrections (null-drop at the new level, per-node
//! `has_more`) live in `treemap.rs`, guarded on `axis.path.is_some()`.

use polars::prelude::*;

use crate::error::Result;
use crate::manifest::{Axis, PathSpec};

/// Derived level-column name for component index `i` (`__lvl0`, `__lvl1`, …).
pub fn level_col(i: usize) -> String {
    format!("__lvl{i}")
}

/// The expression that yields the list of path components.
fn split_expr(spec: &PathSpec) -> Expr {
    col(spec.column.as_str()).str().split(lit(spec.sep.clone()))
}

/// Max number of components across all rows (the path axis depth `D`). At least
/// 1 (an all-empty/all-null column still yields a single level).
pub fn path_depth(spec: &PathSpec, base: &LazyFrame) -> Result<usize> {
    let df = base
        .clone()
        .select([split_expr(spec).list().len().max().alias("_d")])
        .collect()?;
    let d = df
        .column("_d")
        .ok()
        .and_then(|c| c.get(0).ok())
        .and_then(|av| crate::output::av_to_f64(&av.into_static()))
        .map(|f| f as usize)
        .unwrap_or(0);
    Ok(d.max(1))
}

/// Derived level column names `__lvl0..__lvl{depth-1}`.
pub fn derived_levels(depth: usize) -> Vec<String> {
    (0..depth).map(level_col).collect()
}

/// Add the derived `__lvl*` columns to a frame (split + indexed `list.get`).
pub fn with_level_cols(spec: &PathSpec, lf: LazyFrame, depth: usize) -> LazyFrame {
    let parts = split_expr(spec);
    let cols: Vec<Expr> = (0..depth)
        .map(|i| {
            parts
                .clone()
                .list()
                .get(lit(i as i64), true)
                .alias(&level_col(i))
        })
        .collect();
    lf.with_columns(cols)
}

/// For a path axis: derive depth + level names + a frame already carrying the
/// `__lvl*` columns. For a fixed axis: return the authored levels and the frame
/// untouched. This is the single seam every engine path goes through.
pub fn resolved_levels(axis: &Axis, lf: LazyFrame) -> Result<(Vec<String>, LazyFrame)> {
    match &axis.path {
        Some(spec) => {
            let d = path_depth(spec, &lf)?;
            let levels = derived_levels(d);
            let lf = with_level_cols(spec, lf, d);
            Ok((levels, lf))
        }
        None => Ok((axis.levels.clone(), lf)),
    }
}