use std::rc::Rc;
use std::any::Any;
use cjc_runtime::value::Value;
use crate::{
ArrangeKey, Column, CsvConfig, CsvReader, DExpr, DBinOp, DataFrame, GroupedTidyView,
TidyAgg, TidyView,
};
pub fn dispatch_tidy_method(
inner: &Rc<dyn Any>,
method: &str,
args: &[Value],
) -> Result<Option<Value>, String> {
let view = downcast_view(inner)?;
match method {
"nrows" => Ok(Some(Value::Int(view.nrows() as i64))),
"ncols" => Ok(Some(Value::Int(view.ncols() as i64))),
"column_names" => {
let names: Vec<Value> = view
.column_names()
.into_iter()
.map(|s| Value::String(Rc::new(s.to_string())))
.collect();
Ok(Some(Value::Array(Rc::new(names))))
}
"filter" => {
if args.len() != 1 {
return Err("TidyView.filter requires 1 argument: predicate DExpr".into());
}
let predicate = value_to_dexpr(&args[0])?;
let new_view = view.filter(&predicate).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(new_view)))
}
"select" => {
if args.len() != 1 {
return Err("TidyView.select requires 1 argument: column names array".into());
}
let cols = value_to_str_vec(&args[0])?;
let col_refs: Vec<&str> = cols.iter().map(|s| s.as_str()).collect();
let new_view = view.select(&col_refs).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(new_view)))
}
"mutate" => {
if args.len() != 2 {
return Err("TidyView.mutate requires 2 arguments: column_name and expression".into());
}
let col_name = value_to_string(&args[0])?;
let expr = value_to_dexpr(&args[1])?;
let frame = view.mutate(&[(&col_name, expr)]).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(frame.view())))
}
"group_by" => {
if args.len() != 1 {
return Err("TidyView.group_by requires 1 argument: key columns array".into());
}
let keys = value_to_str_vec(&args[0])?;
let key_refs: Vec<&str> = keys.iter().map(|s| s.as_str()).collect();
let grouped = view.group_by(&key_refs).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_grouped(grouped)))
}
"arrange" => {
if args.len() != 1 {
return Err("TidyView.arrange requires 1 argument: sort keys array".into());
}
let keys = value_to_arrange_keys(&args[0])?;
let new_view = view.arrange(&keys).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(new_view)))
}
"distinct" => {
let cols = if args.is_empty() {
view.column_names().iter().map(|s| s.to_string()).collect::<Vec<_>>()
} else {
value_to_str_vec(&args[0])?
};
let col_refs: Vec<&str> = cols.iter().map(|s| s.as_str()).collect();
let new_view = view.distinct(&col_refs).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(new_view)))
}
"slice" => {
if args.len() != 2 {
return Err("TidyView.slice requires 2 arguments: start, end".into());
}
let start = value_to_usize(&args[0])?;
let end = value_to_usize(&args[1])?;
Ok(Some(wrap_view(view.slice(start, end))))
}
"slice_head" => {
if args.len() != 1 {
return Err("TidyView.slice_head requires 1 argument: n".into());
}
let n = value_to_usize(&args[0])?;
Ok(Some(wrap_view(view.slice_head(n))))
}
"slice_tail" => {
if args.len() != 1 {
return Err("TidyView.slice_tail requires 1 argument: n".into());
}
let n = value_to_usize(&args[0])?;
Ok(Some(wrap_view(view.slice_tail(n))))
}
"slice_sample" => {
if args.len() != 2 {
return Err("TidyView.slice_sample requires 2 arguments: n, seed".into());
}
let n = value_to_usize(&args[0])?;
let seed = match &args[1] {
Value::Int(i) => *i as u64,
_ => return Err("slice_sample seed must be Int".into()),
};
Ok(Some(wrap_view(view.slice_sample(n, seed))))
}
"inner_join" | "left_join" | "semi_join" | "anti_join" | "full_join" => {
dispatch_join(view, args, method)
}
"pivot_longer" => {
if args.len() < 2 || args.len() > 3 {
return Err(
"TidyView.pivot_longer requires 2-3 args: cols, names_to, [values_to]".into(),
);
}
let cols = value_to_str_vec(&args[0])?;
let col_refs: Vec<&str> = cols.iter().map(|s| s.as_str()).collect();
let names_to = value_to_string(&args[1])?;
let values_to = if args.len() == 3 {
value_to_string(&args[2])?
} else {
"value".to_string()
};
let frame = view
.pivot_longer(&col_refs, &names_to, &values_to)
.map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(frame.view())))
}
"pivot_wider" => {
if args.len() != 3 {
return Err(
"TidyView.pivot_wider requires 3 args: id_cols, names_from, values_from"
.into(),
);
}
let id_cols = value_to_str_vec(&args[0])?;
let id_refs: Vec<&str> = id_cols.iter().map(|s| s.as_str()).collect();
let names_from = value_to_string(&args[1])?;
let values_from = value_to_string(&args[2])?;
let nullable_frame = view
.pivot_wider(&id_refs, &names_from, &values_from)
.map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(nullable_frame.to_tidy_view_filled())))
}
"rename" => {
if args.len() != 1 {
return Err("TidyView.rename requires 1 argument: array of [old, new] pairs".into());
}
let pairs = value_to_rename_pairs(&args[0])?;
let pair_refs: Vec<(&str, &str)> =
pairs.iter().map(|(a, b)| (a.as_str(), b.as_str())).collect();
let new_view = view.rename(&pair_refs).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(new_view)))
}
"drop_cols" => {
if args.len() != 1 {
return Err("TidyView.drop_cols requires 1 argument: column names array".into());
}
let cols = value_to_str_vec(&args[0])?;
let col_refs: Vec<&str> = cols.iter().map(|s| s.as_str()).collect();
let new_view = view.drop_cols(&col_refs).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(new_view)))
}
"bind_rows" => {
if args.len() != 1 {
return Err("TidyView.bind_rows requires 1 argument: other TidyView".into());
}
let other_rc = match &args[0] {
Value::TidyView(rc) => rc,
_ => return Err("bind_rows argument must be a TidyView".into()),
};
let other = downcast_view(other_rc)?;
let frame = view.bind_rows(other).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(frame.view())))
}
"bind_cols" => {
if args.len() != 1 {
return Err("TidyView.bind_cols requires 1 argument: other TidyView".into());
}
let other_rc = match &args[0] {
Value::TidyView(rc) => rc,
_ => return Err("bind_cols argument must be a TidyView".into()),
};
let other = downcast_view(other_rc)?;
let frame = view.bind_cols(other).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(frame.view())))
}
"column" => {
if args.len() != 1 {
return Err("TidyView.column requires 1 argument: column_name".into());
}
let name = value_to_string(&args[0])?;
let df = view.materialize().map_err(|e| format!("{e}"))?;
let col = df
.get_column(&name)
.ok_or_else(|| format!("column '{}' not found", name))?;
Ok(Some(column_to_value(col)))
}
"to_tensor" => {
if args.len() != 1 {
return Err("TidyView.to_tensor requires 1 argument: column_names array".into());
}
let cols = value_to_str_vec(&args[0])?;
let col_refs: Vec<&str> = cols.iter().map(|s| s.as_str()).collect();
let t = view.to_tensor(&col_refs).map_err(|e| format!("{e}"))?;
Ok(Some(Value::Tensor(t)))
}
"collect" => {
let df = view.materialize().map_err(|e| format!("{e}"))?;
Ok(Some(dataframe_to_value(df)))
}
"print" => {
let df = view.materialize().map_err(|e| format!("{e}"))?;
let s = format_dataframe(&df);
Ok(Some(Value::String(Rc::new(s))))
}
"head" => {
let n = if args.is_empty() { 10 } else {
match &args[0] { Value::Int(n) => *n as usize, _ => return Err("head: argument must be Int".into()) }
};
let sliced = view.slice_head(n);
let df = sliced.materialize().map_err(|e| format!("{e}"))?;
let s = format_dataframe(&df);
Ok(Some(Value::String(Rc::new(s))))
}
"tail" => {
let n = if args.is_empty() { 10 } else {
match &args[0] { Value::Int(n) => *n as usize, _ => return Err("tail: argument must be Int".into()) }
};
let sliced = view.slice_tail(n);
let df = sliced.materialize().map_err(|e| format!("{e}"))?;
let s = format_dataframe(&df);
Ok(Some(Value::String(Rc::new(s))))
}
"shape" => {
let result = Value::Tuple(Rc::new(vec![
Value::Int(view.nrows() as i64),
Value::Int(view.ncols() as i64),
]));
Ok(Some(result))
}
"columns" => {
let names: Vec<Value> = view
.column_names()
.into_iter()
.map(|s| Value::String(Rc::new(s.to_string())))
.collect();
Ok(Some(Value::Array(Rc::new(names))))
}
"dtypes" => {
let df = view.materialize().map_err(|e| format!("{e}"))?;
let mut fields = std::collections::BTreeMap::new();
for (name, col) in &df.columns {
fields.insert(name.clone(), Value::String(Rc::new(col.type_name().to_string())));
}
Ok(Some(Value::Struct { name: "Dtypes".to_string(), fields }))
}
"describe" => {
let df = view.materialize().map_err(|e| format!("{e}"))?;
let s = format_describe(&df);
Ok(Some(Value::String(Rc::new(s))))
}
"glimpse" => {
let df = view.materialize().map_err(|e| format!("{e}"))?;
let s = format_glimpse(&df);
Ok(Some(Value::String(Rc::new(s))))
}
_ => Ok(None), }
}
pub fn dispatch_grouped_method(
inner: &Rc<dyn Any>,
method: &str,
args: &[Value],
) -> Result<Option<Value>, String> {
let grouped = downcast_grouped(inner)?;
match method {
"ngroups" => Ok(Some(Value::Int(grouped.ngroups() as i64))),
"summarise" | "summarize" => {
if args.len() % 2 != 0 || args.is_empty() {
return Err(
"summarise requires pairs of (name, agg) arguments".into(),
);
}
let mut assignments: Vec<(String, TidyAgg)> = Vec::new();
let mut i = 0;
while i < args.len() {
let name = value_to_string(&args[i])?;
let agg = value_to_tidy_agg(&args[i + 1])?;
assignments.push((name, agg));
i += 2;
}
let asg_refs: Vec<(&str, TidyAgg)> = assignments
.iter()
.map(|(n, a)| (n.as_str(), a.clone()))
.collect();
let frame = grouped.summarise(&asg_refs).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(frame.view())))
}
"ungroup" => {
let view = grouped.clone().ungroup();
Ok(Some(wrap_view(view)))
}
_ => Ok(None),
}
}
fn downcast_view(inner: &Rc<dyn Any>) -> Result<&TidyView, String> {
inner
.downcast_ref::<TidyView>()
.ok_or_else(|| "internal error: TidyView downcast failed".to_string())
}
fn downcast_grouped(inner: &Rc<dyn Any>) -> Result<&GroupedTidyView, String> {
inner
.downcast_ref::<GroupedTidyView>()
.ok_or_else(|| "internal error: GroupedTidyView downcast failed".to_string())
}
pub fn wrap_view(view: TidyView) -> Value {
Value::TidyView(Rc::new(view) as Rc<dyn Any>)
}
pub fn wrap_grouped(grouped: GroupedTidyView) -> Value {
Value::GroupedTidyView(Rc::new(grouped) as Rc<dyn Any>)
}
fn value_to_string(v: &Value) -> Result<String, String> {
match v {
Value::String(s) => Ok(s.as_ref().clone()),
_ => Err(format!("expected String, got {}", v.type_name())),
}
}
fn value_to_usize(v: &Value) -> Result<usize, String> {
match v {
Value::Int(i) if *i >= 0 => Ok(*i as usize),
Value::Int(i) => Err(format!("expected non-negative Int, got {i}")),
_ => Err(format!("expected Int, got {}", v.type_name())),
}
}
fn value_to_str_vec(v: &Value) -> Result<Vec<String>, String> {
match v {
Value::Array(arr) => arr
.iter()
.map(|v| match v {
Value::String(s) => Ok(s.as_ref().clone()),
_ => Err(format!("expected String in array, got {}", v.type_name())),
})
.collect(),
_ => Err(format!("expected Array, got {}", v.type_name())),
}
}
pub fn value_to_dexpr(v: &Value) -> Result<DExpr, String> {
match v {
Value::Int(i) => Ok(DExpr::LitInt(*i)),
Value::Float(f) => Ok(DExpr::LitFloat(*f)),
Value::Bool(b) => Ok(DExpr::LitBool(*b)),
Value::String(s) => Ok(DExpr::Col(s.as_ref().clone())),
Value::Struct { name, fields } if name == "DExpr" => {
let kind = fields
.get("kind")
.and_then(|v| if let Value::String(s) = v { Some(s.as_ref().as_str()) } else { None })
.ok_or("DExpr struct missing 'kind' string field")?;
match kind {
"col" => {
let col_name = fields
.get("value")
.and_then(|v| if let Value::String(s) = v { Some(s.as_ref().clone()) } else { None })
.ok_or("DExpr col missing 'value' string field")?;
Ok(DExpr::Col(col_name))
}
"lit_int" => {
let val = fields
.get("value")
.and_then(|v| if let Value::Int(i) = v { Some(*i) } else { None })
.ok_or("DExpr lit_int missing 'value' int field")?;
Ok(DExpr::LitInt(val))
}
"lit_float" => {
let val = fields
.get("value")
.and_then(|v| if let Value::Float(f) = v { Some(*f) } else { None })
.ok_or("DExpr lit_float missing 'value' float field")?;
Ok(DExpr::LitFloat(val))
}
"lit_bool" => {
let val = fields
.get("value")
.and_then(|v| if let Value::Bool(b) = v { Some(*b) } else { None })
.ok_or("DExpr lit_bool missing 'value' bool field")?;
Ok(DExpr::LitBool(val))
}
"lit_str" => {
let val = fields
.get("value")
.and_then(|v| if let Value::String(s) = v { Some(s.as_ref().clone()) } else { None })
.ok_or("DExpr lit_str missing 'value' string field")?;
Ok(DExpr::LitStr(val))
}
"binop" => {
let op_str = fields
.get("op")
.and_then(|v| if let Value::String(s) = v { Some(s.as_ref().as_str()) } else { None })
.ok_or("DExpr binop missing 'op' field")?;
let op = parse_binop(op_str)?;
let left = fields.get("left").ok_or("DExpr binop missing 'left'")?;
let right = fields.get("right").ok_or("DExpr binop missing 'right'")?;
Ok(DExpr::BinOp {
op,
left: Box::new(value_to_dexpr(left)?),
right: Box::new(value_to_dexpr(right)?),
})
}
"count" => Ok(DExpr::Count),
other => Err(format!("unknown DExpr kind: {other}")),
}
}
_ => Err(format!(
"cannot convert {} to DExpr (expected DExpr struct, Int, Float, Bool, or String)",
v.type_name()
)),
}
}
fn parse_binop(s: &str) -> Result<DBinOp, String> {
match s {
"+" | "add" => Ok(DBinOp::Add),
"-" | "sub" => Ok(DBinOp::Sub),
"*" | "mul" => Ok(DBinOp::Mul),
"/" | "div" => Ok(DBinOp::Div),
">" | "gt" => Ok(DBinOp::Gt),
"<" | "lt" => Ok(DBinOp::Lt),
">=" | "ge" => Ok(DBinOp::Ge),
"<=" | "le" => Ok(DBinOp::Le),
"==" | "eq" => Ok(DBinOp::Eq),
"!=" | "ne" => Ok(DBinOp::Ne),
"&&" | "and" => Ok(DBinOp::And),
"||" | "or" => Ok(DBinOp::Or),
other => Err(format!("unknown binop: {other}")),
}
}
fn value_to_tidy_agg(v: &Value) -> Result<TidyAgg, String> {
match v {
Value::Struct { name, fields } if name == "TidyAgg" => {
let kind = fields
.get("kind")
.and_then(|v| if let Value::String(s) = v { Some(s.as_ref().as_str()) } else { None })
.ok_or("TidyAgg struct missing 'kind' string")?;
match kind {
"count" => Ok(TidyAgg::Count),
"sum" | "mean" | "min" | "max" | "first" | "last"
| "median" | "sd" | "var" | "n_distinct" | "iqr" => {
let col = fields
.get("col")
.and_then(|v| if let Value::String(s) = v { Some(s.as_ref().clone()) } else { None })
.ok_or_else(|| format!("TidyAgg {kind} missing 'col' string"))?;
match kind {
"sum" => Ok(TidyAgg::Sum(col)),
"mean" => Ok(TidyAgg::Mean(col)),
"min" => Ok(TidyAgg::Min(col)),
"max" => Ok(TidyAgg::Max(col)),
"first" => Ok(TidyAgg::First(col)),
"last" => Ok(TidyAgg::Last(col)),
"median" => Ok(TidyAgg::Median(col)),
"sd" => Ok(TidyAgg::Sd(col)),
"var" => Ok(TidyAgg::Var(col)),
"n_distinct" => Ok(TidyAgg::NDistinct(col)),
"iqr" => Ok(TidyAgg::Iqr(col)),
_ => unreachable!(),
}
}
"quantile" => {
let col = fields
.get("col")
.and_then(|v| if let Value::String(s) = v { Some(s.as_ref().clone()) } else { None })
.ok_or("TidyAgg quantile missing 'col' string")?;
let p = fields
.get("p")
.and_then(|v| match v {
Value::Float(f) => Some(*f),
Value::Int(i) => Some(*i as f64),
_ => None,
})
.ok_or("TidyAgg quantile missing 'p' float")?;
Ok(TidyAgg::Quantile(col, p))
}
other => Err(format!("unknown TidyAgg kind: {other}")),
}
}
_ => Err(format!("expected TidyAgg struct, got {}", v.type_name())),
}
}
fn value_to_arrange_keys(v: &Value) -> Result<Vec<ArrangeKey>, String> {
match v {
Value::Array(arr) => {
let mut keys = Vec::with_capacity(arr.len());
for item in arr.iter() {
match item {
Value::String(s) => keys.push(ArrangeKey::asc(s)),
Value::Struct { name, fields } if name == "ArrangeKey" => {
let col = fields
.get("col")
.and_then(|v| if let Value::String(s) = v { Some(s.as_ref().as_str()) } else { None })
.ok_or("ArrangeKey missing 'col'")?;
let desc = fields
.get("desc")
.and_then(|v| if let Value::Bool(b) = v { Some(*b) } else { None })
.unwrap_or(false);
keys.push(if desc { ArrangeKey::desc(col) } else { ArrangeKey::asc(col) });
}
_ => return Err(format!("arrange key must be String or ArrangeKey struct, got {}", item.type_name())),
}
}
Ok(keys)
}
_ => Err(format!("arrange requires Array of keys, got {}", v.type_name())),
}
}
fn value_to_rename_pairs(v: &Value) -> Result<Vec<(String, String)>, String> {
match v {
Value::Array(arr) => {
let mut pairs = Vec::with_capacity(arr.len());
for item in arr.iter() {
match item {
Value::Array(pair) if pair.len() == 2 => {
let old = value_to_string(&pair[0])?;
let new = value_to_string(&pair[1])?;
pairs.push((old, new));
}
_ => return Err("rename pairs must be arrays of [old, new] strings".into()),
}
}
Ok(pairs)
}
_ => Err(format!("rename requires Array of pairs, got {}", v.type_name())),
}
}
fn dispatch_join(
view: &TidyView,
args: &[Value],
kind: &str,
) -> Result<Option<Value>, String> {
if args.len() != 3 {
return Err(format!(
"TidyView.{kind} requires 3 args: other_view, left_on, right_on"
));
}
let other_rc = match &args[0] {
Value::TidyView(rc) => rc,
_ => return Err(format!("{kind}: first arg must be a TidyView")),
};
let other = downcast_view(other_rc)?;
let left_on = value_to_string(&args[1])?;
let right_on = value_to_string(&args[2])?;
let on_pairs: Vec<(&str, &str)> = vec![(&left_on, &right_on)];
match kind {
"inner_join" => {
let frame = view.inner_join(other, &on_pairs).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(frame.view())))
}
"left_join" => {
let frame = view.left_join(other, &on_pairs).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(frame.view())))
}
"semi_join" => {
let new_view = view.semi_join(other, &on_pairs).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(new_view)))
}
"anti_join" => {
let new_view = view.anti_join(other, &on_pairs).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(new_view)))
}
"full_join" => {
let suffix = crate::JoinSuffix::default();
let nullable_frame = view.full_join(other, &on_pairs, &suffix).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(nullable_frame.to_tidy_view_filled())))
}
_ => Ok(None),
}
}
fn column_to_value(col: &Column) -> Value {
if matches!(col, Column::CategoricalAdaptive(_)) {
return column_to_value(&col.to_legacy_categorical());
}
let vals: Vec<Value> = match col {
Column::Int(v) => v.iter().map(|i| Value::Int(*i)).collect(),
Column::Float(v) => v.iter().map(|f| Value::Float(*f)).collect(),
Column::Str(v) => v
.iter()
.map(|s| Value::String(Rc::new(s.clone())))
.collect(),
Column::Bool(v) => v.iter().map(|b| Value::Bool(*b)).collect(),
Column::Categorical { levels, codes } => codes
.iter()
.map(|&c| Value::String(Rc::new(levels[c as usize].clone())))
.collect(),
Column::DateTime(v) => v.iter().map(|i| Value::Int(*i)).collect(),
Column::CategoricalAdaptive(_) => unreachable!("handled by early return"),
};
Value::Array(Rc::new(vals))
}
pub fn dataframe_to_value(df: DataFrame) -> Value {
let mut fields = std::collections::BTreeMap::new();
let mut col_names: Vec<Value> = Vec::new();
let nrows = df.nrows();
for (name, col) in &df.columns {
col_names.push(Value::String(Rc::new(name.clone())));
fields.insert(name.clone(), column_to_value(col));
}
fields.insert(
"__columns".to_string(),
Value::Array(Rc::new(col_names)),
);
fields.insert("__nrows".to_string(), Value::Int(nrows as i64));
Value::Struct {
name: "DataFrame".to_string(),
fields,
}
}
fn format_dataframe(df: &DataFrame) -> String {
let ncols = df.ncols();
let nrows = df.nrows();
if ncols == 0 {
return "DataFrame(0x0)".to_string();
}
let names: Vec<&str> = df.columns.iter().map(|(n, _)| n.as_str()).collect();
let mut widths: Vec<usize> = names.iter().map(|n| n.len()).collect();
let display_rows = nrows.min(20); let mut cells: Vec<Vec<String>> = Vec::with_capacity(display_rows);
for r in 0..display_rows {
let mut row: Vec<String> = Vec::with_capacity(ncols);
for (ci, (_, col)) in df.columns.iter().enumerate() {
let s = col.get_display(r);
if s.len() > widths[ci] {
widths[ci] = s.len();
}
row.push(s);
}
cells.push(row);
}
let mut out = String::new();
for (ci, name) in names.iter().enumerate() {
if ci > 0 { out.push_str(" "); }
out.push_str(&format!("{:>width$}", name, width = widths[ci]));
}
out.push('\n');
for row in &cells {
for (ci, cell) in row.iter().enumerate() {
if ci > 0 { out.push_str(" "); }
out.push_str(&format!("{:>width$}", cell, width = widths[ci]));
}
out.push('\n');
}
if nrows > display_rows {
out.push_str(&format!("... ({} more rows)\n", nrows - display_rows));
}
out
}
fn format_describe(df: &DataFrame) -> String {
use cjc_repro::KahanAccumulatorF64;
let nrows = df.nrows();
let mut out = String::new();
out.push_str(&format!("DataFrame: {} rows x {} columns\n\n", nrows, df.ncols()));
for (name, col) in &df.columns {
out.push_str(&format!("── {} ({}) ──\n", name, col.type_name()));
match col {
Column::Int(v) => {
if v.is_empty() {
out.push_str(" (empty)\n");
continue;
}
let mut sorted = v.clone();
sorted.sort();
let mut acc = KahanAccumulatorF64::new();
for &x in v { acc.add(x as f64); }
let mean = acc.finalize() / nrows as f64;
let mut var_acc = KahanAccumulatorF64::new();
for &x in v { let d = x as f64 - mean; var_acc.add(d * d); }
let std = if nrows > 1 { (var_acc.finalize() / (nrows - 1) as f64).sqrt() } else { 0.0 };
out.push_str(&format!(" count: {}\n", nrows));
out.push_str(&format!(" mean: {:.4}\n", mean));
out.push_str(&format!(" std: {:.4}\n", std));
out.push_str(&format!(" min: {}\n", sorted[0]));
out.push_str(&format!(" 25%: {}\n", sorted[nrows / 4]));
out.push_str(&format!(" 50%: {}\n", sorted[nrows / 2]));
out.push_str(&format!(" 75%: {}\n", sorted[3 * nrows / 4]));
out.push_str(&format!(" max: {}\n", sorted[nrows - 1]));
}
Column::Float(v) => {
if v.is_empty() {
out.push_str(" (empty)\n");
continue;
}
let mut sorted = v.clone();
sorted.sort_by(|a, b| a.total_cmp(b));
let mut acc = KahanAccumulatorF64::new();
for &x in v { acc.add(x); }
let mean = acc.finalize() / nrows as f64;
let mut var_acc = KahanAccumulatorF64::new();
for &x in v { let d = x - mean; var_acc.add(d * d); }
let std = if nrows > 1 { (var_acc.finalize() / (nrows - 1) as f64).sqrt() } else { 0.0 };
out.push_str(&format!(" count: {}\n", nrows));
out.push_str(&format!(" mean: {:.4}\n", mean));
out.push_str(&format!(" std: {:.4}\n", std));
out.push_str(&format!(" min: {:.4}\n", sorted[0]));
out.push_str(&format!(" 25%: {:.4}\n", sorted[nrows / 4]));
out.push_str(&format!(" 50%: {:.4}\n", sorted[nrows / 2]));
out.push_str(&format!(" 75%: {:.4}\n", sorted[3 * nrows / 4]));
out.push_str(&format!(" max: {:.4}\n", sorted[nrows - 1]));
}
Column::Str(v) => {
let mut freq = std::collections::BTreeMap::new();
for s in v { *freq.entry(s.as_str()).or_insert(0usize) += 1; }
let unique = freq.len();
let top = freq.iter().max_by_key(|(_, &c)| c).map(|(s, _)| *s).unwrap_or("");
out.push_str(&format!(" count: {}\n", nrows));
out.push_str(&format!(" unique: {}\n", unique));
out.push_str(&format!(" top: {}\n", top));
}
Column::Bool(v) => {
let trues = v.iter().filter(|&&b| b).count();
out.push_str(&format!(" count: {}\n", nrows));
out.push_str(&format!(" true: {}\n", trues));
out.push_str(&format!(" false: {}\n", nrows - trues));
}
Column::Categorical { levels, codes } => {
let n_levels = levels.len();
let mut freq = std::collections::BTreeMap::new();
for &c in codes { *freq.entry(c).or_insert(0usize) += 1; }
let top_code = freq.iter().max_by_key(|(_, &c)| c).map(|(&k, _)| k).unwrap_or(0);
let top = if (top_code as usize) < levels.len() { &levels[top_code as usize] } else { "?" };
out.push_str(&format!(" count: {}\n", nrows));
out.push_str(&format!(" levels: {}\n", n_levels));
out.push_str(&format!(" top: {}\n", top));
}
Column::DateTime(v) => {
if v.is_empty() {
out.push_str(" (empty)\n");
continue;
}
let mut sorted = v.clone();
sorted.sort();
out.push_str(&format!(" count: {}\n", nrows));
out.push_str(&format!(" min: {} (epoch ms)\n", sorted[0]));
out.push_str(&format!(" max: {} (epoch ms)\n", sorted[nrows - 1]));
}
Column::CategoricalAdaptive(cc) => {
let n_levels = cc.dictionary().len();
out.push_str(&format!(" count: {}\n", nrows));
out.push_str(&format!(" levels: {} (adaptive, {}-byte codes)\n",
n_levels, cc.codes().width_bytes()));
}
}
}
out
}
fn format_glimpse(df: &DataFrame) -> String {
let nrows = df.nrows();
let ncols = df.ncols();
let mut out = String::new();
out.push_str(&format!("Rows: {}\nColumns: {}\n", nrows, ncols));
let max_name_w = df.columns.iter().map(|(n, _)| n.len()).max().unwrap_or(0);
let max_type_w = df.columns.iter().map(|(_, c)| c.type_name().len()).max().unwrap_or(0);
let preview_count = nrows.min(8);
for (name, col) in &df.columns {
out.push_str(&format!("$ {:width_n$} <{:width_t$}> ",
name, col.type_name(),
width_n = max_name_w, width_t = max_type_w));
let mut vals = Vec::with_capacity(preview_count);
for i in 0..preview_count {
vals.push(col.get_display(i));
}
out.push_str(&vals.join(", "));
if nrows > preview_count {
out.push_str(", ...");
}
out.push('\n');
}
out
}
pub fn build_col_expr(name: &str) -> Value {
let mut fields = std::collections::BTreeMap::new();
fields.insert("kind".to_string(), Value::String(Rc::new("col".to_string())));
fields.insert("value".to_string(), Value::String(Rc::new(name.to_string())));
Value::Struct { name: "DExpr".to_string(), fields }
}
pub fn build_binop_expr(op: &str, left: Value, right: Value) -> Value {
let mut fields = std::collections::BTreeMap::new();
fields.insert("kind".to_string(), Value::String(Rc::new("binop".to_string())));
fields.insert("op".to_string(), Value::String(Rc::new(op.to_string())));
fields.insert("left".to_string(), left);
fields.insert("right".to_string(), right);
Value::Struct { name: "DExpr".to_string(), fields }
}
pub fn build_tidy_agg(kind: &str, col: Option<&str>) -> Value {
let mut fields = std::collections::BTreeMap::new();
fields.insert("kind".to_string(), Value::String(Rc::new(kind.to_string())));
if let Some(c) = col {
fields.insert("col".to_string(), Value::String(Rc::new(c.to_string())));
}
Value::Struct { name: "TidyAgg".to_string(), fields }
}
pub fn build_arrange_key(col: &str, descending: bool) -> Value {
let mut fields = std::collections::BTreeMap::new();
fields.insert("col".to_string(), Value::String(Rc::new(col.to_string())));
fields.insert("desc".to_string(), Value::Bool(descending));
Value::Struct { name: "ArrangeKey".to_string(), fields }
}
pub fn dispatch_tidy_builtin(name: &str, args: &[Value]) -> Result<Option<Value>, String> {
match name {
"col" => {
if args.len() != 1 {
return Err("col() requires 1 argument: column name".into());
}
let name = value_to_string(&args[0])?;
Ok(Some(build_col_expr(&name)))
}
"desc" => {
if args.len() != 1 {
return Err("desc() requires 1 argument: column name".into());
}
let name = value_to_string(&args[0])?;
Ok(Some(build_arrange_key(&name, true)))
}
"asc" => {
if args.len() != 1 {
return Err("asc() requires 1 argument: column name".into());
}
let name = value_to_string(&args[0])?;
Ok(Some(build_arrange_key(&name, false)))
}
"dexpr_binop" => {
if args.len() != 3 {
return Err("dexpr_binop() requires 3 args: op, left, right".into());
}
let op = value_to_string(&args[0])?;
Ok(Some(build_binop_expr(&op, args[1].clone(), args[2].clone())))
}
"tidy_count" => Ok(Some(build_tidy_agg("count", None))),
"tidy_sum" => {
if args.len() != 1 { return Err("tidy_sum() requires 1 argument: column name".into()); }
let col = value_to_string(&args[0])?;
Ok(Some(build_tidy_agg("sum", Some(&col))))
}
"tidy_mean" => {
if args.len() != 1 { return Err("tidy_mean() requires 1 argument: column name".into()); }
let col = value_to_string(&args[0])?;
Ok(Some(build_tidy_agg("mean", Some(&col))))
}
"tidy_min" => {
if args.len() != 1 { return Err("tidy_min() requires 1 argument: column name".into()); }
let col = value_to_string(&args[0])?;
Ok(Some(build_tidy_agg("min", Some(&col))))
}
"tidy_max" => {
if args.len() != 1 { return Err("tidy_max() requires 1 argument: column name".into()); }
let col = value_to_string(&args[0])?;
Ok(Some(build_tidy_agg("max", Some(&col))))
}
"tidy_first" => {
if args.len() != 1 { return Err("tidy_first() requires 1 argument: column name".into()); }
let col = value_to_string(&args[0])?;
Ok(Some(build_tidy_agg("first", Some(&col))))
}
"tidy_last" => {
if args.len() != 1 { return Err("tidy_last() requires 1 argument: column name".into()); }
let col = value_to_string(&args[0])?;
Ok(Some(build_tidy_agg("last", Some(&col))))
}
"str_detect" => {
if args.len() != 2 { return Err("str_detect requires 2 args: string, pattern".into()); }
let hay = value_to_string(&args[0])?;
let pat = value_to_string(&args[1])?;
let matched = cjc_regex::is_match(&pat, "", hay.as_bytes());
Ok(Some(Value::Bool(matched)))
}
"str_extract" => {
if args.len() != 2 { return Err("str_extract requires 2 args: string, pattern".into()); }
let hay = value_to_string(&args[0])?;
let pat = value_to_string(&args[1])?;
match cjc_regex::find(&pat, "", hay.as_bytes()) {
Some((start, end)) => {
let slice = &hay.as_bytes()[start..end];
let s = String::from_utf8_lossy(slice).to_string();
Ok(Some(Value::String(Rc::new(s))))
}
None => Ok(Some(Value::String(Rc::new(String::new())))),
}
}
"str_extract_all" => {
if args.len() != 2 { return Err("str_extract_all requires 2 args: string, pattern".into()); }
let hay = value_to_string(&args[0])?;
let pat = value_to_string(&args[1])?;
let matches = cjc_regex::find_all(&pat, "", hay.as_bytes());
let vals: Vec<Value> = matches
.iter()
.map(|&(start, end)| {
let slice = &hay.as_bytes()[start..end];
Value::String(Rc::new(String::from_utf8_lossy(slice).to_string()))
})
.collect();
Ok(Some(Value::Array(Rc::new(vals))))
}
"str_replace" => {
if args.len() != 3 { return Err("str_replace requires 3 args: string, pattern, replacement".into()); }
let hay = value_to_string(&args[0])?;
let pat = value_to_string(&args[1])?;
let rep = value_to_string(&args[2])?;
match cjc_regex::find(&pat, "", hay.as_bytes()) {
Some((start, end)) => {
let mut result = String::with_capacity(hay.len());
result.push_str(&hay[..start]);
result.push_str(&rep);
result.push_str(&hay[end..]);
Ok(Some(Value::String(Rc::new(result))))
}
None => Ok(Some(Value::String(Rc::new(hay)))),
}
}
"str_replace_all" => {
if args.len() != 3 { return Err("str_replace_all requires 3 args: string, pattern, replacement".into()); }
let hay = value_to_string(&args[0])?;
let pat = value_to_string(&args[1])?;
let rep = value_to_string(&args[2])?;
let matches = cjc_regex::find_all(&pat, "", hay.as_bytes());
if matches.is_empty() {
return Ok(Some(Value::String(Rc::new(hay))));
}
let mut result = String::with_capacity(hay.len());
let mut last_end = 0;
for &(start, end) in &matches {
result.push_str(&hay[last_end..start]);
result.push_str(&rep);
last_end = end;
}
result.push_str(&hay[last_end..]);
Ok(Some(Value::String(Rc::new(result))))
}
"str_split" => {
if args.len() != 2 { return Err("str_split requires 2 args: string, pattern".into()); }
let hay = value_to_string(&args[0])?;
let pat = value_to_string(&args[1])?;
let spans = cjc_regex::split(&pat, "", hay.as_bytes());
let vals: Vec<Value> = spans
.iter()
.map(|&(start, end)| {
Value::String(Rc::new(
String::from_utf8_lossy(&hay.as_bytes()[start..end]).to_string(),
))
})
.collect();
Ok(Some(Value::Array(Rc::new(vals))))
}
"str_count" => {
if args.len() != 2 { return Err("str_count requires 2 args: string, pattern".into()); }
let hay = value_to_string(&args[0])?;
let pat = value_to_string(&args[1])?;
let count = cjc_regex::find_all(&pat, "", hay.as_bytes()).len();
Ok(Some(Value::Int(count as i64)))
}
"str_trim" => {
if args.len() != 1 { return Err("str_trim requires 1 arg: string".into()); }
let s = value_to_string(&args[0])?;
Ok(Some(Value::String(Rc::new(s.trim().to_string()))))
}
"str_to_upper" => {
if args.len() != 1 { return Err("str_to_upper requires 1 arg: string".into()); }
let s = value_to_string(&args[0])?;
Ok(Some(Value::String(Rc::new(s.to_uppercase()))))
}
"str_to_lower" => {
if args.len() != 1 { return Err("str_to_lower requires 1 arg: string".into()); }
let s = value_to_string(&args[0])?;
Ok(Some(Value::String(Rc::new(s.to_lowercase()))))
}
"str_starts" => {
if args.len() != 2 { return Err("str_starts requires 2 args: string, prefix".into()); }
let s = value_to_string(&args[0])?;
let prefix = value_to_string(&args[1])?;
Ok(Some(Value::Bool(s.starts_with(&prefix))))
}
"str_ends" => {
if args.len() != 2 { return Err("str_ends requires 2 args: string, suffix".into()); }
let s = value_to_string(&args[0])?;
let suffix = value_to_string(&args[1])?;
Ok(Some(Value::Bool(s.ends_with(&suffix))))
}
"str_sub" => {
if args.len() != 3 { return Err("str_sub requires 3 args: string, start, end".into()); }
let s = value_to_string(&args[0])?;
let start = value_to_usize(&args[1])?.min(s.len());
let end = value_to_usize(&args[2])?.min(s.len());
if start > end {
Ok(Some(Value::String(Rc::new(String::new()))))
} else {
let actual_start = clamp_to_char_boundary(&s, start);
let actual_end = clamp_to_char_boundary(&s, end);
Ok(Some(Value::String(Rc::new(s[actual_start..actual_end].to_string()))))
}
}
"str_len" => {
if args.len() != 1 { return Err("str_len requires 1 arg: string".into()); }
let s = value_to_string(&args[0])?;
Ok(Some(Value::Int(s.len() as i64)))
}
"median" => {
if args.len() != 1 { return Err("median requires 1 arg: numeric array".into()); }
let nums = value_to_f64_vec(&args[0])?;
if nums.is_empty() {
return Ok(Some(Value::Float(f64::NAN)));
}
let mut sorted = nums;
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let mid = sorted.len() / 2;
let med = if sorted.len() % 2 == 0 {
(sorted[mid - 1] + sorted[mid]) / 2.0
} else {
sorted[mid]
};
Ok(Some(Value::Float(med)))
}
"sd" => {
if args.len() != 1 { return Err("sd requires 1 arg: numeric array".into()); }
let nums = value_to_f64_vec(&args[0])?;
if nums.len() < 2 {
return Ok(Some(Value::Float(f64::NAN)));
}
let mean = nums.iter().sum::<f64>() / nums.len() as f64;
let var = nums.iter().map(|x| (x - mean) * (x - mean)).sum::<f64>()
/ (nums.len() - 1) as f64;
Ok(Some(Value::Float(var.sqrt())))
}
"variance" => {
if args.len() != 1 { return Err("variance requires 1 arg: numeric array".into()); }
let nums = value_to_f64_vec(&args[0])?;
if nums.len() < 2 {
return Ok(Some(Value::Float(f64::NAN)));
}
let mean = nums.iter().sum::<f64>() / nums.len() as f64;
let var = nums.iter().map(|x| (x - mean) * (x - mean)).sum::<f64>()
/ (nums.len() - 1) as f64;
Ok(Some(Value::Float(var)))
}
"n_distinct" => {
if args.len() != 1 { return Err("n_distinct requires 1 arg: array".into()); }
match &args[0] {
Value::Array(arr) => {
let mut seen = std::collections::BTreeSet::new();
for v in arr.iter() {
seen.insert(format!("{v}"));
}
Ok(Some(Value::Int(seen.len() as i64)))
}
_ => Err(format!("n_distinct expects Array, got {}", args[0].type_name())),
}
}
"df_read_csv" => {
if args.len() < 1 || args.len() > 2 {
return Err("df_read_csv requires 1-2 arguments (path[, delimiter])".into());
}
let path = match &args[0] {
Value::String(s) => s.as_ref().clone(),
_ => return Err(format!("df_read_csv: path must be String, got {}", args[0].type_name())),
};
let delim: u8 = if args.len() == 2 {
match &args[1] {
Value::String(s) if !s.is_empty() => s.as_bytes()[0],
_ => return Err("df_read_csv: delimiter must be a non-empty String".into()),
}
} else {
b','
};
let bytes = std::fs::read(&path)
.map_err(|e| format!("df_read_csv: {}", e))?;
let config = CsvConfig { delimiter: delim, ..CsvConfig::default() };
let df = CsvReader::new(config)
.parse(&bytes)
.map_err(|e| format!("df_read_csv: {}", e))?;
Ok(Some(wrap_view(TidyView::from_df(df))))
}
"pivot_wider" => {
if args.len() != 4 {
return Err(
"pivot_wider requires 4 arguments (df, id_cols, names_from, values_from)".into(),
);
}
let view = value_to_tidy_view(&args[0])?;
let id_cols = value_to_str_vec(&args[1])?;
let id_refs: Vec<&str> = id_cols.iter().map(|s| s.as_str()).collect();
let names_from = value_to_string(&args[2])?;
let values_from = value_to_string(&args[3])?;
let nullable_frame = view
.pivot_wider(&id_refs, &names_from, &values_from)
.map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(nullable_frame.to_tidy_view_filled())))
}
"pivot_longer" => {
if args.len() < 3 || args.len() > 4 {
return Err(
"pivot_longer requires 3-4 arguments (df, cols, names_to[, values_to])".into(),
);
}
let view = value_to_tidy_view(&args[0])?;
let cols = value_to_str_vec(&args[1])?;
let col_refs: Vec<&str> = cols.iter().map(|s| s.as_str()).collect();
let names_to = value_to_string(&args[2])?;
let values_to = if args.len() == 4 {
value_to_string(&args[3])?
} else {
"value".to_string()
};
let frame = view
.pivot_longer(&col_refs, &names_to, &values_to)
.map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(frame.view())))
}
"df_distinct" => {
if args.is_empty() || args.len() > 2 {
return Err("df_distinct requires 1-2 arguments (df[, cols])".into());
}
let view = value_to_tidy_view(&args[0])?;
let cols = if args.len() == 2 {
value_to_str_vec(&args[1])?
} else {
view.column_names().iter().map(|s| s.to_string()).collect()
};
let col_refs: Vec<&str> = cols.iter().map(|s| s.as_str()).collect();
let new_view = view.distinct(&col_refs).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(new_view)))
}
"df_rename" => {
if args.len() != 3 {
return Err("df_rename requires 3 arguments (df, old_name, new_name)".into());
}
let view = value_to_tidy_view(&args[0])?;
let old = value_to_string(&args[1])?;
let new = value_to_string(&args[2])?;
let pair_refs: Vec<(&str, &str)> = vec![(&old, &new)];
let new_view = view.rename(&pair_refs).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(new_view)))
}
"df_anti_join" | "df_semi_join" | "df_full_join" => {
if args.len() != 3 {
return Err(format!(
"{name} requires 3 arguments (df1, df2, on)"
));
}
let left = value_to_tidy_view(&args[0])?;
let right_rc = match &args[1] {
Value::TidyView(rc) => rc,
_ => return Err(format!("{name}: second argument must be a TidyView")),
};
let right_inner: &Rc<dyn std::any::Any> = right_rc;
let right = right_inner
.downcast_ref::<TidyView>()
.ok_or_else(|| "internal: TidyView downcast failed".to_string())?;
let on_keys: Vec<String> = match &args[2] {
Value::String(s) => vec![s.as_ref().clone()],
Value::Array(arr) => arr
.iter()
.map(|v| match v {
Value::String(s) => Ok(s.as_ref().clone()),
_ => Err(format!("on: expected String keys, got {}", v.type_name())),
})
.collect::<Result<Vec<_>, _>>()?,
_ => return Err(format!("{name}: `on` must be String or Array of Strings")),
};
let on_pairs: Vec<(&str, &str)> = on_keys.iter().map(|k| (k.as_str(), k.as_str())).collect();
match name {
"df_anti_join" => {
let new_view = left.anti_join(right, &on_pairs).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(new_view)))
}
"df_semi_join" => {
let new_view = left.semi_join(right, &on_pairs).map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(new_view)))
}
"df_full_join" => {
let suffix = crate::JoinSuffix::default();
let nullable_frame = left.full_join(right, &on_pairs, &suffix)
.map_err(|e| format!("{e}"))?;
Ok(Some(wrap_view(nullable_frame.to_tidy_view_filled())))
}
_ => Ok(None),
}
}
"df_fill_na" => {
if args.len() != 3 {
return Err("df_fill_na requires 3 arguments (df, col_name, fill_val)".into());
}
let view = value_to_tidy_view(&args[0])?;
let col_name = value_to_string(&args[1])?;
let fill_val = &args[2];
let mut df = view.materialize().map_err(|e| format!("{e}"))?;
let col_idx = df.columns.iter().position(|(n, _)| n == &col_name)
.ok_or_else(|| format!("df_fill_na: column '{}' not found", col_name))?;
let filled_col = match &df.columns[col_idx].1 {
Column::Int(v) => {
let _fill = match fill_val {
Value::Int(i) => *i,
Value::Float(f) => *f as i64,
_ => return Err("df_fill_na: fill value must be numeric for Int column".into()),
};
Column::Int(v.clone())
}
Column::Float(v) => {
let fill = match fill_val {
Value::Float(f) => *f,
Value::Int(i) => *i as f64,
_ => return Err("df_fill_na: fill value must be numeric for Float column".into()),
};
Column::Float(v.iter().map(|&x| if x.is_nan() { fill } else { x }).collect())
}
Column::Str(v) => {
let fill = match fill_val {
Value::String(s) => s.as_ref().clone(),
other => format!("{other}"),
};
Column::Str(v.iter().map(|s| {
if s == "NA" || s.is_empty() { fill.clone() } else { s.clone() }
}).collect())
}
Column::Bool(v) => Column::Bool(v.clone()),
Column::Categorical { levels, codes } => Column::Categorical { levels: levels.clone(), codes: codes.clone() },
Column::CategoricalAdaptive(_) => df.columns[col_idx].1.to_legacy_categorical(),
Column::DateTime(v) => Column::DateTime(v.clone()),
};
df.columns[col_idx].1 = filled_col;
Ok(Some(wrap_view(TidyView::from_df(df))))
}
"df_drop_na" => {
if args.is_empty() || args.len() > 2 {
return Err("df_drop_na requires 1-2 arguments (df[, cols])".into());
}
let view = value_to_tidy_view(&args[0])?;
let target_cols: Vec<String> = if args.len() == 2 {
value_to_str_vec(&args[1])?
} else {
view.column_names().iter().map(|s| s.to_string()).collect()
};
let df = view.materialize().map_err(|e| format!("{e}"))?;
let nrows = df.nrows();
let mut keep = vec![true; nrows];
for col_name in &target_cols {
if let Some(col) = df.get_column(col_name) {
for r in 0..nrows {
if !keep[r] { continue; }
let na = match col {
Column::Float(v) => v[r].is_nan(),
Column::Str(v) => v[r] == "NA" || v[r].is_empty(),
_ => false,
};
if na { keep[r] = false; }
}
} else {
return Err(format!("df_drop_na: column '{}' not found", col_name));
}
}
let mut new_cols: Vec<(String, Column)> = Vec::with_capacity(df.columns.len());
for (name, col) in &df.columns {
let legacy_owned;
let col_ref: &Column = if matches!(col, Column::CategoricalAdaptive(_)) {
legacy_owned = col.to_legacy_categorical();
&legacy_owned
} else {
col
};
let new_col = match col_ref {
Column::Int(v) => Column::Int(v.iter().enumerate().filter(|(r, _)| keep[*r]).map(|(_, x)| *x).collect()),
Column::Float(v) => Column::Float(v.iter().enumerate().filter(|(r, _)| keep[*r]).map(|(_, x)| *x).collect()),
Column::Str(v) => Column::Str(v.iter().enumerate().filter(|(r, _)| keep[*r]).map(|(_, x)| x.clone()).collect()),
Column::Bool(v) => Column::Bool(v.iter().enumerate().filter(|(r, _)| keep[*r]).map(|(_, x)| *x).collect()),
Column::DateTime(v) => Column::DateTime(v.iter().enumerate().filter(|(r, _)| keep[*r]).map(|(_, x)| *x).collect()),
Column::Categorical { levels, codes } => Column::Categorical {
levels: levels.clone(),
codes: codes.iter().enumerate().filter(|(r, _)| keep[*r]).map(|(_, x)| *x).collect(),
},
Column::CategoricalAdaptive(_) => unreachable!("converted via legacy_owned"),
};
new_cols.push((name.clone(), new_col));
}
let new_df = DataFrame::from_columns(new_cols)
.map_err(|e| format!("df_drop_na: {e}"))?;
Ok(Some(wrap_view(TidyView::from_df(new_df))))
}
_ => Ok(None),
}
}
fn value_to_tidy_view(v: &Value) -> Result<&TidyView, String> {
match v {
Value::TidyView(rc) => rc
.downcast_ref::<TidyView>()
.ok_or_else(|| "internal: TidyView downcast failed".to_string()),
_ => Err(format!(
"expected TidyView (use df.view() to convert a DataFrame), got {}",
v.type_name()
)),
}
}
fn clamp_to_char_boundary(s: &str, idx: usize) -> usize {
if idx >= s.len() {
return s.len();
}
let mut i = idx;
while i > 0 && !s.is_char_boundary(i) {
i -= 1;
}
i
}
fn value_to_f64_vec(v: &Value) -> Result<Vec<f64>, String> {
match v {
Value::Array(arr) => {
arr.iter()
.map(|v| match v {
Value::Float(f) => Ok(*f),
Value::Int(i) => Ok(*i as f64),
_ => Err(format!("expected numeric value in array, got {}", v.type_name())),
})
.collect()
}
_ => Err(format!("expected Array, got {}", v.type_name())),
}
}