use crate::data::datatable::DataValue;
use crate::sql::functions::{ArgCount, FunctionCategory, FunctionSignature, SqlFunction};
use anyhow::{anyhow, Result};
pub struct MedianFunction;
impl SqlFunction for MedianFunction {
fn signature(&self) -> FunctionSignature {
FunctionSignature {
name: "MEDIAN",
category: FunctionCategory::Statistical,
arg_count: ArgCount::Fixed(1),
description:
"Returns the median (middle value) of a numeric column (aggregate function)",
returns: "Numeric value representing the median",
examples: vec![
"SELECT MEDIAN(salary) FROM employees",
"SELECT department, MEDIAN(age) FROM users GROUP BY department",
],
}
}
fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
if args.len() != 1 {
return Err(anyhow!("MEDIAN requires exactly 1 argument"));
}
match &args[0] {
DataValue::Integer(i) => Ok(DataValue::Float(*i as f64)),
DataValue::Float(f) => Ok(DataValue::Float(*f)),
_ => Ok(DataValue::Null),
}
}
}
pub struct PercentileFunction;
impl SqlFunction for PercentileFunction {
fn signature(&self) -> FunctionSignature {
FunctionSignature {
name: "PERCENTILE",
category: FunctionCategory::Statistical,
arg_count: ArgCount::Fixed(2),
description: "Returns the nth percentile of values (0-100) (aggregate function)",
returns: "Numeric value at the specified percentile",
examples: vec![
"SELECT PERCENTILE(score, 75) FROM tests", "SELECT PERCENTILE(income, 50) FROM users", "SELECT PERCENTILE(response_time, 95) FROM requests", ],
}
}
fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
if args.len() != 2 {
return Err(anyhow!(
"PERCENTILE requires exactly 2 arguments: (value, percentile)"
));
}
let percentile = match &args[1] {
DataValue::Integer(i) => *i as f64,
DataValue::Float(f) => *f,
_ => return Err(anyhow!("Percentile must be a number between 0 and 100")),
};
if percentile < 0.0 || percentile > 100.0 {
return Err(anyhow!("Percentile must be between 0 and 100"));
}
match &args[0] {
DataValue::Integer(i) => Ok(DataValue::Float(*i as f64)),
DataValue::Float(f) => Ok(DataValue::Float(*f)),
_ => Ok(DataValue::Null),
}
}
}
pub struct ModeFunction;
impl SqlFunction for ModeFunction {
fn signature(&self) -> FunctionSignature {
FunctionSignature {
name: "MODE",
category: FunctionCategory::Statistical,
arg_count: ArgCount::Fixed(1),
description: "Returns the most frequently occurring value (aggregate function)",
returns: "The mode value (most frequent)",
examples: vec![
"SELECT MODE(category) FROM products",
"SELECT MODE(rating) FROM reviews",
"SELECT department, MODE(job_level) FROM employees GROUP BY department",
],
}
}
fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
if args.len() != 1 {
return Err(anyhow!("MODE requires exactly 1 argument"));
}
Ok(args[0].clone())
}
}
pub struct VarianceFunction;
impl SqlFunction for VarianceFunction {
fn signature(&self) -> FunctionSignature {
FunctionSignature {
name: "VARIANCE",
category: FunctionCategory::Statistical,
arg_count: ArgCount::Fixed(1),
description:
"Calculates the population variance of numeric values (aggregate function)",
returns: "Numeric value representing the variance",
examples: vec![
"SELECT VARIANCE(price) FROM products",
"SELECT category, VARIANCE(quantity) FROM inventory GROUP BY category",
],
}
}
fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
if args.len() != 1 {
return Err(anyhow!("VARIANCE requires exactly 1 argument"));
}
match &args[0] {
DataValue::Integer(_) | DataValue::Float(_) => Ok(DataValue::Float(0.0)),
_ => Ok(DataValue::Null),
}
}
}
pub struct VarSampFunction;
impl SqlFunction for VarSampFunction {
fn signature(&self) -> FunctionSignature {
FunctionSignature {
name: "VAR_SAMP",
category: FunctionCategory::Statistical,
arg_count: ArgCount::Fixed(1),
description: "Calculates the sample variance of numeric values (aggregate function)",
returns: "Numeric value representing the sample variance",
examples: vec![
"SELECT VAR_SAMP(score) FROM test_results",
"SELECT class, VAR_SAMP(height) FROM students GROUP BY class",
],
}
}
fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
if args.len() != 1 {
return Err(anyhow!("VAR_SAMP requires exactly 1 argument"));
}
match &args[0] {
DataValue::Integer(_) | DataValue::Float(_) => Ok(DataValue::Float(0.0)),
_ => Ok(DataValue::Null),
}
}
}
pub struct VarPopFunction;
impl SqlFunction for VarPopFunction {
fn signature(&self) -> FunctionSignature {
FunctionSignature {
name: "VAR_POP",
category: FunctionCategory::Statistical,
arg_count: ArgCount::Fixed(1),
description:
"Calculates the population variance (same as VARIANCE) (aggregate function)",
returns: "Numeric value representing the population variance",
examples: vec!["SELECT VAR_POP(amount) FROM transactions"],
}
}
fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
VarianceFunction.evaluate(args)
}
}
pub struct CorrelationFunction;
impl SqlFunction for CorrelationFunction {
fn signature(&self) -> FunctionSignature {
FunctionSignature {
name: "CORR",
category: FunctionCategory::Statistical,
arg_count: ArgCount::Fixed(2),
description: "Calculates the Pearson correlation coefficient between two columns (aggregate function)",
returns: "Numeric value between -1 and 1",
examples: vec![
"SELECT CORR(height, weight) FROM people",
"SELECT CORR(temperature, ice_cream_sales) FROM daily_data",
"SELECT month, CORR(advertising_spend, revenue) FROM sales GROUP BY month",
],
}
}
fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
if args.len() != 2 {
return Err(anyhow!("CORR requires exactly 2 arguments"));
}
match (&args[0], &args[1]) {
(DataValue::Integer(_), DataValue::Integer(_))
| (DataValue::Float(_), DataValue::Float(_))
| (DataValue::Integer(_), DataValue::Float(_))
| (DataValue::Float(_), DataValue::Integer(_)) => Ok(DataValue::Float(0.0)),
_ => Ok(DataValue::Null),
}
}
}
pub struct SkewFunction;
impl SqlFunction for SkewFunction {
fn signature(&self) -> FunctionSignature {
FunctionSignature {
name: "SKEW",
category: FunctionCategory::Statistical,
arg_count: ArgCount::Fixed(3),
description: "Calculate skewness contribution: ((x - mean) / stddev)^3",
returns: "Skewness contribution of a single value",
examples: vec![
"SELECT SKEW(value, mean, stddev) FROM measurements",
"SELECT SKEW(100, 85, 15) -- returns ~1.0 for value one std dev above mean",
],
}
}
fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
if args.len() != 3 {
return Err(anyhow!("SKEW requires 3 arguments: value, mean, stddev"));
}
let value = match &args[0] {
DataValue::Integer(i) => *i as f64,
DataValue::Float(f) => *f,
_ => return Ok(DataValue::Null),
};
let mean = match &args[1] {
DataValue::Integer(i) => *i as f64,
DataValue::Float(f) => *f,
_ => return Ok(DataValue::Null),
};
let stddev = match &args[2] {
DataValue::Integer(i) => *i as f64,
DataValue::Float(f) => *f,
_ => return Ok(DataValue::Null),
};
if stddev == 0.0 {
return Ok(DataValue::Null);
}
let z_score = (value - mean) / stddev;
Ok(DataValue::Float(z_score.powi(3)))
}
}
pub struct KurtosisFunction;
impl SqlFunction for KurtosisFunction {
fn signature(&self) -> FunctionSignature {
FunctionSignature {
name: "KURTOSIS",
category: FunctionCategory::Statistical,
arg_count: ArgCount::Fixed(3),
description: "Calculate kurtosis contribution: ((x - mean) / stddev)^4",
returns: "Kurtosis contribution of a single value",
examples: vec![
"SELECT KURTOSIS(value, mean, stddev) FROM measurements",
"SELECT KURTOSIS(100, 85, 15) -- returns 1.0 for value one std dev above mean",
],
}
}
fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
if args.len() != 3 {
return Err(anyhow!(
"KURTOSIS requires 3 arguments: value, mean, stddev"
));
}
let value = match &args[0] {
DataValue::Integer(i) => *i as f64,
DataValue::Float(f) => *f,
_ => return Ok(DataValue::Null),
};
let mean = match &args[1] {
DataValue::Integer(i) => *i as f64,
DataValue::Float(f) => *f,
_ => return Ok(DataValue::Null),
};
let stddev = match &args[2] {
DataValue::Integer(i) => *i as f64,
DataValue::Float(f) => *f,
_ => return Ok(DataValue::Null),
};
if stddev == 0.0 {
return Ok(DataValue::Null);
}
let z_score = (value - mean) / stddev;
Ok(DataValue::Float(z_score.powi(4)))
}
}