sql-cli 1.67.1

SQL query tool for CSV/JSON with both interactive TUI and non-interactive CLI modes - perfect for exploration and automation
Documentation
use crate::data::datatable::DataValue;
use crate::sql::functions::{ArgCount, FunctionCategory, FunctionSignature, SqlFunction};
use anyhow::{anyhow, Result};

// Note: These are placeholder implementations for statistical functions.
// In a full SQL engine, these would be handled as aggregate functions
// that accumulate values row by row during query execution.
// For now, these implementations work with single values or
// would need to be integrated with the aggregate system.

/// MEDIAN() - Returns the middle value (placeholder)
pub struct MedianFunction;

impl SqlFunction for MedianFunction {
    fn signature(&self) -> FunctionSignature {
        FunctionSignature {
            name: "MEDIAN",
            category: FunctionCategory::Statistical,
            arg_count: ArgCount::Fixed(1),
            description:
                "Returns the median (middle value) of a numeric column (aggregate function)",
            returns: "Numeric value representing the median",
            examples: vec![
                "SELECT MEDIAN(salary) FROM employees",
                "SELECT department, MEDIAN(age) FROM users GROUP BY department",
            ],
        }
    }

    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
        if args.len() != 1 {
            return Err(anyhow!("MEDIAN requires exactly 1 argument"));
        }

        // For now, just return the input as this needs aggregate handling
        match &args[0] {
            DataValue::Integer(i) => Ok(DataValue::Float(*i as f64)),
            DataValue::Float(f) => Ok(DataValue::Float(*f)),
            _ => Ok(DataValue::Null),
        }
    }
}

/// PERCENTILE() - Returns the nth percentile value (placeholder)
pub struct PercentileFunction;

impl SqlFunction for PercentileFunction {
    fn signature(&self) -> FunctionSignature {
        FunctionSignature {
            name: "PERCENTILE",
            category: FunctionCategory::Statistical,
            arg_count: ArgCount::Fixed(2),
            description: "Returns the nth percentile of values (0-100) (aggregate function)",
            returns: "Numeric value at the specified percentile",
            examples: vec![
                "SELECT PERCENTILE(score, 75) FROM tests", // 75th percentile
                "SELECT PERCENTILE(income, 50) FROM users", // 50th percentile (median)
                "SELECT PERCENTILE(response_time, 95) FROM requests", // 95th percentile
            ],
        }
    }

    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
        if args.len() != 2 {
            return Err(anyhow!(
                "PERCENTILE requires exactly 2 arguments: (value, percentile)"
            ));
        }

        // Extract the percentile value (0-100)
        let percentile = match &args[1] {
            DataValue::Integer(i) => *i as f64,
            DataValue::Float(f) => *f,
            _ => return Err(anyhow!("Percentile must be a number between 0 and 100")),
        };

        if percentile < 0.0 || percentile > 100.0 {
            return Err(anyhow!("Percentile must be between 0 and 100"));
        }

        // For now, just return the input as this needs aggregate handling
        match &args[0] {
            DataValue::Integer(i) => Ok(DataValue::Float(*i as f64)),
            DataValue::Float(f) => Ok(DataValue::Float(*f)),
            _ => Ok(DataValue::Null),
        }
    }
}

/// MODE() - Returns the most frequent value (placeholder)
pub struct ModeFunction;

impl SqlFunction for ModeFunction {
    fn signature(&self) -> FunctionSignature {
        FunctionSignature {
            name: "MODE",
            category: FunctionCategory::Statistical,
            arg_count: ArgCount::Fixed(1),
            description: "Returns the most frequently occurring value (aggregate function)",
            returns: "The mode value (most frequent)",
            examples: vec![
                "SELECT MODE(category) FROM products",
                "SELECT MODE(rating) FROM reviews",
                "SELECT department, MODE(job_level) FROM employees GROUP BY department",
            ],
        }
    }

    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
        if args.len() != 1 {
            return Err(anyhow!("MODE requires exactly 1 argument"));
        }

        // For now, just return the input as this needs aggregate handling
        Ok(args[0].clone())
    }
}

/// VARIANCE() - Population variance (placeholder)
pub struct VarianceFunction;

impl SqlFunction for VarianceFunction {
    fn signature(&self) -> FunctionSignature {
        FunctionSignature {
            name: "VARIANCE",
            category: FunctionCategory::Statistical,
            arg_count: ArgCount::Fixed(1),
            description:
                "Calculates the population variance of numeric values (aggregate function)",
            returns: "Numeric value representing the variance",
            examples: vec![
                "SELECT VARIANCE(price) FROM products",
                "SELECT category, VARIANCE(quantity) FROM inventory GROUP BY category",
            ],
        }
    }

    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
        if args.len() != 1 {
            return Err(anyhow!("VARIANCE requires exactly 1 argument"));
        }

        // For now, just return 0 as this needs aggregate handling
        match &args[0] {
            DataValue::Integer(_) | DataValue::Float(_) => Ok(DataValue::Float(0.0)),
            _ => Ok(DataValue::Null),
        }
    }
}

/// VAR_SAMP() - Sample variance (placeholder)
pub struct VarSampFunction;

impl SqlFunction for VarSampFunction {
    fn signature(&self) -> FunctionSignature {
        FunctionSignature {
            name: "VAR_SAMP",
            category: FunctionCategory::Statistical,
            arg_count: ArgCount::Fixed(1),
            description: "Calculates the sample variance of numeric values (aggregate function)",
            returns: "Numeric value representing the sample variance",
            examples: vec![
                "SELECT VAR_SAMP(score) FROM test_results",
                "SELECT class, VAR_SAMP(height) FROM students GROUP BY class",
            ],
        }
    }

    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
        if args.len() != 1 {
            return Err(anyhow!("VAR_SAMP requires exactly 1 argument"));
        }

        // For now, just return 0 as this needs aggregate handling
        match &args[0] {
            DataValue::Integer(_) | DataValue::Float(_) => Ok(DataValue::Float(0.0)),
            _ => Ok(DataValue::Null),
        }
    }
}

/// VAR_POP() - Population variance (alias for VARIANCE) (placeholder)
pub struct VarPopFunction;

impl SqlFunction for VarPopFunction {
    fn signature(&self) -> FunctionSignature {
        FunctionSignature {
            name: "VAR_POP",
            category: FunctionCategory::Statistical,
            arg_count: ArgCount::Fixed(1),
            description:
                "Calculates the population variance (same as VARIANCE) (aggregate function)",
            returns: "Numeric value representing the population variance",
            examples: vec!["SELECT VAR_POP(amount) FROM transactions"],
        }
    }

    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
        // Delegate to VARIANCE function
        VarianceFunction.evaluate(args)
    }
}

/// CORR() - Correlation coefficient (placeholder)
pub struct CorrelationFunction;

impl SqlFunction for CorrelationFunction {
    fn signature(&self) -> FunctionSignature {
        FunctionSignature {
            name: "CORR",
            category: FunctionCategory::Statistical,
            arg_count: ArgCount::Fixed(2),
            description: "Calculates the Pearson correlation coefficient between two columns (aggregate function)",
            returns: "Numeric value between -1 and 1",
            examples: vec![
                "SELECT CORR(height, weight) FROM people",
                "SELECT CORR(temperature, ice_cream_sales) FROM daily_data",
                "SELECT month, CORR(advertising_spend, revenue) FROM sales GROUP BY month",
            ],
        }
    }

    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
        if args.len() != 2 {
            return Err(anyhow!("CORR requires exactly 2 arguments"));
        }

        // For now, just return 0 as this needs aggregate handling
        match (&args[0], &args[1]) {
            (DataValue::Integer(_), DataValue::Integer(_))
            | (DataValue::Float(_), DataValue::Float(_))
            | (DataValue::Integer(_), DataValue::Float(_))
            | (DataValue::Float(_), DataValue::Integer(_)) => Ok(DataValue::Float(0.0)),
            _ => Ok(DataValue::Null),
        }
    }
}

// Additional useful statistical functions that work on single values

/// SKEW() - Calculate skewness of a single value from mean
pub struct SkewFunction;

impl SqlFunction for SkewFunction {
    fn signature(&self) -> FunctionSignature {
        FunctionSignature {
            name: "SKEW",
            category: FunctionCategory::Statistical,
            arg_count: ArgCount::Fixed(3),
            description: "Calculate skewness contribution: ((x - mean) / stddev)^3",
            returns: "Skewness contribution of a single value",
            examples: vec![
                "SELECT SKEW(value, mean, stddev) FROM measurements",
                "SELECT SKEW(100, 85, 15) -- returns ~1.0 for value one std dev above mean",
            ],
        }
    }

    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
        if args.len() != 3 {
            return Err(anyhow!("SKEW requires 3 arguments: value, mean, stddev"));
        }

        let value = match &args[0] {
            DataValue::Integer(i) => *i as f64,
            DataValue::Float(f) => *f,
            _ => return Ok(DataValue::Null),
        };

        let mean = match &args[1] {
            DataValue::Integer(i) => *i as f64,
            DataValue::Float(f) => *f,
            _ => return Ok(DataValue::Null),
        };

        let stddev = match &args[2] {
            DataValue::Integer(i) => *i as f64,
            DataValue::Float(f) => *f,
            _ => return Ok(DataValue::Null),
        };

        if stddev == 0.0 {
            return Ok(DataValue::Null);
        }

        let z_score = (value - mean) / stddev;
        Ok(DataValue::Float(z_score.powi(3)))
    }
}

/// KURTOSIS() - Calculate kurtosis of a single value from mean
pub struct KurtosisFunction;

impl SqlFunction for KurtosisFunction {
    fn signature(&self) -> FunctionSignature {
        FunctionSignature {
            name: "KURTOSIS",
            category: FunctionCategory::Statistical,
            arg_count: ArgCount::Fixed(3),
            description: "Calculate kurtosis contribution: ((x - mean) / stddev)^4",
            returns: "Kurtosis contribution of a single value",
            examples: vec![
                "SELECT KURTOSIS(value, mean, stddev) FROM measurements",
                "SELECT KURTOSIS(100, 85, 15) -- returns 1.0 for value one std dev above mean",
            ],
        }
    }

    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
        if args.len() != 3 {
            return Err(anyhow!(
                "KURTOSIS requires 3 arguments: value, mean, stddev"
            ));
        }

        let value = match &args[0] {
            DataValue::Integer(i) => *i as f64,
            DataValue::Float(f) => *f,
            _ => return Ok(DataValue::Null),
        };

        let mean = match &args[1] {
            DataValue::Integer(i) => *i as f64,
            DataValue::Float(f) => *f,
            _ => return Ok(DataValue::Null),
        };

        let stddev = match &args[2] {
            DataValue::Integer(i) => *i as f64,
            DataValue::Float(f) => *f,
            _ => return Ok(DataValue::Null),
        };

        if stddev == 0.0 {
            return Ok(DataValue::Null);
        }

        let z_score = (value - mean) / stddev;
        Ok(DataValue::Float(z_score.powi(4)))
    }
}