1use crate::data::datatable::DataValue;
2use crate::sql::functions::{ArgCount, FunctionCategory, FunctionSignature, SqlFunction};
3use anyhow::{anyhow, Result};
4
5pub struct MedianFunction;
13
14impl SqlFunction for MedianFunction {
15 fn signature(&self) -> FunctionSignature {
16 FunctionSignature {
17 name: "MEDIAN",
18 category: FunctionCategory::Statistical,
19 arg_count: ArgCount::Fixed(1),
20 description:
21 "Returns the median (middle value) of a numeric column (aggregate function)",
22 returns: "Numeric value representing the median",
23 examples: vec![
24 "SELECT MEDIAN(salary) FROM employees",
25 "SELECT department, MEDIAN(age) FROM users GROUP BY department",
26 ],
27 }
28 }
29
30 fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
31 if args.len() != 1 {
32 return Err(anyhow!("MEDIAN requires exactly 1 argument"));
33 }
34
35 match &args[0] {
37 DataValue::Integer(i) => Ok(DataValue::Float(*i as f64)),
38 DataValue::Float(f) => Ok(DataValue::Float(*f)),
39 _ => Ok(DataValue::Null),
40 }
41 }
42}
43
44pub struct PercentileFunction;
46
47impl SqlFunction for PercentileFunction {
48 fn signature(&self) -> FunctionSignature {
49 FunctionSignature {
50 name: "PERCENTILE",
51 category: FunctionCategory::Statistical,
52 arg_count: ArgCount::Fixed(2),
53 description: "Returns the nth percentile of values (0-100) (aggregate function)",
54 returns: "Numeric value at the specified percentile",
55 examples: vec![
56 "SELECT PERCENTILE(score, 75) FROM tests", "SELECT PERCENTILE(income, 50) FROM users", "SELECT PERCENTILE(response_time, 95) FROM requests", ],
60 }
61 }
62
63 fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
64 if args.len() != 2 {
65 return Err(anyhow!(
66 "PERCENTILE requires exactly 2 arguments: (value, percentile)"
67 ));
68 }
69
70 let percentile = match &args[1] {
72 DataValue::Integer(i) => *i as f64,
73 DataValue::Float(f) => *f,
74 _ => return Err(anyhow!("Percentile must be a number between 0 and 100")),
75 };
76
77 if percentile < 0.0 || percentile > 100.0 {
78 return Err(anyhow!("Percentile must be between 0 and 100"));
79 }
80
81 match &args[0] {
83 DataValue::Integer(i) => Ok(DataValue::Float(*i as f64)),
84 DataValue::Float(f) => Ok(DataValue::Float(*f)),
85 _ => Ok(DataValue::Null),
86 }
87 }
88}
89
90pub struct ModeFunction;
92
93impl SqlFunction for ModeFunction {
94 fn signature(&self) -> FunctionSignature {
95 FunctionSignature {
96 name: "MODE",
97 category: FunctionCategory::Statistical,
98 arg_count: ArgCount::Fixed(1),
99 description: "Returns the most frequently occurring value (aggregate function)",
100 returns: "The mode value (most frequent)",
101 examples: vec![
102 "SELECT MODE(category) FROM products",
103 "SELECT MODE(rating) FROM reviews",
104 "SELECT department, MODE(job_level) FROM employees GROUP BY department",
105 ],
106 }
107 }
108
109 fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
110 if args.len() != 1 {
111 return Err(anyhow!("MODE requires exactly 1 argument"));
112 }
113
114 Ok(args[0].clone())
116 }
117}
118
119pub struct VarianceFunction;
121
122impl SqlFunction for VarianceFunction {
123 fn signature(&self) -> FunctionSignature {
124 FunctionSignature {
125 name: "VARIANCE",
126 category: FunctionCategory::Statistical,
127 arg_count: ArgCount::Fixed(1),
128 description:
129 "Calculates the population variance of numeric values (aggregate function)",
130 returns: "Numeric value representing the variance",
131 examples: vec![
132 "SELECT VARIANCE(price) FROM products",
133 "SELECT category, VARIANCE(quantity) FROM inventory GROUP BY category",
134 ],
135 }
136 }
137
138 fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
139 if args.len() != 1 {
140 return Err(anyhow!("VARIANCE requires exactly 1 argument"));
141 }
142
143 match &args[0] {
145 DataValue::Integer(_) | DataValue::Float(_) => Ok(DataValue::Float(0.0)),
146 _ => Ok(DataValue::Null),
147 }
148 }
149}
150
151pub struct VarSampFunction;
153
154impl SqlFunction for VarSampFunction {
155 fn signature(&self) -> FunctionSignature {
156 FunctionSignature {
157 name: "VAR_SAMP",
158 category: FunctionCategory::Statistical,
159 arg_count: ArgCount::Fixed(1),
160 description: "Calculates the sample variance of numeric values (aggregate function)",
161 returns: "Numeric value representing the sample variance",
162 examples: vec![
163 "SELECT VAR_SAMP(score) FROM test_results",
164 "SELECT class, VAR_SAMP(height) FROM students GROUP BY class",
165 ],
166 }
167 }
168
169 fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
170 if args.len() != 1 {
171 return Err(anyhow!("VAR_SAMP requires exactly 1 argument"));
172 }
173
174 match &args[0] {
176 DataValue::Integer(_) | DataValue::Float(_) => Ok(DataValue::Float(0.0)),
177 _ => Ok(DataValue::Null),
178 }
179 }
180}
181
182pub struct VarPopFunction;
184
185impl SqlFunction for VarPopFunction {
186 fn signature(&self) -> FunctionSignature {
187 FunctionSignature {
188 name: "VAR_POP",
189 category: FunctionCategory::Statistical,
190 arg_count: ArgCount::Fixed(1),
191 description:
192 "Calculates the population variance (same as VARIANCE) (aggregate function)",
193 returns: "Numeric value representing the population variance",
194 examples: vec!["SELECT VAR_POP(amount) FROM transactions"],
195 }
196 }
197
198 fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
199 VarianceFunction.evaluate(args)
201 }
202}
203
204pub struct CorrelationFunction;
206
207impl SqlFunction for CorrelationFunction {
208 fn signature(&self) -> FunctionSignature {
209 FunctionSignature {
210 name: "CORR",
211 category: FunctionCategory::Statistical,
212 arg_count: ArgCount::Fixed(2),
213 description: "Calculates the Pearson correlation coefficient between two columns (aggregate function)",
214 returns: "Numeric value between -1 and 1",
215 examples: vec![
216 "SELECT CORR(height, weight) FROM people",
217 "SELECT CORR(temperature, ice_cream_sales) FROM daily_data",
218 "SELECT month, CORR(advertising_spend, revenue) FROM sales GROUP BY month",
219 ],
220 }
221 }
222
223 fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
224 if args.len() != 2 {
225 return Err(anyhow!("CORR requires exactly 2 arguments"));
226 }
227
228 match (&args[0], &args[1]) {
230 (DataValue::Integer(_), DataValue::Integer(_))
231 | (DataValue::Float(_), DataValue::Float(_))
232 | (DataValue::Integer(_), DataValue::Float(_))
233 | (DataValue::Float(_), DataValue::Integer(_)) => Ok(DataValue::Float(0.0)),
234 _ => Ok(DataValue::Null),
235 }
236 }
237}
238
239pub struct SkewFunction;
243
244impl SqlFunction for SkewFunction {
245 fn signature(&self) -> FunctionSignature {
246 FunctionSignature {
247 name: "SKEW",
248 category: FunctionCategory::Statistical,
249 arg_count: ArgCount::Fixed(3),
250 description: "Calculate skewness contribution: ((x - mean) / stddev)^3",
251 returns: "Skewness contribution of a single value",
252 examples: vec![
253 "SELECT SKEW(value, mean, stddev) FROM measurements",
254 "SELECT SKEW(100, 85, 15) -- returns ~1.0 for value one std dev above mean",
255 ],
256 }
257 }
258
259 fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
260 if args.len() != 3 {
261 return Err(anyhow!("SKEW requires 3 arguments: value, mean, stddev"));
262 }
263
264 let value = match &args[0] {
265 DataValue::Integer(i) => *i as f64,
266 DataValue::Float(f) => *f,
267 _ => return Ok(DataValue::Null),
268 };
269
270 let mean = match &args[1] {
271 DataValue::Integer(i) => *i as f64,
272 DataValue::Float(f) => *f,
273 _ => return Ok(DataValue::Null),
274 };
275
276 let stddev = match &args[2] {
277 DataValue::Integer(i) => *i as f64,
278 DataValue::Float(f) => *f,
279 _ => return Ok(DataValue::Null),
280 };
281
282 if stddev == 0.0 {
283 return Ok(DataValue::Null);
284 }
285
286 let z_score = (value - mean) / stddev;
287 Ok(DataValue::Float(z_score.powi(3)))
288 }
289}
290
291pub struct KurtosisFunction;
293
294impl SqlFunction for KurtosisFunction {
295 fn signature(&self) -> FunctionSignature {
296 FunctionSignature {
297 name: "KURTOSIS",
298 category: FunctionCategory::Statistical,
299 arg_count: ArgCount::Fixed(3),
300 description: "Calculate kurtosis contribution: ((x - mean) / stddev)^4",
301 returns: "Kurtosis contribution of a single value",
302 examples: vec![
303 "SELECT KURTOSIS(value, mean, stddev) FROM measurements",
304 "SELECT KURTOSIS(100, 85, 15) -- returns 1.0 for value one std dev above mean",
305 ],
306 }
307 }
308
309 fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
310 if args.len() != 3 {
311 return Err(anyhow!(
312 "KURTOSIS requires 3 arguments: value, mean, stddev"
313 ));
314 }
315
316 let value = match &args[0] {
317 DataValue::Integer(i) => *i as f64,
318 DataValue::Float(f) => *f,
319 _ => return Ok(DataValue::Null),
320 };
321
322 let mean = match &args[1] {
323 DataValue::Integer(i) => *i as f64,
324 DataValue::Float(f) => *f,
325 _ => return Ok(DataValue::Null),
326 };
327
328 let stddev = match &args[2] {
329 DataValue::Integer(i) => *i as f64,
330 DataValue::Float(f) => *f,
331 _ => return Ok(DataValue::Null),
332 };
333
334 if stddev == 0.0 {
335 return Ok(DataValue::Null);
336 }
337
338 let z_score = (value - mean) / stddev;
339 Ok(DataValue::Float(z_score.powi(4)))
340 }
341}