Skip to main content

truecalc_core/eval/functions/database/
mod.rs

1use super::super::{FunctionMeta, Registry};
2use crate::eval::functions::math::criterion::{matches_criterion, parse_criterion};
3use crate::types::{ErrorKind, Value};
4
5// ── Helpers ───────────────────────────────────────────────────────────────────
6
7/// Extract rows from a 2D Value::Array.
8/// The outer array contains rows (each row is a Value::Array).
9/// Returns None if the structure is not a valid 2D array.
10fn extract_rows(v: &Value) -> Option<Vec<&[Value]>> {
11    match v {
12        Value::Array(outer) => {
13            let rows: Option<Vec<&[Value]>> = outer
14                .iter()
15                .map(|row| match row {
16                    Value::Array(r) => Some(r.as_slice()),
17                    _ => None,
18                })
19                .collect();
20            rows
21        }
22        _ => None,
23    }
24}
25
26/// Extract criteria rows from a Value, handling both 2D arrays and flat 1D arrays.
27///
28/// A flat 1D array like `["Sales", ">100"]` is treated as alternating
29/// (header, criterion) pairs: `[["Sales"], [">100"]]`.
30fn extract_criteria_rows(v: &Value) -> Option<Vec<Vec<&Value>>> {
31    // Try 2D array first (normal case).
32    if let Value::Array(outer) = v {
33        // Check if it's a 2D array (each element is also an array).
34        let is_2d = outer.iter().all(|row| matches!(row, Value::Array(_)));
35        if is_2d {
36            let rows: Vec<Vec<&Value>> = outer
37                .iter()
38                .map(|row| match row {
39                    Value::Array(r) => r.iter().collect(),
40                    _ => unreachable!(),
41                })
42                .collect();
43            return Some(rows);
44        }
45
46        // Flat 1D array: treat pairs of elements as (header_col, criteria_val).
47        // E.g. ["Sales", ">100"] → header_row = ["Sales"], criteria_row = [">100"].
48        let n = outer.len();
49        if n % 2 != 0 || n == 0 {
50            return None;
51        }
52        let ncols = n / 2;
53        let header_row: Vec<&Value> = outer[..ncols].iter().collect();
54        let crit_row: Vec<&Value> = outer[ncols..].iter().collect();
55        return Some(vec![header_row, crit_row]);
56    }
57    None
58}
59
60/// Resolve field argument to a 0-based column index.
61/// field can be a 1-based number or a column name string matching headers.
62fn resolve_field(field: &Value, headers: &[Value]) -> Option<usize> {
63    match field {
64        Value::Number(n) => {
65            let idx = *n as usize;
66            if idx >= 1 && idx <= headers.len() {
67                Some(idx - 1)
68            } else {
69                None
70            }
71        }
72        Value::Text(name) => {
73            let name_lower = name.to_lowercase();
74            headers.iter().position(|h| match h {
75                Value::Text(s) => s.to_lowercase() == name_lower,
76                _ => false,
77            })
78        }
79        _ => None,
80    }
81}
82
83/// Check whether a data row matches all criteria.
84/// criteria_rows: [header_row, criteria_row] where each row is Vec<&Value>
85/// data_headers: the database headers
86/// data_row: the data row to check
87fn row_matches_criteria(
88    criteria_rows: &[Vec<&Value>],
89    data_headers: &[Value],
90    data_row: &[Value],
91) -> bool {
92    if criteria_rows.len() < 2 {
93        return false;
94    }
95    let crit_headers = &criteria_rows[0];
96    let crit_values = &criteria_rows[1];
97
98    for (crit_col, crit_val) in crit_headers.iter().zip(crit_values.iter()) {
99        // Skip empty criteria
100        match crit_val {
101            Value::Empty => continue,
102            Value::Text(s) if s.is_empty() => continue,
103            _ => {}
104        }
105
106        // Find the matching data column
107        let col_name = match crit_col {
108            Value::Text(s) => s.to_lowercase(),
109            _ => continue,
110        };
111        let col_idx = data_headers.iter().position(|h| match h {
112            Value::Text(s) => s.to_lowercase() == col_name,
113            _ => false,
114        });
115        let col_idx = match col_idx {
116            Some(i) => i,
117            None => return false, // criteria column not found in database
118        };
119
120        let cell_val = data_row.get(col_idx).unwrap_or(&Value::Empty);
121        let criterion = parse_criterion(crit_val);
122        if !matches_criterion(cell_val, &criterion) {
123            return false;
124        }
125    }
126    true
127}
128
129/// Parse the database and criteria arguments, collect the field values for
130/// matching rows.
131///
132/// Returns `Err(Value)` on structural errors (wrong types, field not found).
133fn collect_matching_values(args: &[Value]) -> Result<Vec<Value>, Value> {
134    if args.len() != 3 {
135        return Err(Value::Error(ErrorKind::NA));
136    }
137    let db_rows = extract_rows(&args[0])
138        .ok_or(Value::Error(ErrorKind::Value))?;
139
140    if db_rows.len() < 2 {
141        // Need at least header row + one data row
142        return Ok(vec![]);
143    }
144
145    let headers = db_rows[0];
146    let field_idx = resolve_field(&args[1], headers)
147        .ok_or(Value::Error(ErrorKind::Value))?;
148
149    let crit_rows = extract_criteria_rows(&args[2])
150        .ok_or(Value::Error(ErrorKind::Value))?;
151
152    if crit_rows.len() < 2 {
153        return Err(Value::Error(ErrorKind::Value));
154    }
155
156    let mut values = Vec::new();
157    for data_row in &db_rows[1..] {
158        if row_matches_criteria(&crit_rows, headers, data_row) {
159            let val = data_row.get(field_idx).cloned().unwrap_or(Value::Empty);
160            values.push(val);
161        }
162    }
163    Ok(values)
164}
165
166// ── D* functions ──────────────────────────────────────────────────────────────
167
168/// `DSUM(database, field, criteria)` — sum of field values for matching rows.
169pub fn dsum_fn(args: &[Value]) -> Value {
170    match collect_matching_values(args) {
171        Err(e) => e,
172        Ok(values) => {
173            let mut sum = 0.0_f64;
174            for v in &values {
175                if let Value::Number(n) = v {
176                    sum += n;
177                }
178            }
179            Value::Number(sum)
180        }
181    }
182}
183
184/// `DAVERAGE(database, field, criteria)` — average of field values for matching rows.
185pub fn daverage_fn(args: &[Value]) -> Value {
186    match collect_matching_values(args) {
187        Err(e) => e,
188        Ok(values) => {
189            let nums: Vec<f64> = values
190                .iter()
191                .filter_map(|v| if let Value::Number(n) = v { Some(*n) } else { None })
192                .collect();
193            if nums.is_empty() {
194                return Value::Error(ErrorKind::DivByZero);
195            }
196            Value::Number(nums.iter().sum::<f64>() / nums.len() as f64)
197        }
198    }
199}
200
201/// `DCOUNT(database, field, criteria)` — count of numeric field values for matching rows.
202pub fn dcount_fn(args: &[Value]) -> Value {
203    match collect_matching_values(args) {
204        Err(e) => e,
205        Ok(values) => {
206            let count = values
207                .iter()
208                .filter(|v| matches!(v, Value::Number(_)))
209                .count();
210            Value::Number(count as f64)
211        }
212    }
213}
214
215/// `DCOUNTA(database, field, criteria)` — count of non-empty field values for matching rows.
216pub fn dcounta_fn(args: &[Value]) -> Value {
217    match collect_matching_values(args) {
218        Err(e) => e,
219        Ok(values) => {
220            let count = values
221                .iter()
222                .filter(|v| !matches!(v, Value::Empty))
223                .count();
224            Value::Number(count as f64)
225        }
226    }
227}
228
229/// `DGET(database, field, criteria)` — returns the single matching value, or error.
230pub fn dget_fn(args: &[Value]) -> Value {
231    match collect_matching_values(args) {
232        Err(e) => e,
233        Ok(values) => {
234            if values.len() == 1 {
235                values.into_iter().next().unwrap()
236            } else if values.is_empty() {
237                Value::Error(ErrorKind::Value)
238            } else {
239                // Multiple matches
240                Value::Error(ErrorKind::Num)
241            }
242        }
243    }
244}
245
246/// `DMAX(database, field, criteria)` — max of field values for matching rows.
247pub fn dmax_fn(args: &[Value]) -> Value {
248    match collect_matching_values(args) {
249        Err(e) => e,
250        Ok(values) => {
251            let nums: Vec<f64> = values
252                .iter()
253                .filter_map(|v| if let Value::Number(n) = v { Some(*n) } else { None })
254                .collect();
255            if nums.is_empty() {
256                return Value::Number(0.0);
257            }
258            Value::Number(nums.iter().cloned().fold(f64::NEG_INFINITY, f64::max))
259        }
260    }
261}
262
263/// `DMIN(database, field, criteria)` — min of field values for matching rows.
264pub fn dmin_fn(args: &[Value]) -> Value {
265    match collect_matching_values(args) {
266        Err(e) => e,
267        Ok(values) => {
268            let nums: Vec<f64> = values
269                .iter()
270                .filter_map(|v| if let Value::Number(n) = v { Some(*n) } else { None })
271                .collect();
272            if nums.is_empty() {
273                return Value::Number(0.0);
274            }
275            Value::Number(nums.iter().cloned().fold(f64::INFINITY, f64::min))
276        }
277    }
278}
279
280/// `DPRODUCT(database, field, criteria)` — product of field values for matching rows.
281pub fn dproduct_fn(args: &[Value]) -> Value {
282    match collect_matching_values(args) {
283        Err(e) => e,
284        Ok(values) => {
285            let nums: Vec<f64> = values
286                .iter()
287                .filter_map(|v| if let Value::Number(n) = v { Some(*n) } else { None })
288                .collect();
289            if nums.is_empty() {
290                return Value::Number(0.0);
291            }
292            Value::Number(nums.iter().product())
293        }
294    }
295}
296
297/// `DSTDEV(database, field, criteria)` — sample standard deviation of matching numeric values.
298pub fn dstdev_fn(args: &[Value]) -> Value {
299    match collect_matching_values(args) {
300        Err(e) => e,
301        Ok(values) => {
302            let nums: Vec<f64> = values
303                .iter()
304                .filter_map(|v| if let Value::Number(n) = v { Some(*n) } else { None })
305                .collect();
306            let n = nums.len();
307            if n < 2 {
308                return Value::Error(ErrorKind::DivByZero);
309            }
310            let mean = nums.iter().sum::<f64>() / n as f64;
311            let var = nums.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - 1) as f64;
312            Value::Number(var.sqrt())
313        }
314    }
315}
316
317/// `DSTDEVP(database, field, criteria)` — population standard deviation of matching numeric values.
318pub fn dstdevp_fn(args: &[Value]) -> Value {
319    match collect_matching_values(args) {
320        Err(e) => e,
321        Ok(values) => {
322            let nums: Vec<f64> = values
323                .iter()
324                .filter_map(|v| if let Value::Number(n) = v { Some(*n) } else { None })
325                .collect();
326            let n = nums.len();
327            if n == 0 {
328                return Value::Error(ErrorKind::DivByZero);
329            }
330            let mean = nums.iter().sum::<f64>() / n as f64;
331            let var = nums.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n as f64;
332            Value::Number(var.sqrt())
333        }
334    }
335}
336
337/// `DVAR(database, field, criteria)` — sample variance of matching numeric values.
338pub fn dvar_fn(args: &[Value]) -> Value {
339    match collect_matching_values(args) {
340        Err(e) => e,
341        Ok(values) => {
342            let nums: Vec<f64> = values
343                .iter()
344                .filter_map(|v| if let Value::Number(n) = v { Some(*n) } else { None })
345                .collect();
346            let n = nums.len();
347            if n < 2 {
348                return Value::Error(ErrorKind::DivByZero);
349            }
350            let mean = nums.iter().sum::<f64>() / n as f64;
351            let var = nums.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - 1) as f64;
352            Value::Number(var)
353        }
354    }
355}
356
357/// `DVARP(database, field, criteria)` — population variance of matching numeric values.
358pub fn dvarp_fn(args: &[Value]) -> Value {
359    match collect_matching_values(args) {
360        Err(e) => e,
361        Ok(values) => {
362            let nums: Vec<f64> = values
363                .iter()
364                .filter_map(|v| if let Value::Number(n) = v { Some(*n) } else { None })
365                .collect();
366            let n = nums.len();
367            if n == 0 {
368                return Value::Error(ErrorKind::DivByZero);
369            }
370            let mean = nums.iter().sum::<f64>() / n as f64;
371            let var = nums.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n as f64;
372            Value::Number(var)
373        }
374    }
375}
376
377// ── Registration ──────────────────────────────────────────────────────────────
378
379pub fn register_database(registry: &mut Registry) {
380    registry.register_eager("DSUM",     dsum_fn,     FunctionMeta { category: "database", signature: "DSUM(database, field, criteria)",     description: "Sum of field values for rows matching criteria" });
381    registry.register_eager("DAVERAGE", daverage_fn, FunctionMeta { category: "database", signature: "DAVERAGE(database, field, criteria)", description: "Average of field values for rows matching criteria" });
382    registry.register_eager("DCOUNT",   dcount_fn,   FunctionMeta { category: "database", signature: "DCOUNT(database, field, criteria)",   description: "Count of numeric field values for rows matching criteria" });
383    registry.register_eager("DCOUNTA",  dcounta_fn,  FunctionMeta { category: "database", signature: "DCOUNTA(database, field, criteria)",  description: "Count of non-empty field values for rows matching criteria" });
384    registry.register_eager("DGET",     dget_fn,     FunctionMeta { category: "database", signature: "DGET(database, field, criteria)",     description: "Single field value for rows matching criteria" });
385    registry.register_eager("DMAX",     dmax_fn,     FunctionMeta { category: "database", signature: "DMAX(database, field, criteria)",     description: "Maximum field value for rows matching criteria" });
386    registry.register_eager("DMIN",     dmin_fn,     FunctionMeta { category: "database", signature: "DMIN(database, field, criteria)",     description: "Minimum field value for rows matching criteria" });
387    registry.register_eager("DPRODUCT", dproduct_fn, FunctionMeta { category: "database", signature: "DPRODUCT(database, field, criteria)", description: "Product of field values for rows matching criteria" });
388    registry.register_eager("DSTDEV",   dstdev_fn,   FunctionMeta { category: "database", signature: "DSTDEV(database, field, criteria)",   description: "Sample standard deviation of field values for rows matching criteria" });
389    registry.register_eager("DSTDEVP",  dstdevp_fn,  FunctionMeta { category: "database", signature: "DSTDEVP(database, field, criteria)",  description: "Population standard deviation of field values for rows matching criteria" });
390    registry.register_eager("DVAR",     dvar_fn,     FunctionMeta { category: "database", signature: "DVAR(database, field, criteria)",     description: "Sample variance of field values for rows matching criteria" });
391    registry.register_eager("DVARP",    dvarp_fn,    FunctionMeta { category: "database", signature: "DVARP(database, field, criteria)",    description: "Population variance of field values for rows matching criteria" });
392}
393
394#[cfg(test)]
395mod tests;