rustledger-query 0.16.2

Beancount query engine (BQL) with SQL-like syntax for ledger queries
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
//! Sorting and pivoting functions.

use std::collections::HashMap;

use crate::ast::{Expr, Literal, OrderSpec, SortDirection};
use crate::error::QueryError;

use super::Executor;
use super::types::{QueryResult, Row, Value, hash_single_value};

impl Executor<'_> {
    pub(super) fn sort_results(
        &self,
        result: &mut QueryResult,
        order_by: &[OrderSpec],
    ) -> Result<(), QueryError> {
        if order_by.is_empty() {
            return Ok(());
        }

        // Build a map from column names to indices
        let column_indices: std::collections::HashMap<&str, usize> = result
            .columns
            .iter()
            .enumerate()
            .map(|(i, name)| (name.as_str(), i))
            .collect();

        // Resolve ORDER BY expressions to column indices
        let mut sort_specs: Vec<(usize, bool)> = Vec::new();
        for spec in order_by {
            // Try to resolve the expression to a column index
            let idx = match &spec.expr {
                Expr::Column(name) => column_indices
                    .get(name.as_str())
                    .copied()
                    .ok_or_else(|| QueryError::UnknownColumn(name.clone()))?,
                Expr::Function(func) => {
                    // First try to find a column with the function name (e.g., "sum" for sum(amount))
                    // Then try the full expression string (e.g., "account_sortkey(account)")
                    let expr_str = spec.expr.to_string();
                    column_indices
                        .get(func.name.as_str())
                        .or_else(|| column_indices.get(expr_str.as_str()))
                        .copied()
                        .ok_or_else(|| {
                            QueryError::Evaluation(format!(
                                "ORDER BY expression not found in SELECT: {expr_str}"
                            ))
                        })?
                }
                _ => {
                    // For other expression kinds (binary ops, literals, etc.),
                    // look up by string representation (matches hidden column aliases).
                    let expr_str = spec.expr.to_string();
                    column_indices
                        .get(expr_str.as_str())
                        .copied()
                        .ok_or_else(|| {
                            QueryError::Evaluation(format!(
                                "ORDER BY expression not found in SELECT: {expr_str}"
                            ))
                        })?
                }
            };
            let ascending = spec.direction != SortDirection::Desc;
            sort_specs.push((idx, ascending));
        }

        // Sort the rows. Use `QueryResult::sort_by` (not `result.rows.sort_by`)
        // so the per-row `row_group_keys` sidecar stays in lockstep — without
        // this, the renderer would apply a row's currency hint to a different
        // row's content after sort.
        result.sort_by(|a, b| {
            for (idx, ascending) in &sort_specs {
                if *idx >= a.len() || *idx >= b.len() {
                    continue;
                }
                let ord = self.compare_values_for_sort(&a[*idx], &b[*idx]);
                if ord != std::cmp::Ordering::Equal {
                    return if *ascending { ord } else { ord.reverse() };
                }
            }
            std::cmp::Ordering::Equal
        });

        Ok(())
    }
    /// Apply the PIVOT BY transformation, matching bean-query semantics
    /// (issue #1034).
    ///
    /// **Syntax**: `PIVOT BY <pivot_value_col>, <group_by_col>` — exactly
    /// two columns. The first column's values become the new column
    /// headers; the second is the GROUP BY column to keep as the row key.
    /// All other columns become "value" cells, populated at the
    /// intersection of (`group_by_col` value, `pivot_value_col` value).
    ///
    /// **Validation** (rules 1–3 match `_compile_pivot_by` in
    /// `beanquery/compiler.py`; rule 4 is rledger-specific):
    /// - exactly two pivot columns (`PivotWrongArity` otherwise)
    /// - the two columns must differ (`PivotSameColumn` otherwise)
    /// - the second column must be a GROUP BY target
    ///   (`PivotSecondNotInGroupBy` otherwise)
    /// - PIVOT BY requires an explicit `GROUP BY` clause
    ///   (`PivotWithoutGroupBy` otherwise). Bean-query reaches the
    ///   same outcome through grammar — the second-must-be-in-GROUP-BY
    ///   rule is only checked when GROUP BY exists, but their parser
    ///   typically rejects PIVOT-without-GROUP-BY queries earlier.
    ///   rledger's parser is more permissive, so we surface the case
    ///   with a specific error rather than the misleading "second
    ///   column not in GROUP BY".
    ///
    /// **Input contract**: callers should provide `result` with at most
    /// one row per `(key_col, pivot_value_col)` pair — the typical
    /// guarantee from `GROUP BY <key>, <pivot_value>`. If the input has
    /// duplicate `(key, pivot_value)` rows, only the first one in row
    /// order contributes its value cells; later duplicates are silently
    /// ignored. Validator-rejected aggregate queries always satisfy
    /// this; non-aggregate queries with PIVOT would need the caller to
    /// enforce uniqueness.
    ///
    /// **Pipeline ordering**: this runs AFTER `sort_results` and the
    /// hidden-column strip (`execute_select`), so `result.columns`
    /// holds only the visible select targets in the user-requested
    /// sort order. That contract is what makes the strip+pivot
    /// interaction (item #4 of #1034) cleanly disappear.
    pub(super) fn apply_pivot(
        &self,
        result: &QueryResult,
        pivot_exprs: &[Expr],
        group_by: &Option<Vec<Expr>>,
    ) -> Result<QueryResult, QueryError> {
        // The parser uses `at_least(1)` for the PIVOT BY clause and
        // execute_select only calls this fn inside `if let Some(pivot_exprs)`,
        // so an empty slice is unreachable. Belt-and-suspenders.
        debug_assert!(
            !pivot_exprs.is_empty(),
            "apply_pivot called with empty pivot_exprs (parser invariant violated)"
        );

        // Validation #1: arity. Bean-query requires exactly two columns.
        if pivot_exprs.len() != 2 {
            return Err(QueryError::PivotWrongArity(pivot_exprs.len()));
        }

        // Validation #2: PIVOT BY requires an explicit GROUP BY clause.
        // Implicit grouping (aggregates without GROUP BY) produces a
        // single-row result whose key dimension is undefined — PIVOT
        // can't identify a row key from such a result. Reject early
        // so the user gets a specific error, not the misleading
        // "second column not in GROUP BY".
        let Some(gb) = group_by.as_ref() else {
            return Err(QueryError::PivotWithoutGroupBy);
        };

        let pivot_value_col_idx = self.find_pivot_column(result, &pivot_exprs[0])?;
        let key_col_idx = self.find_pivot_column(result, &pivot_exprs[1])?;

        // Validation #3: the two columns must differ.
        if pivot_value_col_idx == key_col_idx {
            return Err(QueryError::PivotSameColumn);
        }

        // Validation #4: the second column must be a GROUP BY target.
        // Resolve each GROUP BY expression to its result column index
        // and check membership.
        let key_in_group_by = gb
            .iter()
            .filter_map(|expr| self.find_pivot_column(result, expr).ok())
            .any(|idx| idx == key_col_idx);
        if !key_in_group_by {
            return Err(QueryError::PivotSecondNotInGroupBy);
        }

        // Collect unique pivot values, preserving the row-order they
        // appear in (which post-sort means the user's ORDER BY drives
        // the new column order). Linear-scan dedup via structural
        // PartialEq — pivot values are typically small (handful of
        // currencies), and `Value` doesn't implement `Hash` because
        // some inner types (Decimal, Inventory) don't, so a pure
        // hash-based dedup either risks 2⁻⁶⁴ collisions (false-merging
        // distinct values) or requires a wrapper type. Structural eq
        // sidesteps both.
        let mut pivot_values: Vec<Value> = Vec::new();
        for row in &result.rows {
            let v = row.get(pivot_value_col_idx).cloned().unwrap_or(Value::Null);
            if !pivot_values.contains(&v) {
                pivot_values.push(v);
            }
        }

        // The "value" cells to place at each (key, pivot_value)
        // intersection are EVERY OTHER column that's not the pivot and
        // not the key. Today the typical query has exactly one such
        // column (the aggregate), but the design generalizes.
        let value_col_idxs: Vec<usize> = (0..result.columns.len())
            .filter(|i| *i != pivot_value_col_idx && *i != key_col_idx)
            .collect();

        // Build new column names. Layout: [key_col, <value_col × pivot_value>...].
        //
        // Single-value-column case (the typical SUM(number) shape) gets
        // just the pivot value as the header — matches bean-query
        // exactly. Multi-value-column case qualifies with the value
        // column name (`<value_col> / <pivot_value>`) so the headers
        // stay unambiguous when there's more than one aggregate. The
        // asymmetry is deliberate: a single-aggregate header like
        // `USD` is what users expect from the typical pivot.
        let mut new_columns: Vec<String> =
            Vec::with_capacity(1 + value_col_idxs.len() * pivot_values.len());
        new_columns.push(result.columns[key_col_idx].clone());
        for pv in &pivot_values {
            let pv_str = Self::value_to_string(pv);
            if value_col_idxs.len() == 1 {
                new_columns.push(pv_str);
            } else {
                for &vci in &value_col_idxs {
                    new_columns.push(format!("{} / {pv_str}", result.columns[vci]));
                }
            }
        }

        let mut new_result = QueryResult::new(new_columns);

        // Group rows by their key-column value, preserving first-seen
        // order so the post-sort row order survives into the pivot.
        //
        // The hash bucket is a fast first-pass; structural `==` inside
        // the bucket guarantees we don't false-merge distinct keys
        // that share a u64 hash (probability ~2⁻⁶⁴, but pinned out
        // for correctness). Same pattern as `pivot_lookup` below.
        let mut groups: Vec<(Value, Vec<&Row>)> = Vec::new();
        let mut group_index: HashMap<u64, Vec<usize>> = HashMap::new();
        for row in &result.rows {
            let key = row.get(key_col_idx).cloned().unwrap_or(Value::Null);
            let h = hash_single_value(&key);
            let bucket = group_index.entry(h).or_default();
            let existing = bucket.iter().find(|&&idx| groups[idx].0 == key).copied();
            if let Some(idx) = existing {
                groups[idx].1.push(row);
            } else {
                bucket.push(groups.len());
                groups.push((key, vec![row]));
            }
        }

        // Build pivoted rows.
        for (key, group_rows) in groups {
            let mut new_row: Vec<Value> =
                Vec::with_capacity(1 + value_col_idxs.len() * pivot_values.len());
            new_row.push(key);

            // For each pivot value, find the input row in this group
            // whose pivot column equals it; pull the value-column cell.
            // Hash bucket + structural `==` for collision-safety, same
            // pattern as the group_index above.
            //
            // If multiple input rows share the same `(key, pivot_value)`
            // pair, only the first one wins — see "Input contract" in
            // the function docstring.
            let mut pivot_lookup: HashMap<u64, Vec<&Row>> = HashMap::new();
            for &row in &group_rows {
                let pv = row.get(pivot_value_col_idx).cloned().unwrap_or(Value::Null);
                pivot_lookup
                    .entry(hash_single_value(&pv))
                    .or_default()
                    .push(row);
            }

            for pv in &pivot_values {
                let pv_hash = hash_single_value(pv);
                // Find the row in this bucket whose pivot value equals
                // `pv` structurally — guards against the hash-only
                // collision case.
                let matched = pivot_lookup.get(&pv_hash).and_then(|bucket| {
                    bucket
                        .iter()
                        .find(|row| row.get(pivot_value_col_idx).is_some_and(|cell| cell == pv))
                });
                for &vci in &value_col_idxs {
                    let cell = matched
                        .and_then(|row| row.get(vci))
                        .cloned()
                        .unwrap_or(Value::Null);
                    new_row.push(cell);
                }
            }

            new_result.add_row(new_row);
        }

        Ok(new_result)
    }
    pub(super) fn find_pivot_column(
        &self,
        result: &QueryResult,
        pivot_expr: &Expr,
    ) -> Result<usize, QueryError> {
        match pivot_expr {
            Expr::Column(name) => {
                let upper_name = name.to_uppercase();
                result
                    .columns
                    .iter()
                    .position(|c| c.to_uppercase() == upper_name)
                    .ok_or_else(|| {
                        QueryError::Evaluation(format!(
                            "PIVOT BY column '{name}' not found in SELECT"
                        ))
                    })
            }
            Expr::Literal(Literal::Integer(n)) => {
                let idx = (*n as usize).saturating_sub(1);
                if idx < result.columns.len() {
                    Ok(idx)
                } else {
                    Err(QueryError::Evaluation(format!(
                        "PIVOT BY column index {n} out of range"
                    )))
                }
            }
            Expr::Literal(Literal::Number(n)) => {
                // Defensive: literal whole numbers parse as Integer (issue #938),
                // so this arm is only reachable for fractional literals like `1.0`.
                use rust_decimal::prelude::ToPrimitive;
                let idx = n.to_usize().unwrap_or(0).saturating_sub(1);
                if idx < result.columns.len() {
                    Ok(idx)
                } else {
                    Err(QueryError::Evaluation(format!(
                        "PIVOT BY column index {n} out of range"
                    )))
                }
            }
            Expr::Function(func) => {
                // Two-tier resolution matching `sort_results`'s ORDER BY
                // logic: try the bare function name first (e.g.
                // `YEAR(date)` → column `year`), then fall back to the
                // full expression string (e.g. column literally named
                // `YEAR(date)`). Same convention as bean-query's column
                // naming.
                //
                // Without this, `PIVOT BY YEAR(date), account` would
                // fail at find_pivot_column AND `GROUP BY YEAR(date)`
                // would silently be skipped from the membership check
                // — misreporting `PivotSecondNotInGroupBy` for valid
                // queries (Copilot review on PR #1037).
                let expr_str = pivot_expr.to_string();
                let upper_func = func.name.to_uppercase();
                result
                    .columns
                    .iter()
                    .position(|c| c.to_uppercase() == upper_func)
                    .or_else(|| result.columns.iter().position(|c| c == &expr_str))
                    .ok_or_else(|| {
                        QueryError::Evaluation(format!(
                            "PIVOT BY expression '{expr_str}' not found in SELECT"
                        ))
                    })
            }
            _ => {
                // For other expression kinds (binary ops, literals,
                // etc.) try the full expression string against column
                // names — matches the hidden-column alias convention
                // used by `find_hidden_order_by_targets`.
                let expr_str = pivot_expr.to_string();
                result
                    .columns
                    .iter()
                    .position(|c| c == &expr_str)
                    .ok_or_else(|| {
                        QueryError::Evaluation(format!(
                            "PIVOT BY expression '{expr_str}' not found in SELECT"
                        ))
                    })
            }
        }
    }

    /// Convert a value to string for display/grouping.
    pub(super) fn value_to_string(val: &Value) -> String {
        match val {
            Value::String(s) => s.clone(),
            Value::Number(n) => n.to_string(),
            Value::Integer(i) => i.to_string(),
            Value::Date(d) => d.to_string(),
            Value::Boolean(b) => b.to_string(),
            Value::Amount(a) => format!("{} {}", a.number, a.currency),
            Value::Position(p) => p.to_string(),
            Value::Inventory(inv) => inv.to_string(),
            Value::StringSet(ss) => ss.join(", "),
            Value::Set(values) => {
                // Format set elements as comma-separated values
                let strs: Vec<String> = values.iter().map(Self::value_to_string).collect();
                format!("({})", strs.join(", "))
            }
            Value::Metadata(meta) => {
                // Format metadata as key=value pairs
                let pairs: Vec<String> = meta.iter().map(|(k, v)| format!("{k}: {v:?}")).collect();
                format!("{{{}}}", pairs.join(", "))
            }
            Value::Interval(i) => format!("{} {:?}", i.count, i.unit),
            Value::Object(obj) => {
                // Format object as {key: value, ...}
                let pairs: Vec<String> = obj
                    .iter()
                    .map(|(k, v)| format!("{k}: {}", Self::value_to_string(v)))
                    .collect();
                format!("{{{}}}", pairs.join(", "))
            }
            Value::Null => "NULL".to_string(),
        }
    }
}