Skip to main content

rustledger_query/executor/
types.rs

1//! Types used by the BQL query executor.
2
3// ratchet: fxhash-only — hot path; use FxHashMap/FxHashSet, not std SipHash collections (#1237).
4use std::collections::BTreeMap;
5use std::hash::{Hash, Hasher};
6
7use rust_decimal::Decimal;
8use rustledger_core::{Amount, Inventory, Metadata, NaiveDate, Position, Transaction};
9
10/// Source location information for a directive.
11#[derive(Debug, Clone)]
12pub struct SourceLocation {
13    /// File path.
14    pub filename: String,
15    /// Line number (1-based).
16    pub lineno: usize,
17}
18
19/// An interval unit for date arithmetic.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
21pub enum IntervalUnit {
22    /// Days.
23    Day,
24    /// Weeks.
25    Week,
26    /// Months.
27    Month,
28    /// Quarters.
29    Quarter,
30    /// Years.
31    Year,
32}
33
34impl IntervalUnit {
35    /// Parse an interval unit from a string.
36    pub fn parse_unit(s: &str) -> Option<Self> {
37        match s.to_uppercase().as_str() {
38            "DAY" | "DAYS" | "D" => Some(Self::Day),
39            "WEEK" | "WEEKS" | "W" => Some(Self::Week),
40            "MONTH" | "MONTHS" | "M" => Some(Self::Month),
41            "QUARTER" | "QUARTERS" | "Q" => Some(Self::Quarter),
42            "YEAR" | "YEARS" | "Y" => Some(Self::Year),
43            _ => None,
44        }
45    }
46}
47
48/// An interval value for date arithmetic.
49#[derive(Debug, Clone, PartialEq, Eq, Hash)]
50pub struct Interval {
51    /// The count (can be negative).
52    pub count: i64,
53    /// The unit.
54    pub unit: IntervalUnit,
55}
56
57impl Interval {
58    /// Create a new interval.
59    pub const fn new(count: i64, unit: IntervalUnit) -> Self {
60        Self { count, unit }
61    }
62
63    /// Convert interval to an approximate number of days for comparison.
64    /// Uses: Day=1, Week=7, Month=30, Quarter=91, Year=365.
65    pub(crate) const fn to_approx_days(&self) -> i64 {
66        let days_per_unit = match self.unit {
67            IntervalUnit::Day => 1,
68            IntervalUnit::Week => 7,
69            IntervalUnit::Month => 30,
70            IntervalUnit::Quarter => 91,
71            IntervalUnit::Year => 365,
72        };
73        self.count.saturating_mul(days_per_unit)
74    }
75
76    /// Add this interval to a date.
77    pub fn add_to_date(&self, date: NaiveDate) -> Option<NaiveDate> {
78        use jiff::ToSpan;
79
80        let span = match self.unit {
81            IntervalUnit::Day => self.count.days(),
82            IntervalUnit::Week => self.count.weeks(),
83            IntervalUnit::Month => self.count.months(),
84            IntervalUnit::Quarter => (self.count * 3).months(),
85            IntervalUnit::Year => self.count.years(),
86        };
87        date.checked_add(span).ok()
88    }
89}
90
91/// A value that can result from evaluating a BQL expression.
92///
93/// Heavy variants (Inventory, Position, Metadata, Object) are boxed to reduce
94/// the size of the enum from 120 bytes to 32 bytes, improving cache efficiency
95/// when processing large result sets.
96#[derive(Debug, Clone, PartialEq, Eq)]
97pub enum Value {
98    /// String value.
99    String(String),
100    /// Numeric value.
101    Number(Decimal),
102    /// Integer value.
103    Integer(i64),
104    /// Date value.
105    Date(NaiveDate),
106    /// Boolean value.
107    Boolean(bool),
108    /// Amount (number + currency).
109    Amount(Amount),
110    /// Position (amount + optional cost). Boxed to reduce enum size.
111    Position(Box<Position>),
112    /// Inventory (aggregated positions). Boxed to reduce enum size.
113    Inventory(Box<Inventory>),
114    /// Set of strings (tags, links).
115    StringSet(Vec<String>),
116    /// Generic set of values for IN operator (supports mixed types).
117    Set(Vec<Self>),
118    /// Metadata dictionary. Boxed to reduce enum size.
119    Metadata(Box<Metadata>),
120    /// Interval for date arithmetic.
121    Interval(Interval),
122    /// Structured object (for entry, meta columns). Boxed to reduce enum size.
123    Object(Box<BTreeMap<String, Self>>),
124    /// NULL value.
125    Null,
126}
127
128impl Value {
129    /// Compute a hash for this value.
130    ///
131    /// Note: This is not the standard Hash trait because some contained types
132    /// (Decimal, Inventory) don't implement Hash. We use byte representations
133    /// for those types.
134    pub(crate) fn hash_value<H: Hasher>(&self, state: &mut H) {
135        std::mem::discriminant(self).hash(state);
136        match self {
137            Self::String(s) => s.hash(state),
138            Self::Number(d) => d.serialize().hash(state),
139            Self::Integer(i) => i.hash(state),
140            Self::Date(d) => {
141                d.year().hash(state);
142                d.month().hash(state);
143                d.day().hash(state);
144            }
145            Self::Boolean(b) => b.hash(state),
146            Self::Amount(a) => {
147                a.number.serialize().hash(state);
148                a.currency.as_str().hash(state);
149            }
150            Self::Position(p) => {
151                // Dereference boxed position
152                p.units.number.serialize().hash(state);
153                p.units.currency.as_str().hash(state);
154                if let Some(cost) = &p.cost {
155                    cost.number.serialize().hash(state);
156                    cost.currency.as_str().hash(state);
157                }
158            }
159            Self::Inventory(inv) => {
160                // Dereference boxed inventory
161                for pos in inv.positions() {
162                    pos.units.number.serialize().hash(state);
163                    pos.units.currency.as_str().hash(state);
164                    if let Some(cost) = &pos.cost {
165                        cost.number.serialize().hash(state);
166                        cost.currency.as_str().hash(state);
167                    }
168                }
169            }
170            Self::StringSet(ss) => {
171                // Hash StringSet in a canonical, order-independent way by sorting first.
172                let mut sorted = ss.clone();
173                sorted.sort();
174                for s in &sorted {
175                    s.hash(state);
176                }
177            }
178            Self::Set(values) => {
179                // Hash each value in order (sets from literals maintain order)
180                for v in values {
181                    v.hash_value(state);
182                }
183            }
184            Self::Metadata(meta) => {
185                // Hash metadata in canonical order by sorting keys (boxed)
186                let mut keys: Vec<_> = meta.keys().collect();
187                keys.sort();
188                for key in keys {
189                    key.hash(state);
190                    // Hash the debug representation of the value
191                    format!("{:?}", meta.get(key)).hash(state);
192                }
193            }
194            Self::Interval(interval) => {
195                interval.count.hash(state);
196                interval.unit.hash(state);
197            }
198            Self::Object(obj) => {
199                // BTreeMap is already sorted by key, so iteration order is deterministic (boxed)
200                for (k, v) in obj.as_ref() {
201                    k.hash(state);
202                    v.hash_value(state);
203                }
204            }
205            Self::Null => {}
206        }
207    }
208}
209
210/// A row of query results.
211pub type Row = Vec<Value>;
212
213/// Compute a hash for a row (for DISTINCT deduplication).
214///
215/// Uses `FxHasher` (the same non-cryptographic hash backing every
216/// `FxHashMap` in the workspace). DISTINCT / GROUP BY keys are internal
217/// dedup tokens — they need speed, not DoS-resistance.
218pub fn hash_row(row: &Row) -> u64 {
219    let mut hasher = rustc_hash::FxHasher::default();
220    for value in row {
221        value.hash_value(&mut hasher);
222    }
223    hasher.finish()
224}
225
226/// Compute a hash for a single value (for PIVOT lookups).
227pub fn hash_single_value(value: &Value) -> u64 {
228    let mut hasher = rustc_hash::FxHasher::default();
229    value.hash_value(&mut hasher);
230    hasher.finish()
231}
232
233/// Query result containing column names and rows.
234///
235/// **Invariant**: `rows.len() == row_group_keys.len()`. Always. Mutating
236/// either field directly will violate this; use the helper methods
237/// (`add_row`, `add_aggregate_row`, `truncate`, `sort_by`, etc.) that
238/// keep both vectors in lockstep. The invariant is enforced at runtime
239/// with `assert_eq!` inside `sort_by`.
240#[derive(Debug, Clone)]
241pub struct QueryResult {
242    /// Column names.
243    pub columns: Vec<String>,
244    /// Result rows.
245    pub rows: Vec<Row>,
246    /// Per-row GROUP BY key values, parallel to `rows`. `None` for rows
247    /// produced outside aggregation. Populated by the aggregate execution
248    /// path; used by the text renderer to recover the per-row currency
249    /// context for `Value::Number` cells emitted by `SUM` / `AVG` (issue
250    /// #988 — display-precision fix that stays lossless for JSON/CSV).
251    ///
252    /// `pub(crate)` so external consumers can't accidentally violate the
253    /// parallel-vector invariant; reach in directly only inside this crate
254    /// and only with extreme care. External access goes through
255    /// [`Self::group_key`].
256    pub(crate) row_group_keys: Vec<Option<Vec<Value>>>,
257}
258
259impl QueryResult {
260    /// Create a new empty result.
261    pub const fn new(columns: Vec<String>) -> Self {
262        Self {
263            columns,
264            rows: Vec::new(),
265            row_group_keys: Vec::new(),
266        }
267    }
268
269    /// Add a row to the result with no GROUP BY context (non-aggregate path).
270    /// The sidecar (`row_group_keys`) records `None` for this row, so the
271    /// text renderer applies no per-currency quantization (issue #988).
272    /// Aggregate paths must use [`Self::add_aggregate_row`] instead.
273    pub fn add_row(&mut self, row: Row) {
274        self.rows.push(row);
275        self.row_group_keys.push(None);
276    }
277
278    /// Add a row produced by aggregation, recording the GROUP BY key values
279    /// alongside it. The renderer consults the key to quantize numeric
280    /// aggregates against the per-currency display precision (issue #988).
281    ///
282    /// Multi-column GROUP BY note: when several columns are grouped (e.g.
283    /// `GROUP BY account, currency`), the entire key is preserved here.
284    /// The renderer's currency-hint extraction (`currency_hint_for_row`
285    /// in `rustledger/src/cmd/query/output.rs`) takes the *first*
286    /// currency-shaped string in iteration order — so put the currency
287    /// column first if both are currency-shaped, which is rare in
288    /// practice but possible.
289    pub fn add_aggregate_row(&mut self, row: Row, group_key: Vec<Value>) {
290        self.rows.push(row);
291        self.row_group_keys.push(if group_key.is_empty() {
292            None
293        } else {
294            Some(group_key)
295        });
296    }
297
298    /// Get the GROUP BY key for a given row, if it was produced by
299    /// aggregation. Returns `None` for non-aggregate rows or when the
300    /// row index is out of range. This is the public read-side of the
301    /// `row_group_keys` sidecar — prefer it over reaching into the
302    /// field directly.
303    ///
304    /// Returns `&[Value]` rather than `&Vec<Value>` so callers aren't
305    /// tied to the specific container type.
306    #[must_use]
307    pub fn group_key(&self, row_idx: usize) -> Option<&[Value]> {
308        self.row_group_keys.get(row_idx).and_then(|k| k.as_deref())
309    }
310
311    /// Whether any row in the result was produced by aggregation. Lets
312    /// downstream renderers short-circuit per-row hint lookups when
313    /// the cache would be all `None` anyway (issue #988 follow-up).
314    #[must_use]
315    pub fn has_aggregate_rows(&self) -> bool {
316        self.row_group_keys.iter().any(Option::is_some)
317    }
318
319    /// Truncate to the first `len` rows, keeping `row_group_keys` in
320    /// lockstep so the parallel-vector invariant survives LIMIT.
321    pub fn truncate(&mut self, len: usize) {
322        self.rows.truncate(len);
323        self.row_group_keys.truncate(len);
324    }
325
326    /// Sort rows by a comparator, keeping `row_group_keys` in lockstep.
327    /// Pair-sort prevents the sidecar from desynchronizing after ORDER BY
328    /// (otherwise text rendering would apply the wrong currency hint to
329    /// a row).
330    pub fn sort_by<F>(&mut self, mut compare: F)
331    where
332        F: FnMut(&Row, &Row) -> std::cmp::Ordering,
333    {
334        // Hard assert (not debug_assert!): the invariant is load-bearing
335        // for correctness; a release-mode mismatch would silently apply
336        // the wrong currency hint to rows after sort.
337        assert_eq!(
338            self.rows.len(),
339            self.row_group_keys.len(),
340            "QueryResult invariant violated: rows.len() must equal row_group_keys.len()"
341        );
342        let n = self.rows.len();
343        let mut paired: Vec<(Row, Option<Vec<Value>>)> = std::mem::take(&mut self.rows)
344            .into_iter()
345            .zip(std::mem::take(&mut self.row_group_keys))
346            .collect();
347        paired.sort_by(|(a, _), (b, _)| compare(a, b));
348        // Pre-allocate the now-empty Vecs back to known capacity to skip
349        // the incremental-grow allocations during push-back.
350        self.rows.reserve_exact(n);
351        self.row_group_keys.reserve_exact(n);
352        for (row, key) in paired {
353            self.rows.push(row);
354            self.row_group_keys.push(key);
355        }
356    }
357
358    /// Number of rows.
359    pub const fn len(&self) -> usize {
360        self.rows.len()
361    }
362
363    /// Whether the result is empty.
364    pub const fn is_empty(&self) -> bool {
365        self.rows.is_empty()
366    }
367}
368
369/// Context for a single posting being evaluated.
370#[derive(Debug)]
371pub struct PostingContext<'a> {
372    /// The transaction this posting belongs to.
373    pub transaction: &'a Transaction,
374    /// The posting index within the transaction.
375    pub posting_index: usize,
376    /// Cumulative running balance across all WHERE-filtered postings up to and
377    /// including this one, in iteration order. This is what bean-query exposes
378    /// as the `balance` column — a single Inventory that grows as the result
379    /// set is built, regardless of which account each posting belongs to.
380    pub balance: Option<Inventory>,
381    /// Per-account running balance for this posting's account. Exposed as the
382    /// `account_balance` column. Updated for every posting, independent of the
383    /// WHERE filter, so it always reflects the true ledger balance for the
384    /// account at this point in time.
385    pub account_balance: Option<Inventory>,
386    /// The directive index (for source location lookup).
387    pub directive_index: Option<usize>,
388}
389
390/// Context for window function evaluation.
391#[derive(Debug, Clone)]
392pub struct WindowContext {
393    /// Row number within the partition (1-based).
394    pub row_number: usize,
395    /// Rank within the partition (1-based, ties get same rank).
396    pub rank: usize,
397    /// Dense rank within the partition (1-based, no gaps after ties).
398    pub dense_rank: usize,
399}
400
401/// Account information cached from Open/Close directives.
402#[derive(Debug, Clone)]
403pub struct AccountInfo {
404    /// Date the account was opened.
405    pub open_date: Option<NaiveDate>,
406    /// Date the account was closed (if any).
407    pub close_date: Option<NaiveDate>,
408    /// Metadata from the Open directive.
409    pub open_meta: Metadata,
410}
411
412/// An in-memory table created by CREATE TABLE.
413#[derive(Debug, Clone)]
414pub struct Table {
415    /// Column names.
416    pub columns: Vec<String>,
417    /// Rows of data.
418    pub rows: Vec<Vec<Value>>,
419}
420
421impl Table {
422    /// Create a new empty table with the given column names.
423    #[allow(clippy::missing_const_for_fn)] // Vec::new() isn't const with owned columns
424    pub fn new(columns: Vec<String>) -> Self {
425        Self {
426            columns,
427            rows: Vec::new(),
428        }
429    }
430
431    /// Add a row to the table.
432    pub fn add_row(&mut self, row: Vec<Value>) {
433        self.rows.push(row);
434    }
435}
436
437#[cfg(test)]
438mod tests {
439    use super::*;
440
441    /// Verify Value enum size is reasonable after boxing heavy variants.
442    /// Previously 120 bytes, now 40 bytes (67% reduction).
443    #[test]
444    fn test_value_size() {
445        use std::mem::size_of;
446        // Value should be ~40 bytes with boxed variants (vs 120 unboxed)
447        assert!(
448            size_of::<Value>() <= 48,
449            "Value enum too large: {} bytes",
450            size_of::<Value>()
451        );
452    }
453
454    // ─── QueryResult parallel-vector invariant (issue #988) ───────────
455    //
456    // The `row_group_keys` sidecar must stay aligned with `rows` across
457    // every mutation. These tests pin the contract for the helpers that
458    // mutate both vectors. A failure here means future renderer logic
459    // would apply the wrong currency hint to a row.
460
461    fn make_keyed_result() -> QueryResult {
462        let mut r = QueryResult::new(vec!["currency".into(), "sum".into()]);
463        r.add_aggregate_row(
464            vec![Value::String("USD".into()), Value::Integer(100)],
465            vec![Value::String("USD".into())],
466        );
467        r.add_aggregate_row(
468            vec![Value::String("EUR".into()), Value::Integer(50)],
469            vec![Value::String("EUR".into())],
470        );
471        r.add_aggregate_row(
472            vec![Value::String("GBP".into()), Value::Integer(75)],
473            vec![Value::String("GBP".into())],
474        );
475        r
476    }
477
478    /// `sort_by` reorders rows AND `row_group_keys` together.
479    #[test]
480    fn test_sort_by_keeps_row_group_keys_in_lockstep() {
481        let mut r = make_keyed_result();
482        // Sort by the integer column ascending: 50 (EUR), 75 (GBP), 100 (USD).
483        r.sort_by(|a, b| match (&a[1], &b[1]) {
484            (Value::Integer(x), Value::Integer(y)) => x.cmp(y),
485            _ => std::cmp::Ordering::Equal,
486        });
487
488        // After sort, row[0] is EUR, row[1] is GBP, row[2] is USD.
489        // The sidecar MUST have followed.
490        assert_eq!(r.group_key(0), Some(&[Value::String("EUR".into())][..]));
491        assert_eq!(r.group_key(1), Some(&[Value::String("GBP".into())][..]));
492        assert_eq!(r.group_key(2), Some(&[Value::String("USD".into())][..]));
493    }
494
495    /// `truncate` drops the same suffix from rows AND `row_group_keys`.
496    #[test]
497    fn test_truncate_keeps_row_group_keys_in_lockstep() {
498        let mut r = make_keyed_result();
499        r.truncate(2);
500
501        assert_eq!(r.rows.len(), 2);
502        assert_eq!(r.row_group_keys.len(), 2);
503        // Surviving keys are the first two: USD, EUR.
504        assert_eq!(r.group_key(0), Some(&[Value::String("USD".into())][..]));
505        assert_eq!(r.group_key(1), Some(&[Value::String("EUR".into())][..]));
506        // Out-of-range index returns None gracefully.
507        assert_eq!(r.group_key(2), None);
508    }
509
510    /// Mixed aggregate / non-aggregate rows: `add_row` writes `None` to
511    /// the sidecar so the invariant is preserved when the two paths
512    /// interleave (e.g. a synthetic explanatory row appended after an
513    /// aggregate).
514    #[test]
515    fn test_add_row_and_add_aggregate_row_mixed() {
516        let mut r = QueryResult::new(vec!["x".into()]);
517        r.add_aggregate_row(vec![Value::Integer(1)], vec![Value::String("USD".into())]);
518        r.add_row(vec![Value::Integer(2)]);
519        r.add_aggregate_row(vec![Value::Integer(3)], vec![Value::String("EUR".into())]);
520
521        assert_eq!(r.rows.len(), 3);
522        assert_eq!(r.row_group_keys.len(), 3);
523        assert_eq!(r.group_key(0), Some(&[Value::String("USD".into())][..]));
524        assert_eq!(r.group_key(1), None);
525        assert_eq!(r.group_key(2), Some(&[Value::String("EUR".into())][..]));
526    }
527
528    /// Empty `group_key` arg means "no GROUP BY context" — sidecar
529    /// records `None` so callers don't see a misleading `Some(vec![])`.
530    #[test]
531    fn test_add_aggregate_row_empty_key_records_none() {
532        let mut r = QueryResult::new(vec!["count".into()]);
533        // Pure aggregate (e.g. SELECT COUNT(*)) has no GROUP BY at all.
534        r.add_aggregate_row(vec![Value::Integer(42)], vec![]);
535
536        assert_eq!(r.group_key(0), None);
537    }
538
539    /// `sort_by`'s lockstep invariant is enforced by an unconditional
540    /// `assert_eq!`. This test deliberately corrupts the sidecar (by
541    /// pushing to `rows` without a matching push to `row_group_keys`)
542    /// then calls `sort_by`, expecting a panic. Pins the safety net
543    /// against accidental removal of the assert.
544    #[test]
545    #[should_panic(expected = "QueryResult invariant violated")]
546    fn test_sort_by_panics_on_lockstep_violation() {
547        let mut r = QueryResult::new(vec!["x".into()]);
548        // Reach in directly to corrupt the sidecar — the only way to
549        // hit the assert without going through the helpers (which are
550        // designed to make it impossible). Available because tests live
551        // inside `rustledger-query` and `row_group_keys` is `pub(crate)`.
552        r.rows.push(vec![Value::Integer(1)]);
553        // Deliberately skip pushing to `row_group_keys`.
554        r.sort_by(|_, _| std::cmp::Ordering::Equal);
555    }
556
557    /// Direct test for `add_row`: the non-aggregate path records `None`
558    /// in the sidecar, keeping the parallel-vector invariant. Covered
559    /// indirectly by `test_add_row_and_add_aggregate_row_mixed` but
560    /// pinned standalone here so the contract is unambiguous.
561    #[test]
562    fn test_add_row_records_none_in_sidecar() {
563        let mut r = QueryResult::new(vec!["x".into()]);
564        r.add_row(vec![Value::Integer(1)]);
565
566        assert_eq!(r.rows.len(), 1);
567        assert_eq!(r.row_group_keys.len(), 1);
568        assert_eq!(r.group_key(0), None);
569    }
570}