rustledger_query/executor/
types.rs

1//! Types used by the BQL query executor.
2
3use std::collections::BTreeMap;
4use std::hash::{Hash, Hasher};
5
6use rust_decimal::Decimal;
7use rustledger_core::{Amount, Inventory, Metadata, NaiveDate, Position, Transaction};
8
9/// Source location information for a directive.
10#[derive(Debug, Clone)]
11pub struct SourceLocation {
12    /// File path.
13    pub filename: String,
14    /// Line number (1-based).
15    pub lineno: usize,
16}
17
18/// An interval unit for date arithmetic.
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
20pub enum IntervalUnit {
21    /// Days.
22    Day,
23    /// Weeks.
24    Week,
25    /// Months.
26    Month,
27    /// Quarters.
28    Quarter,
29    /// Years.
30    Year,
31}
32
33impl IntervalUnit {
34    /// Parse an interval unit from a string.
35    pub fn parse_unit(s: &str) -> Option<Self> {
36        match s.to_uppercase().as_str() {
37            "DAY" | "DAYS" | "D" => Some(Self::Day),
38            "WEEK" | "WEEKS" | "W" => Some(Self::Week),
39            "MONTH" | "MONTHS" | "M" => Some(Self::Month),
40            "QUARTER" | "QUARTERS" | "Q" => Some(Self::Quarter),
41            "YEAR" | "YEARS" | "Y" => Some(Self::Year),
42            _ => None,
43        }
44    }
45}
46
47/// An interval value for date arithmetic.
48#[derive(Debug, Clone, PartialEq, Eq, Hash)]
49pub struct Interval {
50    /// The count (can be negative).
51    pub count: i64,
52    /// The unit.
53    pub unit: IntervalUnit,
54}
55
56impl Interval {
57    /// Create a new interval.
58    pub const fn new(count: i64, unit: IntervalUnit) -> Self {
59        Self { count, unit }
60    }
61
62    /// Convert interval to an approximate number of days for comparison.
63    /// Uses: Day=1, Week=7, Month=30, Quarter=91, Year=365.
64    pub(crate) const fn to_approx_days(&self) -> i64 {
65        let days_per_unit = match self.unit {
66            IntervalUnit::Day => 1,
67            IntervalUnit::Week => 7,
68            IntervalUnit::Month => 30,
69            IntervalUnit::Quarter => 91,
70            IntervalUnit::Year => 365,
71        };
72        self.count.saturating_mul(days_per_unit)
73    }
74
75    /// Add this interval to a date.
76    pub fn add_to_date(&self, date: NaiveDate) -> Option<NaiveDate> {
77        use jiff::ToSpan;
78
79        let span = match self.unit {
80            IntervalUnit::Day => self.count.days(),
81            IntervalUnit::Week => self.count.weeks(),
82            IntervalUnit::Month => self.count.months(),
83            IntervalUnit::Quarter => (self.count * 3).months(),
84            IntervalUnit::Year => self.count.years(),
85        };
86        date.checked_add(span).ok()
87    }
88}
89
90/// A value that can result from evaluating a BQL expression.
91///
92/// Heavy variants (Inventory, Position, Metadata, Object) are boxed to reduce
93/// the size of the enum from 120 bytes to 32 bytes, improving cache efficiency
94/// when processing large result sets.
95#[derive(Debug, Clone, PartialEq, Eq)]
96pub enum Value {
97    /// String value.
98    String(String),
99    /// Numeric value.
100    Number(Decimal),
101    /// Integer value.
102    Integer(i64),
103    /// Date value.
104    Date(NaiveDate),
105    /// Boolean value.
106    Boolean(bool),
107    /// Amount (number + currency).
108    Amount(Amount),
109    /// Position (amount + optional cost). Boxed to reduce enum size.
110    Position(Box<Position>),
111    /// Inventory (aggregated positions). Boxed to reduce enum size.
112    Inventory(Box<Inventory>),
113    /// Set of strings (tags, links).
114    StringSet(Vec<String>),
115    /// Generic set of values for IN operator (supports mixed types).
116    Set(Vec<Self>),
117    /// Metadata dictionary. Boxed to reduce enum size.
118    Metadata(Box<Metadata>),
119    /// Interval for date arithmetic.
120    Interval(Interval),
121    /// Structured object (for entry, meta columns). Boxed to reduce enum size.
122    Object(Box<BTreeMap<String, Self>>),
123    /// NULL value.
124    Null,
125}
126
127impl Value {
128    /// Compute a hash for this value.
129    ///
130    /// Note: This is not the standard Hash trait because some contained types
131    /// (Decimal, Inventory) don't implement Hash. We use byte representations
132    /// for those types.
133    pub(crate) fn hash_value<H: Hasher>(&self, state: &mut H) {
134        std::mem::discriminant(self).hash(state);
135        match self {
136            Self::String(s) => s.hash(state),
137            Self::Number(d) => d.serialize().hash(state),
138            Self::Integer(i) => i.hash(state),
139            Self::Date(d) => {
140                d.year().hash(state);
141                d.month().hash(state);
142                d.day().hash(state);
143            }
144            Self::Boolean(b) => b.hash(state),
145            Self::Amount(a) => {
146                a.number.serialize().hash(state);
147                a.currency.as_str().hash(state);
148            }
149            Self::Position(p) => {
150                // Dereference boxed position
151                p.units.number.serialize().hash(state);
152                p.units.currency.as_str().hash(state);
153                if let Some(cost) = &p.cost {
154                    cost.number.serialize().hash(state);
155                    cost.currency.as_str().hash(state);
156                }
157            }
158            Self::Inventory(inv) => {
159                // Dereference boxed inventory
160                for pos in inv.positions() {
161                    pos.units.number.serialize().hash(state);
162                    pos.units.currency.as_str().hash(state);
163                    if let Some(cost) = &pos.cost {
164                        cost.number.serialize().hash(state);
165                        cost.currency.as_str().hash(state);
166                    }
167                }
168            }
169            Self::StringSet(ss) => {
170                // Hash StringSet in a canonical, order-independent way by sorting first.
171                let mut sorted = ss.clone();
172                sorted.sort();
173                for s in &sorted {
174                    s.hash(state);
175                }
176            }
177            Self::Set(values) => {
178                // Hash each value in order (sets from literals maintain order)
179                for v in values {
180                    v.hash_value(state);
181                }
182            }
183            Self::Metadata(meta) => {
184                // Hash metadata in canonical order by sorting keys (boxed)
185                let mut keys: Vec<_> = meta.keys().collect();
186                keys.sort();
187                for key in keys {
188                    key.hash(state);
189                    // Hash the debug representation of the value
190                    format!("{:?}", meta.get(key)).hash(state);
191                }
192            }
193            Self::Interval(interval) => {
194                interval.count.hash(state);
195                interval.unit.hash(state);
196            }
197            Self::Object(obj) => {
198                // BTreeMap is already sorted by key, so iteration order is deterministic (boxed)
199                for (k, v) in obj.as_ref() {
200                    k.hash(state);
201                    v.hash_value(state);
202                }
203            }
204            Self::Null => {}
205        }
206    }
207}
208
209/// A row of query results.
210pub type Row = Vec<Value>;
211
212/// Compute a hash for a row (for DISTINCT deduplication).
213///
214/// Uses `FxHasher` (the same non-cryptographic hash backing every
215/// `FxHashMap` in the workspace). DISTINCT / GROUP BY keys are internal
216/// dedup tokens — they need speed, not DoS-resistance.
217pub fn hash_row(row: &Row) -> u64 {
218    let mut hasher = rustc_hash::FxHasher::default();
219    for value in row {
220        value.hash_value(&mut hasher);
221    }
222    hasher.finish()
223}
224
225/// Compute a hash for a single value (for PIVOT lookups).
226pub fn hash_single_value(value: &Value) -> u64 {
227    let mut hasher = rustc_hash::FxHasher::default();
228    value.hash_value(&mut hasher);
229    hasher.finish()
230}
231
232/// Query result containing column names and rows.
233///
234/// **Invariant**: `rows.len() == row_group_keys.len()`. Always. Mutating
235/// either field directly will violate this; use the helper methods
236/// (`add_row`, `add_aggregate_row`, `truncate`, `sort_by`, etc.) that
237/// keep both vectors in lockstep. The invariant is enforced at runtime
238/// with `assert_eq!` inside `sort_by`.
239#[derive(Debug, Clone)]
240pub struct QueryResult {
241    /// Column names.
242    pub columns: Vec<String>,
243    /// Result rows.
244    pub rows: Vec<Row>,
245    /// Per-row GROUP BY key values, parallel to `rows`. `None` for rows
246    /// produced outside aggregation. Populated by the aggregate execution
247    /// path; used by the text renderer to recover the per-row currency
248    /// context for `Value::Number` cells emitted by `SUM` / `AVG` (issue
249    /// #988 — display-precision fix that stays lossless for JSON/CSV).
250    ///
251    /// `pub(crate)` so external consumers can't accidentally violate the
252    /// parallel-vector invariant; reach in directly only inside this crate
253    /// and only with extreme care. External access goes through
254    /// [`Self::group_key`].
255    pub(crate) row_group_keys: Vec<Option<Vec<Value>>>,
256}
257
258impl QueryResult {
259    /// Create a new empty result.
260    pub const fn new(columns: Vec<String>) -> Self {
261        Self {
262            columns,
263            rows: Vec::new(),
264            row_group_keys: Vec::new(),
265        }
266    }
267
268    /// Add a row to the result with no GROUP BY context (non-aggregate path).
269    /// The sidecar (`row_group_keys`) records `None` for this row, so the
270    /// text renderer applies no per-currency quantization (issue #988).
271    /// Aggregate paths must use [`Self::add_aggregate_row`] instead.
272    pub fn add_row(&mut self, row: Row) {
273        self.rows.push(row);
274        self.row_group_keys.push(None);
275    }
276
277    /// Add a row produced by aggregation, recording the GROUP BY key values
278    /// alongside it. The renderer consults the key to quantize numeric
279    /// aggregates against the per-currency display precision (issue #988).
280    ///
281    /// Multi-column GROUP BY note: when several columns are grouped (e.g.
282    /// `GROUP BY account, currency`), the entire key is preserved here.
283    /// The renderer's currency-hint extraction (`currency_hint_for_row`
284    /// in `rustledger/src/cmd/query/output.rs`) takes the *first*
285    /// currency-shaped string in iteration order — so put the currency
286    /// column first if both are currency-shaped, which is rare in
287    /// practice but possible.
288    pub fn add_aggregate_row(&mut self, row: Row, group_key: Vec<Value>) {
289        self.rows.push(row);
290        self.row_group_keys.push(if group_key.is_empty() {
291            None
292        } else {
293            Some(group_key)
294        });
295    }
296
297    /// Get the GROUP BY key for a given row, if it was produced by
298    /// aggregation. Returns `None` for non-aggregate rows or when the
299    /// row index is out of range. This is the public read-side of the
300    /// `row_group_keys` sidecar — prefer it over reaching into the
301    /// field directly.
302    ///
303    /// Returns `&[Value]` rather than `&Vec<Value>` so callers aren't
304    /// tied to the specific container type.
305    #[must_use]
306    pub fn group_key(&self, row_idx: usize) -> Option<&[Value]> {
307        self.row_group_keys.get(row_idx).and_then(|k| k.as_deref())
308    }
309
310    /// Whether any row in the result was produced by aggregation. Lets
311    /// downstream renderers short-circuit per-row hint lookups when
312    /// the cache would be all `None` anyway (issue #988 follow-up).
313    #[must_use]
314    pub fn has_aggregate_rows(&self) -> bool {
315        self.row_group_keys.iter().any(Option::is_some)
316    }
317
318    /// Truncate to the first `len` rows, keeping `row_group_keys` in
319    /// lockstep so the parallel-vector invariant survives LIMIT.
320    pub fn truncate(&mut self, len: usize) {
321        self.rows.truncate(len);
322        self.row_group_keys.truncate(len);
323    }
324
325    /// Sort rows by a comparator, keeping `row_group_keys` in lockstep.
326    /// Pair-sort prevents the sidecar from desynchronizing after ORDER BY
327    /// (otherwise text rendering would apply the wrong currency hint to
328    /// a row).
329    pub fn sort_by<F>(&mut self, mut compare: F)
330    where
331        F: FnMut(&Row, &Row) -> std::cmp::Ordering,
332    {
333        // Hard assert (not debug_assert!): the invariant is load-bearing
334        // for correctness; a release-mode mismatch would silently apply
335        // the wrong currency hint to rows after sort.
336        assert_eq!(
337            self.rows.len(),
338            self.row_group_keys.len(),
339            "QueryResult invariant violated: rows.len() must equal row_group_keys.len()"
340        );
341        let n = self.rows.len();
342        let mut paired: Vec<(Row, Option<Vec<Value>>)> = std::mem::take(&mut self.rows)
343            .into_iter()
344            .zip(std::mem::take(&mut self.row_group_keys))
345            .collect();
346        paired.sort_by(|(a, _), (b, _)| compare(a, b));
347        // Pre-allocate the now-empty Vecs back to known capacity to skip
348        // the incremental-grow allocations during push-back.
349        self.rows.reserve_exact(n);
350        self.row_group_keys.reserve_exact(n);
351        for (row, key) in paired {
352            self.rows.push(row);
353            self.row_group_keys.push(key);
354        }
355    }
356
357    /// Number of rows.
358    pub const fn len(&self) -> usize {
359        self.rows.len()
360    }
361
362    /// Whether the result is empty.
363    pub const fn is_empty(&self) -> bool {
364        self.rows.is_empty()
365    }
366}
367
368/// Context for a single posting being evaluated.
369#[derive(Debug)]
370pub struct PostingContext<'a> {
371    /// The transaction this posting belongs to.
372    pub transaction: &'a Transaction,
373    /// The posting index within the transaction.
374    pub posting_index: usize,
375    /// Cumulative running balance across all WHERE-filtered postings up to and
376    /// including this one, in iteration order. This is what bean-query exposes
377    /// as the `balance` column — a single Inventory that grows as the result
378    /// set is built, regardless of which account each posting belongs to.
379    pub balance: Option<Inventory>,
380    /// Per-account running balance for this posting's account. Exposed as the
381    /// `account_balance` column. Updated for every posting, independent of the
382    /// WHERE filter, so it always reflects the true ledger balance for the
383    /// account at this point in time.
384    pub account_balance: Option<Inventory>,
385    /// The directive index (for source location lookup).
386    pub directive_index: Option<usize>,
387}
388
389/// Context for window function evaluation.
390#[derive(Debug, Clone)]
391pub struct WindowContext {
392    /// Row number within the partition (1-based).
393    pub row_number: usize,
394    /// Rank within the partition (1-based, ties get same rank).
395    pub rank: usize,
396    /// Dense rank within the partition (1-based, no gaps after ties).
397    pub dense_rank: usize,
398}
399
400/// Account information cached from Open/Close directives.
401#[derive(Debug, Clone)]
402pub struct AccountInfo {
403    /// Date the account was opened.
404    pub open_date: Option<NaiveDate>,
405    /// Date the account was closed (if any).
406    pub close_date: Option<NaiveDate>,
407    /// Metadata from the Open directive.
408    pub open_meta: Metadata,
409}
410
411/// An in-memory table created by CREATE TABLE.
412#[derive(Debug, Clone)]
413pub struct Table {
414    /// Column names.
415    pub columns: Vec<String>,
416    /// Rows of data.
417    pub rows: Vec<Vec<Value>>,
418}
419
420impl Table {
421    /// Create a new empty table with the given column names.
422    #[allow(clippy::missing_const_for_fn)] // Vec::new() isn't const with owned columns
423    pub fn new(columns: Vec<String>) -> Self {
424        Self {
425            columns,
426            rows: Vec::new(),
427        }
428    }
429
430    /// Add a row to the table.
431    pub fn add_row(&mut self, row: Vec<Value>) {
432        self.rows.push(row);
433    }
434}
435
436#[cfg(test)]
437mod tests {
438    use super::*;
439
440    /// Verify Value enum size is reasonable after boxing heavy variants.
441    /// Previously 120 bytes, now 40 bytes (67% reduction).
442    #[test]
443    fn test_value_size() {
444        use std::mem::size_of;
445        // Value should be ~40 bytes with boxed variants (vs 120 unboxed)
446        assert!(
447            size_of::<Value>() <= 48,
448            "Value enum too large: {} bytes",
449            size_of::<Value>()
450        );
451    }
452
453    // ─── QueryResult parallel-vector invariant (issue #988) ───────────
454    //
455    // The `row_group_keys` sidecar must stay aligned with `rows` across
456    // every mutation. These tests pin the contract for the helpers that
457    // mutate both vectors. A failure here means future renderer logic
458    // would apply the wrong currency hint to a row.
459
460    fn make_keyed_result() -> QueryResult {
461        let mut r = QueryResult::new(vec!["currency".into(), "sum".into()]);
462        r.add_aggregate_row(
463            vec![Value::String("USD".into()), Value::Integer(100)],
464            vec![Value::String("USD".into())],
465        );
466        r.add_aggregate_row(
467            vec![Value::String("EUR".into()), Value::Integer(50)],
468            vec![Value::String("EUR".into())],
469        );
470        r.add_aggregate_row(
471            vec![Value::String("GBP".into()), Value::Integer(75)],
472            vec![Value::String("GBP".into())],
473        );
474        r
475    }
476
477    /// `sort_by` reorders rows AND `row_group_keys` together.
478    #[test]
479    fn test_sort_by_keeps_row_group_keys_in_lockstep() {
480        let mut r = make_keyed_result();
481        // Sort by the integer column ascending: 50 (EUR), 75 (GBP), 100 (USD).
482        r.sort_by(|a, b| match (&a[1], &b[1]) {
483            (Value::Integer(x), Value::Integer(y)) => x.cmp(y),
484            _ => std::cmp::Ordering::Equal,
485        });
486
487        // After sort, row[0] is EUR, row[1] is GBP, row[2] is USD.
488        // The sidecar MUST have followed.
489        assert_eq!(r.group_key(0), Some(&[Value::String("EUR".into())][..]));
490        assert_eq!(r.group_key(1), Some(&[Value::String("GBP".into())][..]));
491        assert_eq!(r.group_key(2), Some(&[Value::String("USD".into())][..]));
492    }
493
494    /// `truncate` drops the same suffix from rows AND `row_group_keys`.
495    #[test]
496    fn test_truncate_keeps_row_group_keys_in_lockstep() {
497        let mut r = make_keyed_result();
498        r.truncate(2);
499
500        assert_eq!(r.rows.len(), 2);
501        assert_eq!(r.row_group_keys.len(), 2);
502        // Surviving keys are the first two: USD, EUR.
503        assert_eq!(r.group_key(0), Some(&[Value::String("USD".into())][..]));
504        assert_eq!(r.group_key(1), Some(&[Value::String("EUR".into())][..]));
505        // Out-of-range index returns None gracefully.
506        assert_eq!(r.group_key(2), None);
507    }
508
509    /// Mixed aggregate / non-aggregate rows: `add_row` writes `None` to
510    /// the sidecar so the invariant is preserved when the two paths
511    /// interleave (e.g. a synthetic explanatory row appended after an
512    /// aggregate).
513    #[test]
514    fn test_add_row_and_add_aggregate_row_mixed() {
515        let mut r = QueryResult::new(vec!["x".into()]);
516        r.add_aggregate_row(vec![Value::Integer(1)], vec![Value::String("USD".into())]);
517        r.add_row(vec![Value::Integer(2)]);
518        r.add_aggregate_row(vec![Value::Integer(3)], vec![Value::String("EUR".into())]);
519
520        assert_eq!(r.rows.len(), 3);
521        assert_eq!(r.row_group_keys.len(), 3);
522        assert_eq!(r.group_key(0), Some(&[Value::String("USD".into())][..]));
523        assert_eq!(r.group_key(1), None);
524        assert_eq!(r.group_key(2), Some(&[Value::String("EUR".into())][..]));
525    }
526
527    /// Empty `group_key` arg means "no GROUP BY context" — sidecar
528    /// records `None` so callers don't see a misleading `Some(vec![])`.
529    #[test]
530    fn test_add_aggregate_row_empty_key_records_none() {
531        let mut r = QueryResult::new(vec!["count".into()]);
532        // Pure aggregate (e.g. SELECT COUNT(*)) has no GROUP BY at all.
533        r.add_aggregate_row(vec![Value::Integer(42)], vec![]);
534
535        assert_eq!(r.group_key(0), None);
536    }
537
538    /// `sort_by`'s lockstep invariant is enforced by an unconditional
539    /// `assert_eq!`. This test deliberately corrupts the sidecar (by
540    /// pushing to `rows` without a matching push to `row_group_keys`)
541    /// then calls `sort_by`, expecting a panic. Pins the safety net
542    /// against accidental removal of the assert.
543    #[test]
544    #[should_panic(expected = "QueryResult invariant violated")]
545    fn test_sort_by_panics_on_lockstep_violation() {
546        let mut r = QueryResult::new(vec!["x".into()]);
547        // Reach in directly to corrupt the sidecar — the only way to
548        // hit the assert without going through the helpers (which are
549        // designed to make it impossible). Available because tests live
550        // inside `rustledger-query` and `row_group_keys` is `pub(crate)`.
551        r.rows.push(vec![Value::Integer(1)]);
552        // Deliberately skip pushing to `row_group_keys`.
553        r.sort_by(|_, _| std::cmp::Ordering::Equal);
554    }
555
556    /// Direct test for `add_row`: the non-aggregate path records `None`
557    /// in the sidecar, keeping the parallel-vector invariant. Covered
558    /// indirectly by `test_add_row_and_add_aggregate_row_mixed` but
559    /// pinned standalone here so the contract is unambiguous.
560    #[test]
561    fn test_add_row_records_none_in_sidecar() {
562        let mut r = QueryResult::new(vec!["x".into()]);
563        r.add_row(vec![Value::Integer(1)]);
564
565        assert_eq!(r.rows.len(), 1);
566        assert_eq!(r.row_group_keys.len(), 1);
567        assert_eq!(r.group_key(0), None);
568    }
569}
rustledger_query/executor/types.rs

rustledger_query/executor/
types.rs