rustledger_query/executor/types.rs
1//! Types used by the BQL query executor.
2
3// ratchet: fxhash-only — hot path; use FxHashMap/FxHashSet, not std SipHash collections (#1237).
4use std::collections::BTreeMap;
5use std::hash::{Hash, Hasher};
6
7use rust_decimal::Decimal;
8use rustledger_core::{Amount, Inventory, Metadata, NaiveDate, Position, Transaction};
9
10/// Source location information for a directive.
11#[derive(Debug, Clone)]
12pub struct SourceLocation {
13 /// File path.
14 pub filename: String,
15 /// Line number (1-based).
16 pub lineno: usize,
17}
18
19/// An interval unit for date arithmetic.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
21pub enum IntervalUnit {
22 /// Days.
23 Day,
24 /// Weeks.
25 Week,
26 /// Months.
27 Month,
28 /// Quarters.
29 Quarter,
30 /// Years.
31 Year,
32}
33
34impl IntervalUnit {
35 /// Parse an interval unit from a string.
36 pub fn parse_unit(s: &str) -> Option<Self> {
37 match s.to_uppercase().as_str() {
38 "DAY" | "DAYS" | "D" => Some(Self::Day),
39 "WEEK" | "WEEKS" | "W" => Some(Self::Week),
40 "MONTH" | "MONTHS" | "M" => Some(Self::Month),
41 "QUARTER" | "QUARTERS" | "Q" => Some(Self::Quarter),
42 "YEAR" | "YEARS" | "Y" => Some(Self::Year),
43 _ => None,
44 }
45 }
46}
47
48/// An interval value for date arithmetic.
49#[derive(Debug, Clone, PartialEq, Eq, Hash)]
50pub struct Interval {
51 /// The count (can be negative).
52 pub count: i64,
53 /// The unit.
54 pub unit: IntervalUnit,
55}
56
57impl Interval {
58 /// Create a new interval.
59 pub const fn new(count: i64, unit: IntervalUnit) -> Self {
60 Self { count, unit }
61 }
62
63 /// Convert interval to an approximate number of days for comparison.
64 /// Uses: Day=1, Week=7, Month=30, Quarter=91, Year=365.
65 pub(crate) const fn to_approx_days(&self) -> i64 {
66 let days_per_unit = match self.unit {
67 IntervalUnit::Day => 1,
68 IntervalUnit::Week => 7,
69 IntervalUnit::Month => 30,
70 IntervalUnit::Quarter => 91,
71 IntervalUnit::Year => 365,
72 };
73 self.count.saturating_mul(days_per_unit)
74 }
75
76 /// Add this interval to a date.
77 pub fn add_to_date(&self, date: NaiveDate) -> Option<NaiveDate> {
78 use jiff::ToSpan;
79
80 let span = match self.unit {
81 IntervalUnit::Day => self.count.days(),
82 IntervalUnit::Week => self.count.weeks(),
83 IntervalUnit::Month => self.count.months(),
84 IntervalUnit::Quarter => (self.count * 3).months(),
85 IntervalUnit::Year => self.count.years(),
86 };
87 date.checked_add(span).ok()
88 }
89}
90
91/// A value that can result from evaluating a BQL expression.
92///
93/// Heavy variants (Inventory, Position, Metadata, Object) are boxed to reduce
94/// the size of the enum from 120 bytes to 32 bytes, improving cache efficiency
95/// when processing large result sets.
96#[derive(Debug, Clone, PartialEq, Eq)]
97pub enum Value {
98 /// String value.
99 String(String),
100 /// Numeric value.
101 Number(Decimal),
102 /// Integer value.
103 Integer(i64),
104 /// Date value.
105 Date(NaiveDate),
106 /// Boolean value.
107 Boolean(bool),
108 /// Amount (number + currency).
109 Amount(Amount),
110 /// Position (amount + optional cost). Boxed to reduce enum size.
111 Position(Box<Position>),
112 /// Inventory (aggregated positions). Boxed to reduce enum size.
113 Inventory(Box<Inventory>),
114 /// Set of strings (tags, links).
115 StringSet(Vec<String>),
116 /// Generic set of values for IN operator (supports mixed types).
117 Set(Vec<Self>),
118 /// Metadata dictionary. Boxed to reduce enum size.
119 Metadata(Box<Metadata>),
120 /// Interval for date arithmetic.
121 Interval(Interval),
122 /// Structured object (for entry, meta columns). Boxed to reduce enum size.
123 Object(Box<BTreeMap<String, Self>>),
124 /// NULL value.
125 Null,
126}
127
128impl Value {
129 /// Compute a hash for this value.
130 ///
131 /// Note: This is not the standard Hash trait because some contained types
132 /// (Decimal, Inventory) don't implement Hash. We use byte representations
133 /// for those types.
134 pub(crate) fn hash_value<H: Hasher>(&self, state: &mut H) {
135 std::mem::discriminant(self).hash(state);
136 match self {
137 Self::String(s) => s.hash(state),
138 Self::Number(d) => d.serialize().hash(state),
139 Self::Integer(i) => i.hash(state),
140 Self::Date(d) => {
141 d.year().hash(state);
142 d.month().hash(state);
143 d.day().hash(state);
144 }
145 Self::Boolean(b) => b.hash(state),
146 Self::Amount(a) => {
147 a.number.serialize().hash(state);
148 a.currency.as_str().hash(state);
149 }
150 Self::Position(p) => {
151 // Dereference boxed position
152 p.units.number.serialize().hash(state);
153 p.units.currency.as_str().hash(state);
154 if let Some(cost) = &p.cost {
155 cost.number.serialize().hash(state);
156 cost.currency.as_str().hash(state);
157 }
158 }
159 Self::Inventory(inv) => {
160 // Dereference boxed inventory
161 for pos in inv.positions() {
162 pos.units.number.serialize().hash(state);
163 pos.units.currency.as_str().hash(state);
164 if let Some(cost) = &pos.cost {
165 cost.number.serialize().hash(state);
166 cost.currency.as_str().hash(state);
167 }
168 }
169 }
170 Self::StringSet(ss) => {
171 // Hash StringSet in a canonical, order-independent way by sorting first.
172 let mut sorted = ss.clone();
173 sorted.sort();
174 for s in &sorted {
175 s.hash(state);
176 }
177 }
178 Self::Set(values) => {
179 // Hash each value in order (sets from literals maintain order)
180 for v in values {
181 v.hash_value(state);
182 }
183 }
184 Self::Metadata(meta) => {
185 // Hash metadata in canonical order by sorting keys (boxed)
186 let mut keys: Vec<_> = meta.keys().collect();
187 keys.sort();
188 for key in keys {
189 key.hash(state);
190 // Hash the debug representation of the value
191 format!("{:?}", meta.get(key)).hash(state);
192 }
193 }
194 Self::Interval(interval) => {
195 interval.count.hash(state);
196 interval.unit.hash(state);
197 }
198 Self::Object(obj) => {
199 // BTreeMap is already sorted by key, so iteration order is deterministic (boxed)
200 for (k, v) in obj.as_ref() {
201 k.hash(state);
202 v.hash_value(state);
203 }
204 }
205 Self::Null => {}
206 }
207 }
208}
209
210/// A row of query results.
211pub type Row = Vec<Value>;
212
213/// Compute a hash for a row (for DISTINCT deduplication).
214///
215/// Uses `FxHasher` (the same non-cryptographic hash backing every
216/// `FxHashMap` in the workspace). DISTINCT / GROUP BY keys are internal
217/// dedup tokens — they need speed, not DoS-resistance.
218pub fn hash_row(row: &Row) -> u64 {
219 let mut hasher = rustc_hash::FxHasher::default();
220 for value in row {
221 value.hash_value(&mut hasher);
222 }
223 hasher.finish()
224}
225
226/// Compute a hash for a single value (for PIVOT lookups).
227pub fn hash_single_value(value: &Value) -> u64 {
228 let mut hasher = rustc_hash::FxHasher::default();
229 value.hash_value(&mut hasher);
230 hasher.finish()
231}
232
233/// Query result containing column names and rows.
234///
235/// **Invariant**: `rows.len() == row_group_keys.len()`. Always. Mutating
236/// either field directly will violate this; use the helper methods
237/// (`add_row`, `add_aggregate_row`, `truncate`, `sort_by`, etc.) that
238/// keep both vectors in lockstep. The invariant is enforced at runtime
239/// with `assert_eq!` inside `sort_by`.
240#[derive(Debug, Clone)]
241pub struct QueryResult {
242 /// Column names.
243 pub columns: Vec<String>,
244 /// Result rows.
245 pub rows: Vec<Row>,
246 /// Per-row GROUP BY key values, parallel to `rows`. `None` for rows
247 /// produced outside aggregation. Populated by the aggregate execution
248 /// path; used by the text renderer to recover the per-row currency
249 /// context for `Value::Number` cells emitted by `SUM` / `AVG` (issue
250 /// #988 — display-precision fix that stays lossless for JSON/CSV).
251 ///
252 /// `pub(crate)` so external consumers can't accidentally violate the
253 /// parallel-vector invariant; reach in directly only inside this crate
254 /// and only with extreme care. External access goes through
255 /// [`Self::group_key`].
256 pub(crate) row_group_keys: Vec<Option<Vec<Value>>>,
257}
258
259impl QueryResult {
260 /// Create a new empty result.
261 pub const fn new(columns: Vec<String>) -> Self {
262 Self {
263 columns,
264 rows: Vec::new(),
265 row_group_keys: Vec::new(),
266 }
267 }
268
269 /// Add a row to the result with no GROUP BY context (non-aggregate path).
270 /// The sidecar (`row_group_keys`) records `None` for this row, so the
271 /// text renderer applies no per-currency quantization (issue #988).
272 /// Aggregate paths must use [`Self::add_aggregate_row`] instead.
273 pub fn add_row(&mut self, row: Row) {
274 self.rows.push(row);
275 self.row_group_keys.push(None);
276 }
277
278 /// Add a row produced by aggregation, recording the GROUP BY key values
279 /// alongside it. The renderer consults the key to quantize numeric
280 /// aggregates against the per-currency display precision (issue #988).
281 ///
282 /// Multi-column GROUP BY note: when several columns are grouped (e.g.
283 /// `GROUP BY account, currency`), the entire key is preserved here.
284 /// The renderer's currency-hint extraction (`currency_hint_for_row`
285 /// in `rustledger/src/cmd/query/output.rs`) takes the *first*
286 /// currency-shaped string in iteration order — so put the currency
287 /// column first if both are currency-shaped, which is rare in
288 /// practice but possible.
289 pub fn add_aggregate_row(&mut self, row: Row, group_key: Vec<Value>) {
290 self.rows.push(row);
291 self.row_group_keys.push(if group_key.is_empty() {
292 None
293 } else {
294 Some(group_key)
295 });
296 }
297
298 /// Get the GROUP BY key for a given row, if it was produced by
299 /// aggregation. Returns `None` for non-aggregate rows or when the
300 /// row index is out of range. This is the public read-side of the
301 /// `row_group_keys` sidecar — prefer it over reaching into the
302 /// field directly.
303 ///
304 /// Returns `&[Value]` rather than `&Vec<Value>` so callers aren't
305 /// tied to the specific container type.
306 #[must_use]
307 pub fn group_key(&self, row_idx: usize) -> Option<&[Value]> {
308 self.row_group_keys.get(row_idx).and_then(|k| k.as_deref())
309 }
310
311 /// Whether any row in the result was produced by aggregation. Lets
312 /// downstream renderers short-circuit per-row hint lookups when
313 /// the cache would be all `None` anyway (issue #988 follow-up).
314 #[must_use]
315 pub fn has_aggregate_rows(&self) -> bool {
316 self.row_group_keys.iter().any(Option::is_some)
317 }
318
319 /// Truncate to the first `len` rows, keeping `row_group_keys` in
320 /// lockstep so the parallel-vector invariant survives LIMIT.
321 pub fn truncate(&mut self, len: usize) {
322 self.rows.truncate(len);
323 self.row_group_keys.truncate(len);
324 }
325
326 /// Sort rows by a comparator, keeping `row_group_keys` in lockstep.
327 /// Pair-sort prevents the sidecar from desynchronizing after ORDER BY
328 /// (otherwise text rendering would apply the wrong currency hint to
329 /// a row).
330 pub fn sort_by<F>(&mut self, mut compare: F)
331 where
332 F: FnMut(&Row, &Row) -> std::cmp::Ordering,
333 {
334 // Hard assert (not debug_assert!): the invariant is load-bearing
335 // for correctness; a release-mode mismatch would silently apply
336 // the wrong currency hint to rows after sort.
337 assert_eq!(
338 self.rows.len(),
339 self.row_group_keys.len(),
340 "QueryResult invariant violated: rows.len() must equal row_group_keys.len()"
341 );
342 let n = self.rows.len();
343 let mut paired: Vec<(Row, Option<Vec<Value>>)> = std::mem::take(&mut self.rows)
344 .into_iter()
345 .zip(std::mem::take(&mut self.row_group_keys))
346 .collect();
347 paired.sort_by(|(a, _), (b, _)| compare(a, b));
348 // Pre-allocate the now-empty Vecs back to known capacity to skip
349 // the incremental-grow allocations during push-back.
350 self.rows.reserve_exact(n);
351 self.row_group_keys.reserve_exact(n);
352 for (row, key) in paired {
353 self.rows.push(row);
354 self.row_group_keys.push(key);
355 }
356 }
357
358 /// Number of rows.
359 pub const fn len(&self) -> usize {
360 self.rows.len()
361 }
362
363 /// Whether the result is empty.
364 pub const fn is_empty(&self) -> bool {
365 self.rows.is_empty()
366 }
367}
368
369/// Context for a single posting being evaluated.
370#[derive(Debug)]
371pub struct PostingContext<'a> {
372 /// The transaction this posting belongs to.
373 pub transaction: &'a Transaction,
374 /// The posting index within the transaction.
375 pub posting_index: usize,
376 /// Cumulative running balance across all WHERE-filtered postings up to and
377 /// including this one, in iteration order. This is what bean-query exposes
378 /// as the `balance` column — a single Inventory that grows as the result
379 /// set is built, regardless of which account each posting belongs to.
380 pub balance: Option<Inventory>,
381 /// Per-account running balance for this posting's account. Exposed as the
382 /// `account_balance` column. Updated for every posting, independent of the
383 /// WHERE filter, so it always reflects the true ledger balance for the
384 /// account at this point in time.
385 pub account_balance: Option<Inventory>,
386 /// The directive index (for source location lookup).
387 pub directive_index: Option<usize>,
388}
389
390/// Context for window function evaluation.
391#[derive(Debug, Clone)]
392pub struct WindowContext {
393 /// Row number within the partition (1-based).
394 pub row_number: usize,
395 /// Rank within the partition (1-based, ties get same rank).
396 pub rank: usize,
397 /// Dense rank within the partition (1-based, no gaps after ties).
398 pub dense_rank: usize,
399}
400
401/// Account information cached from Open/Close directives.
402#[derive(Debug, Clone)]
403pub struct AccountInfo {
404 /// Date the account was opened.
405 pub open_date: Option<NaiveDate>,
406 /// Date the account was closed (if any).
407 pub close_date: Option<NaiveDate>,
408 /// Metadata from the Open directive.
409 pub open_meta: Metadata,
410}
411
412/// An in-memory table created by CREATE TABLE.
413#[derive(Debug, Clone)]
414pub struct Table {
415 /// Column names.
416 pub columns: Vec<String>,
417 /// Rows of data.
418 pub rows: Vec<Vec<Value>>,
419}
420
421impl Table {
422 /// Create a new empty table with the given column names.
423 #[allow(clippy::missing_const_for_fn)] // Vec::new() isn't const with owned columns
424 pub fn new(columns: Vec<String>) -> Self {
425 Self {
426 columns,
427 rows: Vec::new(),
428 }
429 }
430
431 /// Add a row to the table.
432 pub fn add_row(&mut self, row: Vec<Value>) {
433 self.rows.push(row);
434 }
435}
436
437#[cfg(test)]
438mod tests {
439 use super::*;
440
441 /// Verify Value enum size is reasonable after boxing heavy variants.
442 /// Previously 120 bytes, now 40 bytes (67% reduction).
443 #[test]
444 fn test_value_size() {
445 use std::mem::size_of;
446 // Value should be ~40 bytes with boxed variants (vs 120 unboxed)
447 assert!(
448 size_of::<Value>() <= 48,
449 "Value enum too large: {} bytes",
450 size_of::<Value>()
451 );
452 }
453
454 // ─── QueryResult parallel-vector invariant (issue #988) ───────────
455 //
456 // The `row_group_keys` sidecar must stay aligned with `rows` across
457 // every mutation. These tests pin the contract for the helpers that
458 // mutate both vectors. A failure here means future renderer logic
459 // would apply the wrong currency hint to a row.
460
461 fn make_keyed_result() -> QueryResult {
462 let mut r = QueryResult::new(vec!["currency".into(), "sum".into()]);
463 r.add_aggregate_row(
464 vec![Value::String("USD".into()), Value::Integer(100)],
465 vec![Value::String("USD".into())],
466 );
467 r.add_aggregate_row(
468 vec![Value::String("EUR".into()), Value::Integer(50)],
469 vec![Value::String("EUR".into())],
470 );
471 r.add_aggregate_row(
472 vec![Value::String("GBP".into()), Value::Integer(75)],
473 vec![Value::String("GBP".into())],
474 );
475 r
476 }
477
478 /// `sort_by` reorders rows AND `row_group_keys` together.
479 #[test]
480 fn test_sort_by_keeps_row_group_keys_in_lockstep() {
481 let mut r = make_keyed_result();
482 // Sort by the integer column ascending: 50 (EUR), 75 (GBP), 100 (USD).
483 r.sort_by(|a, b| match (&a[1], &b[1]) {
484 (Value::Integer(x), Value::Integer(y)) => x.cmp(y),
485 _ => std::cmp::Ordering::Equal,
486 });
487
488 // After sort, row[0] is EUR, row[1] is GBP, row[2] is USD.
489 // The sidecar MUST have followed.
490 assert_eq!(r.group_key(0), Some(&[Value::String("EUR".into())][..]));
491 assert_eq!(r.group_key(1), Some(&[Value::String("GBP".into())][..]));
492 assert_eq!(r.group_key(2), Some(&[Value::String("USD".into())][..]));
493 }
494
495 /// `truncate` drops the same suffix from rows AND `row_group_keys`.
496 #[test]
497 fn test_truncate_keeps_row_group_keys_in_lockstep() {
498 let mut r = make_keyed_result();
499 r.truncate(2);
500
501 assert_eq!(r.rows.len(), 2);
502 assert_eq!(r.row_group_keys.len(), 2);
503 // Surviving keys are the first two: USD, EUR.
504 assert_eq!(r.group_key(0), Some(&[Value::String("USD".into())][..]));
505 assert_eq!(r.group_key(1), Some(&[Value::String("EUR".into())][..]));
506 // Out-of-range index returns None gracefully.
507 assert_eq!(r.group_key(2), None);
508 }
509
510 /// Mixed aggregate / non-aggregate rows: `add_row` writes `None` to
511 /// the sidecar so the invariant is preserved when the two paths
512 /// interleave (e.g. a synthetic explanatory row appended after an
513 /// aggregate).
514 #[test]
515 fn test_add_row_and_add_aggregate_row_mixed() {
516 let mut r = QueryResult::new(vec!["x".into()]);
517 r.add_aggregate_row(vec![Value::Integer(1)], vec![Value::String("USD".into())]);
518 r.add_row(vec![Value::Integer(2)]);
519 r.add_aggregate_row(vec![Value::Integer(3)], vec![Value::String("EUR".into())]);
520
521 assert_eq!(r.rows.len(), 3);
522 assert_eq!(r.row_group_keys.len(), 3);
523 assert_eq!(r.group_key(0), Some(&[Value::String("USD".into())][..]));
524 assert_eq!(r.group_key(1), None);
525 assert_eq!(r.group_key(2), Some(&[Value::String("EUR".into())][..]));
526 }
527
528 /// Empty `group_key` arg means "no GROUP BY context" — sidecar
529 /// records `None` so callers don't see a misleading `Some(vec![])`.
530 #[test]
531 fn test_add_aggregate_row_empty_key_records_none() {
532 let mut r = QueryResult::new(vec!["count".into()]);
533 // Pure aggregate (e.g. SELECT COUNT(*)) has no GROUP BY at all.
534 r.add_aggregate_row(vec![Value::Integer(42)], vec![]);
535
536 assert_eq!(r.group_key(0), None);
537 }
538
539 /// `sort_by`'s lockstep invariant is enforced by an unconditional
540 /// `assert_eq!`. This test deliberately corrupts the sidecar (by
541 /// pushing to `rows` without a matching push to `row_group_keys`)
542 /// then calls `sort_by`, expecting a panic. Pins the safety net
543 /// against accidental removal of the assert.
544 #[test]
545 #[should_panic(expected = "QueryResult invariant violated")]
546 fn test_sort_by_panics_on_lockstep_violation() {
547 let mut r = QueryResult::new(vec!["x".into()]);
548 // Reach in directly to corrupt the sidecar — the only way to
549 // hit the assert without going through the helpers (which are
550 // designed to make it impossible). Available because tests live
551 // inside `rustledger-query` and `row_group_keys` is `pub(crate)`.
552 r.rows.push(vec![Value::Integer(1)]);
553 // Deliberately skip pushing to `row_group_keys`.
554 r.sort_by(|_, _| std::cmp::Ordering::Equal);
555 }
556
557 /// Direct test for `add_row`: the non-aggregate path records `None`
558 /// in the sidecar, keeping the parallel-vector invariant. Covered
559 /// indirectly by `test_add_row_and_add_aggregate_row_mixed` but
560 /// pinned standalone here so the contract is unambiguous.
561 #[test]
562 fn test_add_row_records_none_in_sidecar() {
563 let mut r = QueryResult::new(vec!["x".into()]);
564 r.add_row(vec![Value::Integer(1)]);
565
566 assert_eq!(r.rows.len(), 1);
567 assert_eq!(r.row_group_keys.len(), 1);
568 assert_eq!(r.group_key(0), None);
569 }
570}