rustledger_query/executor/types.rs
1//! Types used by the BQL query executor.
2
3use std::collections::BTreeMap;
4use std::hash::{Hash, Hasher};
5
6use rust_decimal::Decimal;
7use rustledger_core::{Amount, Inventory, Metadata, NaiveDate, Position, Transaction};
8
9/// Source location information for a directive.
10#[derive(Debug, Clone)]
11pub struct SourceLocation {
12 /// File path.
13 pub filename: String,
14 /// Line number (1-based).
15 pub lineno: usize,
16}
17
18/// An interval unit for date arithmetic.
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
20pub enum IntervalUnit {
21 /// Days.
22 Day,
23 /// Weeks.
24 Week,
25 /// Months.
26 Month,
27 /// Quarters.
28 Quarter,
29 /// Years.
30 Year,
31}
32
33impl IntervalUnit {
34 /// Parse an interval unit from a string.
35 pub fn parse_unit(s: &str) -> Option<Self> {
36 match s.to_uppercase().as_str() {
37 "DAY" | "DAYS" | "D" => Some(Self::Day),
38 "WEEK" | "WEEKS" | "W" => Some(Self::Week),
39 "MONTH" | "MONTHS" | "M" => Some(Self::Month),
40 "QUARTER" | "QUARTERS" | "Q" => Some(Self::Quarter),
41 "YEAR" | "YEARS" | "Y" => Some(Self::Year),
42 _ => None,
43 }
44 }
45}
46
47/// An interval value for date arithmetic.
48#[derive(Debug, Clone, PartialEq, Eq, Hash)]
49pub struct Interval {
50 /// The count (can be negative).
51 pub count: i64,
52 /// The unit.
53 pub unit: IntervalUnit,
54}
55
56impl Interval {
57 /// Create a new interval.
58 pub const fn new(count: i64, unit: IntervalUnit) -> Self {
59 Self { count, unit }
60 }
61
62 /// Convert interval to an approximate number of days for comparison.
63 /// Uses: Day=1, Week=7, Month=30, Quarter=91, Year=365.
64 pub(crate) const fn to_approx_days(&self) -> i64 {
65 let days_per_unit = match self.unit {
66 IntervalUnit::Day => 1,
67 IntervalUnit::Week => 7,
68 IntervalUnit::Month => 30,
69 IntervalUnit::Quarter => 91,
70 IntervalUnit::Year => 365,
71 };
72 self.count.saturating_mul(days_per_unit)
73 }
74
75 /// Add this interval to a date.
76 pub fn add_to_date(&self, date: NaiveDate) -> Option<NaiveDate> {
77 use jiff::ToSpan;
78
79 let span = match self.unit {
80 IntervalUnit::Day => self.count.days(),
81 IntervalUnit::Week => self.count.weeks(),
82 IntervalUnit::Month => self.count.months(),
83 IntervalUnit::Quarter => (self.count * 3).months(),
84 IntervalUnit::Year => self.count.years(),
85 };
86 date.checked_add(span).ok()
87 }
88}
89
90/// A value that can result from evaluating a BQL expression.
91///
92/// Heavy variants (Inventory, Position, Metadata, Object) are boxed to reduce
93/// the size of the enum from 120 bytes to 32 bytes, improving cache efficiency
94/// when processing large result sets.
95#[derive(Debug, Clone, PartialEq, Eq)]
96pub enum Value {
97 /// String value.
98 String(String),
99 /// Numeric value.
100 Number(Decimal),
101 /// Integer value.
102 Integer(i64),
103 /// Date value.
104 Date(NaiveDate),
105 /// Boolean value.
106 Boolean(bool),
107 /// Amount (number + currency).
108 Amount(Amount),
109 /// Position (amount + optional cost). Boxed to reduce enum size.
110 Position(Box<Position>),
111 /// Inventory (aggregated positions). Boxed to reduce enum size.
112 Inventory(Box<Inventory>),
113 /// Set of strings (tags, links).
114 StringSet(Vec<String>),
115 /// Generic set of values for IN operator (supports mixed types).
116 Set(Vec<Self>),
117 /// Metadata dictionary. Boxed to reduce enum size.
118 Metadata(Box<Metadata>),
119 /// Interval for date arithmetic.
120 Interval(Interval),
121 /// Structured object (for entry, meta columns). Boxed to reduce enum size.
122 Object(Box<BTreeMap<String, Self>>),
123 /// NULL value.
124 Null,
125}
126
127impl Value {
128 /// Compute a hash for this value.
129 ///
130 /// Note: This is not the standard Hash trait because some contained types
131 /// (Decimal, Inventory) don't implement Hash. We use byte representations
132 /// for those types.
133 pub(crate) fn hash_value<H: Hasher>(&self, state: &mut H) {
134 std::mem::discriminant(self).hash(state);
135 match self {
136 Self::String(s) => s.hash(state),
137 Self::Number(d) => d.serialize().hash(state),
138 Self::Integer(i) => i.hash(state),
139 Self::Date(d) => {
140 d.year().hash(state);
141 d.month().hash(state);
142 d.day().hash(state);
143 }
144 Self::Boolean(b) => b.hash(state),
145 Self::Amount(a) => {
146 a.number.serialize().hash(state);
147 a.currency.as_str().hash(state);
148 }
149 Self::Position(p) => {
150 // Dereference boxed position
151 p.units.number.serialize().hash(state);
152 p.units.currency.as_str().hash(state);
153 if let Some(cost) = &p.cost {
154 cost.number.serialize().hash(state);
155 cost.currency.as_str().hash(state);
156 }
157 }
158 Self::Inventory(inv) => {
159 // Dereference boxed inventory
160 for pos in inv.positions() {
161 pos.units.number.serialize().hash(state);
162 pos.units.currency.as_str().hash(state);
163 if let Some(cost) = &pos.cost {
164 cost.number.serialize().hash(state);
165 cost.currency.as_str().hash(state);
166 }
167 }
168 }
169 Self::StringSet(ss) => {
170 // Hash StringSet in a canonical, order-independent way by sorting first.
171 let mut sorted = ss.clone();
172 sorted.sort();
173 for s in &sorted {
174 s.hash(state);
175 }
176 }
177 Self::Set(values) => {
178 // Hash each value in order (sets from literals maintain order)
179 for v in values {
180 v.hash_value(state);
181 }
182 }
183 Self::Metadata(meta) => {
184 // Hash metadata in canonical order by sorting keys (boxed)
185 let mut keys: Vec<_> = meta.keys().collect();
186 keys.sort();
187 for key in keys {
188 key.hash(state);
189 // Hash the debug representation of the value
190 format!("{:?}", meta.get(key)).hash(state);
191 }
192 }
193 Self::Interval(interval) => {
194 interval.count.hash(state);
195 interval.unit.hash(state);
196 }
197 Self::Object(obj) => {
198 // BTreeMap is already sorted by key, so iteration order is deterministic (boxed)
199 for (k, v) in obj.as_ref() {
200 k.hash(state);
201 v.hash_value(state);
202 }
203 }
204 Self::Null => {}
205 }
206 }
207}
208
209/// A row of query results.
210pub type Row = Vec<Value>;
211
212/// Compute a hash for a row (for DISTINCT deduplication).
213///
214/// Uses `FxHasher` (the same non-cryptographic hash backing every
215/// `FxHashMap` in the workspace). DISTINCT / GROUP BY keys are internal
216/// dedup tokens — they need speed, not DoS-resistance.
217pub fn hash_row(row: &Row) -> u64 {
218 let mut hasher = rustc_hash::FxHasher::default();
219 for value in row {
220 value.hash_value(&mut hasher);
221 }
222 hasher.finish()
223}
224
225/// Compute a hash for a single value (for PIVOT lookups).
226pub fn hash_single_value(value: &Value) -> u64 {
227 let mut hasher = rustc_hash::FxHasher::default();
228 value.hash_value(&mut hasher);
229 hasher.finish()
230}
231
232/// Query result containing column names and rows.
233///
234/// **Invariant**: `rows.len() == row_group_keys.len()`. Always. Mutating
235/// either field directly will violate this; use the helper methods
236/// (`add_row`, `add_aggregate_row`, `truncate`, `sort_by`, etc.) that
237/// keep both vectors in lockstep. The invariant is enforced at runtime
238/// with `assert_eq!` inside `sort_by`.
239#[derive(Debug, Clone)]
240pub struct QueryResult {
241 /// Column names.
242 pub columns: Vec<String>,
243 /// Result rows.
244 pub rows: Vec<Row>,
245 /// Per-row GROUP BY key values, parallel to `rows`. `None` for rows
246 /// produced outside aggregation. Populated by the aggregate execution
247 /// path; used by the text renderer to recover the per-row currency
248 /// context for `Value::Number` cells emitted by `SUM` / `AVG` (issue
249 /// #988 — display-precision fix that stays lossless for JSON/CSV).
250 ///
251 /// `pub(crate)` so external consumers can't accidentally violate the
252 /// parallel-vector invariant; reach in directly only inside this crate
253 /// and only with extreme care. External access goes through
254 /// [`Self::group_key`].
255 pub(crate) row_group_keys: Vec<Option<Vec<Value>>>,
256}
257
258impl QueryResult {
259 /// Create a new empty result.
260 pub const fn new(columns: Vec<String>) -> Self {
261 Self {
262 columns,
263 rows: Vec::new(),
264 row_group_keys: Vec::new(),
265 }
266 }
267
268 /// Add a row to the result with no GROUP BY context (non-aggregate path).
269 /// The sidecar (`row_group_keys`) records `None` for this row, so the
270 /// text renderer applies no per-currency quantization (issue #988).
271 /// Aggregate paths must use [`Self::add_aggregate_row`] instead.
272 pub fn add_row(&mut self, row: Row) {
273 self.rows.push(row);
274 self.row_group_keys.push(None);
275 }
276
277 /// Add a row produced by aggregation, recording the GROUP BY key values
278 /// alongside it. The renderer consults the key to quantize numeric
279 /// aggregates against the per-currency display precision (issue #988).
280 ///
281 /// Multi-column GROUP BY note: when several columns are grouped (e.g.
282 /// `GROUP BY account, currency`), the entire key is preserved here.
283 /// The renderer's currency-hint extraction (`currency_hint_for_row`
284 /// in `rustledger/src/cmd/query/output.rs`) takes the *first*
285 /// currency-shaped string in iteration order — so put the currency
286 /// column first if both are currency-shaped, which is rare in
287 /// practice but possible.
288 pub fn add_aggregate_row(&mut self, row: Row, group_key: Vec<Value>) {
289 self.rows.push(row);
290 self.row_group_keys.push(if group_key.is_empty() {
291 None
292 } else {
293 Some(group_key)
294 });
295 }
296
297 /// Get the GROUP BY key for a given row, if it was produced by
298 /// aggregation. Returns `None` for non-aggregate rows or when the
299 /// row index is out of range. This is the public read-side of the
300 /// `row_group_keys` sidecar — prefer it over reaching into the
301 /// field directly.
302 ///
303 /// Returns `&[Value]` rather than `&Vec<Value>` so callers aren't
304 /// tied to the specific container type.
305 #[must_use]
306 pub fn group_key(&self, row_idx: usize) -> Option<&[Value]> {
307 self.row_group_keys.get(row_idx).and_then(|k| k.as_deref())
308 }
309
310 /// Whether any row in the result was produced by aggregation. Lets
311 /// downstream renderers short-circuit per-row hint lookups when
312 /// the cache would be all `None` anyway (issue #988 follow-up).
313 #[must_use]
314 pub fn has_aggregate_rows(&self) -> bool {
315 self.row_group_keys.iter().any(Option::is_some)
316 }
317
318 /// Truncate to the first `len` rows, keeping `row_group_keys` in
319 /// lockstep so the parallel-vector invariant survives LIMIT.
320 pub fn truncate(&mut self, len: usize) {
321 self.rows.truncate(len);
322 self.row_group_keys.truncate(len);
323 }
324
325 /// Sort rows by a comparator, keeping `row_group_keys` in lockstep.
326 /// Pair-sort prevents the sidecar from desynchronizing after ORDER BY
327 /// (otherwise text rendering would apply the wrong currency hint to
328 /// a row).
329 pub fn sort_by<F>(&mut self, mut compare: F)
330 where
331 F: FnMut(&Row, &Row) -> std::cmp::Ordering,
332 {
333 // Hard assert (not debug_assert!): the invariant is load-bearing
334 // for correctness; a release-mode mismatch would silently apply
335 // the wrong currency hint to rows after sort.
336 assert_eq!(
337 self.rows.len(),
338 self.row_group_keys.len(),
339 "QueryResult invariant violated: rows.len() must equal row_group_keys.len()"
340 );
341 let n = self.rows.len();
342 let mut paired: Vec<(Row, Option<Vec<Value>>)> = std::mem::take(&mut self.rows)
343 .into_iter()
344 .zip(std::mem::take(&mut self.row_group_keys))
345 .collect();
346 paired.sort_by(|(a, _), (b, _)| compare(a, b));
347 // Pre-allocate the now-empty Vecs back to known capacity to skip
348 // the incremental-grow allocations during push-back.
349 self.rows.reserve_exact(n);
350 self.row_group_keys.reserve_exact(n);
351 for (row, key) in paired {
352 self.rows.push(row);
353 self.row_group_keys.push(key);
354 }
355 }
356
357 /// Number of rows.
358 pub const fn len(&self) -> usize {
359 self.rows.len()
360 }
361
362 /// Whether the result is empty.
363 pub const fn is_empty(&self) -> bool {
364 self.rows.is_empty()
365 }
366}
367
368/// Context for a single posting being evaluated.
369#[derive(Debug)]
370pub struct PostingContext<'a> {
371 /// The transaction this posting belongs to.
372 pub transaction: &'a Transaction,
373 /// The posting index within the transaction.
374 pub posting_index: usize,
375 /// Cumulative running balance across all WHERE-filtered postings up to and
376 /// including this one, in iteration order. This is what bean-query exposes
377 /// as the `balance` column — a single Inventory that grows as the result
378 /// set is built, regardless of which account each posting belongs to.
379 pub balance: Option<Inventory>,
380 /// Per-account running balance for this posting's account. Exposed as the
381 /// `account_balance` column. Updated for every posting, independent of the
382 /// WHERE filter, so it always reflects the true ledger balance for the
383 /// account at this point in time.
384 pub account_balance: Option<Inventory>,
385 /// The directive index (for source location lookup).
386 pub directive_index: Option<usize>,
387}
388
389/// Context for window function evaluation.
390#[derive(Debug, Clone)]
391pub struct WindowContext {
392 /// Row number within the partition (1-based).
393 pub row_number: usize,
394 /// Rank within the partition (1-based, ties get same rank).
395 pub rank: usize,
396 /// Dense rank within the partition (1-based, no gaps after ties).
397 pub dense_rank: usize,
398}
399
400/// Account information cached from Open/Close directives.
401#[derive(Debug, Clone)]
402pub struct AccountInfo {
403 /// Date the account was opened.
404 pub open_date: Option<NaiveDate>,
405 /// Date the account was closed (if any).
406 pub close_date: Option<NaiveDate>,
407 /// Metadata from the Open directive.
408 pub open_meta: Metadata,
409}
410
411/// An in-memory table created by CREATE TABLE.
412#[derive(Debug, Clone)]
413pub struct Table {
414 /// Column names.
415 pub columns: Vec<String>,
416 /// Rows of data.
417 pub rows: Vec<Vec<Value>>,
418}
419
420impl Table {
421 /// Create a new empty table with the given column names.
422 #[allow(clippy::missing_const_for_fn)] // Vec::new() isn't const with owned columns
423 pub fn new(columns: Vec<String>) -> Self {
424 Self {
425 columns,
426 rows: Vec::new(),
427 }
428 }
429
430 /// Add a row to the table.
431 pub fn add_row(&mut self, row: Vec<Value>) {
432 self.rows.push(row);
433 }
434}
435
436#[cfg(test)]
437mod tests {
438 use super::*;
439
440 /// Verify Value enum size is reasonable after boxing heavy variants.
441 /// Previously 120 bytes, now 40 bytes (67% reduction).
442 #[test]
443 fn test_value_size() {
444 use std::mem::size_of;
445 // Value should be ~40 bytes with boxed variants (vs 120 unboxed)
446 assert!(
447 size_of::<Value>() <= 48,
448 "Value enum too large: {} bytes",
449 size_of::<Value>()
450 );
451 }
452
453 // ─── QueryResult parallel-vector invariant (issue #988) ───────────
454 //
455 // The `row_group_keys` sidecar must stay aligned with `rows` across
456 // every mutation. These tests pin the contract for the helpers that
457 // mutate both vectors. A failure here means future renderer logic
458 // would apply the wrong currency hint to a row.
459
460 fn make_keyed_result() -> QueryResult {
461 let mut r = QueryResult::new(vec!["currency".into(), "sum".into()]);
462 r.add_aggregate_row(
463 vec![Value::String("USD".into()), Value::Integer(100)],
464 vec![Value::String("USD".into())],
465 );
466 r.add_aggregate_row(
467 vec![Value::String("EUR".into()), Value::Integer(50)],
468 vec![Value::String("EUR".into())],
469 );
470 r.add_aggregate_row(
471 vec![Value::String("GBP".into()), Value::Integer(75)],
472 vec![Value::String("GBP".into())],
473 );
474 r
475 }
476
477 /// `sort_by` reorders rows AND `row_group_keys` together.
478 #[test]
479 fn test_sort_by_keeps_row_group_keys_in_lockstep() {
480 let mut r = make_keyed_result();
481 // Sort by the integer column ascending: 50 (EUR), 75 (GBP), 100 (USD).
482 r.sort_by(|a, b| match (&a[1], &b[1]) {
483 (Value::Integer(x), Value::Integer(y)) => x.cmp(y),
484 _ => std::cmp::Ordering::Equal,
485 });
486
487 // After sort, row[0] is EUR, row[1] is GBP, row[2] is USD.
488 // The sidecar MUST have followed.
489 assert_eq!(r.group_key(0), Some(&[Value::String("EUR".into())][..]));
490 assert_eq!(r.group_key(1), Some(&[Value::String("GBP".into())][..]));
491 assert_eq!(r.group_key(2), Some(&[Value::String("USD".into())][..]));
492 }
493
494 /// `truncate` drops the same suffix from rows AND `row_group_keys`.
495 #[test]
496 fn test_truncate_keeps_row_group_keys_in_lockstep() {
497 let mut r = make_keyed_result();
498 r.truncate(2);
499
500 assert_eq!(r.rows.len(), 2);
501 assert_eq!(r.row_group_keys.len(), 2);
502 // Surviving keys are the first two: USD, EUR.
503 assert_eq!(r.group_key(0), Some(&[Value::String("USD".into())][..]));
504 assert_eq!(r.group_key(1), Some(&[Value::String("EUR".into())][..]));
505 // Out-of-range index returns None gracefully.
506 assert_eq!(r.group_key(2), None);
507 }
508
509 /// Mixed aggregate / non-aggregate rows: `add_row` writes `None` to
510 /// the sidecar so the invariant is preserved when the two paths
511 /// interleave (e.g. a synthetic explanatory row appended after an
512 /// aggregate).
513 #[test]
514 fn test_add_row_and_add_aggregate_row_mixed() {
515 let mut r = QueryResult::new(vec!["x".into()]);
516 r.add_aggregate_row(vec![Value::Integer(1)], vec![Value::String("USD".into())]);
517 r.add_row(vec![Value::Integer(2)]);
518 r.add_aggregate_row(vec![Value::Integer(3)], vec![Value::String("EUR".into())]);
519
520 assert_eq!(r.rows.len(), 3);
521 assert_eq!(r.row_group_keys.len(), 3);
522 assert_eq!(r.group_key(0), Some(&[Value::String("USD".into())][..]));
523 assert_eq!(r.group_key(1), None);
524 assert_eq!(r.group_key(2), Some(&[Value::String("EUR".into())][..]));
525 }
526
527 /// Empty `group_key` arg means "no GROUP BY context" — sidecar
528 /// records `None` so callers don't see a misleading `Some(vec![])`.
529 #[test]
530 fn test_add_aggregate_row_empty_key_records_none() {
531 let mut r = QueryResult::new(vec!["count".into()]);
532 // Pure aggregate (e.g. SELECT COUNT(*)) has no GROUP BY at all.
533 r.add_aggregate_row(vec![Value::Integer(42)], vec![]);
534
535 assert_eq!(r.group_key(0), None);
536 }
537
538 /// `sort_by`'s lockstep invariant is enforced by an unconditional
539 /// `assert_eq!`. This test deliberately corrupts the sidecar (by
540 /// pushing to `rows` without a matching push to `row_group_keys`)
541 /// then calls `sort_by`, expecting a panic. Pins the safety net
542 /// against accidental removal of the assert.
543 #[test]
544 #[should_panic(expected = "QueryResult invariant violated")]
545 fn test_sort_by_panics_on_lockstep_violation() {
546 let mut r = QueryResult::new(vec!["x".into()]);
547 // Reach in directly to corrupt the sidecar — the only way to
548 // hit the assert without going through the helpers (which are
549 // designed to make it impossible). Available because tests live
550 // inside `rustledger-query` and `row_group_keys` is `pub(crate)`.
551 r.rows.push(vec![Value::Integer(1)]);
552 // Deliberately skip pushing to `row_group_keys`.
553 r.sort_by(|_, _| std::cmp::Ordering::Equal);
554 }
555
556 /// Direct test for `add_row`: the non-aggregate path records `None`
557 /// in the sidecar, keeping the parallel-vector invariant. Covered
558 /// indirectly by `test_add_row_and_add_aggregate_row_mixed` but
559 /// pinned standalone here so the contract is unambiguous.
560 #[test]
561 fn test_add_row_records_none_in_sidecar() {
562 let mut r = QueryResult::new(vec!["x".into()]);
563 r.add_row(vec![Value::Integer(1)]);
564
565 assert_eq!(r.rows.len(), 1);
566 assert_eq!(r.row_group_keys.len(), 1);
567 assert_eq!(r.group_key(0), None);
568 }
569}