vibesql_executor/memory/
mod.rs

1//! Memory management utilities for efficient query execution
2//!
3//! This module provides memory-bounded execution for SQL query operators,
4//! enabling processing of datasets larger than available memory through
5//! disk spilling.
6//!
7//! # Components
8//!
9//! - **Memory Controller** (`MemoryController`): Budget management and tracking
10//! - **Memory Reservation** (`MemoryReservation`): Per-operator memory tracking
11//! - **External Sort** (`ExternalSort`): Disk-spilling merge sort
12//! - **External Aggregate** (`ExternalAggregate`): Partition-based GROUP BY
13//! - **External Hash Join** (`ExternalHashJoin`): Grace hash join with spilling
14//! - **Spill Files** (`SpillFile`): Temporary file management with auto-cleanup
15//! - **Arena Allocator** (`QueryArena`): Fast bump-pointer allocator
16//!
17//! # Architecture
18//!
19//! ```text
20//! ┌─────────────────────────────────────────────────────────────────┐
21//! │                       MemoryController                          │
22//! │  ┌─────────────┐  ┌─────────────┐  ┌─────────────────────────┐ │
23//! │  │ Budget Pool │  │  Tracking   │  │       Metrics           │ │
24//! │  │ (configurable)│ │ (per-operator)│ │ (spills, peak, etc.) │ │
25//! │  └─────────────┘  └─────────────┘  └─────────────────────────┘ │
26//! └─────────────────────────────────────────────────────────────────┘
27//!            │                │                │
28//!            ▼                ▼                ▼
29//! ┌──────────────┐  ┌──────────────┐  ┌──────────────┐
30//! │ External     │  │ External     │  │ External     │
31//! │ Sort         │  │ Aggregate    │  │ Hash Join    │
32//! │ (merge sort) │  │ (partitioned)│  │ (grace join) │
33//! └──────────────┘  └──────────────┘  └──────────────┘
34//!            │                │                │
35//!            ▼                ▼                ▼
36//! ┌─────────────────────────────────────────────────────────────────┐
37//! │                        SpillFile (temp files)                   │
38//! │         Auto-cleanup on drop, buffered I/O, seeking             │
39//! └─────────────────────────────────────────────────────────────────┘
40//! ```
41//!
42//! # Memory-Bounded Execution
43//!
44//! ```text
45//! use std::sync::Arc;
46//! use vibesql_executor::memory::{MemoryController, MemoryConfig};
47//!
48//! // Create controller with 1GB budget
49//! let controller = Arc::new(MemoryController::with_budget(1024 * 1024 * 1024));
50//!
51//! // Operators create reservations to track their memory
52//! let mut reservation = controller.create_reservation();
53//!
54//! // When memory is exhausted, spill to disk
55//! if !reservation.try_grow(batch_size) {
56//!     spill_to_disk(&data);
57//!     reservation.shrink(data.size());
58//! }
59//!
60//! // Check statistics after execution
61//! let stats = controller.stats();
62//! println!("{}", stats); // "Memory: 512MB/1GB (50%), peak: 950MB, spilled: 2GB (3 ops)"
63//! ```
64//!
65//! # External Operators
66//!
67//! ## External Sort
68//!
69//! Two-phase external merge sort:
70//! 1. **Run generation**: Sort in-memory chunks, spill as sorted runs
71//! 2. **K-way merge**: Merge runs using a tournament tree
72//!
73//! ```text
74//! let mut sort = ExternalSort::new(controller, config, sort_keys);
75//! for row in input {
76//!     sort.add_row(&row)?;  // Automatically spills when needed
77//! }
78//! for result in sort.finish()? {
79//!     // Rows come out in sorted order
80//! }
81//! ```
82//!
83//! ## External Aggregate
84//!
85//! Partition-based aggregation for GROUP BY:
86//! 1. Hash rows to partitions
87//! 2. Spill partitions when memory exhausted
88//! 3. Process each partition's groups
89//!
90//! ```text
91//! let specs = vec![AggregateSpec { function_name: "SUM".into(), .. }];
92//! let mut agg = ExternalAggregate::new(controller, config, specs, 2);
93//! for row in input {
94//!     agg.add_row(&row)?;
95//! }
96//! for result in agg.finish()? {
97//!     // (group_key..., aggregate_values...)
98//! }
99//! ```
100//!
101//! ## External Hash Join
102//!
103//! Grace hash join with partition-based spilling:
104//! 1. Partition both build and probe sides by join key hash
105//! 2. Spill partitions when memory exhausted
106//! 3. Process matching partitions together
107//!
108//! ```text
109//! let mut join = ExternalHashJoin::new(
110//!     controller, config,
111//!     vec![0],  // build key columns
112//!     vec![0],  // probe key columns
113//!     JoinType::Inner,
114//! );
115//! for row in build_side { join.add_build_row(&row)?; }
116//! for row in probe_side { join.add_probe_row(&row)?; }
117//! for result in join.finish()? {
118//!     // Joined rows
119//! }
120//! ```
121//!
122//! # Configuration
123//!
124//! Environment variables:
125//!
126//! | Variable | Description | Default |
127//! |----------|-------------|---------|
128//! | `VIBESQL_MEMORY_LIMIT` | Total memory budget (e.g., "4GB") | 1GB |
129//! | `VIBESQL_TEMP_DIR` | Directory for spill files | system temp |
130//! | `VIBESQL_SPILL_THRESHOLD` | When to start spilling (0.0-1.0) | 0.8 |
131//! | `VIBESQL_PARTITION_SIZE` | Target partition size | 64MB |
132
133mod arena;
134mod controller;
135mod external_aggregate;
136mod external_hash_join;
137mod external_sort;
138pub mod row_serialization;
139mod spill;
140
141pub use arena::QueryArena;
142pub use controller::{
143    MemoryConfig, MemoryController, MemoryReservation, MemoryStats, DEFAULT_MEMORY_BUDGET,
144    DEFAULT_SPILL_THRESHOLD, DEFAULT_TARGET_PARTITION_BYTES, MIN_OPERATOR_MEMORY,
145};
146pub use external_aggregate::{
147    AggregateResultIterator, AggregateSpec, ExternalAggregate, ExternalAggregateConfig,
148};
149pub use external_hash_join::{
150    ExternalHashJoin, ExternalHashJoinConfig, HashJoinResultIterator, JoinType,
151};
152pub use external_sort::{ExternalSort, ExternalSortConfig, SortKey, SortedIterator};
153pub use spill::{SpillFile, SpillFileSet};