vibesql_executor/select/late_materialization/
row_ref.rs

1//! Row Reference Implementation
2//!
3//! A row reference is a lightweight pointer to a row in source data,
4//! avoiding data copying during intermediate query operations.
5
6use std::sync::Arc;
7use vibesql_storage::Row;
8use vibesql_types::SqlValue;
9
10/// A lightweight reference to a row in source data
11///
12/// Instead of cloning entire rows during query processing, `RowReference`
13/// stores just enough information to locate the row when materialization
14/// is needed.
15///
16/// # Memory Comparison
17///
18/// For a row with 10 columns averaging 32 bytes each:
19/// - Full Row: 320+ bytes (plus heap allocations for strings)
20/// - RowReference: 16 bytes (table_id + row_index)
21///
22/// This is **20x** more memory efficient for intermediate results.
23///
24/// # Example
25///
26/// ```text
27/// // Create references instead of copying rows
28/// let refs: Vec<RowReference> = qualifying_indices
29///     .iter()
30///     .map(|&idx| RowReference::new(0, idx as u32))
31///     .collect();
32///
33/// // Only materialize at output boundary
34/// let output_rows: Vec<Row> = refs
35///     .iter()
36///     .map(|r| source_tables[r.table_id()].row(r.row_index()))
37///     .collect();
38/// ```
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
40pub struct RowReference {
41    /// Identifier for the source table (index into a table registry)
42    table_id: u16,
43    /// Row index within the source table
44    row_index: u32,
45}
46
47impl RowReference {
48    /// Create a new row reference
49    #[inline]
50    pub const fn new(table_id: u16, row_index: u32) -> Self {
51        Self { table_id, row_index }
52    }
53
54    /// Get the table identifier
55    #[inline]
56    pub const fn table_id(&self) -> u16 {
57        self.table_id
58    }
59
60    /// Get the row index within the table
61    #[inline]
62    pub const fn row_index(&self) -> u32 {
63        self.row_index
64    }
65
66    /// Create a vector of row references for a range of rows
67    #[inline]
68    pub fn range(table_id: u16, start: u32, end: u32) -> Vec<Self> {
69        (start..end).map(|idx| Self::new(table_id, idx)).collect()
70    }
71
72    /// Create row references from a selection vector
73    #[inline]
74    pub fn from_selection(table_id: u16, indices: &[u32]) -> Vec<Self> {
75        indices.iter().map(|&idx| Self::new(table_id, idx)).collect()
76    }
77}
78
79/// A pair of row references for join results
80///
81/// When joining tables, we track which rows from each side matched
82/// without materializing the combined row.
83#[derive(Debug, Clone, Copy, PartialEq, Eq)]
84pub struct JoinedRowRef {
85    /// Reference to the left (probe) side row
86    pub left: RowReference,
87    /// Reference to the right (build) side row
88    /// None for LEFT OUTER join when there's no match
89    pub right: Option<RowReference>,
90}
91
92impl JoinedRowRef {
93    /// Create a new joined row reference (inner join match)
94    #[inline]
95    pub const fn matched(left: RowReference, right: RowReference) -> Self {
96        Self { left, right: Some(right) }
97    }
98
99    /// Create a new joined row reference (left outer, no match)
100    #[inline]
101    pub const fn left_only(left: RowReference) -> Self {
102        Self { left, right: None }
103    }
104
105    /// Check if this is a matched pair
106    #[inline]
107    pub const fn is_matched(&self) -> bool {
108        self.right.is_some()
109    }
110}
111
112/// A resolver that can materialize row references into actual rows
113///
114/// This trait abstracts over different source data formats (row-based, columnar)
115/// allowing late materialization to work with various storage layouts.
116pub trait RowResolver {
117    /// Get a row by its reference
118    fn resolve(&self, reference: &RowReference) -> Option<&Row>;
119
120    /// Get a specific column value from a row reference
121    fn resolve_column(&self, reference: &RowReference, column_idx: usize) -> Option<&SqlValue>;
122
123    /// Batch resolve multiple row references
124    ///
125    /// Default implementation calls resolve() for each, but implementations
126    /// can override for better performance with columnar storage.
127    fn resolve_batch(&self, references: &[RowReference]) -> Vec<Option<&Row>> {
128        references.iter().map(|r| self.resolve(r)).collect()
129    }
130}
131
132/// Simple row resolver backed by a vector of rows
133///
134/// This is the most common case for row-based table scans.
135pub struct VecRowResolver<'a> {
136    table_id: u16,
137    rows: &'a [Row],
138}
139
140impl<'a> VecRowResolver<'a> {
141    /// Create a new resolver for a specific table
142    pub fn new(table_id: u16, rows: &'a [Row]) -> Self {
143        Self { table_id, rows }
144    }
145}
146
147impl<'a> RowResolver for VecRowResolver<'a> {
148    fn resolve(&self, reference: &RowReference) -> Option<&Row> {
149        if reference.table_id == self.table_id {
150            self.rows.get(reference.row_index as usize)
151        } else {
152            None
153        }
154    }
155
156    fn resolve_column(&self, reference: &RowReference, column_idx: usize) -> Option<&SqlValue> {
157        self.resolve(reference).and_then(|row| row.get(column_idx))
158    }
159}
160
161/// Multi-table row resolver for joins
162///
163/// Resolves row references across multiple source tables.
164pub struct MultiTableResolver<'a> {
165    tables: Vec<(u16, &'a [Row])>,
166}
167
168impl<'a> MultiTableResolver<'a> {
169    /// Create a new multi-table resolver
170    pub fn new() -> Self {
171        Self { tables: Vec::new() }
172    }
173
174    /// Register a table with the resolver
175    pub fn add_table(&mut self, table_id: u16, rows: &'a [Row]) {
176        self.tables.push((table_id, rows));
177    }
178
179    /// Create from a list of tables
180    pub fn from_tables(tables: Vec<(u16, &'a [Row])>) -> Self {
181        Self { tables }
182    }
183}
184
185impl<'a> Default for MultiTableResolver<'a> {
186    fn default() -> Self {
187        Self::new()
188    }
189}
190
191impl<'a> RowResolver for MultiTableResolver<'a> {
192    fn resolve(&self, reference: &RowReference) -> Option<&Row> {
193        for (table_id, rows) in &self.tables {
194            if *table_id == reference.table_id {
195                return rows.get(reference.row_index as usize);
196            }
197        }
198        None
199    }
200
201    fn resolve_column(&self, reference: &RowReference, column_idx: usize) -> Option<&SqlValue> {
202        self.resolve(reference).and_then(|row| row.get(column_idx))
203    }
204}
205
206/// Owned row data for late materialization
207///
208/// When rows need to be owned (e.g., for cross-thread operations),
209/// this wrapper allows efficient reference-based access while
210/// maintaining ownership of the underlying data.
211pub struct OwnedRowSource {
212    table_id: u16,
213    rows: Arc<Vec<Row>>,
214}
215
216impl OwnedRowSource {
217    /// Create a new owned row source
218    pub fn new(table_id: u16, rows: Vec<Row>) -> Self {
219        Self { table_id, rows: Arc::new(rows) }
220    }
221
222    /// Get the table ID
223    #[inline]
224    pub fn table_id(&self) -> u16 {
225        self.table_id
226    }
227
228    /// Get the number of rows
229    #[inline]
230    pub fn row_count(&self) -> usize {
231        self.rows.len()
232    }
233
234    /// Get a reference to the rows
235    #[inline]
236    pub fn rows(&self) -> &[Row] {
237        &self.rows
238    }
239
240    /// Create a row reference for an index
241    #[inline]
242    pub fn reference(&self, row_index: u32) -> RowReference {
243        RowReference::new(self.table_id, row_index)
244    }
245
246    /// Resolve a row reference
247    #[inline]
248    pub fn resolve(&self, reference: &RowReference) -> Option<&Row> {
249        if reference.table_id == self.table_id {
250            self.rows.get(reference.row_index as usize)
251        } else {
252            None
253        }
254    }
255
256    /// Clone the Arc (cheap, just bumps reference count)
257    pub fn share(&self) -> Self {
258        Self { table_id: self.table_id, rows: Arc::clone(&self.rows) }
259    }
260}
261
262impl Clone for OwnedRowSource {
263    fn clone(&self) -> Self {
264        self.share()
265    }
266}
267
268#[cfg(test)]
269mod row_ref_tests {
270    use super::*;
271
272    #[test]
273    fn test_row_reference_creation() {
274        let r = RowReference::new(1, 42);
275        assert_eq!(r.table_id(), 1);
276        assert_eq!(r.row_index(), 42);
277    }
278
279    #[test]
280    fn test_row_reference_range() {
281        let refs = RowReference::range(0, 10, 15);
282        assert_eq!(refs.len(), 5);
283        assert_eq!(refs[0].row_index(), 10);
284        assert_eq!(refs[4].row_index(), 14);
285    }
286
287    #[test]
288    fn test_vec_resolver() {
289        let rows = vec![
290            Row::new(vec![SqlValue::Integer(1)]),
291            Row::new(vec![SqlValue::Integer(2)]),
292            Row::new(vec![SqlValue::Integer(3)]),
293        ];
294
295        let resolver = VecRowResolver::new(0, &rows);
296
297        let ref1 = RowReference::new(0, 1);
298        let resolved = resolver.resolve(&ref1).unwrap();
299        assert_eq!(resolved.get(0), Some(&SqlValue::Integer(2)));
300
301        // Wrong table_id returns None
302        let ref_wrong = RowReference::new(1, 1);
303        assert!(resolver.resolve(&ref_wrong).is_none());
304    }
305
306    #[test]
307    fn test_multi_table_resolver() {
308        let table0 = [Row::new(vec![SqlValue::Varchar("A".into())])];
309        let table1 = [Row::new(vec![SqlValue::Varchar("B".into())])];
310
311        let resolver = MultiTableResolver::from_tables(vec![(0, &table0[..]), (1, &table1[..])]);
312
313        let ref0 = RowReference::new(0, 0);
314        let ref1 = RowReference::new(1, 0);
315
316        assert_eq!(resolver.resolve_column(&ref0, 0), Some(&SqlValue::Varchar("A".into())));
317        assert_eq!(resolver.resolve_column(&ref1, 0), Some(&SqlValue::Varchar("B".into())));
318    }
319
320    #[test]
321    fn test_joined_row_ref() {
322        let left = RowReference::new(0, 10);
323        let right = RowReference::new(1, 20);
324
325        let matched = JoinedRowRef::matched(left, right);
326        assert!(matched.is_matched());
327        assert_eq!(matched.left.row_index(), 10);
328        assert_eq!(matched.right.unwrap().row_index(), 20);
329
330        let left_only = JoinedRowRef::left_only(left);
331        assert!(!left_only.is_matched());
332    }
333
334    #[test]
335    fn test_owned_row_source() {
336        let rows =
337            vec![Row::new(vec![SqlValue::Integer(100)]), Row::new(vec![SqlValue::Integer(200)])];
338
339        let source = OwnedRowSource::new(0, rows);
340        assert_eq!(source.row_count(), 2);
341
342        let r = source.reference(1);
343        assert_eq!(source.resolve(&r).unwrap().get(0), Some(&SqlValue::Integer(200)));
344
345        // Test sharing (Arc clone)
346        let shared = source.share();
347        assert_eq!(shared.row_count(), 2);
348    }
349}