vibesql_executor/select/late_materialization/
row_ref.rs

1//! Row Reference Implementation
2//!
3//! A row reference is a lightweight pointer to a row in source data,
4//! avoiding data copying during intermediate query operations.
5
6use std::sync::Arc;
7use vibesql_storage::Row;
8use vibesql_types::SqlValue;
9
10/// A lightweight reference to a row in source data
11///
12/// Instead of cloning entire rows during query processing, `RowReference`
13/// stores just enough information to locate the row when materialization
14/// is needed.
15///
16/// # Memory Comparison
17///
18/// For a row with 10 columns averaging 32 bytes each:
19/// - Full Row: 320+ bytes (plus heap allocations for strings)
20/// - RowReference: 16 bytes (table_id + row_index)
21///
22/// This is **20x** more memory efficient for intermediate results.
23///
24/// # Example
25///
26/// ```text
27/// // Create references instead of copying rows
28/// let refs: Vec<RowReference> = qualifying_indices
29///     .iter()
30///     .map(|&idx| RowReference::new(0, idx as u32))
31///     .collect();
32///
33/// // Only materialize at output boundary
34/// let output_rows: Vec<Row> = refs
35///     .iter()
36///     .map(|r| source_tables[r.table_id()].row(r.row_index()))
37///     .collect();
38/// ```
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
40pub struct RowReference {
41    /// Identifier for the source table (index into a table registry)
42    table_id: u16,
43    /// Row index within the source table
44    row_index: u32,
45}
46
47impl RowReference {
48    /// Create a new row reference
49    #[inline]
50    pub const fn new(table_id: u16, row_index: u32) -> Self {
51        Self { table_id, row_index }
52    }
53
54    /// Get the table identifier
55    #[inline]
56    pub const fn table_id(&self) -> u16 {
57        self.table_id
58    }
59
60    /// Get the row index within the table
61    #[inline]
62    pub const fn row_index(&self) -> u32 {
63        self.row_index
64    }
65
66    /// Create a vector of row references for a range of rows
67    #[inline]
68    pub fn range(table_id: u16, start: u32, end: u32) -> Vec<Self> {
69        (start..end).map(|idx| Self::new(table_id, idx)).collect()
70    }
71
72    /// Create row references from a selection vector
73    #[inline]
74    pub fn from_selection(table_id: u16, indices: &[u32]) -> Vec<Self> {
75        indices.iter().map(|&idx| Self::new(table_id, idx)).collect()
76    }
77}
78
79/// A pair of row references for join results
80///
81/// When joining tables, we track which rows from each side matched
82/// without materializing the combined row.
83#[derive(Debug, Clone, Copy, PartialEq, Eq)]
84pub struct JoinedRowRef {
85    /// Reference to the left (probe) side row
86    pub left: RowReference,
87    /// Reference to the right (build) side row
88    /// None for LEFT OUTER join when there's no match
89    pub right: Option<RowReference>,
90}
91
92impl JoinedRowRef {
93    /// Create a new joined row reference (inner join match)
94    #[inline]
95    pub const fn matched(left: RowReference, right: RowReference) -> Self {
96        Self {
97            left,
98            right: Some(right),
99        }
100    }
101
102    /// Create a new joined row reference (left outer, no match)
103    #[inline]
104    pub const fn left_only(left: RowReference) -> Self {
105        Self { left, right: None }
106    }
107
108    /// Check if this is a matched pair
109    #[inline]
110    pub const fn is_matched(&self) -> bool {
111        self.right.is_some()
112    }
113}
114
115/// A resolver that can materialize row references into actual rows
116///
117/// This trait abstracts over different source data formats (row-based, columnar)
118/// allowing late materialization to work with various storage layouts.
119pub trait RowResolver {
120    /// Get a row by its reference
121    fn resolve(&self, reference: &RowReference) -> Option<&Row>;
122
123    /// Get a specific column value from a row reference
124    fn resolve_column(&self, reference: &RowReference, column_idx: usize) -> Option<&SqlValue>;
125
126    /// Batch resolve multiple row references
127    ///
128    /// Default implementation calls resolve() for each, but implementations
129    /// can override for better performance with columnar storage.
130    fn resolve_batch(&self, references: &[RowReference]) -> Vec<Option<&Row>> {
131        references.iter().map(|r| self.resolve(r)).collect()
132    }
133}
134
135/// Simple row resolver backed by a vector of rows
136///
137/// This is the most common case for row-based table scans.
138pub struct VecRowResolver<'a> {
139    table_id: u16,
140    rows: &'a [Row],
141}
142
143impl<'a> VecRowResolver<'a> {
144    /// Create a new resolver for a specific table
145    pub fn new(table_id: u16, rows: &'a [Row]) -> Self {
146        Self { table_id, rows }
147    }
148}
149
150impl<'a> RowResolver for VecRowResolver<'a> {
151    fn resolve(&self, reference: &RowReference) -> Option<&Row> {
152        if reference.table_id == self.table_id {
153            self.rows.get(reference.row_index as usize)
154        } else {
155            None
156        }
157    }
158
159    fn resolve_column(&self, reference: &RowReference, column_idx: usize) -> Option<&SqlValue> {
160        self.resolve(reference)
161            .and_then(|row| row.get(column_idx))
162    }
163}
164
165/// Multi-table row resolver for joins
166///
167/// Resolves row references across multiple source tables.
168pub struct MultiTableResolver<'a> {
169    tables: Vec<(u16, &'a [Row])>,
170}
171
172impl<'a> MultiTableResolver<'a> {
173    /// Create a new multi-table resolver
174    pub fn new() -> Self {
175        Self { tables: Vec::new() }
176    }
177
178    /// Register a table with the resolver
179    pub fn add_table(&mut self, table_id: u16, rows: &'a [Row]) {
180        self.tables.push((table_id, rows));
181    }
182
183    /// Create from a list of tables
184    pub fn from_tables(tables: Vec<(u16, &'a [Row])>) -> Self {
185        Self { tables }
186    }
187}
188
189impl<'a> Default for MultiTableResolver<'a> {
190    fn default() -> Self {
191        Self::new()
192    }
193}
194
195impl<'a> RowResolver for MultiTableResolver<'a> {
196    fn resolve(&self, reference: &RowReference) -> Option<&Row> {
197        for (table_id, rows) in &self.tables {
198            if *table_id == reference.table_id {
199                return rows.get(reference.row_index as usize);
200            }
201        }
202        None
203    }
204
205    fn resolve_column(&self, reference: &RowReference, column_idx: usize) -> Option<&SqlValue> {
206        self.resolve(reference)
207            .and_then(|row| row.get(column_idx))
208    }
209}
210
211/// Owned row data for late materialization
212///
213/// When rows need to be owned (e.g., for cross-thread operations),
214/// this wrapper allows efficient reference-based access while
215/// maintaining ownership of the underlying data.
216pub struct OwnedRowSource {
217    table_id: u16,
218    rows: Arc<Vec<Row>>,
219}
220
221impl OwnedRowSource {
222    /// Create a new owned row source
223    pub fn new(table_id: u16, rows: Vec<Row>) -> Self {
224        Self {
225            table_id,
226            rows: Arc::new(rows),
227        }
228    }
229
230    /// Get the table ID
231    #[inline]
232    pub fn table_id(&self) -> u16 {
233        self.table_id
234    }
235
236    /// Get the number of rows
237    #[inline]
238    pub fn row_count(&self) -> usize {
239        self.rows.len()
240    }
241
242    /// Get a reference to the rows
243    #[inline]
244    pub fn rows(&self) -> &[Row] {
245        &self.rows
246    }
247
248    /// Create a row reference for an index
249    #[inline]
250    pub fn reference(&self, row_index: u32) -> RowReference {
251        RowReference::new(self.table_id, row_index)
252    }
253
254    /// Resolve a row reference
255    #[inline]
256    pub fn resolve(&self, reference: &RowReference) -> Option<&Row> {
257        if reference.table_id == self.table_id {
258            self.rows.get(reference.row_index as usize)
259        } else {
260            None
261        }
262    }
263
264    /// Clone the Arc (cheap, just bumps reference count)
265    pub fn share(&self) -> Self {
266        Self {
267            table_id: self.table_id,
268            rows: Arc::clone(&self.rows),
269        }
270    }
271}
272
273impl Clone for OwnedRowSource {
274    fn clone(&self) -> Self {
275        self.share()
276    }
277}
278
279#[cfg(test)]
280mod row_ref_tests {
281    use super::*;
282
283    #[test]
284    fn test_row_reference_creation() {
285        let r = RowReference::new(1, 42);
286        assert_eq!(r.table_id(), 1);
287        assert_eq!(r.row_index(), 42);
288    }
289
290    #[test]
291    fn test_row_reference_range() {
292        let refs = RowReference::range(0, 10, 15);
293        assert_eq!(refs.len(), 5);
294        assert_eq!(refs[0].row_index(), 10);
295        assert_eq!(refs[4].row_index(), 14);
296    }
297
298    #[test]
299    fn test_vec_resolver() {
300        let rows = vec![
301            Row::new(vec![SqlValue::Integer(1)]),
302            Row::new(vec![SqlValue::Integer(2)]),
303            Row::new(vec![SqlValue::Integer(3)]),
304        ];
305
306        let resolver = VecRowResolver::new(0, &rows);
307
308        let ref1 = RowReference::new(0, 1);
309        let resolved = resolver.resolve(&ref1).unwrap();
310        assert_eq!(resolved.get(0), Some(&SqlValue::Integer(2)));
311
312        // Wrong table_id returns None
313        let ref_wrong = RowReference::new(1, 1);
314        assert!(resolver.resolve(&ref_wrong).is_none());
315    }
316
317    #[test]
318    fn test_multi_table_resolver() {
319        let table0 = [Row::new(vec![SqlValue::Varchar("A".into())])];
320        let table1 = [Row::new(vec![SqlValue::Varchar("B".into())])];
321
322        let resolver = MultiTableResolver::from_tables(vec![
323            (0, &table0[..]),
324            (1, &table1[..]),
325        ]);
326
327        let ref0 = RowReference::new(0, 0);
328        let ref1 = RowReference::new(1, 0);
329
330        assert_eq!(
331            resolver.resolve_column(&ref0, 0),
332            Some(&SqlValue::Varchar("A".into()))
333        );
334        assert_eq!(
335            resolver.resolve_column(&ref1, 0),
336            Some(&SqlValue::Varchar("B".into()))
337        );
338    }
339
340    #[test]
341    fn test_joined_row_ref() {
342        let left = RowReference::new(0, 10);
343        let right = RowReference::new(1, 20);
344
345        let matched = JoinedRowRef::matched(left, right);
346        assert!(matched.is_matched());
347        assert_eq!(matched.left.row_index(), 10);
348        assert_eq!(matched.right.unwrap().row_index(), 20);
349
350        let left_only = JoinedRowRef::left_only(left);
351        assert!(!left_only.is_matched());
352    }
353
354    #[test]
355    fn test_owned_row_source() {
356        let rows = vec![
357            Row::new(vec![SqlValue::Integer(100)]),
358            Row::new(vec![SqlValue::Integer(200)]),
359        ];
360
361        let source = OwnedRowSource::new(0, rows);
362        assert_eq!(source.row_count(), 2);
363
364        let r = source.reference(1);
365        assert_eq!(source.resolve(&r).unwrap().get(0), Some(&SqlValue::Integer(200)));
366
367        // Test sharing (Arc clone)
368        let shared = source.share();
369        assert_eq!(shared.row_count(), 2);
370    }
371}