vibesql_executor/select/late_materialization/
row_ref.rs

1//! Row Reference Implementation
2//!
3//! A row reference is a lightweight pointer to a row in source data,
4//! avoiding data copying during intermediate query operations.
5
6use std::sync::Arc;
7
8use vibesql_storage::Row;
9use vibesql_types::SqlValue;
10
11/// A lightweight reference to a row in source data
12///
13/// Instead of cloning entire rows during query processing, `RowReference`
14/// stores just enough information to locate the row when materialization
15/// is needed.
16///
17/// # Memory Comparison
18///
19/// For a row with 10 columns averaging 32 bytes each:
20/// - Full Row: 320+ bytes (plus heap allocations for strings)
21/// - RowReference: 16 bytes (table_id + row_index)
22///
23/// This is **20x** more memory efficient for intermediate results.
24///
25/// # Example
26///
27/// ```text
28/// // Create references instead of copying rows
29/// let refs: Vec<RowReference> = qualifying_indices
30///     .iter()
31///     .map(|&idx| RowReference::new(0, idx as u32))
32///     .collect();
33///
34/// // Only materialize at output boundary
35/// let output_rows: Vec<Row> = refs
36///     .iter()
37///     .map(|r| source_tables[r.table_id()].row(r.row_index()))
38///     .collect();
39/// ```
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
41pub struct RowReference {
42    /// Identifier for the source table (index into a table registry)
43    table_id: u16,
44    /// Row index within the source table
45    row_index: u32,
46}
47
48impl RowReference {
49    /// Create a new row reference
50    #[inline]
51    pub const fn new(table_id: u16, row_index: u32) -> Self {
52        Self { table_id, row_index }
53    }
54
55    /// Get the table identifier
56    #[inline]
57    pub const fn table_id(&self) -> u16 {
58        self.table_id
59    }
60
61    /// Get the row index within the table
62    #[inline]
63    pub const fn row_index(&self) -> u32 {
64        self.row_index
65    }
66
67    /// Create a vector of row references for a range of rows
68    #[inline]
69    pub fn range(table_id: u16, start: u32, end: u32) -> Vec<Self> {
70        (start..end).map(|idx| Self::new(table_id, idx)).collect()
71    }
72
73    /// Create row references from a selection vector
74    #[inline]
75    pub fn from_selection(table_id: u16, indices: &[u32]) -> Vec<Self> {
76        indices.iter().map(|&idx| Self::new(table_id, idx)).collect()
77    }
78}
79
80/// A pair of row references for join results
81///
82/// When joining tables, we track which rows from each side matched
83/// without materializing the combined row.
84#[derive(Debug, Clone, Copy, PartialEq, Eq)]
85pub struct JoinedRowRef {
86    /// Reference to the left (probe) side row
87    pub left: RowReference,
88    /// Reference to the right (build) side row
89    /// None for LEFT OUTER join when there's no match
90    pub right: Option<RowReference>,
91}
92
93impl JoinedRowRef {
94    /// Create a new joined row reference (inner join match)
95    #[inline]
96    pub const fn matched(left: RowReference, right: RowReference) -> Self {
97        Self { left, right: Some(right) }
98    }
99
100    /// Create a new joined row reference (left outer, no match)
101    #[inline]
102    pub const fn left_only(left: RowReference) -> Self {
103        Self { left, right: None }
104    }
105
106    /// Check if this is a matched pair
107    #[inline]
108    pub const fn is_matched(&self) -> bool {
109        self.right.is_some()
110    }
111}
112
113/// A resolver that can materialize row references into actual rows
114///
115/// This trait abstracts over different source data formats (row-based, columnar)
116/// allowing late materialization to work with various storage layouts.
117pub trait RowResolver {
118    /// Get a row by its reference
119    fn resolve(&self, reference: &RowReference) -> Option<&Row>;
120
121    /// Get a specific column value from a row reference
122    fn resolve_column(&self, reference: &RowReference, column_idx: usize) -> Option<&SqlValue>;
123
124    /// Batch resolve multiple row references
125    ///
126    /// Default implementation calls resolve() for each, but implementations
127    /// can override for better performance with columnar storage.
128    fn resolve_batch(&self, references: &[RowReference]) -> Vec<Option<&Row>> {
129        references.iter().map(|r| self.resolve(r)).collect()
130    }
131}
132
133/// Simple row resolver backed by a vector of rows
134///
135/// This is the most common case for row-based table scans.
136pub struct VecRowResolver<'a> {
137    table_id: u16,
138    rows: &'a [Row],
139}
140
141impl<'a> VecRowResolver<'a> {
142    /// Create a new resolver for a specific table
143    pub fn new(table_id: u16, rows: &'a [Row]) -> Self {
144        Self { table_id, rows }
145    }
146}
147
148impl<'a> RowResolver for VecRowResolver<'a> {
149    fn resolve(&self, reference: &RowReference) -> Option<&Row> {
150        if reference.table_id == self.table_id {
151            self.rows.get(reference.row_index as usize)
152        } else {
153            None
154        }
155    }
156
157    fn resolve_column(&self, reference: &RowReference, column_idx: usize) -> Option<&SqlValue> {
158        self.resolve(reference).and_then(|row| row.get(column_idx))
159    }
160}
161
162/// Multi-table row resolver for joins
163///
164/// Resolves row references across multiple source tables.
165pub struct MultiTableResolver<'a> {
166    tables: Vec<(u16, &'a [Row])>,
167}
168
169impl<'a> MultiTableResolver<'a> {
170    /// Create a new multi-table resolver
171    pub fn new() -> Self {
172        Self { tables: Vec::new() }
173    }
174
175    /// Register a table with the resolver
176    pub fn add_table(&mut self, table_id: u16, rows: &'a [Row]) {
177        self.tables.push((table_id, rows));
178    }
179
180    /// Create from a list of tables
181    pub fn from_tables(tables: Vec<(u16, &'a [Row])>) -> Self {
182        Self { tables }
183    }
184}
185
186impl<'a> Default for MultiTableResolver<'a> {
187    fn default() -> Self {
188        Self::new()
189    }
190}
191
192impl<'a> RowResolver for MultiTableResolver<'a> {
193    fn resolve(&self, reference: &RowReference) -> Option<&Row> {
194        for (table_id, rows) in &self.tables {
195            if *table_id == reference.table_id {
196                return rows.get(reference.row_index as usize);
197            }
198        }
199        None
200    }
201
202    fn resolve_column(&self, reference: &RowReference, column_idx: usize) -> Option<&SqlValue> {
203        self.resolve(reference).and_then(|row| row.get(column_idx))
204    }
205}
206
207/// Owned row data for late materialization
208///
209/// When rows need to be owned (e.g., for cross-thread operations),
210/// this wrapper allows efficient reference-based access while
211/// maintaining ownership of the underlying data.
212pub struct OwnedRowSource {
213    table_id: u16,
214    rows: Arc<Vec<Row>>,
215}
216
217impl OwnedRowSource {
218    /// Create a new owned row source
219    pub fn new(table_id: u16, rows: Vec<Row>) -> Self {
220        Self { table_id, rows: Arc::new(rows) }
221    }
222
223    /// Get the table ID
224    #[inline]
225    pub fn table_id(&self) -> u16 {
226        self.table_id
227    }
228
229    /// Get the number of rows
230    #[inline]
231    pub fn row_count(&self) -> usize {
232        self.rows.len()
233    }
234
235    /// Get a reference to the rows
236    #[inline]
237    pub fn rows(&self) -> &[Row] {
238        &self.rows
239    }
240
241    /// Create a row reference for an index
242    #[inline]
243    pub fn reference(&self, row_index: u32) -> RowReference {
244        RowReference::new(self.table_id, row_index)
245    }
246
247    /// Resolve a row reference
248    #[inline]
249    pub fn resolve(&self, reference: &RowReference) -> Option<&Row> {
250        if reference.table_id == self.table_id {
251            self.rows.get(reference.row_index as usize)
252        } else {
253            None
254        }
255    }
256
257    /// Clone the Arc (cheap, just bumps reference count)
258    pub fn share(&self) -> Self {
259        Self { table_id: self.table_id, rows: Arc::clone(&self.rows) }
260    }
261}
262
263impl Clone for OwnedRowSource {
264    fn clone(&self) -> Self {
265        self.share()
266    }
267}
268
269#[cfg(test)]
270mod row_ref_tests {
271    use super::*;
272
273    #[test]
274    fn test_row_reference_creation() {
275        let r = RowReference::new(1, 42);
276        assert_eq!(r.table_id(), 1);
277        assert_eq!(r.row_index(), 42);
278    }
279
280    #[test]
281    fn test_row_reference_range() {
282        let refs = RowReference::range(0, 10, 15);
283        assert_eq!(refs.len(), 5);
284        assert_eq!(refs[0].row_index(), 10);
285        assert_eq!(refs[4].row_index(), 14);
286    }
287
288    #[test]
289    fn test_vec_resolver() {
290        let rows = vec![
291            Row::new(vec![SqlValue::Integer(1)]),
292            Row::new(vec![SqlValue::Integer(2)]),
293            Row::new(vec![SqlValue::Integer(3)]),
294        ];
295
296        let resolver = VecRowResolver::new(0, &rows);
297
298        let ref1 = RowReference::new(0, 1);
299        let resolved = resolver.resolve(&ref1).unwrap();
300        assert_eq!(resolved.get(0), Some(&SqlValue::Integer(2)));
301
302        // Wrong table_id returns None
303        let ref_wrong = RowReference::new(1, 1);
304        assert!(resolver.resolve(&ref_wrong).is_none());
305    }
306
307    #[test]
308    fn test_multi_table_resolver() {
309        let table0 = [Row::new(vec![SqlValue::Varchar(arcstr::ArcStr::from("A"))])];
310        let table1 = [Row::new(vec![SqlValue::Varchar(arcstr::ArcStr::from("B"))])];
311
312        let resolver = MultiTableResolver::from_tables(vec![(0, &table0[..]), (1, &table1[..])]);
313
314        let ref0 = RowReference::new(0, 0);
315        let ref1 = RowReference::new(1, 0);
316
317        assert_eq!(
318            resolver.resolve_column(&ref0, 0),
319            Some(&SqlValue::Varchar(arcstr::ArcStr::from("A")))
320        );
321        assert_eq!(
322            resolver.resolve_column(&ref1, 0),
323            Some(&SqlValue::Varchar(arcstr::ArcStr::from("B")))
324        );
325    }
326
327    #[test]
328    fn test_joined_row_ref() {
329        let left = RowReference::new(0, 10);
330        let right = RowReference::new(1, 20);
331
332        let matched = JoinedRowRef::matched(left, right);
333        assert!(matched.is_matched());
334        assert_eq!(matched.left.row_index(), 10);
335        assert_eq!(matched.right.unwrap().row_index(), 20);
336
337        let left_only = JoinedRowRef::left_only(left);
338        assert!(!left_only.is_matched());
339    }
340
341    #[test]
342    fn test_owned_row_source() {
343        let rows =
344            vec![Row::new(vec![SqlValue::Integer(100)]), Row::new(vec![SqlValue::Integer(200)])];
345
346        let source = OwnedRowSource::new(0, rows);
347        assert_eq!(source.row_count(), 2);
348
349        let r = source.reference(1);
350        assert_eq!(source.resolve(&r).unwrap().get(0), Some(&SqlValue::Integer(200)));
351
352        // Test sharing (Arc clone)
353        let shared = source.share();
354        assert_eq!(shared.row_count(), 2);
355    }
356}