vibesql_storage/columnar/
table.rs

1//! Columnar table storage.
2//!
3//! This module provides the `ColumnarTable` struct for storing data in
4//! column-oriented format for analytical query performance.
5
6use std::collections::HashMap;
7
8use super::{builder::ColumnBuilder, data::ColumnData, types::ColumnTypeClass};
9use crate::Row;
10
11/// Columnar table storage
12///
13/// Stores data in column-oriented format for analytical query performance.
14/// Each column is stored as a typed vector with a separate NULL bitmap.
15///
16/// # Example
17///
18/// ```text
19/// use vibesql_storage::{Row, ColumnarTable};
20/// use vibesql_types::SqlValue;
21///
22/// // Create rows
23/// let rows = vec![
24///     Row::new(vec![SqlValue::Integer(1), SqlValue::Double(3.14)]),
25///     Row::new(vec![SqlValue::Integer(2), SqlValue::Double(2.71)]),
26/// ];
27///
28/// // Convert to columnar
29/// let column_names = vec!["id".to_string(), "value".to_string()];
30/// let columnar = ColumnarTable::from_rows(&rows, &column_names).unwrap();
31///
32/// // Access column data
33/// assert_eq!(columnar.row_count(), 2);
34/// assert_eq!(columnar.column_count(), 2);
35/// ```
36#[derive(Debug, Clone)]
37pub struct ColumnarTable {
38    /// Column data indexed by column name
39    columns: HashMap<String, ColumnData>,
40    /// Column names in order (for iteration)
41    column_names: Vec<String>,
42    /// Number of rows
43    row_count: usize,
44}
45
46impl ColumnarTable {
47    /// Create a new empty columnar table
48    pub fn new() -> Self {
49        ColumnarTable { columns: HashMap::new(), column_names: Vec::new(), row_count: 0 }
50    }
51
52    /// Convert row-oriented data to columnar format (optimized single-pass)
53    ///
54    /// # Arguments
55    /// * `rows` - Vector of rows to convert
56    /// * `column_names` - Names of columns in order
57    ///
58    /// # Returns
59    /// * `Ok(ColumnarTable)` on success
60    /// * `Err(String)` if column count mismatch or incompatible types
61    ///
62    /// # Performance
63    /// O(n * m) single pass through all data
64    pub fn from_rows(rows: &[Row], column_names: &[String]) -> Result<Self, String> {
65        if rows.is_empty() {
66            return Ok(ColumnarTable {
67                columns: HashMap::new(),
68                column_names: column_names.to_vec(),
69                row_count: 0,
70            });
71        }
72
73        let row_count = rows.len();
74        let col_count = column_names.len();
75
76        // Validate first row column count
77        if rows[0].len() != col_count {
78            return Err(format!("Row 0 has {} columns, expected {}", rows[0].len(), col_count));
79        }
80
81        // Infer column types from first non-null value in each column
82        let col_types: Vec<_> = (0..col_count)
83            .map(|col_idx| {
84                rows.iter()
85                    .filter_map(|row| row.get(col_idx))
86                    .find(|v| !v.is_null())
87                    .map(ColumnTypeClass::from_sql_value)
88                    .unwrap_or(ColumnTypeClass::Null)
89            })
90            .collect();
91
92        // Pre-allocate column storage based on inferred types
93        let mut column_builders: Vec<ColumnBuilder> =
94            col_types.iter().map(|t| ColumnBuilder::new(*t, row_count)).collect();
95
96        // Single pass through rows - distribute values to columns
97        for (row_idx, row) in rows.iter().enumerate() {
98            if row.len() != col_count {
99                return Err(format!(
100                    "Row {} has {} columns, expected {}",
101                    row_idx,
102                    row.len(),
103                    col_count
104                ));
105            }
106
107            for (col_idx, value) in row.values.iter().enumerate() {
108                column_builders[col_idx].push(value)?;
109            }
110        }
111
112        // Build final columns HashMap - consume builders
113        let mut columns = HashMap::with_capacity(col_count);
114        for (col_name, builder) in column_names.iter().zip(column_builders.into_iter()) {
115            columns.insert(col_name.clone(), builder.build());
116        }
117
118        Ok(ColumnarTable { columns, column_names: column_names.to_vec(), row_count })
119    }
120
121    /// Create a ColumnarTable from a slice of row references
122    ///
123    /// This is useful when you have filtered rows (e.g., skipping deleted rows)
124    /// and want to avoid cloning the entire row just to pass to from_rows.
125    ///
126    /// # Arguments
127    /// * `rows` - Slice of row references to convert
128    /// * `column_names` - Column names for the table schema
129    ///
130    /// # Returns
131    /// * `Ok(ColumnarTable)` on success
132    /// * `Err(String)` if column count mismatch or incompatible types
133    pub fn from_row_refs(rows: &[&Row], column_names: &[String]) -> Result<Self, String> {
134        if rows.is_empty() {
135            return Ok(ColumnarTable {
136                columns: HashMap::new(),
137                column_names: column_names.to_vec(),
138                row_count: 0,
139            });
140        }
141
142        let row_count = rows.len();
143        let col_count = column_names.len();
144
145        // Validate first row column count
146        if rows[0].len() != col_count {
147            return Err(format!("Row 0 has {} columns, expected {}", rows[0].len(), col_count));
148        }
149
150        // Infer column types from first non-null value in each column
151        let col_types: Vec<_> = (0..col_count)
152            .map(|col_idx| {
153                rows.iter()
154                    .filter_map(|row| row.get(col_idx))
155                    .find(|v| !v.is_null())
156                    .map(ColumnTypeClass::from_sql_value)
157                    .unwrap_or(ColumnTypeClass::Null)
158            })
159            .collect();
160
161        // Pre-allocate column storage based on inferred types
162        let mut column_builders: Vec<ColumnBuilder> =
163            col_types.iter().map(|t| ColumnBuilder::new(*t, row_count)).collect();
164
165        // Single pass through rows - distribute values to columns
166        for (row_idx, row) in rows.iter().enumerate() {
167            if row.len() != col_count {
168                return Err(format!(
169                    "Row {} has {} columns, expected {}",
170                    row_idx,
171                    row.len(),
172                    col_count
173                ));
174            }
175
176            for (col_idx, value) in row.values.iter().enumerate() {
177                column_builders[col_idx].push(value)?;
178            }
179        }
180
181        // Build final columns HashMap - consume builders
182        let mut columns = HashMap::with_capacity(col_count);
183        for (col_name, builder) in column_names.iter().zip(column_builders.into_iter()) {
184            columns.insert(col_name.clone(), builder.build());
185        }
186
187        Ok(ColumnarTable { columns, column_names: column_names.to_vec(), row_count })
188    }
189
190    /// Convert columnar data back to row-oriented format
191    ///
192    /// # Returns
193    /// Vector of rows reconstructed from columnar data
194    ///
195    /// # Performance
196    /// O(n * m) where n = rows, m = columns
197    pub fn to_rows(&self) -> Vec<Row> {
198        let mut rows = Vec::with_capacity(self.row_count);
199
200        for row_idx in 0..self.row_count {
201            let mut values = Vec::with_capacity(self.column_names.len());
202
203            for col_name in &self.column_names {
204                if let Some(column) = self.columns.get(col_name) {
205                    values.push(column.get(row_idx));
206                } else {
207                    values.push(vibesql_types::SqlValue::Null);
208                }
209            }
210
211            rows.push(Row::new(values));
212        }
213
214        rows
215    }
216
217    /// Get the number of rows
218    pub fn row_count(&self) -> usize {
219        self.row_count
220    }
221
222    /// Get the number of columns
223    pub fn column_count(&self) -> usize {
224        self.column_names.len()
225    }
226
227    /// Get column data by name
228    pub fn get_column(&self, name: &str) -> Option<&ColumnData> {
229        self.columns.get(name)
230    }
231
232    /// Get all column names
233    pub fn column_names(&self) -> &[String] {
234        &self.column_names
235    }
236
237    /// Estimate the memory size of this columnar table in bytes
238    ///
239    /// This is used for memory budgeting in the columnar cache.
240    /// The estimate includes:
241    /// - All column data (via ColumnData::size_in_bytes)
242    /// - HashMap overhead for columns
243    /// - Vec overhead for column_names
244    /// - String storage for column names
245    pub fn size_in_bytes(&self) -> usize {
246        const VEC_OVERHEAD: usize = 3 * std::mem::size_of::<usize>();
247        // HashMap has ~48 bytes base overhead plus per-bucket overhead
248        const HASHMAP_BASE_OVERHEAD: usize = 48;
249        const HASHMAP_ENTRY_OVERHEAD: usize = 8; // approximate per-entry overhead
250
251        let columns_size: usize = self.columns.values().map(|c| c.size_in_bytes()).sum();
252
253        let column_names_size: usize =
254            self.column_names.iter().map(|s| std::mem::size_of::<String>() + s.capacity()).sum();
255
256        // HashMap keys (column names stored again)
257        let hashmap_keys_size: usize =
258            self.columns.keys().map(|s| std::mem::size_of::<String>() + s.capacity()).sum();
259
260        std::mem::size_of::<Self>() // Base struct size
261            + columns_size
262            + HASHMAP_BASE_OVERHEAD
263            + self.columns.len() * HASHMAP_ENTRY_OVERHEAD
264            + hashmap_keys_size
265            + VEC_OVERHEAD
266            + column_names_size
267    }
268}
269
270impl Default for ColumnarTable {
271    fn default() -> Self {
272        Self::new()
273    }
274}