vibesql_storage/columnar/table.rs
1//! Columnar table storage.
2//!
3//! This module provides the `ColumnarTable` struct for storing data in
4//! column-oriented format for analytical query performance.
5
6use std::collections::HashMap;
7
8use super::{builder::ColumnBuilder, data::ColumnData, types::ColumnTypeClass};
9use crate::Row;
10
11/// Columnar table storage
12///
13/// Stores data in column-oriented format for analytical query performance.
14/// Each column is stored as a typed vector with a separate NULL bitmap.
15///
16/// # Example
17///
18/// ```text
19/// use vibesql_storage::{Row, ColumnarTable};
20/// use vibesql_types::SqlValue;
21///
22/// // Create rows
23/// let rows = vec![
24/// Row::new(vec![SqlValue::Integer(1), SqlValue::Double(3.14)]),
25/// Row::new(vec![SqlValue::Integer(2), SqlValue::Double(2.71)]),
26/// ];
27///
28/// // Convert to columnar
29/// let column_names = vec!["id".to_string(), "value".to_string()];
30/// let columnar = ColumnarTable::from_rows(&rows, &column_names).unwrap();
31///
32/// // Access column data
33/// assert_eq!(columnar.row_count(), 2);
34/// assert_eq!(columnar.column_count(), 2);
35/// ```
36#[derive(Debug, Clone)]
37pub struct ColumnarTable {
38 /// Column data indexed by column name
39 columns: HashMap<String, ColumnData>,
40 /// Column names in order (for iteration)
41 column_names: Vec<String>,
42 /// Number of rows
43 row_count: usize,
44}
45
46impl ColumnarTable {
47 /// Create a new empty columnar table
48 pub fn new() -> Self {
49 ColumnarTable { columns: HashMap::new(), column_names: Vec::new(), row_count: 0 }
50 }
51
52 /// Convert row-oriented data to columnar format (optimized single-pass)
53 ///
54 /// # Arguments
55 /// * `rows` - Vector of rows to convert
56 /// * `column_names` - Names of columns in order
57 ///
58 /// # Returns
59 /// * `Ok(ColumnarTable)` on success
60 /// * `Err(String)` if column count mismatch or incompatible types
61 ///
62 /// # Performance
63 /// O(n * m) single pass through all data
64 pub fn from_rows(rows: &[Row], column_names: &[String]) -> Result<Self, String> {
65 if rows.is_empty() {
66 return Ok(ColumnarTable {
67 columns: HashMap::new(),
68 column_names: column_names.to_vec(),
69 row_count: 0,
70 });
71 }
72
73 let row_count = rows.len();
74 let col_count = column_names.len();
75
76 // Validate first row column count
77 if rows[0].len() != col_count {
78 return Err(format!("Row 0 has {} columns, expected {}", rows[0].len(), col_count));
79 }
80
81 // Infer column types from first non-null value in each column
82 let col_types: Vec<_> = (0..col_count)
83 .map(|col_idx| {
84 rows.iter()
85 .filter_map(|row| row.get(col_idx))
86 .find(|v| !v.is_null())
87 .map(ColumnTypeClass::from_sql_value)
88 .unwrap_or(ColumnTypeClass::Null)
89 })
90 .collect();
91
92 // Pre-allocate column storage based on inferred types
93 let mut column_builders: Vec<ColumnBuilder> =
94 col_types.iter().map(|t| ColumnBuilder::new(*t, row_count)).collect();
95
96 // Single pass through rows - distribute values to columns
97 for (row_idx, row) in rows.iter().enumerate() {
98 if row.len() != col_count {
99 return Err(format!(
100 "Row {} has {} columns, expected {}",
101 row_idx,
102 row.len(),
103 col_count
104 ));
105 }
106
107 for (col_idx, value) in row.values.iter().enumerate() {
108 column_builders[col_idx].push(value)?;
109 }
110 }
111
112 // Build final columns HashMap - consume builders
113 let mut columns = HashMap::with_capacity(col_count);
114 for (col_name, builder) in column_names.iter().zip(column_builders.into_iter()) {
115 columns.insert(col_name.clone(), builder.build());
116 }
117
118 Ok(ColumnarTable { columns, column_names: column_names.to_vec(), row_count })
119 }
120
121 /// Create a ColumnarTable from a slice of row references
122 ///
123 /// This is useful when you have filtered rows (e.g., skipping deleted rows)
124 /// and want to avoid cloning the entire row just to pass to from_rows.
125 ///
126 /// # Arguments
127 /// * `rows` - Slice of row references to convert
128 /// * `column_names` - Column names for the table schema
129 ///
130 /// # Returns
131 /// * `Ok(ColumnarTable)` on success
132 /// * `Err(String)` if column count mismatch or incompatible types
133 pub fn from_row_refs(rows: &[&Row], column_names: &[String]) -> Result<Self, String> {
134 if rows.is_empty() {
135 return Ok(ColumnarTable {
136 columns: HashMap::new(),
137 column_names: column_names.to_vec(),
138 row_count: 0,
139 });
140 }
141
142 let row_count = rows.len();
143 let col_count = column_names.len();
144
145 // Validate first row column count
146 if rows[0].len() != col_count {
147 return Err(format!("Row 0 has {} columns, expected {}", rows[0].len(), col_count));
148 }
149
150 // Infer column types from first non-null value in each column
151 let col_types: Vec<_> = (0..col_count)
152 .map(|col_idx| {
153 rows.iter()
154 .filter_map(|row| row.get(col_idx))
155 .find(|v| !v.is_null())
156 .map(ColumnTypeClass::from_sql_value)
157 .unwrap_or(ColumnTypeClass::Null)
158 })
159 .collect();
160
161 // Pre-allocate column storage based on inferred types
162 let mut column_builders: Vec<ColumnBuilder> =
163 col_types.iter().map(|t| ColumnBuilder::new(*t, row_count)).collect();
164
165 // Single pass through rows - distribute values to columns
166 for (row_idx, row) in rows.iter().enumerate() {
167 if row.len() != col_count {
168 return Err(format!(
169 "Row {} has {} columns, expected {}",
170 row_idx,
171 row.len(),
172 col_count
173 ));
174 }
175
176 for (col_idx, value) in row.values.iter().enumerate() {
177 column_builders[col_idx].push(value)?;
178 }
179 }
180
181 // Build final columns HashMap - consume builders
182 let mut columns = HashMap::with_capacity(col_count);
183 for (col_name, builder) in column_names.iter().zip(column_builders.into_iter()) {
184 columns.insert(col_name.clone(), builder.build());
185 }
186
187 Ok(ColumnarTable { columns, column_names: column_names.to_vec(), row_count })
188 }
189
190 /// Convert columnar data back to row-oriented format
191 ///
192 /// # Returns
193 /// Vector of rows reconstructed from columnar data
194 ///
195 /// # Performance
196 /// O(n * m) where n = rows, m = columns
197 pub fn to_rows(&self) -> Vec<Row> {
198 let mut rows = Vec::with_capacity(self.row_count);
199
200 for row_idx in 0..self.row_count {
201 let mut values = Vec::with_capacity(self.column_names.len());
202
203 for col_name in &self.column_names {
204 if let Some(column) = self.columns.get(col_name) {
205 values.push(column.get(row_idx));
206 } else {
207 values.push(vibesql_types::SqlValue::Null);
208 }
209 }
210
211 rows.push(Row::new(values));
212 }
213
214 rows
215 }
216
217 /// Get the number of rows
218 pub fn row_count(&self) -> usize {
219 self.row_count
220 }
221
222 /// Get the number of columns
223 pub fn column_count(&self) -> usize {
224 self.column_names.len()
225 }
226
227 /// Get column data by name
228 pub fn get_column(&self, name: &str) -> Option<&ColumnData> {
229 self.columns.get(name)
230 }
231
232 /// Get all column names
233 pub fn column_names(&self) -> &[String] {
234 &self.column_names
235 }
236
237 /// Estimate the memory size of this columnar table in bytes
238 ///
239 /// This is used for memory budgeting in the columnar cache.
240 /// The estimate includes:
241 /// - All column data (via ColumnData::size_in_bytes)
242 /// - HashMap overhead for columns
243 /// - Vec overhead for column_names
244 /// - String storage for column names
245 pub fn size_in_bytes(&self) -> usize {
246 const VEC_OVERHEAD: usize = 3 * std::mem::size_of::<usize>();
247 // HashMap has ~48 bytes base overhead plus per-bucket overhead
248 const HASHMAP_BASE_OVERHEAD: usize = 48;
249 const HASHMAP_ENTRY_OVERHEAD: usize = 8; // approximate per-entry overhead
250
251 let columns_size: usize = self.columns.values().map(|c| c.size_in_bytes()).sum();
252
253 let column_names_size: usize =
254 self.column_names.iter().map(|s| std::mem::size_of::<String>() + s.capacity()).sum();
255
256 // HashMap keys (column names stored again)
257 let hashmap_keys_size: usize =
258 self.columns.keys().map(|s| std::mem::size_of::<String>() + s.capacity()).sum();
259
260 std::mem::size_of::<Self>() // Base struct size
261 + columns_size
262 + HASHMAP_BASE_OVERHEAD
263 + self.columns.len() * HASHMAP_ENTRY_OVERHEAD
264 + hashmap_keys_size
265 + VEC_OVERHEAD
266 + column_names_size
267 }
268}
269
270impl Default for ColumnarTable {
271 fn default() -> Self {
272 Self::new()
273 }
274}