vibesql_storage/columnar/
data.rs1use std::sync::Arc;
14
15use vibesql_types::{Date, Interval, SqlValue, Time, Timestamp};
16
17#[derive(Debug, Clone)]
27pub enum ColumnData {
28 Int64 { values: Arc<Vec<i64>>, nulls: Arc<Vec<bool>> },
30 Float64 { values: Arc<Vec<f64>>, nulls: Arc<Vec<bool>> },
32 String { values: Arc<Vec<Arc<str>>>, nulls: Arc<Vec<bool>> },
34 Bool { values: Arc<Vec<bool>>, nulls: Arc<Vec<bool>> },
36 Date { values: Arc<Vec<Date>>, nulls: Arc<Vec<bool>> },
38 Time { values: Arc<Vec<Time>>, nulls: Arc<Vec<bool>> },
40 Timestamp { values: Arc<Vec<Timestamp>>, nulls: Arc<Vec<bool>> },
42 Interval { values: Arc<Vec<Interval>>, nulls: Arc<Vec<bool>> },
44 Vector { values: Arc<Vec<Vec<f32>>>, nulls: Arc<Vec<bool>> },
46 Blob { values: Arc<Vec<Vec<u8>>>, nulls: Arc<Vec<bool>> },
48}
49
50#[allow(clippy::type_complexity)]
51impl ColumnData {
52 pub fn len(&self) -> usize {
54 match self {
55 ColumnData::Int64 { nulls, .. } => nulls.len(),
56 ColumnData::Float64 { nulls, .. } => nulls.len(),
57 ColumnData::String { nulls, .. } => nulls.len(),
58 ColumnData::Bool { nulls, .. } => nulls.len(),
59 ColumnData::Date { nulls, .. } => nulls.len(),
60 ColumnData::Time { nulls, .. } => nulls.len(),
61 ColumnData::Timestamp { nulls, .. } => nulls.len(),
62 ColumnData::Interval { nulls, .. } => nulls.len(),
63 ColumnData::Vector { nulls, .. } => nulls.len(),
64 ColumnData::Blob { nulls, .. } => nulls.len(),
65 }
66 }
67
68 pub fn is_empty(&self) -> bool {
70 self.len() == 0
71 }
72
73 pub fn size_in_bytes(&self) -> usize {
81 const VEC_OVERHEAD: usize = 3 * std::mem::size_of::<usize>(); match self {
84 ColumnData::Int64 { values, nulls } => {
85 VEC_OVERHEAD * 2
86 + values.capacity() * std::mem::size_of::<i64>()
87 + nulls.capacity() * std::mem::size_of::<bool>()
88 }
89 ColumnData::Float64 { values, nulls } => {
90 VEC_OVERHEAD * 2
91 + values.capacity() * std::mem::size_of::<f64>()
92 + nulls.capacity() * std::mem::size_of::<bool>()
93 }
94 ColumnData::String { values, nulls } => {
95 let arc_overhead = std::mem::size_of::<Arc<str>>(); let string_data: usize = values.iter().map(|s| s.len()).sum();
99 VEC_OVERHEAD * 2
100 + values.capacity() * arc_overhead
101 + string_data
102 + nulls.capacity() * std::mem::size_of::<bool>()
103 }
104 ColumnData::Bool { values, nulls } => {
105 VEC_OVERHEAD * 2
106 + values.capacity() * std::mem::size_of::<bool>()
107 + nulls.capacity() * std::mem::size_of::<bool>()
108 }
109 ColumnData::Date { values, nulls } => {
110 VEC_OVERHEAD * 2
111 + values.capacity() * std::mem::size_of::<Date>()
112 + nulls.capacity() * std::mem::size_of::<bool>()
113 }
114 ColumnData::Time { values, nulls } => {
115 VEC_OVERHEAD * 2
116 + values.capacity() * std::mem::size_of::<Time>()
117 + nulls.capacity() * std::mem::size_of::<bool>()
118 }
119 ColumnData::Timestamp { values, nulls } => {
120 VEC_OVERHEAD * 2
121 + values.capacity() * std::mem::size_of::<Timestamp>()
122 + nulls.capacity() * std::mem::size_of::<bool>()
123 }
124 ColumnData::Interval { values, nulls } => {
125 let interval_overhead = std::mem::size_of::<Interval>();
127 let string_data: usize = values.iter().map(|i| i.value.capacity()).sum();
128 VEC_OVERHEAD * 2
129 + values.capacity() * interval_overhead
130 + string_data
131 + nulls.capacity() * std::mem::size_of::<bool>()
132 }
133 ColumnData::Vector { values, nulls } => {
134 let vec_overhead = std::mem::size_of::<Vec<f32>>();
136 let vector_data: usize =
137 values.iter().map(|v| v.capacity() * std::mem::size_of::<f32>()).sum();
138 VEC_OVERHEAD * 2
139 + values.capacity() * vec_overhead
140 + vector_data
141 + nulls.capacity() * std::mem::size_of::<bool>()
142 }
143 ColumnData::Blob { values, nulls } => {
144 let vec_overhead = std::mem::size_of::<Vec<u8>>();
146 let blob_data: usize = values.iter().map(|v| v.capacity()).sum();
147 VEC_OVERHEAD * 2
148 + values.capacity() * vec_overhead
149 + blob_data
150 + nulls.capacity() * std::mem::size_of::<bool>()
151 }
152 }
153 }
154
155 pub fn is_null(&self, index: usize) -> bool {
157 match self {
158 ColumnData::Int64 { nulls, .. } => nulls[index],
159 ColumnData::Float64 { nulls, .. } => nulls[index],
160 ColumnData::String { nulls, .. } => nulls[index],
161 ColumnData::Bool { nulls, .. } => nulls[index],
162 ColumnData::Date { nulls, .. } => nulls[index],
163 ColumnData::Time { nulls, .. } => nulls[index],
164 ColumnData::Timestamp { nulls, .. } => nulls[index],
165 ColumnData::Interval { nulls, .. } => nulls[index],
166 ColumnData::Vector { nulls, .. } => nulls[index],
167 ColumnData::Blob { nulls, .. } => nulls[index],
168 }
169 }
170
171 pub fn get(&self, index: usize) -> SqlValue {
173 if self.is_null(index) {
174 return SqlValue::Null;
175 }
176
177 match self {
178 ColumnData::Int64 { values, .. } => SqlValue::Integer(values[index]),
179 ColumnData::Float64 { values, .. } => SqlValue::Double(values[index]),
180 ColumnData::String { values, .. } => {
181 SqlValue::Varchar(arcstr::ArcStr::from(values[index].as_ref()))
182 }
183 ColumnData::Bool { values, .. } => SqlValue::Boolean(values[index]),
184 ColumnData::Date { values, .. } => SqlValue::Date(values[index]),
185 ColumnData::Time { values, .. } => SqlValue::Time(values[index]),
186 ColumnData::Timestamp { values, .. } => SqlValue::Timestamp(values[index]),
187 ColumnData::Interval { values, .. } => SqlValue::Interval(values[index].clone()),
188 ColumnData::Vector { values, .. } => SqlValue::Vector(values[index].clone()),
189 ColumnData::Blob { values, .. } => SqlValue::Blob(values[index].clone()),
190 }
191 }
192
193 pub fn as_i64_arc(&self) -> Option<(&Arc<Vec<i64>>, &Arc<Vec<bool>>)> {
195 match self {
196 ColumnData::Int64 { values, nulls } => Some((values, nulls)),
197 _ => None,
198 }
199 }
200
201 pub fn as_f64_arc(&self) -> Option<(&Arc<Vec<f64>>, &Arc<Vec<bool>>)> {
203 match self {
204 ColumnData::Float64 { values, nulls } => Some((values, nulls)),
205 _ => None,
206 }
207 }
208
209 pub fn as_string_arc(&self) -> Option<(&Arc<Vec<Arc<str>>>, &Arc<Vec<bool>>)> {
211 match self {
212 ColumnData::String { values, nulls } => Some((values, nulls)),
213 _ => None,
214 }
215 }
216
217 pub fn as_bool_arc(&self) -> Option<(&Arc<Vec<bool>>, &Arc<Vec<bool>>)> {
219 match self {
220 ColumnData::Bool { values, nulls } => Some((values, nulls)),
221 _ => None,
222 }
223 }
224
225 pub fn as_date_arc(&self) -> Option<(&Arc<Vec<Date>>, &Arc<Vec<bool>>)> {
227 match self {
228 ColumnData::Date { values, nulls } => Some((values, nulls)),
229 _ => None,
230 }
231 }
232
233 pub fn as_timestamp_arc(&self) -> Option<(&Arc<Vec<Timestamp>>, &Arc<Vec<bool>>)> {
235 match self {
236 ColumnData::Timestamp { values, nulls } => Some((values, nulls)),
237 _ => None,
238 }
239 }
240
241 pub fn as_time_arc(&self) -> Option<(&Arc<Vec<Time>>, &Arc<Vec<bool>>)> {
243 match self {
244 ColumnData::Time { values, nulls } => Some((values, nulls)),
245 _ => None,
246 }
247 }
248
249 pub fn as_interval_arc(&self) -> Option<(&Arc<Vec<Interval>>, &Arc<Vec<bool>>)> {
251 match self {
252 ColumnData::Interval { values, nulls } => Some((values, nulls)),
253 _ => None,
254 }
255 }
256}