1use roaring::RoaringBitmap;
24use rustc_hash::FxHashMap;
25use std::collections::HashMap;
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
29pub struct StringId(u32);
30
31#[derive(Debug, Default)]
33pub struct StringTable {
34 string_to_id: FxHashMap<String, StringId>,
36 id_to_string: Vec<String>,
38}
39
40impl StringTable {
41 #[must_use]
43 pub fn new() -> Self {
44 Self::default()
45 }
46
47 pub fn intern(&mut self, s: &str) -> StringId {
51 if let Some(&id) = self.string_to_id.get(s) {
52 return id;
53 }
54
55 #[allow(clippy::cast_possible_truncation)]
56 let id = StringId(self.id_to_string.len() as u32);
57 self.id_to_string.push(s.to_string());
58 self.string_to_id.insert(s.to_string(), id);
59 id
60 }
61
62 #[must_use]
64 pub fn get(&self, id: StringId) -> Option<&str> {
65 self.id_to_string.get(id.0 as usize).map(String::as_str)
66 }
67
68 #[must_use]
70 pub fn get_id(&self, s: &str) -> Option<StringId> {
71 self.string_to_id.get(s).copied()
72 }
73
74 #[must_use]
76 pub fn len(&self) -> usize {
77 self.id_to_string.len()
78 }
79
80 #[must_use]
82 pub fn is_empty(&self) -> bool {
83 self.id_to_string.is_empty()
84 }
85}
86
87#[derive(Debug)]
89pub enum TypedColumn {
90 Int(Vec<Option<i64>>),
92 Float(Vec<Option<f64>>),
94 String(Vec<Option<StringId>>),
96 Bool(Vec<Option<bool>>),
98}
99
100impl TypedColumn {
101 #[must_use]
103 pub fn new_int(capacity: usize) -> Self {
104 Self::Int(Vec::with_capacity(capacity))
105 }
106
107 #[must_use]
109 pub fn new_float(capacity: usize) -> Self {
110 Self::Float(Vec::with_capacity(capacity))
111 }
112
113 #[must_use]
115 pub fn new_string(capacity: usize) -> Self {
116 Self::String(Vec::with_capacity(capacity))
117 }
118
119 #[must_use]
121 pub fn new_bool(capacity: usize) -> Self {
122 Self::Bool(Vec::with_capacity(capacity))
123 }
124
125 #[must_use]
127 pub fn len(&self) -> usize {
128 match self {
129 Self::Int(v) => v.len(),
130 Self::Float(v) => v.len(),
131 Self::String(v) => v.len(),
132 Self::Bool(v) => v.len(),
133 }
134 }
135
136 #[must_use]
138 pub fn is_empty(&self) -> bool {
139 self.len() == 0
140 }
141
142 pub fn push_null(&mut self) {
144 match self {
145 Self::Int(v) => v.push(None),
146 Self::Float(v) => v.push(None),
147 Self::String(v) => v.push(None),
148 Self::Bool(v) => v.push(None),
149 }
150 }
151}
152
153#[derive(Debug, Default)]
155pub struct ColumnStore {
156 columns: HashMap<String, TypedColumn>,
158 string_table: StringTable,
160 row_count: usize,
162}
163
164impl ColumnStore {
165 #[must_use]
167 pub fn new() -> Self {
168 Self::default()
169 }
170
171 #[must_use]
177 pub fn with_schema(fields: &[(&str, ColumnType)]) -> Self {
178 let mut store = Self::new();
179 for (name, col_type) in fields {
180 store.add_column(name, *col_type);
181 }
182 store
183 }
184
185 pub fn add_column(&mut self, name: &str, col_type: ColumnType) {
187 let column = match col_type {
188 ColumnType::Int => TypedColumn::new_int(0),
189 ColumnType::Float => TypedColumn::new_float(0),
190 ColumnType::String => TypedColumn::new_string(0),
191 ColumnType::Bool => TypedColumn::new_bool(0),
192 };
193 self.columns.insert(name.to_string(), column);
194 }
195
196 #[must_use]
198 pub fn row_count(&self) -> usize {
199 self.row_count
200 }
201
202 #[must_use]
204 pub fn string_table(&self) -> &StringTable {
205 &self.string_table
206 }
207
208 pub fn string_table_mut(&mut self) -> &mut StringTable {
210 &mut self.string_table
211 }
212
213 pub fn push_row(&mut self, values: &[(&str, ColumnValue)]) {
217 let value_map: FxHashMap<&str, &ColumnValue> =
219 values.iter().map(|(k, v)| (*k, v)).collect();
220
221 for (name, column) in &mut self.columns {
223 if let Some(value) = value_map.get(name.as_str()) {
224 match value {
225 ColumnValue::Null => column.push_null(),
226 ColumnValue::Int(v) => {
227 if let TypedColumn::Int(col) = column {
228 col.push(Some(*v));
229 } else {
230 column.push_null();
231 }
232 }
233 ColumnValue::Float(v) => {
234 if let TypedColumn::Float(col) = column {
235 col.push(Some(*v));
236 } else {
237 column.push_null();
238 }
239 }
240 ColumnValue::String(id) => {
241 if let TypedColumn::String(col) = column {
242 col.push(Some(*id));
243 } else {
244 column.push_null();
245 }
246 }
247 ColumnValue::Bool(v) => {
248 if let TypedColumn::Bool(col) = column {
249 col.push(Some(*v));
250 } else {
251 column.push_null();
252 }
253 }
254 }
255 } else {
256 column.push_null();
257 }
258 }
259
260 self.row_count += 1;
261 }
262
263 #[must_use]
265 pub fn get_column(&self, name: &str) -> Option<&TypedColumn> {
266 self.columns.get(name)
267 }
268
269 #[must_use]
273 pub fn filter_eq_int(&self, column: &str, value: i64) -> Vec<usize> {
274 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
275 return Vec::new();
276 };
277
278 col.iter()
279 .enumerate()
280 .filter_map(|(idx, v)| if *v == Some(value) { Some(idx) } else { None })
281 .collect()
282 }
283
284 #[must_use]
288 pub fn filter_eq_string(&self, column: &str, value: &str) -> Vec<usize> {
289 let Some(TypedColumn::String(col)) = self.columns.get(column) else {
290 return Vec::new();
291 };
292
293 let Some(string_id) = self.string_table.get_id(value) else {
294 return Vec::new(); };
296
297 col.iter()
298 .enumerate()
299 .filter_map(|(idx, v)| {
300 if *v == Some(string_id) {
301 Some(idx)
302 } else {
303 None
304 }
305 })
306 .collect()
307 }
308
309 #[must_use]
313 pub fn filter_gt_int(&self, column: &str, threshold: i64) -> Vec<usize> {
314 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
315 return Vec::new();
316 };
317
318 col.iter()
319 .enumerate()
320 .filter_map(|(idx, v)| match v {
321 Some(val) if *val > threshold => Some(idx),
322 _ => None,
323 })
324 .collect()
325 }
326
327 #[must_use]
329 pub fn filter_lt_int(&self, column: &str, threshold: i64) -> Vec<usize> {
330 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
331 return Vec::new();
332 };
333
334 col.iter()
335 .enumerate()
336 .filter_map(|(idx, v)| match v {
337 Some(val) if *val < threshold => Some(idx),
338 _ => None,
339 })
340 .collect()
341 }
342
343 #[must_use]
345 pub fn filter_range_int(&self, column: &str, low: i64, high: i64) -> Vec<usize> {
346 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
347 return Vec::new();
348 };
349
350 col.iter()
351 .enumerate()
352 .filter_map(|(idx, v)| match v {
353 Some(val) if *val > low && *val < high => Some(idx),
354 _ => None,
355 })
356 .collect()
357 }
358
359 #[must_use]
363 pub fn filter_in_string(&self, column: &str, values: &[&str]) -> Vec<usize> {
364 let Some(TypedColumn::String(col)) = self.columns.get(column) else {
365 return Vec::new();
366 };
367
368 let ids: Vec<StringId> = values
370 .iter()
371 .filter_map(|s| self.string_table.get_id(s))
372 .collect();
373
374 if ids.is_empty() {
375 return Vec::new();
376 }
377
378 if ids.len() > 16 {
381 let id_set: rustc_hash::FxHashSet<StringId> = ids.into_iter().collect();
382 col.iter()
383 .enumerate()
384 .filter_map(|(idx, v)| match v {
385 Some(id) if id_set.contains(id) => Some(idx),
386 _ => None,
387 })
388 .collect()
389 } else {
390 col.iter()
391 .enumerate()
392 .filter_map(|(idx, v)| match v {
393 Some(id) if ids.contains(id) => Some(idx),
394 _ => None,
395 })
396 .collect()
397 }
398 }
399
400 #[must_use]
404 pub fn count_eq_int(&self, column: &str, value: i64) -> usize {
405 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
406 return 0;
407 };
408
409 col.iter().filter(|v| **v == Some(value)).count()
410 }
411
412 #[must_use]
414 pub fn count_eq_string(&self, column: &str, value: &str) -> usize {
415 let Some(TypedColumn::String(col)) = self.columns.get(column) else {
416 return 0;
417 };
418
419 let Some(string_id) = self.string_table.get_id(value) else {
420 return 0;
421 };
422
423 col.iter().filter(|v| **v == Some(string_id)).count()
424 }
425
426 #[must_use]
435 #[allow(clippy::cast_possible_truncation)]
436 pub fn filter_eq_int_bitmap(&self, column: &str, value: i64) -> RoaringBitmap {
437 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
438 return RoaringBitmap::new();
439 };
440
441 col.iter()
442 .enumerate()
443 .filter_map(|(idx, v)| {
444 if *v == Some(value) {
445 Some(idx as u32)
446 } else {
447 None
448 }
449 })
450 .collect()
451 }
452
453 #[must_use]
455 #[allow(clippy::cast_possible_truncation)]
456 pub fn filter_eq_string_bitmap(&self, column: &str, value: &str) -> RoaringBitmap {
457 let Some(TypedColumn::String(col)) = self.columns.get(column) else {
458 return RoaringBitmap::new();
459 };
460
461 let Some(string_id) = self.string_table.get_id(value) else {
462 return RoaringBitmap::new();
463 };
464
465 col.iter()
466 .enumerate()
467 .filter_map(|(idx, v)| {
468 if *v == Some(string_id) {
469 Some(idx as u32)
470 } else {
471 None
472 }
473 })
474 .collect()
475 }
476
477 #[must_use]
479 #[allow(clippy::cast_possible_truncation)]
480 pub fn filter_range_int_bitmap(&self, column: &str, low: i64, high: i64) -> RoaringBitmap {
481 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
482 return RoaringBitmap::new();
483 };
484
485 col.iter()
486 .enumerate()
487 .filter_map(|(idx, v)| match v {
488 Some(val) if *val > low && *val < high => Some(idx as u32),
489 _ => None,
490 })
491 .collect()
492 }
493
494 #[must_use]
498 pub fn bitmap_and(a: &RoaringBitmap, b: &RoaringBitmap) -> RoaringBitmap {
499 a & b
500 }
501
502 #[must_use]
506 pub fn bitmap_or(a: &RoaringBitmap, b: &RoaringBitmap) -> RoaringBitmap {
507 a | b
508 }
509}
510
511#[derive(Debug, Clone, Copy, PartialEq, Eq)]
513pub enum ColumnType {
514 Int,
516 Float,
518 String,
520 Bool,
522}
523
524#[derive(Debug, Clone)]
526pub enum ColumnValue {
527 Int(i64),
529 Float(f64),
531 String(StringId),
533 Bool(bool),
535 Null,
537}