locustdb/mem_store/
raw_col.rs

1use std::mem;
2use std::ops::BitOr;
3use std::sync::Arc;
4
5use ordered_float::OrderedFloat;
6use serde::{Deserialize, Serialize};
7
8use crate::ingest::raw_val::RawVal;
9use crate::mem_store::column_builder::*;
10use crate::mem_store::*;
11
12// Can eliminate this? Used by in-memory buffer.
13#[derive(PartialEq, Debug, Clone, Serialize, Deserialize)]
14pub struct MixedCol {
15    types: ColType,
16    data: Vec<RawVal>,
17}
18
19impl MixedCol {
20    pub fn with_nulls(count: usize) -> MixedCol {
21        let mut c = Self::default();
22        c.push_nulls(count);
23        c
24    }
25
26    pub fn push(&mut self, elem: RawVal) {
27        self.types = self.types | ColType::determine(&elem);
28        self.data.push(elem);
29    }
30
31    pub fn push_ints(&mut self, ints: Vec<i64>) {
32        self.types = self.types | ColType::int();
33        self.data.extend(ints.into_iter().map(RawVal::Int));
34    }
35
36    pub fn push_floats(&mut self, floats: Vec<f64>) {
37        self.types = self.types | ColType::float();
38        self.data.extend(floats.into_iter().map(|f| RawVal::Float(OrderedFloat(f))));
39    }
40
41    pub fn push_strings(&mut self, strs: Vec<String>) {
42        self.types = self.types | ColType::string();
43        self.data.extend(strs.into_iter().map(RawVal::Str));
44    }
45
46    pub fn push_nulls(&mut self, count: usize) {
47        if count > 0 {
48            self.types = self.types | ColType::null();
49            self.data.extend(std::iter::repeat_n(RawVal::Null, count));
50        }
51    }
52
53    pub fn len(&self) -> usize {
54        self.data.len()
55    }
56
57    pub fn finalize(self, name: &str) -> Arc<Column> {
58        let present =  if self.types.contains_null {
59            let mut present = vec![0u8; self.data.len().div_ceil(8)];
60            for (i, v) in self.data.iter().enumerate() {
61                if *v != RawVal::Null {
62                    present[i / 8] |= 1 << (i % 8);
63                }
64            }
65            Some(present)
66        } else {
67            None
68        };
69        if self.types.contains_string {
70            let mut builder = StringColBuilder::default();
71            for v in self.data {
72                match v {
73                    RawVal::Str(s) => builder.push(&s),
74                    RawVal::Int(i) => builder.push(&i.to_string()),
75                    RawVal::Null => builder.push(&""),
76                    RawVal::Float(f) => builder.push(&f.to_string()),
77                }
78            }
79            ColumnBuilder::<String>::finalize(builder, name, present)
80        } else if self.types.contains_float {
81            let mut builder = FloatColBuilder::default();
82            for v in self.data {
83                match v {
84                    RawVal::Str(_) => panic!("Unexpected string in float column!"),
85                    RawVal::Int(i) => builder.push(&Some(i as f64)),
86                    RawVal::Null => builder.push(&None),
87                    RawVal::Float(f) => builder.push(&Some(f.into_inner())),
88                }
89            }
90            builder.finalize(name, present)
91        } else if self.types.contains_int {
92            let mut builder = IntColBuilder::default();
93            for v in self.data {
94                match v {
95                    RawVal::Str(_) => panic!("Unexpected string in int column!"),
96                    RawVal::Int(i) => builder.push(&Some(i)),
97                    RawVal::Null => builder.push(&None),
98                    RawVal::Float(_) => todo!("Unexpected float in int column!"),
99                }
100            }
101            builder.finalize(name, present)
102        } else {
103            Arc::new(Column::null(name, self.data.len()))
104        }
105    }
106
107    pub fn heap_size_of_children(&self) -> usize {
108        let data_size = self
109            .data
110            .iter()
111            .map(|v| v.heap_size_of_children())
112            .sum::<usize>()
113            + self.data.capacity() * mem::size_of::<RawVal>();
114        let type_size = mem::size_of::<ColType>();
115
116        data_size + type_size
117    }
118}
119
120impl Default for MixedCol {
121    fn default() -> MixedCol {
122        MixedCol {
123            types: ColType::nothing(),
124            data: Vec::new(),
125        }
126    }
127}
128
129#[derive(PartialEq, Debug, Copy, Clone, Serialize, Deserialize)]
130struct ColType {
131    contains_string: bool,
132    contains_int: bool,
133    contains_float: bool,
134    contains_null: bool,
135}
136
137impl ColType {
138    fn new(string: bool, int: bool, float: bool, null: bool) -> ColType {
139        ColType {
140            contains_string: string,
141            contains_int: int,
142            contains_float: float,
143            contains_null: null,
144        }
145    }
146
147    fn string() -> ColType {
148        ColType::new(true, false, false, false)
149    }
150
151    fn int() -> ColType {
152        ColType::new(false, true, false, false)
153    }
154
155    fn float() -> ColType {
156        ColType::new(false, false, true, false)
157    }
158
159    fn null() -> ColType {
160        ColType::new(false, false, false, true)
161    }
162
163    fn nothing() -> ColType {
164        ColType::new(false, false, false, false)
165    }
166
167    fn determine(v: &RawVal) -> ColType {
168        match *v {
169            RawVal::Null => ColType::null(),
170            RawVal::Str(_) => ColType::string(),
171            RawVal::Int(_) => ColType::int(),
172            RawVal::Float(_) => ColType::float(),
173        }
174    }
175}
176
177impl BitOr for ColType {
178    type Output = Self;
179    fn bitor(self, rhs: ColType) -> Self::Output {
180        ColType {
181            contains_string: self.contains_string | rhs.contains_string,
182            contains_int: self.contains_int | rhs.contains_int,
183            contains_float: self.contains_float | rhs.contains_float,
184            contains_null: self.contains_null | rhs.contains_null,
185        }
186    }
187}