hexane/
boolean.rs

1use super::aggregate::Acc;
2use super::columndata::ColumnData;
3use super::cursor::{ColumnCursor, Run, ScanMeta, SpliceDel};
4use super::encoder::{Encoder, EncoderState, SpliceEncoder, Writer};
5use super::pack::{PackError, Packable};
6use super::slab::{Slab, SlabWeight, SlabWriter};
7use super::Cow;
8
9use std::ops::Range;
10
11#[derive(Debug, PartialEq, Default, Clone)]
12pub struct BooleanState {
13    pub(crate) value: bool,
14    pub(crate) count: usize,
15    pub(crate) flushed: bool,
16}
17
18impl BooleanState {
19    fn acc(&self) -> Acc {
20        if self.value {
21            Acc::from(self.count)
22        } else {
23            Acc::new()
24        }
25    }
26}
27
28impl<'a> From<Run<'a, bool>> for BooleanState {
29    fn from(run: Run<'a, bool>) -> Self {
30        let count = run.count;
31        let value = *run.value.as_deref().unwrap_or(&false);
32        let flushed = true;
33        Self {
34            count,
35            value,
36            flushed,
37        }
38    }
39}
40
41#[derive(Debug, Default, Clone, Copy, PartialEq)]
42pub struct BooleanCursorInternal<const B: usize> {
43    value: bool,
44    index: usize,
45    offset: usize,
46    acc: Acc,
47    last_offset: usize,
48}
49
50pub type BooleanCursor = BooleanCursorInternal<64>;
51
52impl<const B: usize> ColumnCursor for BooleanCursorInternal<B> {
53    type Item = bool;
54    type State<'a> = BooleanState;
55    type PostState<'a> = Option<BooleanState>;
56    type Export = bool;
57    type SlabIndex = SlabWeight;
58
59    fn empty() -> Self {
60        Self::default()
61    }
62
63    fn load_with(data: &[u8], m: &ScanMeta) -> Result<ColumnData<Self>, PackError> {
64        let mut cursor = Self::empty();
65        let mut last_cursor = Self::empty();
66        let mut writer = SlabWriter::<bool>::new(B, true);
67        let mut last_copy = Self::empty();
68        while let Some(run) = cursor.try_next(data)? {
69            bool::validate(run.value.as_deref(), m)?;
70            if cursor.offset - last_copy.offset >= B {
71                if !cursor.value {
72                    cursor_copy(data, &mut writer, &last_copy, &cursor);
73                    last_copy = cursor;
74                } else {
75                    cursor_copy(data, &mut writer, &last_copy, &last_cursor);
76                    last_copy = last_cursor;
77                }
78                writer.manual_slab_break();
79            }
80            last_cursor = cursor;
81        }
82        cursor_copy(data, &mut writer, &last_copy, &cursor);
83        Ok(writer.into_column(cursor.index))
84    }
85
86    fn finish<'a>(slab: &'a Slab, writer: &mut SlabWriter<'a, bool>, cursor: Self) {
87        writer.copy(
88            slab.as_slice(),
89            cursor.offset..slab.as_slice().len(),
90            0,
91            slab.len() - cursor.index,
92            slab.acc() - cursor.acc,
93            Some(cursor.value),
94        );
95    }
96
97    fn finalize_state<'a>(
98        slab: &'a Slab,
99        encoder: &mut Encoder<'a, Self>,
100        post: Self::PostState<'a>,
101        mut cursor: Self,
102    ) -> Option<Self> {
103        if let Some(post) = post {
104            if post.value == encoder.state.value {
105                encoder.state.count += post.count;
106                Self::finalize_state(slab, encoder, None, cursor)
107            } else {
108                encoder
109                    .writer
110                    .flush_bool_run(encoder.state.count, encoder.state.value);
111                encoder.writer.flush_bool_run(post.count, post.value);
112                Some(cursor)
113            }
114        } else {
115            let old_cursor = cursor;
116            if let Ok(Some(val)) = cursor.try_next(slab.as_slice()) {
117                if val.count == 0 {
118                    Self::finalize_state(slab, encoder, None, cursor)
119                } else if val.value == Some(Cow::Owned(encoder.state.value)) {
120                    encoder
121                        .writer
122                        .flush_bool_run(encoder.state.count + val.count, encoder.state.value);
123                    Some(cursor)
124                } else {
125                    encoder
126                        .writer
127                        .flush_bool_run(encoder.state.count, encoder.state.value);
128                    Some(old_cursor)
129                }
130            } else {
131                encoder
132                    .writer
133                    .flush_bool_run(encoder.state.count, encoder.state.value);
134                None
135            }
136        }
137    }
138
139    fn is_empty(v: Option<Cow<'_, bool>>) -> bool {
140        v.as_deref() != Some(&true)
141    }
142
143    fn copy_between<'a>(
144        slab: &'a [u8],
145        writer: &mut SlabWriter<'a, bool>,
146        c0: Self,
147        c1: Self,
148        run: Run<'a, bool>,
149        size: usize,
150    ) -> Self::State<'a> {
151        writer.copy(slab, c0.offset..c1.last_offset, 0, size, Acc::new(), None);
152        let mut next_state = BooleanState {
153            value: run.value.as_deref().copied().unwrap_or_default(),
154            count: 0,
155            flushed: true,
156        };
157        next_state.append_chunk(writer, run);
158        next_state
159    }
160
161    fn slab_size() -> usize {
162        B
163    }
164
165    fn splice_encoder(index: usize, del: usize, slab: &Slab) -> SpliceEncoder<'_, Self> {
166        // FIXME encode
167        let (run, cursor) = Self::seek(index, slab);
168
169        let flushed = run.is_some();
170        let count = run.as_ref().map(|r| r.count).unwrap_or(0);
171        let value = run
172            .as_ref()
173            .and_then(|r| r.value.as_deref().cloned())
174            .unwrap_or_default();
175
176        let mut state = BooleanState {
177            count,
178            value,
179            flushed,
180        };
181        let acc = cursor.acc - state.acc();
182        //let state2 = BooleanState::from(run.unwrap_or_default());
183        //assert_eq!(state, state2);
184        let mut post = None;
185
186        let delta = cursor.index - index;
187        if delta > 0 {
188            state.count -= delta;
189            post = Some(Run {
190                count: delta,
191                value: Some(Cow::Owned(value)),
192            });
193        }
194
195        let range = 0..cursor.last_offset;
196        let size = cursor.index - count;
197        let mut current = SlabWriter::new(B, false);
198        current.copy(slab.as_slice(), range, 0, size, acc, None);
199
200        let SpliceDel {
201            deleted,
202            overflow,
203            cursor,
204            post,
205        } = Self::splice_delete(post, cursor, del, slab);
206        let post = post.map(BooleanState::from);
207        let acc = Acc::new();
208
209        SpliceEncoder {
210            encoder: Encoder::init(current, state),
211            slab,
212            post,
213            acc,
214            deleted,
215            overflow,
216            cursor,
217        }
218    }
219
220    fn export_splice<'a, I>(data: &mut Vec<Self::Export>, range: Range<usize>, values: I)
221    where
222        I: Iterator<Item = Option<Cow<'a, bool>>>,
223    {
224        data.splice(range, values.map(|e| *e.unwrap_or_default()));
225    }
226
227    fn try_next<'a>(&mut self, slab: &'a [u8]) -> Result<Option<Run<'a, Self::Item>>, PackError> {
228        if self.offset >= slab.len() {
229            return Ok(None);
230        }
231        let data = &slab[self.offset..];
232        let (bytes, count) = u64::unpack(data)?;
233        let count = *count as usize;
234        let value = self.value;
235        self.value = !value;
236        self.index += count;
237        self.last_offset = self.offset;
238        self.offset += bytes;
239        /*
240                if value {
241                    self.acc += Acc::from(count); // agg(1) * count
242                }
243        */
244        let run = Run {
245            count,
246            value: Some(Cow::Owned(value)),
247        };
248        self.acc += run.acc();
249        Ok(Some(run))
250    }
251
252    fn index(&self) -> usize {
253        self.index
254    }
255
256    fn offset(&self) -> usize {
257        self.offset
258    }
259
260    fn init_empty(len: usize) -> Slab {
261        if len > 0 {
262            let mut writer = SlabWriter::<bool>::new(usize::MAX, false);
263            writer.flush_bool_run(len, false);
264            writer.finish().pop().unwrap_or_default()
265        } else {
266            Slab::default()
267        }
268    }
269
270    fn acc(&self) -> Acc {
271        self.acc
272    }
273}
274
275fn cursor_copy<'a, const B: usize>(
276    data: &'a [u8],
277    writer: &mut SlabWriter<'a, bool>,
278    from: &BooleanCursorInternal<B>,
279    to: &BooleanCursorInternal<B>,
280) {
281    if from.offset == to.offset {
282        return;
283    }
284    writer.copy(
285        data,
286        from.offset..to.offset,
287        0,
288        to.index - from.index,
289        to.acc - from.acc,
290        None,
291    );
292}
293
294#[cfg(test)]
295pub(crate) mod tests {
296    use super::super::columndata::ColumnData;
297    use super::super::test::ColExport;
298    use super::*;
299
300    #[test]
301    fn column_data_boolean_split_merge_semantics() {
302        // lit run spanning multiple slabs
303        let mut col1: ColumnData<BooleanCursorInternal<4>> = ColumnData::new();
304        col1.splice(
305            0,
306            0,
307            vec![
308                true, false, true, false, true, false, true, false, true, false,
309            ],
310        );
311        assert_eq!(
312            col1.test_dump(),
313            vec![
314                vec![
315                    ColExport::run(1, true),
316                    ColExport::run(1, false),
317                    ColExport::run(1, true),
318                ],
319                vec![
320                    ColExport::run(1, false),
321                    ColExport::run(1, true),
322                    ColExport::run(1, false),
323                    ColExport::run(1, true),
324                ],
325                vec![
326                    ColExport::run(1, false),
327                    ColExport::run(1, true),
328                    ColExport::run(1, false),
329                ]
330            ]
331        );
332        assert_eq!(col1.save(), vec![0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]);
333
334        let mut col2: ColumnData<BooleanCursorInternal<4>> = ColumnData::new();
335        col2.splice(
336            0,
337            0,
338            vec![
339                false, false, false, true, true, true, false, false, false, true, true, true,
340                false, false, false, true, true, true, false, false, false, true, true, true,
341                false, false, false, true, true, true, false, false, false, true, true, true,
342            ],
343        );
344        assert_eq!(
345            col2.test_dump(),
346            vec![
347                vec![
348                    ColExport::run(3, false),
349                    ColExport::run(3, true),
350                    ColExport::run(3, false),
351                    ColExport::run(3, true),
352                ],
353                vec![
354                    ColExport::run(3, false),
355                    ColExport::run(3, true),
356                    ColExport::run(3, false),
357                    ColExport::run(3, true),
358                ],
359                vec![
360                    ColExport::run(3, false),
361                    ColExport::run(3, true),
362                    ColExport::run(3, false),
363                    ColExport::run(3, true),
364                ],
365            ]
366        );
367        assert_eq!(col2.save(), vec![3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]);
368
369        // empty data
370        let col5: ColumnData<BooleanCursor> = ColumnData::new();
371        assert_eq!(col5.test_dump(), vec![vec![]]);
372        assert_eq!(col5.save(), vec![0]);
373    }
374
375    #[test]
376    fn column_data_boolean_splice_del() {
377        let mut col1: ColumnData<BooleanCursorInternal<4>> = ColumnData::new();
378        col1.splice(
379            0,
380            0,
381            vec![
382                true, true, true, true, false, false, false, false, true, true,
383            ],
384        );
385        assert_eq!(
386            col1.test_dump(),
387            vec![vec![
388                ColExport::run(4, true),
389                ColExport::run(4, false),
390                ColExport::run(2, true),
391            ]]
392        );
393
394        let mut col2 = col1.clone();
395        col2.splice::<bool, _>(2, 2, vec![]);
396
397        assert_eq!(
398            col2.test_dump(),
399            vec![vec![
400                ColExport::run(2, true),
401                ColExport::run(4, false),
402                ColExport::run(2, true),
403            ]]
404        );
405
406        let mut col3 = col1.clone();
407        col3.splice(2, 2, vec![false, false]);
408
409        assert_eq!(
410            col3.test_dump(),
411            vec![vec![
412                ColExport::run(2, true),
413                ColExport::run(6, false),
414                ColExport::run(2, true),
415            ]]
416        );
417
418        let mut col4 = col1.clone();
419        col4.splice::<bool, _>(2, 4, vec![]);
420
421        assert_eq!(
422            col4.test_dump(),
423            vec![vec![
424                ColExport::run(2, true),
425                ColExport::run(2, false),
426                ColExport::run(2, true),
427            ]]
428        );
429
430        let mut col5 = col1.clone();
431        col5.splice::<bool, _>(2, 7, vec![]);
432
433        assert_eq!(col5.test_dump(), vec![vec![ColExport::run(3, true),]]);
434
435        let mut col6 = col1.clone();
436        col6.splice::<bool, _>(0, 4, vec![]);
437
438        assert_eq!(
439            col6.test_dump(),
440            vec![vec![ColExport::run(4, false), ColExport::run(2, true),]]
441        );
442
443        let mut col7 = col1.clone();
444        col7.splice(0, 10, vec![false]);
445
446        assert_eq!(col7.test_dump(), vec![vec![ColExport::run(1, false),]]);
447
448        let mut col8 = col1.clone();
449        col8.splice::<bool, _>(4, 4, vec![]);
450
451        assert_eq!(col8.test_dump(), vec![vec![ColExport::run(6, true),]]);
452    }
453
454    #[test]
455    fn load_empty_bool_data() {
456        let col = BooleanCursor::load(&[]).unwrap();
457        assert!(col.is_empty());
458    }
459}