ella_common/row/
format.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
use std::{fmt::Debug, marker::PhantomData, sync::Arc};

use datafusion::arrow::{
    array::ArrayRef,
    datatypes::{Field, SchemaRef},
    record_batch::{RecordBatch, RecordBatchOptions},
};

pub trait RowFormat: Debug + Clone + 'static {
    const COLUMNS: usize;

    type Builder: RowBatchBuilder<Self>;
    type View: RowFormatView<Self>;

    fn builder(fields: &[Arc<Field>]) -> crate::Result<Self::Builder>;
    fn view(rows: usize, fields: &[Arc<Field>], arrays: &[ArrayRef]) -> crate::Result<Self::View>;
}

pub trait RowBatchBuilder<R>: Debug + Clone + 'static {
    fn len(&self) -> usize;
    fn is_empty(&self) -> bool {
        self.len() == 0
    }

    fn push(&mut self, row: R);
    fn build_columns(&mut self) -> crate::Result<Vec<ArrayRef>>;

    fn build(&mut self, schema: SchemaRef) -> crate::Result<RecordBatch> {
        let opts = RecordBatchOptions::new().with_row_count(Some(self.len()));
        let columns = self.build_columns()?;
        Ok(RecordBatch::try_new_with_options(schema, columns, &opts)?)
    }
}

/// Wrapper around Arrow data that implements typed indexing by row.
pub trait RowFormatView<R>:
    Debug + IntoIterator<Item = R, IntoIter = RowViewIter<R, Self>> + Clone + 'static
{
    /// Returns the number of wrapped rows.
    fn len(&self) -> usize;

    /// Returns `true` if the view contains `0` rows.
    fn is_empty(&self) -> bool {
        self.len() == 0
    }

    /// Returns the `i`th row.
    ///
    /// Panics if `i >= len`.
    fn row(&self, i: usize) -> R;

    /// Returns the `i`th row without bounds checking.
    ///
    /// # Safety
    /// Implementations must return a valid result when `i < len`.
    /// Calling this method where `i >= len` is undefined behavior.
    unsafe fn row_unchecked(&self, i: usize) -> R;

    /// Returns an iterator over the rows in this view.
    fn iter(&self) -> RowViewIter<R, Self> {
        self.clone().into_iter()
    }
}

#[derive(Debug, Clone)]
pub struct RowViewIter<R, V> {
    view: V,
    _row: PhantomData<R>,
    i: usize,
}

impl<R, V> RowViewIter<R, V> {
    pub fn new(view: V) -> Self {
        Self {
            view,
            _row: PhantomData,
            i: 0,
        }
    }
}

impl<R, V> Iterator for RowViewIter<R, V>
where
    V: RowFormatView<R>,
{
    type Item = R;

    fn next(&mut self) -> Option<Self::Item> {
        if self.i < self.view.len() {
            let item = unsafe { self.view.row_unchecked(self.i) };
            self.i += 1;
            Some(item)
        } else {
            None
        }
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        (self.view.len(), Some(self.view.len()))
    }
}

impl<R, V: RowFormatView<R>> ExactSizeIterator for RowViewIter<R, V> {}