polars_core/chunked_array/builder/
mod.rs

1mod boolean;
2#[cfg(feature = "dtype-array")]
3pub mod fixed_size_list;
4pub mod list;
5mod null;
6mod primitive;
7mod string;
8
9use std::sync::Arc;
10
11use arrow::array::*;
12use arrow::bitmap::Bitmap;
13pub use boolean::*;
14#[cfg(feature = "dtype-array")]
15pub(crate) use fixed_size_list::*;
16pub use list::*;
17pub use null::*;
18pub use primitive::*;
19pub use string::*;
20
21use crate::chunked_array::to_primitive;
22use crate::prelude::*;
23use crate::utils::{NoNull, get_iter_capacity};
24
25// N: the value type; T: the sentinel type
26pub trait ChunkedBuilder<N, T: PolarsDataType> {
27    fn append_value(&mut self, val: N);
28    fn append_null(&mut self);
29    fn append_option(&mut self, opt_val: Option<N>) {
30        match opt_val {
31            Some(v) => self.append_value(v),
32            None => self.append_null(),
33        }
34    }
35    fn finish(self) -> ChunkedArray<T>;
36
37    fn shrink_to_fit(&mut self);
38}
39
40// Used in polars/src/chunked_array/apply.rs:24 to collect from aligned vecs and null bitmaps
41impl<T> FromIterator<(Vec<T::Native>, Option<Bitmap>)> for ChunkedArray<T>
42where
43    T: PolarsNumericType,
44{
45    fn from_iter<I: IntoIterator<Item = (Vec<T::Native>, Option<Bitmap>)>>(iter: I) -> Self {
46        let chunks = iter
47            .into_iter()
48            .map(|(values, opt_buffer)| to_primitive::<T>(values, opt_buffer));
49        ChunkedArray::from_chunk_iter(PlSmallStr::EMPTY, chunks)
50    }
51}
52
53pub trait NewChunkedArray<T, N> {
54    fn from_slice(name: PlSmallStr, v: &[N]) -> Self;
55    fn from_slice_options(name: PlSmallStr, opt_v: &[Option<N>]) -> Self;
56
57    /// Create a new ChunkedArray from an iterator.
58    fn from_iter_options(name: PlSmallStr, it: impl Iterator<Item = Option<N>>) -> Self;
59
60    /// Create a new ChunkedArray from an iterator.
61    fn from_iter_values(name: PlSmallStr, it: impl Iterator<Item = N>) -> Self;
62}
63
64impl<T> NewChunkedArray<T, T::Native> for ChunkedArray<T>
65where
66    T: PolarsNumericType,
67{
68    fn from_slice(name: PlSmallStr, v: &[T::Native]) -> Self {
69        let arr =
70            PrimitiveArray::from_slice(v).to(T::get_static_dtype().to_arrow(CompatLevel::newest()));
71        ChunkedArray::with_chunk(name, arr)
72    }
73
74    fn from_slice_options(name: PlSmallStr, opt_v: &[Option<T::Native>]) -> Self {
75        Self::from_iter_options(name, opt_v.iter().copied())
76    }
77
78    fn from_iter_options(
79        name: PlSmallStr,
80        it: impl Iterator<Item = Option<T::Native>>,
81    ) -> ChunkedArray<T> {
82        let mut builder = PrimitiveChunkedBuilder::new(name, get_iter_capacity(&it));
83        it.for_each(|opt| builder.append_option(opt));
84        builder.finish()
85    }
86
87    /// Create a new ChunkedArray from an iterator.
88    fn from_iter_values(name: PlSmallStr, it: impl Iterator<Item = T::Native>) -> ChunkedArray<T> {
89        let ca: NoNull<ChunkedArray<_>> = it.collect();
90        let mut ca = ca.into_inner();
91        ca.rename(name);
92        ca
93    }
94}
95
96impl NewChunkedArray<BooleanType, bool> for BooleanChunked {
97    fn from_slice(name: PlSmallStr, v: &[bool]) -> Self {
98        Self::from_iter_values(name, v.iter().copied())
99    }
100
101    fn from_slice_options(name: PlSmallStr, opt_v: &[Option<bool>]) -> Self {
102        Self::from_iter_options(name, opt_v.iter().copied())
103    }
104
105    fn from_iter_options(
106        name: PlSmallStr,
107        it: impl Iterator<Item = Option<bool>>,
108    ) -> ChunkedArray<BooleanType> {
109        let mut builder = BooleanChunkedBuilder::new(name, get_iter_capacity(&it));
110        it.for_each(|opt| builder.append_option(opt));
111        builder.finish()
112    }
113
114    /// Create a new ChunkedArray from an iterator.
115    fn from_iter_values(
116        name: PlSmallStr,
117        it: impl Iterator<Item = bool>,
118    ) -> ChunkedArray<BooleanType> {
119        let mut ca: ChunkedArray<_> = it.collect();
120        ca.rename(name);
121        ca
122    }
123}
124
125impl<S> NewChunkedArray<StringType, S> for StringChunked
126where
127    S: AsRef<str>,
128{
129    fn from_slice(name: PlSmallStr, v: &[S]) -> Self {
130        let arr = Utf8ViewArray::from_slice_values(v);
131        ChunkedArray::with_chunk(name, arr)
132    }
133
134    fn from_slice_options(name: PlSmallStr, opt_v: &[Option<S>]) -> Self {
135        let arr = Utf8ViewArray::from_slice(opt_v);
136        ChunkedArray::with_chunk(name, arr)
137    }
138
139    fn from_iter_options(name: PlSmallStr, it: impl Iterator<Item = Option<S>>) -> Self {
140        let arr = MutableBinaryViewArray::from_iterator(it).freeze();
141        ChunkedArray::with_chunk(name, arr)
142    }
143
144    /// Create a new ChunkedArray from an iterator.
145    fn from_iter_values(name: PlSmallStr, it: impl Iterator<Item = S>) -> Self {
146        let arr = MutableBinaryViewArray::from_values_iter(it).freeze();
147        ChunkedArray::with_chunk(name, arr)
148    }
149}
150
151impl<B> NewChunkedArray<BinaryType, B> for BinaryChunked
152where
153    B: AsRef<[u8]>,
154{
155    fn from_slice(name: PlSmallStr, v: &[B]) -> Self {
156        let arr = BinaryViewArray::from_slice_values(v);
157        ChunkedArray::with_chunk(name, arr)
158    }
159
160    fn from_slice_options(name: PlSmallStr, opt_v: &[Option<B>]) -> Self {
161        let arr = BinaryViewArray::from_slice(opt_v);
162        ChunkedArray::with_chunk(name, arr)
163    }
164
165    fn from_iter_options(name: PlSmallStr, it: impl Iterator<Item = Option<B>>) -> Self {
166        let arr = MutableBinaryViewArray::from_iterator(it).freeze();
167        ChunkedArray::with_chunk(name, arr)
168    }
169
170    /// Create a new ChunkedArray from an iterator.
171    fn from_iter_values(name: PlSmallStr, it: impl Iterator<Item = B>) -> Self {
172        let arr = MutableBinaryViewArray::from_values_iter(it).freeze();
173        ChunkedArray::with_chunk(name, arr)
174    }
175}
176
177#[cfg(test)]
178mod test {
179    use super::*;
180
181    #[test]
182    fn test_primitive_builder() {
183        let mut builder =
184            PrimitiveChunkedBuilder::<UInt32Type>::new(PlSmallStr::from_static("foo"), 6);
185        let values = &[Some(1), None, Some(2), Some(3), None, Some(4)];
186        for val in values {
187            builder.append_option(*val);
188        }
189        let ca = builder.finish();
190        assert_eq!(Vec::from(&ca), values);
191    }
192
193    #[test]
194    fn test_list_builder() {
195        let mut builder = ListPrimitiveChunkedBuilder::<Int32Type>::new(
196            PlSmallStr::from_static("a"),
197            10,
198            5,
199            DataType::Int32,
200        );
201
202        // Create a series containing two chunks.
203        let mut s1 =
204            Int32Chunked::from_slice(PlSmallStr::from_static("a"), &[1, 2, 3]).into_series();
205        let s2 = Int32Chunked::from_slice(PlSmallStr::from_static("b"), &[4, 5, 6]).into_series();
206        s1.append(&s2).unwrap();
207
208        builder.append_series(&s1).unwrap();
209        builder.append_series(&s2).unwrap();
210        let ls = builder.finish();
211        if let AnyValue::List(s) = ls.get_any_value(0).unwrap() {
212            // many chunks are aggregated to one in the ListArray
213            assert_eq!(s.len(), 6)
214        } else {
215            panic!()
216        }
217        if let AnyValue::List(s) = ls.get_any_value(1).unwrap() {
218            assert_eq!(s.len(), 3)
219        } else {
220            panic!()
221        }
222
223        // Test list collect.
224        let out = [&s1, &s2].iter().copied().collect::<ListChunked>();
225        assert_eq!(out.get_as_series(0).unwrap().len(), 6);
226        assert_eq!(out.get_as_series(1).unwrap().len(), 3);
227
228        let mut builder = ListPrimitiveChunkedBuilder::<Int32Type>::new(
229            PlSmallStr::from_static("a"),
230            10,
231            5,
232            DataType::Int32,
233        );
234        builder.append_series(&s1).unwrap();
235        builder.append_null();
236
237        let out = builder.finish();
238        let out = out.explode(false).unwrap();
239        assert_eq!(out.len(), 7);
240        assert_eq!(out.get(6).unwrap(), AnyValue::Null);
241    }
242}