polars_arrow/legacy/array/
list.rs

1use polars_error::PolarsResult;
2
3use crate::array::{Array, ArrayRef, ListArray, NullArray, new_null_array};
4use crate::bitmap::BitmapBuilder;
5use crate::compute::concatenate;
6use crate::datatypes::ArrowDataType;
7use crate::legacy::array::is_nested_null;
8use crate::legacy::prelude::*;
9use crate::offset::Offsets;
10
11pub struct AnonymousBuilder<'a> {
12    arrays: Vec<&'a dyn Array>,
13    offsets: Vec<i64>,
14    validity: Option<BitmapBuilder>,
15    size: i64,
16}
17
18impl<'a> AnonymousBuilder<'a> {
19    pub fn new(size: usize) -> Self {
20        let mut offsets = Vec::with_capacity(size + 1);
21        offsets.push(0i64);
22        Self {
23            arrays: Vec::with_capacity(size),
24            offsets,
25            validity: None,
26            size: 0,
27        }
28    }
29    #[inline]
30    fn last_offset(&self) -> i64 {
31        *self.offsets.last().unwrap()
32    }
33
34    pub fn is_empty(&self) -> bool {
35        self.offsets.len() == 1
36    }
37
38    pub fn offsets(&self) -> &[i64] {
39        &self.offsets
40    }
41
42    pub fn take_offsets(self) -> Offsets<i64> {
43        // SAFETY: offsets are correct
44        unsafe { Offsets::new_unchecked(self.offsets) }
45    }
46
47    #[inline]
48    pub fn push(&mut self, arr: &'a dyn Array) {
49        self.size += arr.len() as i64;
50        self.offsets.push(self.size);
51        self.arrays.push(arr);
52
53        if let Some(validity) = &mut self.validity {
54            validity.push(true)
55        }
56    }
57
58    pub fn push_multiple(&mut self, arrs: &'a [ArrayRef]) {
59        for arr in arrs {
60            self.size += arr.len() as i64;
61            self.arrays.push(arr.as_ref());
62        }
63        self.offsets.push(self.size);
64        self.update_validity()
65    }
66
67    #[inline]
68    pub fn push_null(&mut self) {
69        self.offsets.push(self.last_offset());
70        match &mut self.validity {
71            Some(validity) => validity.push(false),
72            None => self.init_validity(),
73        }
74    }
75
76    #[inline]
77    pub fn push_opt(&mut self, arr: Option<&'a dyn Array>) {
78        match arr {
79            None => self.push_null(),
80            Some(arr) => self.push(arr),
81        }
82    }
83
84    pub fn push_empty(&mut self) {
85        self.offsets.push(self.last_offset());
86        self.update_validity()
87    }
88
89    fn init_validity(&mut self) {
90        let len = self.offsets.len() - 1;
91        let mut validity = BitmapBuilder::with_capacity(self.offsets.capacity());
92        if len > 0 {
93            validity.extend_constant(len - 1, true);
94            validity.push(false);
95        }
96        self.validity = Some(validity)
97    }
98
99    fn update_validity(&mut self) {
100        if let Some(validity) = &mut self.validity {
101            validity.push(true)
102        }
103    }
104
105    pub fn finish(self, inner_dtype: Option<&ArrowDataType>) -> PolarsResult<ListArray<i64>> {
106        // SAFETY:
107        // offsets are monotonically increasing
108        let offsets = unsafe { Offsets::new_unchecked(self.offsets) };
109        let (inner_dtype, values) = if self.arrays.is_empty() {
110            let len = *offsets.last() as usize;
111            match inner_dtype {
112                None => {
113                    let values = NullArray::new(ArrowDataType::Null, len).boxed();
114                    (ArrowDataType::Null, values)
115                },
116                Some(inner_dtype) => {
117                    let values = new_null_array(inner_dtype.clone(), len);
118                    (inner_dtype.clone(), values)
119                },
120            }
121        } else {
122            let inner_dtype = inner_dtype.unwrap_or_else(|| self.arrays[0].dtype());
123
124            // check if there is a dtype that is not `Null`
125            // if we find it, we will convert the null arrays
126            // to empty arrays of this dtype, otherwise the concat kernel fails.
127            let mut non_null_dtype = None;
128            if is_nested_null(inner_dtype) {
129                for arr in &self.arrays {
130                    if !is_nested_null(arr.dtype()) {
131                        non_null_dtype = Some(arr.dtype());
132                        break;
133                    }
134                }
135            };
136
137            // there are null arrays found, ensure the types are correct.
138            if let Some(dtype) = non_null_dtype {
139                let arrays = self
140                    .arrays
141                    .iter()
142                    .map(|arr| {
143                        if is_nested_null(arr.dtype()) {
144                            convert_inner_type(&**arr, dtype)
145                        } else {
146                            arr.to_boxed()
147                        }
148                    })
149                    .collect::<Vec<_>>();
150
151                let values = concatenate::concatenate_unchecked(&arrays)?;
152                (dtype.clone(), values)
153            } else {
154                let values = concatenate::concatenate(&self.arrays)?;
155                (inner_dtype.clone(), values)
156            }
157        };
158        let dtype = ListArray::<i64>::default_datatype(inner_dtype);
159        Ok(ListArray::<i64>::new(
160            dtype,
161            offsets.into(),
162            values,
163            self.validity
164                .and_then(|validity| validity.into_opt_validity()),
165        ))
166    }
167}