Skip to main content

polars_arrow/array/dictionary/
mutable.rs

1use std::hash::Hash;
2use std::sync::Arc;
3
4use polars_error::PolarsResult;
5
6use super::value_map::ValueMap;
7use super::{DictionaryArray, DictionaryKey};
8use crate::array::indexable::{AsIndexed, Indexable};
9use crate::array::primitive::MutablePrimitiveArray;
10use crate::array::{Array, MutableArray, TryExtend, TryPush};
11use crate::bitmap::MutableBitmap;
12use crate::datatypes::ArrowDataType;
13
14#[derive(Debug)]
15pub struct MutableDictionaryArray<K: DictionaryKey, M: MutableArray> {
16    dtype: ArrowDataType,
17    map: ValueMap<K, M>,
18    // invariant: `max(keys) < map.values().len()`
19    keys: MutablePrimitiveArray<K>,
20}
21
22impl<K: DictionaryKey, M: MutableArray> From<MutableDictionaryArray<K, M>> for DictionaryArray<K> {
23    fn from(other: MutableDictionaryArray<K, M>) -> Self {
24        // SAFETY: the invariant of this struct ensures that this is up-held
25        unsafe {
26            DictionaryArray::<K>::try_new_unchecked(
27                other.dtype,
28                other.keys.into(),
29                other.map.into_values().as_box(),
30            )
31            .unwrap()
32        }
33    }
34}
35
36impl<K: DictionaryKey, M: MutableArray + Default> MutableDictionaryArray<K, M> {
37    /// Creates an empty [`MutableDictionaryArray`].
38    pub fn new() -> Self {
39        Self::try_empty(M::default()).unwrap()
40    }
41
42    /// Creates an empty [`MutableDictionaryArray`] with the given value dtype.
43    pub fn empty_with_value_dtype(value_dtype: ArrowDataType) -> Self {
44        let keys = MutablePrimitiveArray::<K>::new();
45        let dtype = ArrowDataType::Dictionary(K::KEY_TYPE, Box::new(value_dtype), false);
46        Self {
47            dtype,
48            map: ValueMap::<K, M>::try_empty(M::default()).unwrap(),
49            keys,
50        }
51    }
52}
53
54impl<K: DictionaryKey, M: MutableArray + Default> Default for MutableDictionaryArray<K, M> {
55    fn default() -> Self {
56        Self::new()
57    }
58}
59
60impl<K: DictionaryKey, M: MutableArray> MutableDictionaryArray<K, M> {
61    /// Creates an empty [`MutableDictionaryArray`] from a given empty values array.
62    /// # Errors
63    /// Errors if the array is non-empty.
64    pub fn try_empty(values: M) -> PolarsResult<Self> {
65        Ok(Self::from_value_map(ValueMap::<K, M>::try_empty(values)?))
66    }
67
68    /// Creates an empty [`MutableDictionaryArray`] preloaded with a given dictionary of values.
69    /// Indices associated with those values are automatically assigned based on the order of
70    /// the values.
71    /// # Errors
72    /// Errors if there's more values than the maximum value of `K` or if values are not unique.
73    pub fn from_values(values: M) -> PolarsResult<Self>
74    where
75        M: Indexable,
76        M::Type: Eq + Hash,
77    {
78        Ok(Self::from_value_map(ValueMap::<K, M>::from_values(values)?))
79    }
80
81    fn from_value_map(value_map: ValueMap<K, M>) -> Self {
82        let keys = MutablePrimitiveArray::<K>::new();
83        let dtype =
84            ArrowDataType::Dictionary(K::KEY_TYPE, Box::new(value_map.dtype().clone()), false);
85        Self {
86            dtype,
87            map: value_map,
88            keys,
89        }
90    }
91
92    /// Creates an empty [`MutableDictionaryArray`] retaining the same dictionary as the current
93    /// mutable dictionary array, but with no data. This may come useful when serializing the
94    /// array into multiple chunks, where there's a requirement that the dictionary is the same.
95    /// No copying is performed, the value map is moved over to the new array.
96    pub fn into_empty(self) -> Self {
97        Self::from_value_map(self.map)
98    }
99
100    /// Same as `into_empty` but clones the inner value map instead of taking full ownership.
101    pub fn to_empty(&self) -> Self
102    where
103        M: Clone,
104    {
105        Self::from_value_map(self.map.clone())
106    }
107
108    /// pushes a null value
109    pub fn push_null(&mut self) {
110        self.keys.push(None)
111    }
112
113    /// returns a reference to the inner values.
114    pub fn values(&self) -> &M {
115        self.map.values()
116    }
117
118    /// converts itself into [`Arc<dyn Array>`]
119    pub fn into_arc(self) -> Arc<dyn Array> {
120        let a: DictionaryArray<K> = self.into();
121        Arc::new(a)
122    }
123
124    /// converts itself into [`Box<dyn Array>`]
125    pub fn into_box(self) -> Box<dyn Array> {
126        let a: DictionaryArray<K> = self.into();
127        Box::new(a)
128    }
129
130    /// Reserves `additional` slots.
131    pub fn reserve(&mut self, additional: usize) {
132        self.keys.reserve(additional);
133    }
134
135    /// Shrinks the capacity of the [`MutableDictionaryArray`] to fit its current length.
136    pub fn shrink_to_fit(&mut self) {
137        self.map.shrink_to_fit();
138        self.keys.shrink_to_fit();
139    }
140
141    /// Returns the dictionary keys
142    pub fn keys(&self) -> &MutablePrimitiveArray<K> {
143        &self.keys
144    }
145
146    fn take_into(&mut self) -> DictionaryArray<K> {
147        DictionaryArray::<K>::try_new(
148            self.dtype.clone(),
149            std::mem::take(&mut self.keys).into(),
150            self.map.take_into(),
151        )
152        .unwrap()
153    }
154}
155
156impl<K: DictionaryKey, M: 'static + MutableArray> MutableArray for MutableDictionaryArray<K, M> {
157    fn len(&self) -> usize {
158        self.keys.len()
159    }
160
161    fn validity(&self) -> Option<&MutableBitmap> {
162        self.keys.validity()
163    }
164
165    fn as_box(&mut self) -> Box<dyn Array> {
166        Box::new(self.take_into())
167    }
168
169    fn as_arc(&mut self) -> Arc<dyn Array> {
170        Arc::new(self.take_into())
171    }
172
173    fn dtype(&self) -> &ArrowDataType {
174        &self.dtype
175    }
176
177    fn as_any(&self) -> &dyn std::any::Any {
178        self
179    }
180
181    fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
182        self
183    }
184
185    fn push_null(&mut self) {
186        self.keys.push(None)
187    }
188
189    fn reserve(&mut self, additional: usize) {
190        self.reserve(additional)
191    }
192
193    fn shrink_to_fit(&mut self) {
194        self.shrink_to_fit()
195    }
196}
197
198impl<K, M, T> TryExtend<Option<T>> for MutableDictionaryArray<K, M>
199where
200    K: DictionaryKey,
201    M: MutableArray + Indexable + TryExtend<Option<T>> + TryPush<Option<T>>,
202    T: AsIndexed<M>,
203    M::Type: Eq + Hash,
204{
205    fn try_extend<II: IntoIterator<Item = Option<T>>>(&mut self, iter: II) -> PolarsResult<()> {
206        for value in iter {
207            if let Some(value) = value {
208                let key = self
209                    .map
210                    .try_push_valid(value, |arr, v| arr.try_push(Some(v)))?;
211                self.keys.try_push(Some(key))?;
212            } else {
213                self.push_null();
214            }
215        }
216        Ok(())
217    }
218}
219
220impl<K, M, T> TryPush<Option<T>> for MutableDictionaryArray<K, M>
221where
222    K: DictionaryKey,
223    M: MutableArray + Indexable + TryPush<Option<T>>,
224    T: AsIndexed<M>,
225    M::Type: Eq + Hash,
226{
227    fn try_push(&mut self, item: Option<T>) -> PolarsResult<()> {
228        if let Some(value) = item {
229            let key = self
230                .map
231                .try_push_valid(value, |arr, v| arr.try_push(Some(v)))?;
232            self.keys.try_push(Some(key))?;
233        } else {
234            self.push_null();
235        }
236        Ok(())
237    }
238}