Skip to main content

polars_arrow/array/dictionary/
mutable.rs

1use std::hash::Hash;
2use std::sync::Arc;
3
4use polars_error::PolarsResult;
5
6use super::value_map::ValueMap;
7use super::{DictionaryArray, DictionaryKey};
8use crate::array::indexable::{AsIndexed, Indexable};
9use crate::array::primitive::MutablePrimitiveArray;
10use crate::array::{Array, MutableArray, TryExtend, TryPush};
11use crate::bitmap::MutableBitmap;
12use crate::datatypes::ArrowDataType;
13
14#[derive(Debug)]
15pub struct MutableDictionaryArray<K: DictionaryKey, M: MutableArray> {
16    dtype: ArrowDataType,
17    map: ValueMap<K, M>,
18    // invariant: `max(keys) < map.values().len()`
19    keys: MutablePrimitiveArray<K>,
20}
21
22impl<K: DictionaryKey, M: MutableArray> From<MutableDictionaryArray<K, M>> for DictionaryArray<K> {
23    fn from(other: MutableDictionaryArray<K, M>) -> Self {
24        // SAFETY: the invariant of this struct ensures that this is up-held
25        unsafe {
26            DictionaryArray::<K>::try_new_unchecked(
27                other.dtype,
28                other.keys.into(),
29                other.map.into_values().as_box(),
30            )
31            .unwrap()
32        }
33    }
34}
35
36impl<K: DictionaryKey, M: MutableArray + Default> MutableDictionaryArray<K, M> {
37    /// Creates an empty [`MutableDictionaryArray`].
38    pub fn new(ordered: bool) -> Self {
39        Self::try_empty(M::default(), ordered).unwrap()
40    }
41
42    /// Creates an empty [`MutableDictionaryArray`] with the given value dtype.
43    pub fn empty_with_value_dtype(value_dtype: ArrowDataType, ordered: bool) -> Self {
44        let keys = MutablePrimitiveArray::<K>::new();
45        let dtype = ArrowDataType::Dictionary(K::KEY_TYPE, Box::new(value_dtype), ordered);
46        Self {
47            dtype,
48            map: ValueMap::<K, M>::try_empty(M::default()).unwrap(),
49            keys,
50        }
51    }
52}
53
54impl<K: DictionaryKey, M: MutableArray + Default> Default for MutableDictionaryArray<K, M> {
55    fn default() -> Self {
56        Self::new(false)
57    }
58}
59
60impl<K: DictionaryKey, M: MutableArray> MutableDictionaryArray<K, M> {
61    /// Creates an empty [`MutableDictionaryArray`] from a given empty values array.
62    /// # Errors
63    /// Errors if the array is non-empty.
64    pub fn try_empty(values: M, ordered: bool) -> PolarsResult<Self> {
65        Ok(Self::from_value_map(
66            ValueMap::<K, M>::try_empty(values)?,
67            ordered,
68        ))
69    }
70
71    /// Creates an empty [`MutableDictionaryArray`] preloaded with a given dictionary of values.
72    /// Indices associated with those values are automatically assigned based on the order of
73    /// the values.
74    /// # Errors
75    /// Errors if there's more values than the maximum value of `K` or if values are not unique.
76    pub fn from_values(values: M, ordered: bool) -> PolarsResult<Self>
77    where
78        M: Indexable,
79        M::Type: Eq + Hash,
80    {
81        Ok(Self::from_value_map(
82            ValueMap::<K, M>::from_values(values)?,
83            ordered,
84        ))
85    }
86
87    fn from_value_map(value_map: ValueMap<K, M>, ordered: bool) -> Self {
88        let keys = MutablePrimitiveArray::<K>::new();
89        let dtype =
90            ArrowDataType::Dictionary(K::KEY_TYPE, Box::new(value_map.dtype().clone()), ordered);
91        Self {
92            dtype,
93            map: value_map,
94            keys,
95        }
96    }
97
98    /// Creates an empty [`MutableDictionaryArray`] retaining the same dictionary as the current
99    /// mutable dictionary array, but with no data. This may come useful when serializing the
100    /// array into multiple chunks, where there's a requirement that the dictionary is the same.
101    /// No copying is performed, the value map is moved over to the new array.
102    pub fn into_empty(self, ordered: bool) -> Self {
103        Self::from_value_map(self.map, ordered)
104    }
105
106    /// Same as `into_empty` but clones the inner value map instead of taking full ownership.
107    pub fn to_empty(&self, ordered: bool) -> Self
108    where
109        M: Clone,
110    {
111        Self::from_value_map(self.map.clone(), ordered)
112    }
113
114    /// pushes a null value
115    pub fn push_null(&mut self) {
116        self.keys.push(None)
117    }
118
119    /// returns a reference to the inner values.
120    pub fn values(&self) -> &M {
121        self.map.values()
122    }
123
124    /// converts itself into [`Arc<dyn Array>`]
125    pub fn into_arc(self) -> Arc<dyn Array> {
126        let a: DictionaryArray<K> = self.into();
127        Arc::new(a)
128    }
129
130    /// converts itself into [`Box<dyn Array>`]
131    pub fn into_box(self) -> Box<dyn Array> {
132        let a: DictionaryArray<K> = self.into();
133        Box::new(a)
134    }
135
136    /// Reserves `additional` slots.
137    pub fn reserve(&mut self, additional: usize) {
138        self.keys.reserve(additional);
139    }
140
141    /// Shrinks the capacity of the [`MutableDictionaryArray`] to fit its current length.
142    pub fn shrink_to_fit(&mut self) {
143        self.map.shrink_to_fit();
144        self.keys.shrink_to_fit();
145    }
146
147    /// Returns the dictionary keys
148    pub fn keys(&self) -> &MutablePrimitiveArray<K> {
149        &self.keys
150    }
151
152    fn take_into(&mut self) -> DictionaryArray<K> {
153        DictionaryArray::<K>::try_new(
154            self.dtype.clone(),
155            std::mem::take(&mut self.keys).into(),
156            self.map.take_into(),
157        )
158        .unwrap()
159    }
160}
161
162impl<K: DictionaryKey, M: 'static + MutableArray> MutableArray for MutableDictionaryArray<K, M> {
163    fn len(&self) -> usize {
164        self.keys.len()
165    }
166
167    fn validity(&self) -> Option<&MutableBitmap> {
168        self.keys.validity()
169    }
170
171    fn as_box(&mut self) -> Box<dyn Array> {
172        Box::new(self.take_into())
173    }
174
175    fn as_arc(&mut self) -> Arc<dyn Array> {
176        Arc::new(self.take_into())
177    }
178
179    fn dtype(&self) -> &ArrowDataType {
180        &self.dtype
181    }
182
183    fn as_any(&self) -> &dyn std::any::Any {
184        self
185    }
186
187    fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
188        self
189    }
190
191    fn push_null(&mut self) {
192        self.keys.push(None)
193    }
194
195    fn reserve(&mut self, additional: usize) {
196        self.reserve(additional)
197    }
198
199    fn shrink_to_fit(&mut self) {
200        self.shrink_to_fit()
201    }
202}
203
204impl<K, M, T> TryExtend<Option<T>> for MutableDictionaryArray<K, M>
205where
206    K: DictionaryKey,
207    M: MutableArray + Indexable + TryExtend<Option<T>> + TryPush<Option<T>>,
208    T: AsIndexed<M>,
209    M::Type: Eq + Hash,
210{
211    fn try_extend<II: IntoIterator<Item = Option<T>>>(&mut self, iter: II) -> PolarsResult<()> {
212        for value in iter {
213            if let Some(value) = value {
214                let key = self
215                    .map
216                    .try_push_valid(value, |arr, v| arr.try_push(Some(v)))?;
217                self.keys.try_push(Some(key))?;
218            } else {
219                self.push_null();
220            }
221        }
222        Ok(())
223    }
224}
225
226impl<K, M, T> TryPush<Option<T>> for MutableDictionaryArray<K, M>
227where
228    K: DictionaryKey,
229    M: MutableArray + Indexable + TryPush<Option<T>>,
230    T: AsIndexed<M>,
231    M::Type: Eq + Hash,
232{
233    fn try_push(&mut self, item: Option<T>) -> PolarsResult<()> {
234        if let Some(value) = item {
235            let key = self
236                .map
237                .try_push_valid(value, |arr, v| arr.try_push(Some(v)))?;
238            self.keys.try_push(Some(key))?;
239        } else {
240            self.push_null();
241        }
242        Ok(())
243    }
244}