polars_core/series/implementations/
null.rs

1use std::any::Any;
2
3use polars_error::constants::LENGTH_LIMIT_MSG;
4
5use self::compare_inner::TotalOrdInner;
6use super::*;
7use crate::chunked_array::ops::compare_inner::{IntoTotalEqInner, NonNull, TotalEqInner};
8use crate::chunked_array::ops::sort::arg_sort_multiple::arg_sort_multiple_impl;
9use crate::prelude::*;
10use crate::series::private::{PrivateSeries, PrivateSeriesNumeric};
11use crate::series::*;
12
13impl Series {
14    pub fn new_null(name: PlSmallStr, len: usize) -> Series {
15        NullChunked::new(name, len).into_series()
16    }
17}
18
19#[derive(Clone)]
20pub struct NullChunked {
21    pub(crate) name: PlSmallStr,
22    length: IdxSize,
23    // we still need chunks as many series consumers expect
24    // chunks to be there
25    chunks: Vec<ArrayRef>,
26}
27
28impl NullChunked {
29    pub(crate) fn new(name: PlSmallStr, len: usize) -> Self {
30        Self {
31            name,
32            length: len as IdxSize,
33            chunks: vec![Box::new(arrow::array::NullArray::new(
34                ArrowDataType::Null,
35                len,
36            ))],
37        }
38    }
39
40    pub fn len(&self) -> usize {
41        self.length as usize
42    }
43}
44impl PrivateSeriesNumeric for NullChunked {
45    fn bit_repr(&self) -> Option<BitRepr> {
46        Some(BitRepr::Small(UInt32Chunked::full_null(
47            self.name.clone(),
48            self.len(),
49        )))
50    }
51}
52
53impl PrivateSeries for NullChunked {
54    fn compute_len(&mut self) {
55        fn inner(chunks: &[ArrayRef]) -> usize {
56            match chunks.len() {
57                // fast path
58                1 => chunks[0].len(),
59                _ => chunks.iter().fold(0, |acc, arr| acc + arr.len()),
60            }
61        }
62        self.length = IdxSize::try_from(inner(&self.chunks)).expect(LENGTH_LIMIT_MSG);
63    }
64    fn _field(&self) -> Cow<Field> {
65        Cow::Owned(Field::new(self.name().clone(), DataType::Null))
66    }
67
68    #[allow(unused)]
69    fn _set_flags(&mut self, flags: StatisticsFlags) {}
70
71    fn _dtype(&self) -> &DataType {
72        &DataType::Null
73    }
74
75    #[cfg(feature = "zip_with")]
76    fn zip_with_same_type(&self, mask: &BooleanChunked, other: &Series) -> PolarsResult<Series> {
77        let len = match (self.len(), mask.len(), other.len()) {
78            (a, b, c) if a == b && b == c => a,
79            (1, a, b) | (a, 1, b) | (a, b, 1) if a == b => a,
80            (a, 1, 1) | (1, a, 1) | (1, 1, a) => a,
81            (_, 0, _) => 0,
82            _ => {
83                polars_bail!(ShapeMismatch: "shapes of `self`, `mask` and `other` are not suitable for `zip_with` operation")
84            },
85        };
86
87        Ok(Self::new(self.name().clone(), len).into_series())
88    }
89
90    fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
91        IntoTotalEqInner::into_total_eq_inner(self)
92    }
93    fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {
94        IntoTotalOrdInner::into_total_ord_inner(self)
95    }
96
97    fn subtract(&self, _rhs: &Series) -> PolarsResult<Series> {
98        null_arithmetic(self, _rhs, "subtract")
99    }
100
101    fn add_to(&self, _rhs: &Series) -> PolarsResult<Series> {
102        null_arithmetic(self, _rhs, "add_to")
103    }
104    fn multiply(&self, _rhs: &Series) -> PolarsResult<Series> {
105        null_arithmetic(self, _rhs, "multiply")
106    }
107    fn divide(&self, _rhs: &Series) -> PolarsResult<Series> {
108        null_arithmetic(self, _rhs, "divide")
109    }
110    fn remainder(&self, _rhs: &Series) -> PolarsResult<Series> {
111        null_arithmetic(self, _rhs, "remainder")
112    }
113
114    #[cfg(feature = "algorithm_group_by")]
115    fn group_tuples(&self, _multithreaded: bool, _sorted: bool) -> PolarsResult<GroupsType> {
116        Ok(if self.is_empty() {
117            GroupsType::default()
118        } else {
119            GroupsType::Slice {
120                groups: vec![[0, self.length]],
121                rolling: false,
122            }
123        })
124    }
125
126    #[cfg(feature = "algorithm_group_by")]
127    unsafe fn agg_list(&self, groups: &GroupsType) -> Series {
128        AggList::agg_list(self, groups)
129    }
130
131    fn _get_flags(&self) -> StatisticsFlags {
132        StatisticsFlags::empty()
133    }
134
135    fn vec_hash(
136        &self,
137        random_state: PlSeedableRandomStateQuality,
138        buf: &mut Vec<u64>,
139    ) -> PolarsResult<()> {
140        VecHash::vec_hash(self, random_state, buf)?;
141        Ok(())
142    }
143
144    fn vec_hash_combine(
145        &self,
146        build_hasher: PlSeedableRandomStateQuality,
147        hashes: &mut [u64],
148    ) -> PolarsResult<()> {
149        VecHash::vec_hash_combine(self, build_hasher, hashes)?;
150        Ok(())
151    }
152
153    fn arg_sort_multiple(
154        &self,
155        by: &[Column],
156        options: &SortMultipleOptions,
157    ) -> PolarsResult<IdxCa> {
158        let vals = (0..self.len())
159            .map(|i| (i as IdxSize, NonNull(())))
160            .collect();
161        arg_sort_multiple_impl(vals, by, options)
162    }
163}
164
165fn null_arithmetic(lhs: &NullChunked, rhs: &Series, op: &str) -> PolarsResult<Series> {
166    let output_len = match (lhs.len(), rhs.len()) {
167        (1, len_r) => len_r,
168        (len_l, 1) => len_l,
169        (len_l, len_r) if len_l == len_r => len_l,
170        _ => polars_bail!(ComputeError: "Cannot {:?} two series of different lengths.", op),
171    };
172    Ok(NullChunked::new(lhs.name().clone(), output_len).into_series())
173}
174
175impl SeriesTrait for NullChunked {
176    fn name(&self) -> &PlSmallStr {
177        &self.name
178    }
179
180    fn rename(&mut self, name: PlSmallStr) {
181        self.name = name
182    }
183
184    fn chunks(&self) -> &Vec<ArrayRef> {
185        &self.chunks
186    }
187    unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
188        &mut self.chunks
189    }
190
191    fn chunk_lengths(&self) -> ChunkLenIter {
192        self.chunks.iter().map(|chunk| chunk.len())
193    }
194
195    fn take(&self, indices: &IdxCa) -> PolarsResult<Series> {
196        Ok(NullChunked::new(self.name.clone(), indices.len()).into_series())
197    }
198
199    unsafe fn take_unchecked(&self, indices: &IdxCa) -> Series {
200        NullChunked::new(self.name.clone(), indices.len()).into_series()
201    }
202
203    fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Series> {
204        Ok(NullChunked::new(self.name.clone(), indices.len()).into_series())
205    }
206
207    unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Series {
208        NullChunked::new(self.name.clone(), indices.len()).into_series()
209    }
210
211    fn len(&self) -> usize {
212        self.length as usize
213    }
214
215    fn has_nulls(&self) -> bool {
216        self.len() > 0
217    }
218
219    fn rechunk(&self) -> Series {
220        NullChunked::new(self.name.clone(), self.len()).into_series()
221    }
222
223    fn drop_nulls(&self) -> Series {
224        NullChunked::new(self.name.clone(), 0).into_series()
225    }
226
227    fn cast(&self, dtype: &DataType, _cast_options: CastOptions) -> PolarsResult<Series> {
228        Ok(Series::full_null(self.name.clone(), self.len(), dtype))
229    }
230
231    fn null_count(&self) -> usize {
232        self.len()
233    }
234
235    #[cfg(feature = "algorithm_group_by")]
236    fn unique(&self) -> PolarsResult<Series> {
237        let ca = NullChunked::new(self.name.clone(), self.n_unique().unwrap());
238        Ok(ca.into_series())
239    }
240
241    #[cfg(feature = "algorithm_group_by")]
242    fn n_unique(&self) -> PolarsResult<usize> {
243        let n = if self.is_empty() { 0 } else { 1 };
244        Ok(n)
245    }
246
247    #[cfg(feature = "algorithm_group_by")]
248    fn arg_unique(&self) -> PolarsResult<IdxCa> {
249        let idxs: Vec<IdxSize> = (0..self.n_unique().unwrap() as IdxSize).collect();
250        Ok(IdxCa::new(self.name().clone(), idxs))
251    }
252
253    fn new_from_index(&self, _index: usize, length: usize) -> Series {
254        NullChunked::new(self.name.clone(), length).into_series()
255    }
256
257    unsafe fn get_unchecked(&self, _index: usize) -> AnyValue {
258        AnyValue::Null
259    }
260
261    fn slice(&self, offset: i64, length: usize) -> Series {
262        let (chunks, len) = chunkops::slice(&self.chunks, offset, length, self.len());
263        NullChunked {
264            name: self.name.clone(),
265            length: len as IdxSize,
266            chunks,
267        }
268        .into_series()
269    }
270
271    fn split_at(&self, offset: i64) -> (Series, Series) {
272        let (l, r) = chunkops::split_at(self.chunks(), offset, self.len());
273        (
274            NullChunked {
275                name: self.name.clone(),
276                length: l.iter().map(|arr| arr.len() as IdxSize).sum(),
277                chunks: l,
278            }
279            .into_series(),
280            NullChunked {
281                name: self.name.clone(),
282                length: r.iter().map(|arr| arr.len() as IdxSize).sum(),
283                chunks: r,
284            }
285            .into_series(),
286        )
287    }
288
289    fn sort_with(&self, _options: SortOptions) -> PolarsResult<Series> {
290        Ok(self.clone().into_series())
291    }
292
293    fn arg_sort(&self, _options: SortOptions) -> IdxCa {
294        IdxCa::from_vec(self.name().clone(), (0..self.len() as IdxSize).collect())
295    }
296
297    fn is_null(&self) -> BooleanChunked {
298        BooleanChunked::full(self.name().clone(), true, self.len())
299    }
300
301    fn is_not_null(&self) -> BooleanChunked {
302        BooleanChunked::full(self.name().clone(), false, self.len())
303    }
304
305    fn reverse(&self) -> Series {
306        self.clone().into_series()
307    }
308
309    fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Series> {
310        let len = if self.is_empty() {
311            // We still allow a length of `1` because it could be `lit(true)`.
312            polars_ensure!(filter.len() <= 1, ShapeMismatch: "filter's length: {} differs from that of the series: 0", filter.len());
313            0
314        } else if filter.len() == 1 {
315            return match filter.get(0) {
316                Some(true) => Ok(self.clone().into_series()),
317                None | Some(false) => Ok(NullChunked::new(self.name.clone(), 0).into_series()),
318            };
319        } else {
320            polars_ensure!(filter.len() == self.len(), ShapeMismatch: "filter's length: {} differs from that of the series: {}", filter.len(), self.len());
321            filter.sum().unwrap_or(0) as usize
322        };
323        Ok(NullChunked::new(self.name.clone(), len).into_series())
324    }
325
326    fn shift(&self, _periods: i64) -> Series {
327        self.clone().into_series()
328    }
329
330    fn append(&mut self, other: &Series) -> PolarsResult<()> {
331        polars_ensure!(other.dtype() == &DataType::Null, ComputeError: "expected null dtype");
332        // we don't create a new null array to keep probability of aligned chunks higher
333        self.length += other.len() as IdxSize;
334        self.chunks.extend(other.chunks().iter().cloned());
335        Ok(())
336    }
337    fn append_owned(&mut self, mut other: Series) -> PolarsResult<()> {
338        polars_ensure!(other.dtype() == &DataType::Null, ComputeError: "expected null dtype");
339        // we don't create a new null array to keep probability of aligned chunks higher
340        let other: &mut NullChunked = other._get_inner_mut().as_any_mut().downcast_mut().unwrap();
341        self.length += other.len() as IdxSize;
342        self.chunks.extend(std::mem::take(&mut other.chunks));
343        Ok(())
344    }
345
346    fn extend(&mut self, other: &Series) -> PolarsResult<()> {
347        *self = NullChunked::new(self.name.clone(), self.len() + other.len());
348        Ok(())
349    }
350
351    fn clone_inner(&self) -> Arc<dyn SeriesTrait> {
352        Arc::new(self.clone())
353    }
354
355    fn find_validity_mismatch(&self, other: &Series, idxs: &mut Vec<IdxSize>) {
356        ChunkNestingUtils::find_validity_mismatch(self, other, idxs)
357    }
358
359    fn as_any(&self) -> &dyn Any {
360        self
361    }
362
363    fn as_any_mut(&mut self) -> &mut dyn Any {
364        self
365    }
366
367    fn as_phys_any(&self) -> &dyn Any {
368        self
369    }
370
371    fn as_arc_any(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
372        self as _
373    }
374}
375
376unsafe impl IntoSeries for NullChunked {
377    fn into_series(self) -> Series
378    where
379        Self: Sized,
380    {
381        Series(Arc::new(self))
382    }
383}