polars_core/chunked_array/array/
mod.rs

1//! Special fixed-size-list utility methods
2
3mod iterator;
4
5use std::borrow::Cow;
6
7use crate::prelude::*;
8
9impl ArrayChunked {
10    /// Get the inner data type of the fixed size list.
11    pub fn inner_dtype(&self) -> &DataType {
12        match self.dtype() {
13            DataType::Array(dt, _size) => dt.as_ref(),
14            _ => unreachable!(),
15        }
16    }
17
18    pub fn width(&self) -> usize {
19        match self.dtype() {
20            DataType::Array(_dt, size) => *size,
21            _ => unreachable!(),
22        }
23    }
24
25    /// # Safety
26    /// The caller must ensure that the logical type given fits the physical type of the array.
27    pub unsafe fn to_logical(&mut self, inner_dtype: DataType) {
28        debug_assert_eq!(&inner_dtype.to_physical(), self.inner_dtype());
29        let width = self.width();
30        let fld = Arc::make_mut(&mut self.field);
31        fld.coerce(DataType::Array(Box::new(inner_dtype), width))
32    }
33
34    /// Convert the datatype of the array into the physical datatype.
35    pub fn to_physical_repr(&self) -> Cow<ArrayChunked> {
36        let Cow::Owned(physical_repr) = self.get_inner().to_physical_repr() else {
37            return Cow::Borrowed(self);
38        };
39
40        assert_eq!(self.chunks().len(), physical_repr.chunks().len());
41
42        let width = self.width();
43        let chunks: Vec<_> = self
44            .downcast_iter()
45            .zip(physical_repr.into_chunks())
46            .map(|(chunk, values)| {
47                FixedSizeListArray::new(
48                    ArrowDataType::FixedSizeList(
49                        Box::new(ArrowField::new(
50                            PlSmallStr::from_static("item"),
51                            values.dtype().clone(),
52                            true,
53                        )),
54                        width,
55                    ),
56                    chunk.len(),
57                    values,
58                    chunk.validity().cloned(),
59                )
60                .to_boxed()
61            })
62            .collect();
63
64        let name = self.name().clone();
65        let dtype = DataType::Array(Box::new(self.inner_dtype().to_physical()), width);
66        Cow::Owned(unsafe { ArrayChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype) })
67    }
68
69    /// Convert a non-logical [`ArrayChunked`] back into a logical [`ArrayChunked`] without casting.
70    ///
71    /// # Safety
72    ///
73    /// This can lead to invalid memory access in downstream code.
74    pub unsafe fn from_physical_unchecked(&self, to_inner_dtype: DataType) -> PolarsResult<Self> {
75        debug_assert!(!self.inner_dtype().is_logical());
76
77        let chunks = self
78            .downcast_iter()
79            .map(|chunk| chunk.values())
80            .cloned()
81            .collect();
82
83        let inner = unsafe {
84            Series::from_chunks_and_dtype_unchecked(PlSmallStr::EMPTY, chunks, self.inner_dtype())
85        };
86        let inner = unsafe { inner.from_physical_unchecked(&to_inner_dtype) }?;
87
88        let chunks: Vec<_> = self
89            .downcast_iter()
90            .zip(inner.into_chunks())
91            .map(|(chunk, values)| {
92                FixedSizeListArray::new(
93                    ArrowDataType::FixedSizeList(
94                        Box::new(ArrowField::new(
95                            PlSmallStr::from_static("item"),
96                            values.dtype().clone(),
97                            true,
98                        )),
99                        self.width(),
100                    ),
101                    chunk.len(),
102                    values,
103                    chunk.validity().cloned(),
104                )
105                .to_boxed()
106            })
107            .collect();
108
109        let name = self.name().clone();
110        let dtype = DataType::Array(Box::new(to_inner_dtype), self.width());
111        Ok(unsafe { Self::from_chunks_and_dtype_unchecked(name, chunks, dtype) })
112    }
113
114    /// Get the inner values as `Series`
115    pub fn get_inner(&self) -> Series {
116        let chunks: Vec<_> = self.downcast_iter().map(|c| c.values().clone()).collect();
117
118        // SAFETY: Data type of arrays matches because they are chunks from the same array.
119        unsafe {
120            Series::from_chunks_and_dtype_unchecked(self.name().clone(), chunks, self.inner_dtype())
121        }
122    }
123
124    /// Ignore the list indices and apply `func` to the inner type as [`Series`].
125    pub fn apply_to_inner(
126        &self,
127        func: &dyn Fn(Series) -> PolarsResult<Series>,
128    ) -> PolarsResult<ArrayChunked> {
129        // Rechunk or the generated Series will have wrong length.
130        let ca = self.rechunk();
131        let field = self
132            .inner_dtype()
133            .to_arrow_field(PlSmallStr::from_static("item"), CompatLevel::newest());
134
135        let chunks = ca.downcast_iter().map(|arr| {
136            let elements = unsafe {
137                Series::_try_from_arrow_unchecked_with_md(
138                    self.name().clone(),
139                    vec![(*arr.values()).clone()],
140                    &field.dtype,
141                    field.metadata.as_deref(),
142                )
143                .unwrap()
144            };
145
146            let expected_len = elements.len();
147            let out: Series = func(elements)?;
148            polars_ensure!(
149                out.len() == expected_len,
150                ComputeError: "the function should apply element-wise, it removed elements instead"
151            );
152            let out = out.rechunk();
153            let values = out.chunks()[0].clone();
154
155            let inner_dtype = FixedSizeListArray::default_datatype(
156                out.dtype().to_arrow(CompatLevel::newest()),
157                ca.width(),
158            );
159            let arr =
160                FixedSizeListArray::new(inner_dtype, arr.len(), values, arr.validity().cloned());
161            Ok(arr)
162        });
163
164        ArrayChunked::try_from_chunk_iter(self.name().clone(), chunks)
165    }
166
167    /// Recurse nested types until we are at the leaf array.
168    pub fn get_leaf_array(&self) -> Series {
169        let mut current = self.get_inner();
170        while let Some(child_array) = current.try_array() {
171            current = child_array.get_inner();
172        }
173        current
174    }
175}