polars_core/chunked_array/list/
mod.rs1pub(super) mod iterator;
3
4use std::borrow::Cow;
5
6use polars_utils::itertools::Itertools;
7
8use crate::prelude::*;
9
10impl ListChunked {
11 pub fn inner_dtype(&self) -> &DataType {
13 match self.dtype() {
14 DataType::List(dt) => dt.as_ref(),
15 _ => unreachable!(),
16 }
17 }
18
19 pub fn set_inner_dtype(&mut self, dtype: DataType) {
23 assert_eq!(dtype.to_physical(), self.inner_dtype().to_physical());
24 let field = Arc::make_mut(&mut self.field);
25 field.coerce(DataType::List(Box::new(dtype)));
26 }
27
28 pub fn set_fast_explode(&mut self) {
29 self.set_fast_explode_list(true)
30 }
31
32 pub fn _can_fast_explode(&self) -> bool {
33 self.get_fast_explode_list()
34 }
35
36 pub unsafe fn to_logical(&mut self, inner_dtype: DataType) {
41 debug_assert_eq!(&inner_dtype.to_physical(), self.inner_dtype());
42 let fld = Arc::make_mut(&mut self.field);
43 fld.coerce(DataType::List(Box::new(inner_dtype)))
44 }
45
46 pub fn to_physical_repr(&self) -> Cow<'_, ListChunked> {
48 let Cow::Owned(physical_repr) = self.get_inner().to_physical_repr() else {
49 return Cow::Borrowed(self);
50 };
51
52 let ca = if physical_repr.chunks().len() == 1 && self.chunks().len() > 1 {
53 self.rechunk()
55 } else {
56 Cow::Borrowed(self)
57 };
58
59 assert_eq!(ca.chunks().len(), physical_repr.chunks().len());
60
61 let chunks: Vec<_> = ca
62 .downcast_iter()
63 .zip(physical_repr.into_chunks())
64 .map(|(chunk, values)| {
65 LargeListArray::new(
66 ArrowDataType::LargeList(Box::new(ArrowField::new(
67 LIST_VALUES_NAME,
68 values.dtype().clone(),
69 true,
70 ))),
71 chunk.offsets().clone(),
72 values,
73 chunk.validity().cloned(),
74 )
75 .to_boxed()
76 })
77 .collect();
78
79 let name = self.name().clone();
80 let dtype = DataType::List(Box::new(self.inner_dtype().to_physical()));
81 Cow::Owned(unsafe { ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype) })
82 }
83
84 pub unsafe fn from_physical_unchecked(
90 &self,
91 to_inner_dtype: DataType,
92 ) -> PolarsResult<ListChunked> {
93 debug_assert!(!self.inner_dtype().is_logical());
94
95 let inner_chunks = self
96 .downcast_iter()
97 .map(|chunk| chunk.values())
98 .cloned()
99 .collect();
100
101 let inner = unsafe {
102 Series::from_chunks_and_dtype_unchecked(
103 PlSmallStr::EMPTY,
104 inner_chunks,
105 self.inner_dtype(),
106 )
107 };
108 let inner = unsafe { inner.from_physical_unchecked(&to_inner_dtype) }?;
109
110 let chunks: Vec<_> = self
111 .downcast_iter()
112 .zip(inner.into_chunks())
113 .map(|(chunk, values)| {
114 LargeListArray::new(
115 ArrowDataType::LargeList(Box::new(ArrowField::new(
116 LIST_VALUES_NAME,
117 values.dtype().clone(),
118 true,
119 ))),
120 chunk.offsets().clone(),
121 values,
122 chunk.validity().cloned(),
123 )
124 .to_boxed()
125 })
126 .collect();
127
128 let name = self.name().clone();
129 let dtype = DataType::List(Box::new(to_inner_dtype));
130 Ok(unsafe { ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype) })
131 }
132
133 pub fn get_inner(&self) -> Series {
135 let chunks: Vec<_> = self.downcast_iter().map(|c| c.values().clone()).collect();
136
137 unsafe {
139 Series::from_chunks_and_dtype_unchecked(self.name().clone(), chunks, self.inner_dtype())
140 }
141 }
142
143 pub fn inner_length(&self) -> usize {
144 self.downcast_iter().map(|c| c.values().len()).sum()
145 }
146
147 pub fn apply_to_inner(
149 &self,
150 func: &dyn Fn(Series) -> PolarsResult<Series>,
151 ) -> PolarsResult<ListChunked> {
152 let ca = self.rechunk();
154 let arr = ca.downcast_as_array();
155
156 let elements = unsafe {
159 Series::from_chunks_and_dtype_unchecked(
160 self.name().clone(),
161 vec![arr.values().clone()],
162 ca.inner_dtype(),
163 )
164 };
165
166 let expected_len = elements.len();
167 let out: Series = func(elements)?;
168 polars_ensure!(
169 out.len() == expected_len,
170 ComputeError: "the function should apply element-wise, it removed elements instead"
171 );
172 let out = out.rechunk();
173 let values = out.chunks()[0].clone();
174
175 let inner_dtype = LargeListArray::default_datatype(values.dtype().clone());
176 let arr = LargeListArray::new(
177 inner_dtype,
178 (*arr.offsets()).clone(),
179 values,
180 arr.validity().cloned(),
181 );
182
183 Ok(unsafe {
185 ListChunked::from_chunks_and_dtype_unchecked(
186 ca.name().clone(),
187 vec![Box::new(arr)],
188 DataType::List(Box::new(out.dtype().clone())),
189 )
190 })
191 }
192
193 pub fn with_inner_values(&self, values: &Series) -> ListChunked {
194 if cfg!(debug_assertions) {
195 assert_eq!(values.len(), self.inner_length());
196 }
197
198 fn align_inner_chunks(ca: &'_ ListChunked, values: &'_ Series) -> Series {
200 if ca.chunks().len() == values.chunks().len()
201 && ca
202 .downcast_iter()
203 .map(|arr| arr.values().len())
204 .zip(values.chunks().iter().map(|arr| arr.len()))
205 .all_equal()
206 {
207 return values.clone();
208 }
209
210 let mut values = values.rechunk();
211 let chunks = unsafe { values.chunks_mut() };
212 let mut arr = chunks.pop().unwrap();
213 chunks.extend(ca.downcast_iter().map(|ca_arr| {
214 let chunk;
215 (chunk, arr) = arr.split_at_boxed(ca_arr.values().len());
216 chunk
217 }));
218 assert!(arr.is_empty());
219 values
220 }
221
222 let values = align_inner_chunks(self, values);
223 let values_dtype = values.dtype().clone();
224
225 let chunks = self
226 .downcast_iter()
227 .zip(values.into_chunks())
228 .map(|(ca_arr, v_arr)| {
229 debug_assert_eq!(ca_arr.values().len(), v_arr.len());
230 LargeListArray::new(
231 LargeListArray::default_datatype(v_arr.dtype().clone()),
232 (ca_arr.offsets()).clone(),
233 v_arr,
234 ca_arr.validity().cloned(),
235 )
236 .to_boxed()
237 })
238 .collect::<Vec<_>>();
239
240 unsafe {
242 ListChunked::from_chunks_and_dtype_unchecked(
243 self.name().clone(),
244 chunks,
245 DataType::List(Box::new(values_dtype)),
246 )
247 }
248 }
249}