polars_core/chunked_array/ops/
chunkops.rs1use std::cell::Cell;
2
3use arrow::bitmap::{Bitmap, BitmapBuilder};
4use arrow::legacy::kernels::concatenate::concatenate_owned_unchecked;
5use polars_error::constants::LENGTH_LIMIT_MSG;
6
7use super::*;
8use crate::chunked_array::flags::StatisticsFlags;
9#[cfg(feature = "object")]
10use crate::chunked_array::object::builder::ObjectChunkedBuilder;
11use crate::utils::slice_offsets;
12
13pub(crate) fn split_at(
14 chunks: &[ArrayRef],
15 offset: i64,
16 own_length: usize,
17) -> (Vec<ArrayRef>, Vec<ArrayRef>) {
18 let mut new_chunks_left = Vec::with_capacity(1);
19 let mut new_chunks_right = Vec::with_capacity(1);
20 let (raw_offset, _) = slice_offsets(offset, 0, own_length);
21
22 let mut remaining_offset = raw_offset;
23 let mut iter = chunks.iter();
24
25 for chunk in &mut iter {
26 let chunk_len = chunk.len();
27 if remaining_offset > 0 && remaining_offset >= chunk_len {
28 remaining_offset -= chunk_len;
29 new_chunks_left.push(chunk.clone());
30 continue;
31 }
32
33 let (l, r) = chunk.split_at_boxed(remaining_offset);
34 new_chunks_left.push(l);
35 new_chunks_right.push(r);
36 break;
37 }
38
39 for chunk in iter {
40 new_chunks_right.push(chunk.clone())
41 }
42 if new_chunks_left.is_empty() {
43 new_chunks_left.push(chunks[0].sliced(0, 0));
44 }
45 if new_chunks_right.is_empty() {
46 new_chunks_right.push(chunks[0].sliced(0, 0));
47 }
48 (new_chunks_left, new_chunks_right)
49}
50
51pub(crate) fn slice(
52 chunks: &[ArrayRef],
53 offset: i64,
54 slice_length: usize,
55 own_length: usize,
56) -> (Vec<ArrayRef>, usize) {
57 let mut new_chunks = Vec::with_capacity(1);
58 let (raw_offset, slice_len) = slice_offsets(offset, slice_length, own_length);
59
60 let mut remaining_length = slice_len;
61 let mut remaining_offset = raw_offset;
62 let mut new_len = 0;
63
64 for chunk in chunks {
65 let chunk_len = chunk.len();
66 if remaining_offset > 0 && remaining_offset >= chunk_len {
67 remaining_offset -= chunk_len;
68 continue;
69 }
70 let take_len = if remaining_length + remaining_offset > chunk_len {
71 chunk_len - remaining_offset
72 } else {
73 remaining_length
74 };
75 new_len += take_len;
76
77 debug_assert!(remaining_offset + take_len <= chunk.len());
78 unsafe {
79 new_chunks.push(chunk.sliced_unchecked(remaining_offset, take_len));
82 }
83 remaining_length -= take_len;
84 remaining_offset = 0;
85 if remaining_length == 0 {
86 break;
87 }
88 }
89 if new_chunks.is_empty() {
90 new_chunks.push(chunks[0].sliced(0, 0));
91 }
92 (new_chunks, new_len)
93}
94
95thread_local!(static CHECK_LENGTH: Cell<bool> = const { Cell::new(true) });
100
101pub unsafe fn _set_check_length(check: bool) {
106 CHECK_LENGTH.set(check)
107}
108
109impl<T: PolarsDataType> ChunkedArray<T> {
110 #[inline]
112 pub fn len(&self) -> usize {
113 self.length
114 }
115
116 #[inline]
118 pub fn null_count(&self) -> usize {
119 self.null_count
120 }
121
122 pub unsafe fn set_null_count(&mut self, null_count: usize) {
131 self.null_count = null_count;
132 }
133
134 pub fn is_empty(&self) -> bool {
136 self.len() == 0
137 }
138
139 pub(crate) fn compute_len(&mut self) {
141 fn inner(chunks: &[ArrayRef]) -> usize {
142 match chunks.len() {
143 1 => chunks[0].len(),
145 _ => chunks.iter().fold(0, |acc, arr| acc + arr.len()),
146 }
147 }
148 let len = inner(&self.chunks);
149 if len >= (IdxSize::MAX as usize) && CHECK_LENGTH.get() {
151 panic!("{}", LENGTH_LIMIT_MSG);
152 }
153 self.length = len;
154 self.null_count = self
155 .chunks
156 .iter()
157 .map(|arr| arr.null_count())
158 .sum::<usize>();
159 }
160
161 pub fn rechunk(&self) -> Self {
162 match self.dtype() {
163 #[cfg(feature = "object")]
164 DataType::Object(_, _) => {
165 panic!("implementation error")
166 },
167 _ => {
168 fn inner_rechunk(chunks: &[ArrayRef]) -> Vec<ArrayRef> {
169 vec![concatenate_owned_unchecked(chunks).unwrap()]
170 }
171
172 if self.chunks.len() == 1 {
173 self.clone()
174 } else {
175 let chunks = inner_rechunk(&self.chunks);
176
177 let mut ca = unsafe { self.copy_with_chunks(chunks) };
178
179 use StatisticsFlags as F;
180 ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
181 ca
182 }
183 },
184 }
185 }
186
187 pub fn rechunk_validity(&self) -> Option<Bitmap> {
188 if self.chunks.len() == 1 {
189 return self.chunks[0].validity().cloned();
190 }
191
192 if !self.has_nulls() || self.is_empty() {
193 return None;
194 }
195
196 let mut bm = BitmapBuilder::with_capacity(self.len());
197 for arr in self.downcast_iter() {
198 if let Some(v) = arr.validity() {
199 bm.extend_from_bitmap(v);
200 } else {
201 bm.extend_constant(arr.len(), true);
202 }
203 }
204 bm.into_opt_validity()
205 }
206
207 pub fn split_at(&self, offset: i64) -> (Self, Self) {
213 let (l, r) = split_at(&self.chunks, offset, self.len());
215 let mut out_l = unsafe { self.copy_with_chunks(l) };
216 let mut out_r = unsafe { self.copy_with_chunks(r) };
217
218 use StatisticsFlags as F;
219 out_l.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
220 out_r.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
221
222 (out_l, out_r)
223 }
224
225 pub fn slice(&self, offset: i64, length: usize) -> Self {
231 let exec = || {
234 let (chunks, len) = slice(&self.chunks, offset, length, self.len());
235 let mut out = unsafe { self.copy_with_chunks(chunks) };
236
237 use StatisticsFlags as F;
238 out.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
239 out.length = len;
240
241 out
242 };
243
244 match length {
245 0 => match self.dtype() {
246 #[cfg(feature = "object")]
247 DataType::Object(_, _) => exec(),
248 _ => self.clear(),
249 },
250 _ => exec(),
251 }
252 }
253
254 #[must_use]
256 pub fn limit(&self, num_elements: usize) -> Self
257 where
258 Self: Sized,
259 {
260 self.slice(0, num_elements)
261 }
262
263 #[must_use]
265 pub fn head(&self, length: Option<usize>) -> Self
266 where
267 Self: Sized,
268 {
269 match length {
270 Some(len) => self.slice(0, std::cmp::min(len, self.len())),
271 None => self.slice(0, std::cmp::min(10, self.len())),
272 }
273 }
274
275 #[must_use]
277 pub fn tail(&self, length: Option<usize>) -> Self
278 where
279 Self: Sized,
280 {
281 let len = match length {
282 Some(len) => std::cmp::min(len, self.len()),
283 None => std::cmp::min(10, self.len()),
284 };
285 self.slice(-(len as i64), len)
286 }
287
288 pub fn prune_empty_chunks(&mut self) {
290 let mut count = 0u32;
291 unsafe {
292 self.chunks_mut().retain(|arr| {
293 count += 1;
294 if count == 1 {
296 true
297 } else {
298 arr.len() > 0
300 }
301 })
302 }
303 }
304}
305
306#[cfg(feature = "object")]
307impl<T: PolarsObject> ObjectChunked<T> {
308 pub(crate) fn rechunk_object(&self) -> Self {
309 if self.chunks.len() == 1 {
310 self.clone()
311 } else {
312 let mut builder = ObjectChunkedBuilder::new(self.name().clone(), self.len());
313 let chunks = self.downcast_iter();
314
315 if !self.has_nulls() {
318 for arr in chunks {
319 for idx in 0..arr.len() {
320 builder.append_value(arr.value(idx).clone())
321 }
322 }
323 } else {
324 for arr in chunks {
325 for idx in 0..arr.len() {
326 if arr.is_valid(idx) {
327 builder.append_value(arr.value(idx).clone())
328 } else {
329 builder.append_null()
330 }
331 }
332 }
333 }
334 builder.finish()
335 }
336 }
337}
338
339#[cfg(test)]
340mod test {
341 #[cfg(feature = "dtype-categorical")]
342 use crate::prelude::*;
343
344 #[test]
345 #[cfg(feature = "dtype-categorical")]
346 fn test_categorical_map_after_rechunk() {
347 let s = Series::new(PlSmallStr::EMPTY, &["foo", "bar", "spam"]);
348 let mut a = s
349 .cast(&DataType::Categorical(None, Default::default()))
350 .unwrap();
351
352 a.append(&a.slice(0, 2)).unwrap();
353 let a = a.rechunk();
354 assert!(a.categorical().unwrap().get_rev_map().len() > 0);
355 }
356}