1#![allow(unsafe_op_in_unsafe_fn)]
2use std::hint::unreachable_unchecked;
4use std::ops::Deref;
5
6use polars_error::{PolarsError, PolarsResult, polars_bail, polars_err};
7
8use crate::array::Splitable;
9use crate::buffer::Buffer;
10pub use crate::types::Offset;
11
12#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct Offsets<O: Offset>(Vec<O>);
18
19impl<O: Offset> Default for Offsets<O> {
20 #[inline]
21 fn default() -> Self {
22 Self::new()
23 }
24}
25
26impl<O: Offset> Deref for Offsets<O> {
27 type Target = [O];
28
29 fn deref(&self) -> &Self::Target {
30 self.as_slice()
31 }
32}
33
34impl<O: Offset> TryFrom<Vec<O>> for Offsets<O> {
35 type Error = PolarsError;
36
37 #[inline]
38 fn try_from(offsets: Vec<O>) -> Result<Self, Self::Error> {
39 try_check_offsets(&offsets)?;
40 Ok(Self(offsets))
41 }
42}
43
44impl<O: Offset> TryFrom<Buffer<O>> for OffsetsBuffer<O> {
45 type Error = PolarsError;
46
47 #[inline]
48 fn try_from(offsets: Buffer<O>) -> Result<Self, Self::Error> {
49 try_check_offsets(&offsets)?;
50 Ok(Self(offsets))
51 }
52}
53
54impl<O: Offset> TryFrom<Vec<O>> for OffsetsBuffer<O> {
55 type Error = PolarsError;
56
57 #[inline]
58 fn try_from(offsets: Vec<O>) -> Result<Self, Self::Error> {
59 try_check_offsets(&offsets)?;
60 Ok(Self(offsets.into()))
61 }
62}
63
64impl<O: Offset> From<Offsets<O>> for OffsetsBuffer<O> {
65 #[inline]
66 fn from(offsets: Offsets<O>) -> Self {
67 Self(offsets.0.into())
68 }
69}
70
71impl<O: Offset> Offsets<O> {
72 #[inline]
74 pub fn new() -> Self {
75 Self(vec![O::zero()])
76 }
77
78 #[inline]
80 pub fn new_zeroed(length: usize) -> Self {
81 Self(vec![O::zero(); length + 1])
82 }
83
84 #[inline]
86 pub fn try_from_iter<I: IntoIterator<Item = usize>>(iter: I) -> PolarsResult<Self> {
87 let iterator = iter.into_iter();
88 let (lower, _) = iterator.size_hint();
89 let mut offsets = Self::with_capacity(lower);
90 for item in iterator {
91 offsets.try_push(item)?
92 }
93 Ok(offsets)
94 }
95
96 pub fn with_capacity(capacity: usize) -> Self {
98 let mut offsets = Vec::with_capacity(capacity + 1);
99 offsets.push(O::zero());
100 Self(offsets)
101 }
102
103 pub fn capacity(&self) -> usize {
105 self.0.capacity() - 1
106 }
107
108 pub fn reserve(&mut self, additional: usize) {
110 self.0.reserve(additional);
111 }
112
113 pub fn shrink_to_fit(&mut self) {
115 self.0.shrink_to_fit();
116 }
117
118 #[inline]
125 pub fn try_push(&mut self, length: usize) -> PolarsResult<()> {
126 if O::IS_LARGE {
127 let length = O::from_as_usize(length);
128 let old_length = self.last();
129 let new_length = *old_length + length;
130 self.0.push(new_length);
131 Ok(())
132 } else {
133 let length =
134 O::from_usize(length).ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
135
136 let old_length = self.last();
137 let new_length = old_length
138 .checked_add(&length)
139 .ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
140 self.0.push(new_length);
141 Ok(())
142 }
143 }
144
145 #[inline]
150 pub unsafe fn new_unchecked(offsets: Vec<O>) -> Self {
151 #[cfg(debug_assertions)]
152 {
153 let mut prev_offset = O::default();
154 let mut is_monotonely_increasing = true;
155 for offset in &offsets {
156 is_monotonely_increasing &= *offset >= prev_offset;
157 prev_offset = *offset;
158 }
159 assert!(
160 is_monotonely_increasing,
161 "Unsafe precondition violated. Invariant of offsets broken."
162 );
163 }
164
165 Self(offsets)
166 }
167
168 #[inline]
170 pub fn last(&self) -> &O {
171 match self.0.last() {
172 Some(element) => element,
173 None => unsafe { unreachable_unchecked() },
174 }
175 }
176
177 #[inline]
181 pub fn length_at(&self, index: usize) -> usize {
182 let (start, end) = self.start_end(index);
183 end - start
184 }
185
186 #[inline]
190 pub fn start_end(&self, index: usize) -> (usize, usize) {
191 assert!(index < self.len_proxy());
193 unsafe { self.start_end_unchecked(index) }
194 }
195
196 #[inline]
201 pub unsafe fn start_end_unchecked(&self, index: usize) -> (usize, usize) {
202 let start = self.0.get_unchecked(index).to_usize();
204 let end = self.0.get_unchecked(index + 1).to_usize();
205 (start, end)
206 }
207
208 #[inline]
210 pub fn len_proxy(&self) -> usize {
211 self.0.len() - 1
212 }
213
214 #[inline]
216 pub fn as_slice(&self) -> &[O] {
217 self.0.as_slice()
218 }
219
220 #[inline]
222 pub fn pop(&mut self) -> Option<O> {
223 if self.len_proxy() == 0 {
224 None
225 } else {
226 self.0.pop()
227 }
228 }
229
230 #[inline]
233 pub fn extend_constant(&mut self, additional: usize) {
234 let offset = *self.last();
235 if additional == 1 {
236 self.0.push(offset)
237 } else {
238 self.0.resize(self.0.len() + additional, offset)
239 }
240 }
241
242 #[inline]
246 pub fn try_from_lengths<I: Iterator<Item = usize>>(lengths: I) -> PolarsResult<Self> {
247 let mut self_ = Self::with_capacity(lengths.size_hint().0);
248 self_.try_extend_from_lengths(lengths)?;
249 Ok(self_)
250 }
251
252 #[inline]
256 pub fn try_extend_from_lengths<I: Iterator<Item = usize>>(
257 &mut self,
258 lengths: I,
259 ) -> PolarsResult<()> {
260 let mut total_length = 0;
261 let mut offset = *self.last();
262 let original_offset = offset.to_usize();
263
264 let lengths = lengths.map(|length| {
265 total_length += length;
266 O::from_as_usize(length)
267 });
268
269 let offsets = lengths.map(|length| {
270 offset += length; offset
272 });
273 self.0.extend(offsets);
274
275 let last_offset = original_offset
276 .checked_add(total_length)
277 .ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
278 O::from_usize(last_offset).ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
279 Ok(())
280 }
281
282 pub fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
286 let mut length = *self.last();
287 let other_length = *other.last();
288 length
290 .checked_add(&other_length)
291 .ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
292
293 let lengths = other.as_slice().windows(2).map(|w| w[1] - w[0]);
294 let offsets = lengths.map(|new_length| {
295 length += new_length;
296 length
297 });
298 self.0.extend(offsets);
299 Ok(())
300 }
301
302 pub fn try_extend_from_slice(
306 &mut self,
307 other: &OffsetsBuffer<O>,
308 start: usize,
309 length: usize,
310 ) -> PolarsResult<()> {
311 if length == 0 {
312 return Ok(());
313 }
314 let other = &other.0[start..start + length + 1];
315 let other_length = other.last().expect("Length to be non-zero");
316 let mut length = *self.last();
317 length
319 .checked_add(other_length)
320 .ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
321
322 let lengths = other.windows(2).map(|w| w[1] - w[0]);
323 let offsets = lengths.map(|new_length| {
324 length += new_length;
325 length
326 });
327 self.0.extend(offsets);
328 Ok(())
329 }
330
331 #[inline]
333 pub fn into_inner(self) -> Vec<O> {
334 self.0
335 }
336}
337
338fn try_check_offsets<O: Offset>(offsets: &[O]) -> PolarsResult<()> {
340 match offsets.first() {
342 None => polars_bail!(ComputeError: "offsets must have at least one element"),
343 Some(first) => {
344 if *first < O::zero() {
345 polars_bail!(ComputeError: "offsets must be larger than 0")
346 }
347 let mut previous = *first;
348 let mut any_invalid = false;
349
350 for offset in offsets {
353 if previous > *offset {
354 any_invalid = true
355 }
356 previous = *offset;
357 }
358
359 if any_invalid {
360 polars_bail!(ComputeError: "offsets must be monotonically increasing")
361 } else {
362 Ok(())
363 }
364 },
365 }
366}
367
368#[derive(Clone, PartialEq, Debug)]
373pub struct OffsetsBuffer<O: Offset>(Buffer<O>);
374
375impl<O: Offset> Default for OffsetsBuffer<O> {
376 #[inline]
377 fn default() -> Self {
378 Self(vec![O::zero()].into())
379 }
380}
381
382impl<O: Offset> OffsetsBuffer<O> {
383 #[inline]
386 pub unsafe fn new_unchecked(offsets: Buffer<O>) -> Self {
387 Self(offsets)
388 }
389
390 #[inline]
392 pub fn new() -> Self {
393 Self(vec![O::zero()].into())
394 }
395
396 #[inline]
397 pub fn one_with_length(length: O) -> Self {
398 Self(vec![O::zero(), length].into())
399 }
400
401 #[inline]
403 pub fn into_mut(self) -> either::Either<Self, Offsets<O>> {
404 self.0
405 .into_mut()
406 .map_right(|offsets| unsafe { Offsets::new_unchecked(offsets) })
408 .map_left(Self)
409 }
410
411 #[inline]
413 pub fn buffer(&self) -> &Buffer<O> {
414 &self.0
415 }
416
417 #[inline]
419 pub fn len_proxy(&self) -> usize {
420 self.0.len() - 1
421 }
422
423 #[inline]
425 pub fn len(&self) -> usize {
426 self.0.len()
427 }
428
429 #[inline]
431 pub fn as_slice(&self) -> &[O] {
432 self.0.as_slice()
433 }
434
435 #[inline]
437 pub fn range(&self) -> O {
438 *self.last() - *self.first()
439 }
440
441 #[inline]
443 pub fn first(&self) -> &O {
444 match self.0.first() {
445 Some(element) => element,
446 None => unsafe { unreachable_unchecked() },
447 }
448 }
449
450 #[inline]
452 pub fn last(&self) -> &O {
453 match self.0.last() {
454 Some(element) => element,
455 None => unsafe { unreachable_unchecked() },
456 }
457 }
458
459 #[inline]
463 pub fn length_at(&self, index: usize) -> usize {
464 let (start, end) = self.start_end(index);
465 end - start
466 }
467
468 #[inline]
472 pub fn start_end(&self, index: usize) -> (usize, usize) {
473 assert!(index < self.len_proxy());
475 unsafe { self.start_end_unchecked(index) }
476 }
477
478 #[inline]
483 pub unsafe fn start_end_unchecked(&self, index: usize) -> (usize, usize) {
484 let start = self.0.get_unchecked(index).to_usize();
486 let end = self.0.get_unchecked(index + 1).to_usize();
487 (start, end)
488 }
489
490 #[inline]
495 pub fn slice(&mut self, offset: usize, length: usize) {
496 assert!(length > 0);
497 self.0.slice(offset, length);
498 }
499
500 #[inline]
505 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
506 self.0.slice_unchecked(offset, length);
507 }
508
509 #[inline]
511 pub fn lengths(&self) -> impl ExactSizeIterator<Item = usize> + '_ {
512 self.0.windows(2).map(|w| (w[1] - w[0]).to_usize())
513 }
514
515 #[inline]
517 pub fn offset_and_length_iter(&self) -> impl ExactSizeIterator<Item = (usize, usize)> + '_ {
518 self.windows(2).map(|x| {
519 let [l, r] = x else { unreachable!() };
520 let l = l.to_usize();
521 let r = r.to_usize();
522 (l, r - l)
523 })
524 }
525
526 pub fn leaf_ranges_iter(
529 offsets: &[Self],
530 ) -> impl Iterator<Item = core::ops::Range<usize>> + '_ {
531 let others = &offsets[1..];
532
533 offsets[0].windows(2).map(move |x| {
534 let [l, r] = x else { unreachable!() };
535 let mut l = l.to_usize();
536 let mut r = r.to_usize();
537
538 for o in others {
539 let slc = o.as_slice();
540 l = slc[l].to_usize();
541 r = slc[r].to_usize();
542 }
543
544 l..r
545 })
546 }
547
548 pub fn leaf_full_start_end(offsets: &[Self]) -> core::ops::Range<usize> {
550 let mut l = offsets[0].first().to_usize();
551 let mut r = offsets[0].last().to_usize();
552
553 for o in &offsets[1..] {
554 let slc = o.as_slice();
555 l = slc[l].to_usize();
556 r = slc[r].to_usize();
557 }
558
559 l..r
560 }
561
562 #[inline]
564 pub fn into_inner(self) -> Buffer<O> {
565 self.0
566 }
567
568 #[inline]
570 pub fn delta(&self, start: usize, end: usize) -> usize {
571 assert!(start <= end);
572
573 (self.0[end + 1] - self.0[start]).to_usize()
574 }
575}
576
577impl From<&OffsetsBuffer<i32>> for OffsetsBuffer<i64> {
578 fn from(offsets: &OffsetsBuffer<i32>) -> Self {
579 Self(
581 offsets
582 .buffer()
583 .iter()
584 .map(|x| *x as i64)
585 .collect::<Vec<_>>()
586 .into(),
587 )
588 }
589}
590
591impl TryFrom<&OffsetsBuffer<i64>> for OffsetsBuffer<i32> {
592 type Error = PolarsError;
593
594 fn try_from(offsets: &OffsetsBuffer<i64>) -> Result<Self, Self::Error> {
595 i32::try_from(*offsets.last()).map_err(|_| polars_err!(ComputeError: "overflow"))?;
596
597 Ok(Self(
599 offsets
600 .buffer()
601 .iter()
602 .map(|x| *x as i32)
603 .collect::<Vec<_>>()
604 .into(),
605 ))
606 }
607}
608
609impl From<Offsets<i32>> for Offsets<i64> {
610 fn from(offsets: Offsets<i32>) -> Self {
611 Self(
613 offsets
614 .as_slice()
615 .iter()
616 .map(|x| *x as i64)
617 .collect::<Vec<_>>(),
618 )
619 }
620}
621
622impl TryFrom<Offsets<i64>> for Offsets<i32> {
623 type Error = PolarsError;
624
625 fn try_from(offsets: Offsets<i64>) -> Result<Self, Self::Error> {
626 i32::try_from(*offsets.last()).map_err(|_| polars_err!(ComputeError: "overflow"))?;
627
628 Ok(Self(
630 offsets
631 .as_slice()
632 .iter()
633 .map(|x| *x as i32)
634 .collect::<Vec<_>>(),
635 ))
636 }
637}
638
639impl<O: Offset> std::ops::Deref for OffsetsBuffer<O> {
640 type Target = [O];
641
642 #[inline]
643 fn deref(&self) -> &[O] {
644 self.0.as_slice()
645 }
646}
647
648impl<O: Offset> Splitable for OffsetsBuffer<O> {
649 fn check_bound(&self, offset: usize) -> bool {
650 offset <= self.len_proxy()
651 }
652
653 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
654 let mut lhs = self.0.clone();
655 let mut rhs = self.0.clone();
656
657 lhs.slice(0, offset + 1);
658 rhs.slice(offset, self.0.len() - offset);
659
660 (Self(lhs), Self(rhs))
661 }
662}