1#![allow(unsafe_op_in_unsafe_fn)]
2use std::hint::unreachable_unchecked;
4use std::ops::Deref;
5
6use polars_error::{PolarsError, PolarsResult, polars_bail, polars_err};
7
8use crate::array::Splitable;
9use crate::buffer::Buffer;
10pub use crate::types::Offset;
11
12#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct Offsets<O: Offset>(Vec<O>);
18
19impl<O: Offset> Default for Offsets<O> {
20 #[inline]
21 fn default() -> Self {
22 Self::new()
23 }
24}
25
26impl<O: Offset> Deref for Offsets<O> {
27 type Target = [O];
28
29 fn deref(&self) -> &Self::Target {
30 self.as_slice()
31 }
32}
33
34impl<O: Offset> TryFrom<Vec<O>> for Offsets<O> {
35 type Error = PolarsError;
36
37 #[inline]
38 fn try_from(offsets: Vec<O>) -> Result<Self, Self::Error> {
39 try_check_offsets(&offsets)?;
40 Ok(Self(offsets))
41 }
42}
43
44impl<O: Offset> TryFrom<Buffer<O>> for OffsetsBuffer<O> {
45 type Error = PolarsError;
46
47 #[inline]
48 fn try_from(offsets: Buffer<O>) -> Result<Self, Self::Error> {
49 try_check_offsets(&offsets)?;
50 Ok(Self(offsets))
51 }
52}
53
54impl<O: Offset> TryFrom<Vec<O>> for OffsetsBuffer<O> {
55 type Error = PolarsError;
56
57 #[inline]
58 fn try_from(offsets: Vec<O>) -> Result<Self, Self::Error> {
59 try_check_offsets(&offsets)?;
60 Ok(Self(offsets.into()))
61 }
62}
63
64impl<O: Offset> From<Offsets<O>> for OffsetsBuffer<O> {
65 #[inline]
66 fn from(offsets: Offsets<O>) -> Self {
67 Self(offsets.0.into())
68 }
69}
70
71impl<O: Offset> Offsets<O> {
72 #[inline]
74 pub fn new() -> Self {
75 Self(vec![O::zero()])
76 }
77
78 #[inline]
80 pub fn new_zeroed(length: usize) -> Self {
81 Self(vec![O::zero(); length + 1])
82 }
83
84 #[inline]
86 pub fn try_from_iter<I: IntoIterator<Item = usize>>(iter: I) -> PolarsResult<Self> {
87 let iterator = iter.into_iter();
88 let (lower, _) = iterator.size_hint();
89 let mut offsets = Self::with_capacity(lower);
90 for item in iterator {
91 offsets.try_push(item)?
92 }
93 Ok(offsets)
94 }
95
96 pub fn with_capacity(capacity: usize) -> Self {
98 let mut offsets = Vec::with_capacity(capacity + 1);
99 offsets.push(O::zero());
100 Self(offsets)
101 }
102
103 pub fn capacity(&self) -> usize {
105 self.0.capacity() - 1
106 }
107
108 pub fn reserve(&mut self, additional: usize) {
110 self.0.reserve(additional);
111 }
112
113 pub fn shrink_to_fit(&mut self) {
115 self.0.shrink_to_fit();
116 }
117
118 #[inline]
125 pub fn try_push(&mut self, length: usize) -> PolarsResult<()> {
126 if O::IS_LARGE {
127 let length = O::from_as_usize(length);
128 let old_length = self.last();
129 let new_length = *old_length + length;
130 self.0.push(new_length);
131 Ok(())
132 } else {
133 let length =
134 O::from_usize(length).ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
135
136 let old_length = self.last();
137 let new_length = old_length
138 .checked_add(&length)
139 .ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
140 self.0.push(new_length);
141 Ok(())
142 }
143 }
144
145 #[inline]
150 pub unsafe fn new_unchecked(offsets: Vec<O>) -> Self {
151 #[cfg(debug_assertions)]
152 {
153 let mut prev_offset = O::default();
154 let mut is_monotonely_increasing = true;
155 for offset in &offsets {
156 is_monotonely_increasing &= *offset >= prev_offset;
157 prev_offset = *offset;
158 }
159 assert!(
160 is_monotonely_increasing,
161 "Unsafe precondition violated. Invariant of offsets broken."
162 );
163 }
164
165 Self(offsets)
166 }
167
168 #[inline]
170 pub fn last(&self) -> &O {
171 match self.0.last() {
172 Some(element) => element,
173 None => unsafe { unreachable_unchecked() },
174 }
175 }
176
177 #[inline]
181 pub fn length_at(&self, index: usize) -> usize {
182 let (start, end) = self.start_end(index);
183 end - start
184 }
185
186 #[inline]
190 pub fn start_end(&self, index: usize) -> (usize, usize) {
191 assert!(index < self.len_proxy());
193 unsafe { self.start_end_unchecked(index) }
194 }
195
196 #[inline]
201 pub unsafe fn start_end_unchecked(&self, index: usize) -> (usize, usize) {
202 let start = self.0.get_unchecked(index).to_usize();
204 let end = self.0.get_unchecked(index + 1).to_usize();
205 (start, end)
206 }
207
208 #[inline]
210 pub fn len_proxy(&self) -> usize {
211 self.0.len() - 1
212 }
213
214 #[inline]
216 pub fn as_slice(&self) -> &[O] {
217 self.0.as_slice()
218 }
219
220 #[inline]
222 pub fn pop(&mut self) -> Option<O> {
223 if self.len_proxy() == 0 {
224 None
225 } else {
226 self.0.pop()
227 }
228 }
229
230 #[inline]
233 pub fn extend_constant(&mut self, additional: usize) {
234 let offset = *self.last();
235 if additional == 1 {
236 self.0.push(offset)
237 } else {
238 self.0.resize(self.0.len() + additional, offset)
239 }
240 }
241
242 #[inline]
246 pub fn try_from_lengths<I: Iterator<Item = usize>>(lengths: I) -> PolarsResult<Self> {
247 let mut self_ = Self::with_capacity(lengths.size_hint().0);
248 self_.try_extend_from_lengths(lengths)?;
249 Ok(self_)
250 }
251
252 #[inline]
256 pub fn try_extend_from_lengths<I: Iterator<Item = usize>>(
257 &mut self,
258 lengths: I,
259 ) -> PolarsResult<()> {
260 let mut total_length = 0;
261 let mut offset = *self.last();
262 let original_offset = offset.to_usize();
263
264 let lengths = lengths.map(|length| {
265 total_length += length;
266 O::from_as_usize(length)
267 });
268
269 let offsets = lengths.map(|length| {
270 offset += length; offset
272 });
273 self.0.extend(offsets);
274
275 let last_offset = original_offset
276 .checked_add(total_length)
277 .ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
278 O::from_usize(last_offset).ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
279 Ok(())
280 }
281
282 pub fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
286 let mut length = *self.last();
287 let other_length = *other.last();
288 length
290 .checked_add(&other_length)
291 .ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
292
293 let lengths = other.as_slice().windows(2).map(|w| w[1] - w[0]);
294 let offsets = lengths.map(|new_length| {
295 length += new_length;
296 length
297 });
298 self.0.extend(offsets);
299 Ok(())
300 }
301
302 pub fn try_extend_from_slice(
306 &mut self,
307 other: &OffsetsBuffer<O>,
308 start: usize,
309 length: usize,
310 ) -> PolarsResult<()> {
311 if length == 0 {
312 return Ok(());
313 }
314 let other = &other.0[start..start + length + 1];
315 let other_length = other.last().expect("Length to be non-zero");
316 let mut length = *self.last();
317 length
319 .checked_add(other_length)
320 .ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
321
322 let lengths = other.windows(2).map(|w| w[1] - w[0]);
323 let offsets = lengths.map(|new_length| {
324 length += new_length;
325 length
326 });
327 self.0.extend(offsets);
328 Ok(())
329 }
330
331 #[inline]
333 pub fn into_inner(self) -> Vec<O> {
334 self.0
335 }
336}
337
338fn try_check_offsets<O: Offset>(offsets: &[O]) -> PolarsResult<()> {
340 match offsets.first() {
342 None => polars_bail!(ComputeError: "offsets must have at least one element"),
343 Some(first) => {
344 if *first < O::zero() {
345 polars_bail!(ComputeError: "offsets must be larger than 0")
346 }
347 let mut previous = *first;
348 let mut any_invalid = false;
349
350 for offset in offsets {
353 if previous > *offset {
354 any_invalid = true
355 }
356 previous = *offset;
357 }
358
359 if any_invalid {
360 polars_bail!(ComputeError: "offsets must be monotonically increasing")
361 } else {
362 Ok(())
363 }
364 },
365 }
366}
367
368#[derive(Clone, PartialEq, Debug)]
373pub struct OffsetsBuffer<O: Offset>(Buffer<O>);
374
375impl<O: Offset> Default for OffsetsBuffer<O> {
376 #[inline]
377 fn default() -> Self {
378 Self(vec![O::zero()].into())
379 }
380}
381
382impl<O: Offset> OffsetsBuffer<O> {
383 #[inline]
386 pub unsafe fn new_unchecked(offsets: Buffer<O>) -> Self {
387 Self(offsets)
388 }
389
390 #[inline]
392 pub fn new() -> Self {
393 Self(vec![O::zero()].into())
394 }
395
396 #[inline]
397 pub fn one_with_length(length: O) -> Self {
398 Self(vec![O::zero(), length].into())
399 }
400
401 #[inline]
403 pub fn into_mut(self) -> either::Either<Self, Offsets<O>> {
404 self.0
405 .into_mut()
406 .map_right(|offsets| unsafe { Offsets::new_unchecked(offsets) })
408 .map_left(Self)
409 }
410
411 #[inline]
413 pub fn buffer(&self) -> &Buffer<O> {
414 &self.0
415 }
416
417 #[inline]
419 pub fn len_proxy(&self) -> usize {
420 self.0.len() - 1
421 }
422
423 #[inline]
425 pub fn len(&self) -> usize {
426 self.0.len()
427 }
428
429 #[inline]
431 pub fn as_slice(&self) -> &[O] {
432 self.0.as_slice()
433 }
434
435 #[inline]
437 pub fn range(&self) -> O {
438 *self.last() - *self.first()
439 }
440
441 #[inline]
443 pub fn first(&self) -> &O {
444 match self.0.first() {
445 Some(element) => element,
446 None => unsafe { unreachable_unchecked() },
447 }
448 }
449
450 #[inline]
452 pub fn last(&self) -> &O {
453 match self.0.last() {
454 Some(element) => element,
455 None => unsafe { unreachable_unchecked() },
456 }
457 }
458
459 #[inline]
463 pub fn length_at(&self, index: usize) -> usize {
464 let (start, end) = self.start_end(index);
465 end - start
466 }
467
468 #[inline]
473 pub unsafe fn length_at_unchecked(&self, index: usize) -> usize {
474 let (start, end) = unsafe { self.start_end_unchecked(index) };
475 end - start
476 }
477
478 #[inline]
482 pub fn start_end(&self, index: usize) -> (usize, usize) {
483 assert!(index < self.len_proxy());
485 unsafe { self.start_end_unchecked(index) }
486 }
487
488 #[inline]
493 pub unsafe fn start_end_unchecked(&self, index: usize) -> (usize, usize) {
494 let start = self.0.get_unchecked(index).to_usize();
496 let end = self.0.get_unchecked(index + 1).to_usize();
497 (start, end)
498 }
499
500 #[inline]
505 pub fn slice(&mut self, offset: usize, length: usize) {
506 assert!(length > 0);
507 self.0.slice(offset, length);
508 }
509
510 #[inline]
515 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
516 self.0.slice_unchecked(offset, length);
517 }
518
519 #[inline]
521 pub fn lengths(&self) -> impl ExactSizeIterator<Item = usize> + '_ {
522 self.0.windows(2).map(|w| (w[1] - w[0]).to_usize())
523 }
524
525 #[inline]
527 pub fn offset_and_length_iter(&self) -> impl ExactSizeIterator<Item = (usize, usize)> + '_ {
528 self.windows(2).map(|x| {
529 let [l, r] = x else { unreachable!() };
530 let l = l.to_usize();
531 let r = r.to_usize();
532 (l, r - l)
533 })
534 }
535
536 pub fn leaf_ranges_iter(
539 offsets: &[Self],
540 ) -> impl Iterator<Item = core::ops::Range<usize>> + '_ {
541 let others = &offsets[1..];
542
543 offsets[0].windows(2).map(move |x| {
544 let [l, r] = x else { unreachable!() };
545 let mut l = l.to_usize();
546 let mut r = r.to_usize();
547
548 for o in others {
549 let slc = o.as_slice();
550 l = slc[l].to_usize();
551 r = slc[r].to_usize();
552 }
553
554 l..r
555 })
556 }
557
558 pub fn leaf_full_start_end(offsets: &[Self]) -> core::ops::Range<usize> {
560 let mut l = offsets[0].first().to_usize();
561 let mut r = offsets[0].last().to_usize();
562
563 for o in &offsets[1..] {
564 let slc = o.as_slice();
565 l = slc[l].to_usize();
566 r = slc[r].to_usize();
567 }
568
569 l..r
570 }
571
572 #[inline]
574 pub fn into_inner(self) -> Buffer<O> {
575 self.0
576 }
577
578 #[inline]
580 pub fn delta(&self, start: usize, end: usize) -> usize {
581 assert!(start <= end);
582
583 (self.0[end + 1] - self.0[start]).to_usize()
584 }
585}
586
587impl From<&OffsetsBuffer<i32>> for OffsetsBuffer<i64> {
588 fn from(offsets: &OffsetsBuffer<i32>) -> Self {
589 Self(
591 offsets
592 .buffer()
593 .iter()
594 .map(|x| *x as i64)
595 .collect::<Vec<_>>()
596 .into(),
597 )
598 }
599}
600
601impl TryFrom<&OffsetsBuffer<i64>> for OffsetsBuffer<i32> {
602 type Error = PolarsError;
603
604 fn try_from(offsets: &OffsetsBuffer<i64>) -> Result<Self, Self::Error> {
605 i32::try_from(*offsets.last()).map_err(|_| polars_err!(ComputeError: "overflow"))?;
606
607 Ok(Self(
609 offsets
610 .buffer()
611 .iter()
612 .map(|x| *x as i32)
613 .collect::<Vec<_>>()
614 .into(),
615 ))
616 }
617}
618
619impl From<Offsets<i32>> for Offsets<i64> {
620 fn from(offsets: Offsets<i32>) -> Self {
621 Self(
623 offsets
624 .as_slice()
625 .iter()
626 .map(|x| *x as i64)
627 .collect::<Vec<_>>(),
628 )
629 }
630}
631
632impl TryFrom<Offsets<i64>> for Offsets<i32> {
633 type Error = PolarsError;
634
635 fn try_from(offsets: Offsets<i64>) -> Result<Self, Self::Error> {
636 i32::try_from(*offsets.last()).map_err(|_| polars_err!(ComputeError: "overflow"))?;
637
638 Ok(Self(
640 offsets
641 .as_slice()
642 .iter()
643 .map(|x| *x as i32)
644 .collect::<Vec<_>>(),
645 ))
646 }
647}
648
649impl<O: Offset> std::ops::Deref for OffsetsBuffer<O> {
650 type Target = [O];
651
652 #[inline]
653 fn deref(&self) -> &[O] {
654 self.0.as_slice()
655 }
656}
657
658impl<O: Offset> Splitable for OffsetsBuffer<O> {
659 fn check_bound(&self, offset: usize) -> bool {
660 offset <= self.len_proxy()
661 }
662
663 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
664 let mut lhs = self.0.clone();
665 let mut rhs = self.0.clone();
666
667 lhs.slice(0, offset + 1);
668 rhs.slice(offset, self.0.len() - offset);
669
670 (Self(lhs), Self(rhs))
671 }
672}