1use std::cmp::Ordering;
5use std::fmt::Debug;
6use std::hash::Hash;
7use std::ops::Range;
8
9use itertools::Itertools;
10use num_traits::NumCast;
11use vortex_buffer::BitBuffer;
12use vortex_buffer::BufferMut;
13use vortex_error::VortexError;
14use vortex_error::VortexExpect;
15use vortex_error::VortexResult;
16use vortex_error::vortex_bail;
17use vortex_error::vortex_ensure;
18use vortex_error::vortex_err;
19use vortex_mask::AllOr;
20use vortex_mask::Mask;
21use vortex_mask::MaskMut;
22use vortex_utils::aliases::hash_map::HashMap;
23
24use crate::Array;
25use crate::ArrayRef;
26use crate::ArrayVisitor;
27use crate::ExecutionCtx;
28use crate::IntoArray;
29use crate::ToCanonical;
30use crate::arrays::BoolArray;
31use crate::arrays::PrimitiveArray;
32use crate::builtins::ArrayBuiltins;
33use crate::compute::is_sorted;
34use crate::dtype::DType;
35use crate::dtype::IntegerPType;
36use crate::dtype::NativePType;
37use crate::dtype::Nullability::NonNullable;
38use crate::dtype::PType;
39use crate::dtype::UnsignedPType;
40use crate::match_each_integer_ptype;
41use crate::match_each_unsigned_integer_ptype;
42use crate::scalar::PValue;
43use crate::scalar::Scalar;
44use crate::search_sorted::SearchResult;
45use crate::search_sorted::SearchSorted;
46use crate::search_sorted::SearchSortedSide;
47use crate::validity::Validity;
48use crate::vtable::ValidityHelper;
49
50pub const PATCH_CHUNK_SIZE: usize = 1024;
53
54#[derive(Copy, Clone, prost::Message)]
55pub struct PatchesMetadata {
56 #[prost(uint64, tag = "1")]
57 len: u64,
58 #[prost(uint64, tag = "2")]
59 offset: u64,
60 #[prost(enumeration = "PType", tag = "3")]
61 indices_ptype: i32,
62 #[prost(uint64, optional, tag = "4")]
63 chunk_offsets_len: Option<u64>,
64 #[prost(enumeration = "PType", optional, tag = "5")]
65 chunk_offsets_ptype: Option<i32>,
66 #[prost(uint64, optional, tag = "6")]
67 offset_within_chunk: Option<u64>,
68}
69
70impl PatchesMetadata {
71 #[inline]
72 pub fn new(
73 len: usize,
74 offset: usize,
75 indices_ptype: PType,
76 chunk_offsets_len: Option<usize>,
77 chunk_offsets_ptype: Option<PType>,
78 offset_within_chunk: Option<usize>,
79 ) -> Self {
80 Self {
81 len: len as u64,
82 offset: offset as u64,
83 indices_ptype: indices_ptype as i32,
84 chunk_offsets_len: chunk_offsets_len.map(|len| len as u64),
85 chunk_offsets_ptype: chunk_offsets_ptype.map(|pt| pt as i32),
86 offset_within_chunk: offset_within_chunk.map(|len| len as u64),
87 }
88 }
89
90 #[inline]
91 pub fn len(&self) -> VortexResult<usize> {
92 usize::try_from(self.len).map_err(|_| vortex_err!("len does not fit in usize"))
93 }
94
95 #[inline]
96 pub fn is_empty(&self) -> bool {
97 self.len == 0
98 }
99
100 #[inline]
101 pub fn offset(&self) -> VortexResult<usize> {
102 usize::try_from(self.offset).map_err(|_| vortex_err!("offset does not fit in usize"))
103 }
104
105 #[inline]
106 pub fn chunk_offsets_dtype(&self) -> VortexResult<Option<DType>> {
107 self.chunk_offsets_ptype
108 .map(|t| {
109 PType::try_from(t)
110 .map_err(|e| vortex_err!("invalid i32 value {t} for PType: {}", e))
111 .map(|ptype| DType::Primitive(ptype, NonNullable))
112 })
113 .transpose()
114 }
115
116 #[inline]
117 pub fn indices_dtype(&self) -> VortexResult<DType> {
118 let ptype = PType::try_from(self.indices_ptype).map_err(|e| {
119 vortex_err!("invalid i32 value {} for PType: {}", self.indices_ptype, e)
120 })?;
121 vortex_ensure!(
122 ptype.is_unsigned_int(),
123 "Patch indices must be unsigned integers"
124 );
125 Ok(DType::Primitive(ptype, NonNullable))
126 }
127}
128
129#[derive(Debug, Clone)]
131pub struct Patches {
132 array_len: usize,
133 offset: usize,
134 indices: ArrayRef,
135 values: ArrayRef,
136 chunk_offsets: Option<ArrayRef>,
143 offset_within_chunk: Option<usize>,
153}
154
155impl Patches {
156 pub fn new(
157 array_len: usize,
158 offset: usize,
159 indices: ArrayRef,
160 values: ArrayRef,
161 chunk_offsets: Option<ArrayRef>,
162 ) -> VortexResult<Self> {
163 vortex_ensure!(
164 indices.len() == values.len(),
165 "Patch indices and values must have the same length"
166 );
167 vortex_ensure!(
168 indices.dtype().is_unsigned_int() && !indices.dtype().is_nullable(),
169 "Patch indices must be non-nullable unsigned integers, got {:?}",
170 indices.dtype()
171 );
172
173 vortex_ensure!(
174 indices.len() <= array_len,
175 "Patch indices must be shorter than the array length"
176 );
177 vortex_ensure!(!indices.is_empty(), "Patch indices must not be empty");
178
179 if indices.is_host() && values.is_host() {
182 let max = usize::try_from(&indices.scalar_at(indices.len() - 1)?)
183 .map_err(|_| vortex_err!("indices must be a number"))?;
184 vortex_ensure!(
185 max - offset < array_len,
186 "Patch indices {max:?}, offset {offset} are longer than the array length {array_len}"
187 );
188
189 debug_assert!(
190 is_sorted(&indices).unwrap_or(Some(false)).unwrap_or(false),
191 "Patch indices must be sorted"
192 );
193 }
194
195 Ok(Self {
196 array_len,
197 offset,
198 indices,
199 values,
200 chunk_offsets: chunk_offsets.clone(),
201 offset_within_chunk: chunk_offsets.map(|_| 0),
203 })
204 }
205
206 pub unsafe fn new_unchecked(
216 array_len: usize,
217 offset: usize,
218 indices: ArrayRef,
219 values: ArrayRef,
220 chunk_offsets: Option<ArrayRef>,
221 offset_within_chunk: Option<usize>,
222 ) -> Self {
223 Self {
224 array_len,
225 offset,
226 indices,
227 values,
228 chunk_offsets,
229 offset_within_chunk,
230 }
231 }
232
233 #[inline]
234 pub fn array_len(&self) -> usize {
235 self.array_len
236 }
237
238 #[inline]
239 pub fn num_patches(&self) -> usize {
240 self.indices.len()
241 }
242
243 #[inline]
244 pub fn dtype(&self) -> &DType {
245 self.values.dtype()
246 }
247
248 #[inline]
249 pub fn indices(&self) -> &ArrayRef {
250 &self.indices
251 }
252
253 #[inline]
254 pub fn into_indices(self) -> ArrayRef {
255 self.indices
256 }
257
258 #[inline]
259 pub fn indices_mut(&mut self) -> &mut ArrayRef {
260 &mut self.indices
261 }
262
263 #[inline]
264 pub fn values(&self) -> &ArrayRef {
265 &self.values
266 }
267
268 #[inline]
269 pub fn into_values(self) -> ArrayRef {
270 self.values
271 }
272
273 #[inline]
274 pub fn values_mut(&mut self) -> &mut ArrayRef {
275 &mut self.values
276 }
277
278 #[inline]
279 pub fn offset(&self) -> usize {
281 self.offset
282 }
283
284 #[inline]
285 pub fn chunk_offsets(&self) -> &Option<ArrayRef> {
286 &self.chunk_offsets
287 }
288
289 #[inline]
290 pub fn chunk_offset_at(&self, idx: usize) -> VortexResult<usize> {
291 let Some(chunk_offsets) = &self.chunk_offsets else {
292 vortex_bail!("chunk_offsets must be set to retrieve offset at index")
293 };
294
295 chunk_offsets
296 .scalar_at(idx)?
297 .as_primitive()
298 .as_::<usize>()
299 .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))
300 }
301
302 #[inline]
311 pub fn offset_within_chunk(&self) -> Option<usize> {
312 self.offset_within_chunk
313 }
314
315 #[inline]
316 pub fn indices_ptype(&self) -> VortexResult<PType> {
317 PType::try_from(self.indices.dtype())
318 .map_err(|_| vortex_err!("indices dtype is not primitive"))
319 }
320
321 pub fn to_metadata(&self, len: usize, dtype: &DType) -> VortexResult<PatchesMetadata> {
322 if self.indices.len() > len {
323 vortex_bail!(
324 "Patch indices {} are longer than the array length {}",
325 self.indices.len(),
326 len
327 );
328 }
329 if self.values.dtype() != dtype {
330 vortex_bail!(
331 "Patch values dtype {} does not match array dtype {}",
332 self.values.dtype(),
333 dtype
334 );
335 }
336 let chunk_offsets_len = self.chunk_offsets.as_ref().map(|co| co.len());
337 let chunk_offsets_ptype = self.chunk_offsets.as_ref().map(|co| co.dtype().as_ptype());
338
339 Ok(PatchesMetadata::new(
340 self.indices.len(),
341 self.offset,
342 self.indices.dtype().as_ptype(),
343 chunk_offsets_len,
344 chunk_offsets_ptype,
345 self.offset_within_chunk,
346 ))
347 }
348
349 pub fn cast_values(self, values_dtype: &DType) -> VortexResult<Self> {
350 unsafe {
352 Ok(Self::new_unchecked(
353 self.array_len,
354 self.offset,
355 self.indices,
356 self.values.cast(values_dtype.clone())?,
357 self.chunk_offsets,
358 self.offset_within_chunk,
359 ))
360 }
361 }
362
363 pub fn get_patched(&self, index: usize) -> VortexResult<Option<Scalar>> {
365 self.search_index(index)?
366 .to_found()
367 .map(|patch_idx| self.values().scalar_at(patch_idx))
368 .transpose()
369 }
370
371 pub fn search_index(&self, index: usize) -> VortexResult<SearchResult> {
389 if self.chunk_offsets.is_some() {
390 return self.search_index_chunked(index);
391 }
392
393 Self::search_index_binary_search(&self.indices, index + self.offset)
394 }
395
396 fn search_index_binary_search(indices: &ArrayRef, needle: usize) -> VortexResult<SearchResult> {
402 if indices.is_canonical() {
403 let primitive = indices.to_primitive();
404 match_each_integer_ptype!(primitive.ptype(), |T| {
405 let Ok(needle) = T::try_from(needle) else {
406 return Ok(SearchResult::NotFound(primitive.len()));
411 };
412 return primitive
413 .as_slice::<T>()
414 .search_sorted(&needle, SearchSortedSide::Left);
415 });
416 }
417 indices
418 .as_primitive_typed()
419 .search_sorted(&PValue::U64(needle as u64), SearchSortedSide::Left)
420 }
421
422 fn search_index_chunked(&self, index: usize) -> VortexResult<SearchResult> {
432 let Some(chunk_offsets) = &self.chunk_offsets else {
433 vortex_bail!("chunk_offsets is required to be set")
434 };
435
436 let Some(offset_within_chunk) = self.offset_within_chunk else {
437 vortex_bail!("offset_within_chunk is required to be set")
438 };
439
440 if index >= self.array_len() {
441 return Ok(SearchResult::NotFound(self.indices().len()));
442 }
443
444 let chunk_idx = (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE;
445
446 let base_offset = self.chunk_offset_at(0)?;
448
449 let patches_start_idx = (self.chunk_offset_at(chunk_idx)? - base_offset)
450 .saturating_sub(offset_within_chunk);
457
458 let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
459 self.chunk_offset_at(chunk_idx + 1)? - base_offset - offset_within_chunk
460 } else {
461 self.indices.len()
462 };
463
464 let chunk_indices = self.indices.slice(patches_start_idx..patches_end_idx)?;
465 let result = Self::search_index_binary_search(&chunk_indices, index + self.offset)?;
466
467 Ok(match result {
468 SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
469 SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
470 })
471 }
472
473 fn search_index_chunked_batch<T, O>(
479 &self,
480 indices: &[T],
481 chunk_offsets: &[O],
482 index: T,
483 ) -> VortexResult<SearchResult>
484 where
485 T: UnsignedPType,
486 O: UnsignedPType,
487 usize: TryFrom<T>,
488 usize: TryFrom<O>,
489 {
490 let Some(offset_within_chunk) = self.offset_within_chunk else {
491 vortex_bail!("offset_within_chunk is required to be set")
492 };
493
494 let chunk_idx = {
495 let Ok(index) = usize::try_from(index) else {
496 return Ok(SearchResult::NotFound(indices.len()));
498 };
499
500 if index >= self.array_len() {
501 return Ok(SearchResult::NotFound(self.indices().len()));
502 }
503
504 (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE
505 };
506
507 let chunk_offset = usize::try_from(chunk_offsets[chunk_idx] - chunk_offsets[0])
509 .map_err(|_| vortex_err!("chunk_offset failed to convert to usize"))?;
510
511 let patches_start_idx = chunk_offset
512 .saturating_sub(offset_within_chunk);
519
520 let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
521 let base_offset_end = chunk_offsets[chunk_idx + 1];
522
523 let offset_within_chunk = O::from(offset_within_chunk)
524 .ok_or_else(|| vortex_err!("offset_within_chunk failed to convert to O"))?;
525
526 usize::try_from(base_offset_end - chunk_offsets[0] - offset_within_chunk)
527 .map_err(|_| vortex_err!("patches_end_idx failed to convert to usize"))?
528 } else {
529 self.indices.len()
530 };
531
532 let Some(offset) = T::from(self.offset) else {
533 return Ok(SearchResult::NotFound(indices.len()));
535 };
536
537 let chunk_indices = &indices[patches_start_idx..patches_end_idx];
538 let result = chunk_indices.search_sorted(&(index + offset), SearchSortedSide::Left)?;
539
540 Ok(match result {
541 SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
542 SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
543 })
544 }
545
546 pub fn min_index(&self) -> VortexResult<usize> {
548 let first = self
549 .indices
550 .scalar_at(0)?
551 .as_primitive()
552 .as_::<usize>()
553 .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
554 Ok(first - self.offset)
555 }
556
557 pub fn max_index(&self) -> VortexResult<usize> {
559 let last = self
560 .indices
561 .scalar_at(self.indices.len() - 1)?
562 .as_primitive()
563 .as_::<usize>()
564 .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
565 Ok(last - self.offset)
566 }
567
568 pub fn filter(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
570 if mask.len() != self.array_len {
571 vortex_bail!(
572 "Filter mask length {} does not match array length {}",
573 mask.len(),
574 self.array_len
575 );
576 }
577
578 match mask.indices() {
579 AllOr::All => Ok(Some(self.clone())),
580 AllOr::None => Ok(None),
581 AllOr::Some(mask_indices) => {
582 let flat_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
583 match_each_unsigned_integer_ptype!(flat_indices.ptype(), |I| {
584 filter_patches_with_mask(
585 flat_indices.as_slice::<I>(),
586 self.offset(),
587 self.values(),
588 mask_indices,
589 )
590 })
591 }
592 }
593 }
594
595 pub fn mask(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
600 if mask.len() != self.array_len {
601 vortex_bail!(
602 "Filter mask length {} does not match array length {}",
603 mask.len(),
604 self.array_len
605 );
606 }
607
608 let filter_mask = match mask.bit_buffer() {
609 AllOr::All => return Ok(None),
610 AllOr::None => return Ok(Some(self.clone())),
611 AllOr::Some(masked) => {
612 let patch_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
613 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |P| {
614 let patch_indices = patch_indices.as_slice::<P>();
615 Mask::from_buffer(BitBuffer::collect_bool(patch_indices.len(), |i| {
616 #[allow(clippy::cast_possible_truncation)]
617 let idx = (patch_indices[i] as usize) - self.offset;
618 !masked.value(idx)
619 }))
620 })
621 }
622 };
623
624 if filter_mask.all_false() {
625 return Ok(None);
626 }
627
628 let filtered_indices = self
630 .indices
631 .filter(filter_mask.clone())?
632 .to_canonical()?
633 .into_array();
634 let filtered_values = self
635 .values
636 .filter(filter_mask)?
637 .to_canonical()?
638 .into_array();
639
640 Ok(Some(Self {
641 array_len: self.array_len,
642 offset: self.offset,
643 indices: filtered_indices,
644 values: filtered_values,
645 chunk_offsets: None,
647 offset_within_chunk: self.offset_within_chunk,
648 }))
649 }
650
651 pub fn slice(&self, range: Range<usize>) -> VortexResult<Option<Self>> {
653 let slice_start_idx = self.search_index(range.start)?.to_index();
654 let slice_end_idx = self.search_index(range.end)?.to_index();
655
656 if slice_start_idx == slice_end_idx {
657 return Ok(None);
658 }
659
660 let values = self.values().slice(slice_start_idx..slice_end_idx)?;
661 let indices = self.indices().slice(slice_start_idx..slice_end_idx)?;
662
663 let chunk_offsets = self
664 .chunk_offsets
665 .as_ref()
666 .map(|chunk_offsets| -> VortexResult<ArrayRef> {
667 let chunk_relative_offset = self.offset % PATCH_CHUNK_SIZE;
668 let chunk_start_idx = (chunk_relative_offset + range.start) / PATCH_CHUNK_SIZE;
669 let chunk_end_idx = (chunk_relative_offset + range.end).div_ceil(PATCH_CHUNK_SIZE);
670 chunk_offsets.slice(chunk_start_idx..chunk_end_idx)
671 })
672 .transpose()?;
673
674 let offset_within_chunk = chunk_offsets
675 .as_ref()
676 .map(|chunk_offsets| -> VortexResult<usize> {
677 let base_offset = chunk_offsets
678 .scalar_at(0)?
679 .as_primitive()
680 .as_::<usize>()
681 .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))?;
682 Ok(slice_start_idx - base_offset)
683 })
684 .transpose()?;
685
686 Ok(Some(Self {
687 array_len: range.len(),
688 offset: range.start + self.offset(),
689 indices,
690 values,
691 chunk_offsets,
692 offset_within_chunk,
693 }))
694 }
695
696 const PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN: f64 = 5.0;
698
699 fn is_map_faster_than_search(&self, take_indices: &PrimitiveArray) -> bool {
700 (self.num_patches() as f64 / take_indices.len() as f64)
701 < Self::PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN
702 }
703
704 pub fn take_with_nulls(
708 &self,
709 take_indices: &ArrayRef,
710 ctx: &mut ExecutionCtx,
711 ) -> VortexResult<Option<Self>> {
712 if take_indices.is_empty() {
713 return Ok(None);
714 }
715
716 let take_indices = take_indices.to_array().execute::<PrimitiveArray>(ctx)?;
717 if self.is_map_faster_than_search(&take_indices) {
718 self.take_map(take_indices, true, ctx)
719 } else {
720 self.take_search(take_indices, true, ctx)
721 }
722 }
723
724 pub fn take(
728 &self,
729 take_indices: &ArrayRef,
730 ctx: &mut ExecutionCtx,
731 ) -> VortexResult<Option<Self>> {
732 if take_indices.is_empty() {
733 return Ok(None);
734 }
735
736 let take_indices = take_indices.to_array().execute::<PrimitiveArray>(ctx)?;
737 if self.is_map_faster_than_search(&take_indices) {
738 self.take_map(take_indices, false, ctx)
739 } else {
740 self.take_search(take_indices, false, ctx)
741 }
742 }
743
744 #[expect(
745 clippy::cognitive_complexity,
746 reason = "complexity is from nested match_each_* macros"
747 )]
748 pub fn take_search(
749 &self,
750 take_indices: PrimitiveArray,
751 include_nulls: bool,
752 ctx: &mut ExecutionCtx,
753 ) -> VortexResult<Option<Self>> {
754 let take_indices_validity = take_indices.validity();
755 let patch_indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
756 let chunk_offsets = self
757 .chunk_offsets()
758 .as_ref()
759 .map(|co| co.clone().execute::<PrimitiveArray>(ctx))
760 .transpose()?;
761
762 let (values_indices, new_indices): (BufferMut<u64>, BufferMut<u64>) =
763 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |PatchT| {
764 let patch_indices_slice = patch_indices.as_slice::<PatchT>();
765 match_each_integer_ptype!(take_indices.ptype(), |TakeT| {
766 let take_slice = take_indices.as_slice::<TakeT>();
767
768 if let Some(chunk_offsets) = chunk_offsets {
769 match_each_unsigned_integer_ptype!(chunk_offsets.ptype(), |OffsetT| {
770 let chunk_offsets = chunk_offsets.as_slice::<OffsetT>();
771 take_indices_with_search_fn(
772 patch_indices_slice,
773 take_slice,
774 take_indices.validity_mask()?,
775 include_nulls,
776 |take_idx| {
777 self.search_index_chunked_batch(
778 patch_indices_slice,
779 chunk_offsets,
780 take_idx,
781 )
782 },
783 )?
784 })
785 } else {
786 take_indices_with_search_fn(
787 patch_indices_slice,
788 take_slice,
789 take_indices.validity_mask()?,
790 include_nulls,
791 |take_idx| {
792 let Some(offset) = <PatchT as NumCast>::from(self.offset) else {
793 return Ok(SearchResult::NotFound(patch_indices_slice.len()));
795 };
796
797 patch_indices_slice
798 .search_sorted(&(take_idx + offset), SearchSortedSide::Left)
799 },
800 )?
801 }
802 })
803 });
804
805 if new_indices.is_empty() {
806 return Ok(None);
807 }
808
809 let new_indices = new_indices.into_array();
810 let new_array_len = take_indices.len();
811 let values_validity = take_indices_validity.take(&new_indices)?;
812
813 Ok(Some(Self {
814 array_len: new_array_len,
815 offset: 0,
816 indices: new_indices.clone(),
817 values: self
818 .values()
819 .take(PrimitiveArray::new(values_indices, values_validity).into_array())?,
820 chunk_offsets: None,
821 offset_within_chunk: Some(0), }))
823 }
824
825 pub fn take_map(
826 &self,
827 take_indices: PrimitiveArray,
828 include_nulls: bool,
829 ctx: &mut ExecutionCtx,
830 ) -> VortexResult<Option<Self>> {
831 let indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
832 let new_length = take_indices.len();
833
834 let min_index = self.min_index()?;
835 let max_index = self.max_index()?;
836
837 let Some((new_sparse_indices, value_indices)) =
838 match_each_unsigned_integer_ptype!(indices.ptype(), |Indices| {
839 match_each_integer_ptype!(take_indices.ptype(), |TakeIndices| {
840 take_map::<_, TakeIndices>(
841 indices.as_slice::<Indices>(),
842 take_indices,
843 self.offset(),
844 min_index,
845 max_index,
846 include_nulls,
847 )?
848 })
849 })
850 else {
851 return Ok(None);
852 };
853
854 let taken_values = self.values().take(value_indices)?;
855
856 Ok(Some(Patches {
857 array_len: new_length,
858 offset: 0,
859 indices: new_sparse_indices,
860 values: taken_values,
861 chunk_offsets: None,
863 offset_within_chunk: self.offset_within_chunk,
864 }))
865 }
866
867 pub unsafe fn apply_to_buffer<P: NativePType>(
878 &self,
879 buffer: &mut [P],
880 validity: &mut MaskMut,
881 ctx: &mut ExecutionCtx,
882 ) {
883 let patch_indices = self
884 .indices
885 .clone()
886 .execute::<PrimitiveArray>(ctx)
887 .vortex_expect("patch indices must be convertible to PrimitiveArray");
888 let patch_values = self
889 .values
890 .clone()
891 .execute::<PrimitiveArray>(ctx)
892 .vortex_expect("patch values must be convertible to PrimitiveArray");
893 let patches_validity = patch_values.validity();
894
895 let patch_values_slice = patch_values.as_slice::<P>();
896 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |I| {
897 let patch_indices_slice = patch_indices.as_slice::<I>();
898
899 unsafe {
905 apply_patches_to_buffer_inner(
906 buffer,
907 validity,
908 patch_indices_slice,
909 self.offset,
910 patch_values_slice,
911 patches_validity,
912 ctx,
913 );
914 }
915 });
916 }
917
918 pub fn map_values<F>(self, f: F) -> VortexResult<Self>
919 where
920 F: FnOnce(ArrayRef) -> VortexResult<ArrayRef>,
921 {
922 let values = f(self.values)?;
923 if self.indices.len() != values.len() {
924 vortex_bail!(
925 "map_values must preserve length: expected {} received {}",
926 self.indices.len(),
927 values.len()
928 )
929 }
930
931 Ok(Self {
932 array_len: self.array_len,
933 offset: self.offset,
934 indices: self.indices,
935 values,
936 chunk_offsets: self.chunk_offsets,
937 offset_within_chunk: self.offset_within_chunk,
938 })
939 }
940}
941
942unsafe fn apply_patches_to_buffer_inner<P, I>(
952 buffer: &mut [P],
953 validity: &mut MaskMut,
954 patch_indices: &[I],
955 patch_offset: usize,
956 patch_values: &[P],
957 patches_validity: &Validity,
958 ctx: &mut ExecutionCtx,
959) where
960 P: NativePType,
961 I: UnsignedPType,
962{
963 debug_assert!(!patch_indices.is_empty());
964 debug_assert_eq!(patch_indices.len(), patch_values.len());
965 debug_assert_eq!(buffer.len(), validity.len());
966
967 match patches_validity {
968 Validity::NonNullable | Validity::AllValid => {
969 for (&i, &value) in patch_indices.iter().zip_eq(patch_values) {
971 let index = i.as_() - patch_offset;
972
973 unsafe {
976 validity.set_unchecked(index);
977 }
978 buffer[index] = value;
979 }
980 }
981 Validity::AllInvalid => {
982 for &i in patch_indices {
984 let index = i.as_() - patch_offset;
985
986 unsafe {
989 validity.unset_unchecked(index);
990 }
991 }
992 }
993 Validity::Array(array) => {
994 let bool_array = array
996 .clone()
997 .execute::<BoolArray>(ctx)
998 .vortex_expect("validity array must be convertible to BoolArray");
999 let mask = bool_array.to_bit_buffer();
1000 for (patch_idx, (&i, &value)) in patch_indices.iter().zip_eq(patch_values).enumerate() {
1001 let index = i.as_() - patch_offset;
1002
1003 unsafe {
1006 if mask.value_unchecked(patch_idx) {
1007 buffer[index] = value;
1008 validity.set_unchecked(index);
1009 } else {
1010 validity.unset_unchecked(index);
1011 }
1012 }
1013 }
1014 }
1015 }
1016}
1017
1018fn take_map<I: NativePType + Hash + Eq + TryFrom<usize>, T: NativePType>(
1019 indices: &[I],
1020 take_indices: PrimitiveArray,
1021 indices_offset: usize,
1022 min_index: usize,
1023 max_index: usize,
1024 include_nulls: bool,
1025) -> VortexResult<Option<(ArrayRef, ArrayRef)>>
1026where
1027 usize: TryFrom<T>,
1028 VortexError: From<<I as TryFrom<usize>>::Error>,
1029{
1030 let take_indices_len = take_indices.len();
1031 let take_indices_validity = take_indices.validity();
1032 let take_indices_validity_mask = take_indices_validity.to_mask(take_indices_len);
1033 let take_indices = take_indices.as_slice::<T>();
1034 let offset_i = I::try_from(indices_offset)?;
1035
1036 let sparse_index_to_value_index: HashMap<I, usize> = indices
1037 .iter()
1038 .copied()
1039 .map(|idx| idx - offset_i)
1040 .enumerate()
1041 .map(|(value_index, sparse_index)| (sparse_index, value_index))
1042 .collect();
1043
1044 let mut new_sparse_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1045 let mut value_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1046
1047 for (idx_in_take, &take_idx) in take_indices.iter().enumerate() {
1048 let ti = usize::try_from(take_idx)
1049 .map_err(|_| vortex_err!("Failed to convert index to usize"))?;
1050
1051 let is_null = match take_indices_validity_mask.bit_buffer() {
1053 AllOr::All => false,
1054 AllOr::None => true,
1055 AllOr::Some(buf) => !buf.value(idx_in_take),
1056 };
1057 if is_null {
1058 if include_nulls {
1059 new_sparse_indices.push(idx_in_take as u64);
1060 value_indices.push(0);
1061 }
1062 } else if ti >= min_index && ti <= max_index {
1063 let ti_as_i = I::try_from(ti)
1064 .map_err(|_| vortex_err!("take index does not fit in index type"))?;
1065 if let Some(&value_index) = sparse_index_to_value_index.get(&ti_as_i) {
1066 new_sparse_indices.push(idx_in_take as u64);
1067 value_indices.push(value_index as u64);
1068 }
1069 }
1070 }
1071
1072 if new_sparse_indices.is_empty() {
1073 return Ok(None);
1074 }
1075
1076 let new_sparse_indices = new_sparse_indices.into_array();
1077 let values_validity = take_indices_validity.take(&new_sparse_indices)?;
1078 Ok(Some((
1079 new_sparse_indices,
1080 PrimitiveArray::new(value_indices, values_validity).into_array(),
1081 )))
1082}
1083
1084fn filter_patches_with_mask<T: IntegerPType>(
1090 patch_indices: &[T],
1091 offset: usize,
1092 patch_values: &ArrayRef,
1093 mask_indices: &[usize],
1094) -> VortexResult<Option<Patches>> {
1095 let true_count = mask_indices.len();
1096 let mut new_patch_indices = BufferMut::<u64>::with_capacity(true_count);
1097 let mut new_mask_indices = Vec::with_capacity(true_count);
1098
1099 const STRIDE: usize = 4;
1103
1104 let mut mask_idx = 0usize;
1105 let mut true_idx = 0usize;
1106
1107 while mask_idx < patch_indices.len() && true_idx < true_count {
1108 if (mask_idx + STRIDE) < patch_indices.len() && (true_idx + STRIDE) < mask_indices.len() {
1115 let left_min = patch_indices[mask_idx]
1117 .to_usize()
1118 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1119 - offset;
1120 let left_max = patch_indices[mask_idx + STRIDE]
1121 .to_usize()
1122 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1123 - offset;
1124 let right_min = mask_indices[true_idx];
1125 let right_max = mask_indices[true_idx + STRIDE];
1126
1127 if left_min > right_max {
1128 true_idx += STRIDE;
1130 continue;
1131 } else if right_min > left_max {
1132 mask_idx += STRIDE;
1133 continue;
1134 } else {
1135 }
1137 }
1138
1139 let left = patch_indices[mask_idx]
1142 .to_usize()
1143 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1144 - offset;
1145 let right = mask_indices[true_idx];
1146
1147 match left.cmp(&right) {
1148 Ordering::Less => {
1149 mask_idx += 1;
1150 }
1151 Ordering::Greater => {
1152 true_idx += 1;
1153 }
1154 Ordering::Equal => {
1155 new_mask_indices.push(mask_idx);
1157 new_patch_indices.push(true_idx as u64);
1158
1159 mask_idx += 1;
1160 true_idx += 1;
1161 }
1162 }
1163 }
1164
1165 if new_mask_indices.is_empty() {
1166 return Ok(None);
1167 }
1168
1169 let new_patch_indices = new_patch_indices.into_array();
1170 let new_patch_values = patch_values
1171 .filter(Mask::from_indices(patch_values.len(), new_mask_indices))?
1172 .to_canonical()?
1173 .into_array();
1174
1175 Ok(Some(Patches::new(
1176 true_count,
1177 0,
1178 new_patch_indices,
1179 new_patch_values,
1180 None,
1182 )?))
1183}
1184
1185fn take_indices_with_search_fn<
1186 I: UnsignedPType,
1187 T: IntegerPType,
1188 F: Fn(I) -> VortexResult<SearchResult>,
1189>(
1190 indices: &[I],
1191 take_indices: &[T],
1192 take_validity: Mask,
1193 include_nulls: bool,
1194 search_fn: F,
1195) -> VortexResult<(BufferMut<u64>, BufferMut<u64>)> {
1196 let mut values_indices = BufferMut::with_capacity(take_indices.len());
1197 let mut new_indices = BufferMut::with_capacity(take_indices.len());
1198
1199 for (new_patch_idx, &take_idx) in take_indices.iter().enumerate() {
1200 if !take_validity.value(new_patch_idx) {
1201 if include_nulls {
1202 values_indices.push(0u64);
1204 new_indices.push(new_patch_idx as u64);
1205 }
1206 continue;
1207 } else {
1208 let search_result = match I::from(take_idx) {
1209 Some(idx) => search_fn(idx)?,
1210 None => SearchResult::NotFound(indices.len()),
1211 };
1212
1213 if let Some(patch_idx) = search_result.to_found() {
1214 values_indices.push(patch_idx as u64);
1215 new_indices.push(new_patch_idx as u64);
1216 }
1217 }
1218 }
1219
1220 Ok((values_indices, new_indices))
1221}
1222
1223#[cfg(test)]
1224mod test {
1225 use vortex_buffer::BufferMut;
1226 use vortex_buffer::buffer;
1227 use vortex_mask::Mask;
1228
1229 use crate::IntoArray;
1230 use crate::LEGACY_SESSION;
1231 use crate::ToCanonical;
1232 use crate::VortexSessionExecute;
1233 use crate::arrays::PrimitiveArray;
1234 use crate::assert_arrays_eq;
1235 use crate::patches::Patches;
1236 use crate::search_sorted::SearchResult;
1237 use crate::validity::Validity;
1238
1239 #[test]
1240 fn test_filter() {
1241 let patches = Patches::new(
1242 100,
1243 0,
1244 buffer![10u32, 11, 20].into_array(),
1245 buffer![100, 110, 200].into_array(),
1246 None,
1247 )
1248 .unwrap();
1249
1250 let filtered = patches
1251 .filter(
1252 &Mask::from_indices(100, vec![10, 20, 30]),
1253 &mut LEGACY_SESSION.create_execution_ctx(),
1254 )
1255 .unwrap()
1256 .unwrap();
1257
1258 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1]));
1259 assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1260 }
1261
1262 #[test]
1263 fn take_with_nulls() {
1264 let patches = Patches::new(
1265 20,
1266 0,
1267 buffer![2u64, 9, 15].into_array(),
1268 PrimitiveArray::new(buffer![33_i32, 44, 55], Validity::AllValid).into_array(),
1269 None,
1270 )
1271 .unwrap();
1272
1273 let taken = patches
1274 .take(
1275 &PrimitiveArray::new(buffer![9, 0], Validity::from_iter(vec![true, false]))
1276 .into_array(),
1277 &mut LEGACY_SESSION.create_execution_ctx(),
1278 )
1279 .unwrap()
1280 .unwrap();
1281 let primitive_values = taken.values().to_primitive();
1282 let primitive_indices = taken.indices().to_primitive();
1283 assert_eq!(taken.array_len(), 2);
1284 assert_arrays_eq!(
1285 primitive_values,
1286 PrimitiveArray::from_option_iter([Some(44i32)])
1287 );
1288 assert_arrays_eq!(primitive_indices, PrimitiveArray::from_iter([0u64]));
1289 assert_eq!(
1290 primitive_values.validity_mask().unwrap(),
1291 Mask::from_iter(vec![true])
1292 );
1293 }
1294
1295 #[test]
1296 fn take_search_with_nulls_chunked() {
1297 let patches = Patches::new(
1298 20,
1299 0,
1300 buffer![2u64, 9, 15].into_array(),
1301 buffer![33_i32, 44, 55].into_array(),
1302 Some(buffer![0u64].into_array()),
1303 )
1304 .unwrap();
1305
1306 let taken = patches
1307 .take_search(
1308 PrimitiveArray::new(buffer![9, 0], Validity::from_iter([true, false])),
1309 true,
1310 &mut LEGACY_SESSION.create_execution_ctx(),
1311 )
1312 .unwrap()
1313 .unwrap();
1314
1315 let primitive_values = taken.values().to_primitive();
1316 assert_eq!(taken.array_len(), 2);
1317 assert_arrays_eq!(
1318 primitive_values,
1319 PrimitiveArray::from_option_iter([Some(44i32), None])
1320 );
1321 assert_arrays_eq!(taken.indices(), PrimitiveArray::from_iter([0u64, 1]));
1322
1323 assert_eq!(
1324 primitive_values.validity_mask().unwrap(),
1325 Mask::from_iter([true, false])
1326 );
1327 }
1328
1329 #[test]
1330 fn take_search_chunked_multiple_chunks() {
1331 let patches = Patches::new(
1332 2048,
1333 0,
1334 buffer![100u64, 500, 1200, 1800].into_array(),
1335 buffer![10_i32, 20, 30, 40].into_array(),
1336 Some(buffer![0u64, 2].into_array()),
1337 )
1338 .unwrap();
1339
1340 let taken = patches
1341 .take_search(
1342 PrimitiveArray::new(buffer![500, 1200, 999], Validity::AllValid),
1343 true,
1344 &mut LEGACY_SESSION.create_execution_ctx(),
1345 )
1346 .unwrap()
1347 .unwrap();
1348
1349 assert_eq!(taken.array_len(), 3);
1350 assert_arrays_eq!(
1351 taken.values(),
1352 PrimitiveArray::from_option_iter([Some(20i32), Some(30)])
1353 );
1354 }
1355
1356 #[test]
1357 fn take_search_chunked_indices_with_no_patches() {
1358 let patches = Patches::new(
1359 20,
1360 0,
1361 buffer![2u64, 9, 15].into_array(),
1362 buffer![33_i32, 44, 55].into_array(),
1363 Some(buffer![0u64].into_array()),
1364 )
1365 .unwrap();
1366
1367 let taken = patches
1368 .take_search(
1369 PrimitiveArray::new(buffer![3, 4, 5], Validity::AllValid),
1370 true,
1371 &mut LEGACY_SESSION.create_execution_ctx(),
1372 )
1373 .unwrap();
1374
1375 assert!(taken.is_none());
1376 }
1377
1378 #[test]
1379 fn take_search_chunked_interleaved() {
1380 let patches = Patches::new(
1381 30,
1382 0,
1383 buffer![5u64, 10, 20, 25].into_array(),
1384 buffer![100_i32, 200, 300, 400].into_array(),
1385 Some(buffer![0u64].into_array()),
1386 )
1387 .unwrap();
1388
1389 let taken = patches
1390 .take_search(
1391 PrimitiveArray::new(buffer![10, 15, 20, 99], Validity::AllValid),
1392 true,
1393 &mut LEGACY_SESSION.create_execution_ctx(),
1394 )
1395 .unwrap()
1396 .unwrap();
1397
1398 assert_eq!(taken.array_len(), 4);
1399 assert_arrays_eq!(
1400 taken.values(),
1401 PrimitiveArray::from_option_iter([Some(200i32), Some(300)])
1402 );
1403 }
1404
1405 #[test]
1406 fn test_take_search_multiple_chunk_offsets() {
1407 let patches = Patches::new(
1408 1500,
1409 0,
1410 BufferMut::from_iter(0..1500u64).into_array(),
1411 BufferMut::from_iter(0..1500i32).into_array(),
1412 Some(buffer![0u64, 1024u64].into_array()),
1413 )
1414 .unwrap();
1415
1416 let taken = patches
1417 .take_search(
1418 PrimitiveArray::new(BufferMut::from_iter(0..1500u64), Validity::AllValid),
1419 false,
1420 &mut LEGACY_SESSION.create_execution_ctx(),
1421 )
1422 .unwrap()
1423 .unwrap();
1424
1425 assert_eq!(taken.array_len(), 1500);
1426 }
1427
1428 #[test]
1429 fn test_slice() {
1430 let values = buffer![15_u32, 135, 13531, 42].into_array();
1431 let indices = buffer![10_u64, 11, 50, 100].into_array();
1432
1433 let patches = Patches::new(101, 0, indices, values, None).unwrap();
1434
1435 let sliced = patches.slice(15..100).unwrap().unwrap();
1436 assert_eq!(sliced.array_len(), 100 - 15);
1437 assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1438 }
1439
1440 #[test]
1441 fn doubly_sliced() {
1442 let values = buffer![15_u32, 135, 13531, 42].into_array();
1443 let indices = buffer![10_u64, 11, 50, 100].into_array();
1444
1445 let patches = Patches::new(101, 0, indices, values, None).unwrap();
1446
1447 let sliced = patches.slice(15..100).unwrap().unwrap();
1448 assert_eq!(sliced.array_len(), 100 - 15);
1449 assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1450
1451 let doubly_sliced = sliced.slice(35..36).unwrap().unwrap();
1452 assert_arrays_eq!(
1453 doubly_sliced.values(),
1454 PrimitiveArray::from_iter([13531u32])
1455 );
1456 }
1457
1458 #[test]
1459 fn test_mask_all_true() {
1460 let patches = Patches::new(
1461 10,
1462 0,
1463 buffer![2u64, 5, 8].into_array(),
1464 buffer![100i32, 200, 300].into_array(),
1465 None,
1466 )
1467 .unwrap();
1468
1469 let mask = Mask::new_true(10);
1470 let masked = patches
1471 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1472 .unwrap();
1473 assert!(masked.is_none());
1474 }
1475
1476 #[test]
1477 fn test_mask_all_false() {
1478 let patches = Patches::new(
1479 10,
1480 0,
1481 buffer![2u64, 5, 8].into_array(),
1482 buffer![100i32, 200, 300].into_array(),
1483 None,
1484 )
1485 .unwrap();
1486
1487 let mask = Mask::new_false(10);
1488 let masked = patches
1489 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1490 .unwrap()
1491 .unwrap();
1492
1493 assert_arrays_eq!(
1495 masked.values(),
1496 PrimitiveArray::from_iter([100i32, 200, 300])
1497 );
1498 assert!(masked.values().is_valid(0).unwrap());
1499 assert!(masked.values().is_valid(1).unwrap());
1500 assert!(masked.values().is_valid(2).unwrap());
1501
1502 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1504 }
1505
1506 #[test]
1507 fn test_mask_partial() {
1508 let patches = Patches::new(
1509 10,
1510 0,
1511 buffer![2u64, 5, 8].into_array(),
1512 buffer![100i32, 200, 300].into_array(),
1513 None,
1514 )
1515 .unwrap();
1516
1517 let mask = Mask::from_iter([
1519 false, false, true, false, false, false, false, false, true, false,
1520 ]);
1521 let masked = patches
1522 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1523 .unwrap()
1524 .unwrap();
1525
1526 assert_eq!(masked.values().len(), 1);
1528 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1529
1530 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64]));
1532 }
1533
1534 #[test]
1535 fn test_mask_with_offset() {
1536 let patches = Patches::new(
1537 10,
1538 5, buffer![7u64, 10, 13].into_array(), buffer![100i32, 200, 300].into_array(),
1541 None,
1542 )
1543 .unwrap();
1544
1545 let mask = Mask::from_iter([
1547 false, false, true, false, false, false, false, false, false, false,
1548 ]);
1549
1550 let masked = patches
1551 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1552 .unwrap()
1553 .unwrap();
1554 assert_eq!(masked.array_len(), 10);
1555 assert_eq!(masked.offset(), 5);
1556 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([10u64, 13]));
1557 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32, 300]));
1558 }
1559
1560 #[test]
1561 fn test_mask_nullable_values() {
1562 let patches = Patches::new(
1563 10,
1564 0,
1565 buffer![2u64, 5, 8].into_array(),
1566 PrimitiveArray::from_option_iter([Some(100i32), None, Some(300)]).into_array(),
1567 None,
1568 )
1569 .unwrap();
1570
1571 let mask = Mask::from_iter([
1573 false, false, true, false, false, false, false, false, false, false,
1574 ]);
1575 let masked = patches
1576 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1577 .unwrap()
1578 .unwrap();
1579
1580 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64, 8]));
1582
1583 let masked_values = masked.values().to_primitive();
1585 assert_eq!(masked_values.len(), 2);
1586 assert!(!masked_values.is_valid(0).unwrap()); assert!(masked_values.is_valid(1).unwrap()); assert_eq!(
1589 i32::try_from(&masked_values.scalar_at(1).unwrap()).unwrap(),
1590 300i32
1591 );
1592 }
1593
1594 #[test]
1595 fn test_filter_keep_all() {
1596 let patches = Patches::new(
1597 10,
1598 0,
1599 buffer![2u64, 5, 8].into_array(),
1600 buffer![100i32, 200, 300].into_array(),
1601 None,
1602 )
1603 .unwrap();
1604
1605 let mask = Mask::from_indices(10, (0..10).collect());
1607 let filtered = patches
1608 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1609 .unwrap()
1610 .unwrap();
1611
1612 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1613 assert_arrays_eq!(
1614 filtered.values(),
1615 PrimitiveArray::from_iter([100i32, 200, 300])
1616 );
1617 }
1618
1619 #[test]
1620 fn test_filter_none() {
1621 let patches = Patches::new(
1622 10,
1623 0,
1624 buffer![2u64, 5, 8].into_array(),
1625 buffer![100i32, 200, 300].into_array(),
1626 None,
1627 )
1628 .unwrap();
1629
1630 let mask = Mask::from_indices(10, vec![]);
1632 let filtered = patches
1633 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1634 .unwrap();
1635 assert!(filtered.is_none());
1636 }
1637
1638 #[test]
1639 fn test_filter_with_indices() {
1640 let patches = Patches::new(
1641 10,
1642 0,
1643 buffer![2u64, 5, 8].into_array(),
1644 buffer![100i32, 200, 300].into_array(),
1645 None,
1646 )
1647 .unwrap();
1648
1649 let mask = Mask::from_indices(10, vec![2, 5, 9]);
1651 let filtered = patches
1652 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1653 .unwrap()
1654 .unwrap();
1655
1656 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1])); assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1658 }
1659
1660 #[test]
1661 fn test_slice_full_range() {
1662 let patches = Patches::new(
1663 10,
1664 0,
1665 buffer![2u64, 5, 8].into_array(),
1666 buffer![100i32, 200, 300].into_array(),
1667 None,
1668 )
1669 .unwrap();
1670
1671 let sliced = patches.slice(0..10).unwrap().unwrap();
1672
1673 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1674 assert_arrays_eq!(
1675 sliced.values(),
1676 PrimitiveArray::from_iter([100i32, 200, 300])
1677 );
1678 }
1679
1680 #[test]
1681 fn test_slice_partial() {
1682 let patches = Patches::new(
1683 10,
1684 0,
1685 buffer![2u64, 5, 8].into_array(),
1686 buffer![100i32, 200, 300].into_array(),
1687 None,
1688 )
1689 .unwrap();
1690
1691 let sliced = patches.slice(3..8).unwrap().unwrap();
1693
1694 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([5u64])); assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1696 assert_eq!(sliced.array_len(), 5); assert_eq!(sliced.offset(), 3); }
1699
1700 #[test]
1701 fn test_slice_no_patches() {
1702 let patches = Patches::new(
1703 10,
1704 0,
1705 buffer![2u64, 5, 8].into_array(),
1706 buffer![100i32, 200, 300].into_array(),
1707 None,
1708 )
1709 .unwrap();
1710
1711 let sliced = patches.slice(6..7).unwrap();
1713 assert!(sliced.is_none());
1714 }
1715
1716 #[test]
1717 fn test_slice_with_offset() {
1718 let patches = Patches::new(
1719 10,
1720 5, buffer![7u64, 10, 13].into_array(), buffer![100i32, 200, 300].into_array(),
1723 None,
1724 )
1725 .unwrap();
1726
1727 let sliced = patches.slice(3..8).unwrap().unwrap();
1729
1730 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([10u64])); assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1732 assert_eq!(sliced.offset(), 8); }
1734
1735 #[test]
1736 fn test_patch_values() {
1737 let patches = Patches::new(
1738 10,
1739 0,
1740 buffer![2u64, 5, 8].into_array(),
1741 buffer![100i32, 200, 300].into_array(),
1742 None,
1743 )
1744 .unwrap();
1745
1746 let values = patches.values().to_primitive();
1747 assert_eq!(
1748 i32::try_from(&values.scalar_at(0).unwrap()).unwrap(),
1749 100i32
1750 );
1751 assert_eq!(
1752 i32::try_from(&values.scalar_at(1).unwrap()).unwrap(),
1753 200i32
1754 );
1755 assert_eq!(
1756 i32::try_from(&values.scalar_at(2).unwrap()).unwrap(),
1757 300i32
1758 );
1759 }
1760
1761 #[test]
1762 fn test_indices_range() {
1763 let patches = Patches::new(
1764 10,
1765 0,
1766 buffer![2u64, 5, 8].into_array(),
1767 buffer![100i32, 200, 300].into_array(),
1768 None,
1769 )
1770 .unwrap();
1771
1772 assert_eq!(patches.min_index().unwrap(), 2);
1773 assert_eq!(patches.max_index().unwrap(), 8);
1774 }
1775
1776 #[test]
1777 fn test_search_index() {
1778 let patches = Patches::new(
1779 10,
1780 0,
1781 buffer![2u64, 5, 8].into_array(),
1782 buffer![100i32, 200, 300].into_array(),
1783 None,
1784 )
1785 .unwrap();
1786
1787 assert_eq!(patches.search_index(2).unwrap(), SearchResult::Found(0));
1789 assert_eq!(patches.search_index(5).unwrap(), SearchResult::Found(1));
1790 assert_eq!(patches.search_index(8).unwrap(), SearchResult::Found(2));
1791
1792 assert_eq!(patches.search_index(0).unwrap(), SearchResult::NotFound(0));
1794 assert_eq!(patches.search_index(3).unwrap(), SearchResult::NotFound(1));
1795 assert_eq!(patches.search_index(6).unwrap(), SearchResult::NotFound(2));
1796 assert_eq!(patches.search_index(9).unwrap(), SearchResult::NotFound(3));
1797 }
1798
1799 #[test]
1800 fn test_mask_boundary_patches() {
1801 let patches = Patches::new(
1803 10,
1804 0,
1805 buffer![0u64, 9].into_array(),
1806 buffer![100i32, 200].into_array(),
1807 None,
1808 )
1809 .unwrap();
1810
1811 let mask = Mask::from_iter([
1812 true, false, false, false, false, false, false, false, false, false,
1813 ]);
1814 let masked = patches
1815 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1816 .unwrap();
1817 assert!(masked.is_some());
1818 let masked = masked.unwrap();
1819 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([9u64]));
1820 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1821 }
1822
1823 #[test]
1824 fn test_mask_all_patches_removed() {
1825 let patches = Patches::new(
1827 10,
1828 0,
1829 buffer![2u64, 5, 8].into_array(),
1830 buffer![100i32, 200, 300].into_array(),
1831 None,
1832 )
1833 .unwrap();
1834
1835 let mask = Mask::from_iter([
1837 false, false, true, false, false, true, false, false, true, false,
1838 ]);
1839 let masked = patches
1840 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1841 .unwrap();
1842 assert!(masked.is_none());
1843 }
1844
1845 #[test]
1846 fn test_mask_no_patches_removed() {
1847 let patches = Patches::new(
1849 10,
1850 0,
1851 buffer![2u64, 5, 8].into_array(),
1852 buffer![100i32, 200, 300].into_array(),
1853 None,
1854 )
1855 .unwrap();
1856
1857 let mask = Mask::from_iter([
1859 true, false, false, true, false, false, true, false, false, true,
1860 ]);
1861 let masked = patches
1862 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1863 .unwrap()
1864 .unwrap();
1865
1866 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1867 assert_arrays_eq!(
1868 masked.values(),
1869 PrimitiveArray::from_iter([100i32, 200, 300])
1870 );
1871 }
1872
1873 #[test]
1874 fn test_mask_single_patch() {
1875 let patches = Patches::new(
1877 5,
1878 0,
1879 buffer![2u64].into_array(),
1880 buffer![42i32].into_array(),
1881 None,
1882 )
1883 .unwrap();
1884
1885 let mask = Mask::from_iter([false, false, true, false, false]);
1887 let masked = patches
1888 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1889 .unwrap();
1890 assert!(masked.is_none());
1891
1892 let mask = Mask::from_iter([true, false, false, true, false]);
1894 let masked = patches
1895 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1896 .unwrap()
1897 .unwrap();
1898 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64]));
1899 }
1900
1901 #[test]
1902 fn test_mask_contiguous_patches() {
1903 let patches = Patches::new(
1905 10,
1906 0,
1907 buffer![3u64, 4, 5, 6].into_array(),
1908 buffer![100i32, 200, 300, 400].into_array(),
1909 None,
1910 )
1911 .unwrap();
1912
1913 let mask = Mask::from_iter([
1915 false, false, false, false, true, true, false, false, false, false,
1916 ]);
1917 let masked = patches
1918 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1919 .unwrap()
1920 .unwrap();
1921
1922 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([3u64, 6]));
1923 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 400]));
1924 }
1925
1926 #[test]
1927 fn test_mask_with_large_offset() {
1928 let patches = Patches::new(
1930 20,
1931 15,
1932 buffer![16u64, 17, 19].into_array(), buffer![100i32, 200, 300].into_array(),
1934 None,
1935 )
1936 .unwrap();
1937
1938 let mask = Mask::from_iter([
1940 false, false, true, false, false, false, false, false, false, false, false, false,
1941 false, false, false, false, false, false, false, false,
1942 ]);
1943 let masked = patches
1944 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1945 .unwrap()
1946 .unwrap();
1947
1948 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([16u64, 19]));
1949 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 300]));
1950 }
1951
1952 #[test]
1953 #[should_panic(expected = "Filter mask length 5 does not match array length 10")]
1954 fn test_mask_wrong_length() {
1955 let patches = Patches::new(
1956 10,
1957 0,
1958 buffer![2u64, 5, 8].into_array(),
1959 buffer![100i32, 200, 300].into_array(),
1960 None,
1961 )
1962 .unwrap();
1963
1964 let mask = Mask::from_iter([false, false, true, false, false]);
1966 patches
1967 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1968 .unwrap();
1969 }
1970
1971 #[test]
1972 fn test_chunk_offsets_search() {
1973 let indices = buffer![100u64, 200, 3000, 3100].into_array();
1974 let values = buffer![10i32, 20, 30, 40].into_array();
1975 let chunk_offsets = buffer![0u64, 2, 2, 3].into_array();
1976 let patches = Patches::new(4096, 0, indices, values, Some(chunk_offsets)).unwrap();
1977
1978 assert!(patches.chunk_offsets.is_some());
1979
1980 assert_eq!(patches.search_index(100).unwrap(), SearchResult::Found(0));
1982 assert_eq!(patches.search_index(200).unwrap(), SearchResult::Found(1));
1983
1984 assert_eq!(
1986 patches.search_index(1500).unwrap(),
1987 SearchResult::NotFound(2)
1988 );
1989 assert_eq!(
1990 patches.search_index(2000).unwrap(),
1991 SearchResult::NotFound(2)
1992 );
1993
1994 assert_eq!(patches.search_index(3000).unwrap(), SearchResult::Found(2));
1996 assert_eq!(patches.search_index(3100).unwrap(), SearchResult::Found(3));
1997
1998 assert_eq!(
1999 patches.search_index(1024).unwrap(),
2000 SearchResult::NotFound(2)
2001 );
2002 }
2003
2004 #[test]
2005 fn test_chunk_offsets_with_slice() {
2006 let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
2007 let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
2008 let chunk_offsets = buffer![0u64, 2, 6].into_array();
2009 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2010
2011 let sliced = patches.slice(1000..2200).unwrap().unwrap();
2012
2013 assert!(sliced.chunk_offsets.is_some());
2014 assert_eq!(sliced.offset(), 1000);
2015
2016 assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(0));
2017 assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
2018 assert_eq!(sliced.search_index(1100).unwrap(), SearchResult::Found(4));
2019
2020 assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(1));
2021 }
2022
2023 #[test]
2024 fn test_chunk_offsets_with_slice_after_first_chunk() {
2025 let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
2026 let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
2027 let chunk_offsets = buffer![0u64, 2, 6].into_array();
2028 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2029
2030 let sliced = patches.slice(1300..2200).unwrap().unwrap();
2031
2032 assert!(sliced.chunk_offsets.is_some());
2033 assert_eq!(sliced.offset(), 1300);
2034
2035 assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2036 assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(1));
2037 assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
2038 assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(2));
2039 assert_eq!(sliced.search_index(900).unwrap(), SearchResult::NotFound(4));
2040 }
2041
2042 #[test]
2043 fn test_chunk_offsets_slice_empty_result() {
2044 let indices = buffer![100u64, 200, 3000, 3100].into_array();
2045 let values = buffer![10i32, 20, 30, 40].into_array();
2046 let chunk_offsets = buffer![0u64, 2].into_array();
2047 let patches = Patches::new(4000, 0, indices, values, Some(chunk_offsets)).unwrap();
2048
2049 let sliced = patches.slice(1000..2000).unwrap();
2050 assert!(sliced.is_none());
2051 }
2052
2053 #[test]
2054 fn test_chunk_offsets_slice_single_patch() {
2055 let indices = buffer![100u64, 1200, 1300, 2500].into_array();
2056 let values = buffer![10i32, 20, 30, 40].into_array();
2057 let chunk_offsets = buffer![0u64, 1, 3].into_array();
2058 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2059
2060 let sliced = patches.slice(1100..1250).unwrap().unwrap();
2061
2062 assert_eq!(sliced.num_patches(), 1);
2063 assert_eq!(sliced.offset(), 1100);
2064 assert_eq!(sliced.search_index(100).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(50).unwrap(), SearchResult::NotFound(0));
2066 assert_eq!(sliced.search_index(150).unwrap(), SearchResult::NotFound(1));
2067 }
2068
2069 #[test]
2070 fn test_chunk_offsets_slice_across_chunks() {
2071 let indices = buffer![100u64, 200, 1100, 1200, 2100, 2200].into_array();
2072 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2073 let chunk_offsets = buffer![0u64, 2, 4].into_array();
2074 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2075
2076 let sliced = patches.slice(150..2150).unwrap().unwrap();
2077
2078 assert_eq!(sliced.num_patches(), 4);
2079 assert_eq!(sliced.offset(), 150);
2080
2081 assert_eq!(sliced.search_index(50).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(950).unwrap(), SearchResult::Found(1)); assert_eq!(sliced.search_index(1050).unwrap(), SearchResult::Found(2)); assert_eq!(sliced.search_index(1950).unwrap(), SearchResult::Found(3)); }
2086
2087 #[test]
2088 fn test_chunk_offsets_boundary_searches() {
2089 let indices = buffer![1023u64, 1024, 1025, 2047, 2048].into_array();
2090 let values = buffer![10i32, 20, 30, 40, 50].into_array();
2091 let chunk_offsets = buffer![0u64, 1, 4].into_array();
2092 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2093
2094 assert_eq!(patches.search_index(1023).unwrap(), SearchResult::Found(0));
2095 assert_eq!(patches.search_index(1024).unwrap(), SearchResult::Found(1));
2096 assert_eq!(patches.search_index(1025).unwrap(), SearchResult::Found(2));
2097 assert_eq!(patches.search_index(2047).unwrap(), SearchResult::Found(3));
2098 assert_eq!(patches.search_index(2048).unwrap(), SearchResult::Found(4));
2099
2100 assert_eq!(
2101 patches.search_index(1022).unwrap(),
2102 SearchResult::NotFound(0)
2103 );
2104 assert_eq!(
2105 patches.search_index(2046).unwrap(),
2106 SearchResult::NotFound(3)
2107 );
2108 }
2109
2110 #[test]
2111 fn test_chunk_offsets_slice_edge_cases() {
2112 let indices = buffer![0u64, 1, 1023, 1024, 2047, 2048].into_array();
2113 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2114 let chunk_offsets = buffer![0u64, 3, 5].into_array();
2115 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2116
2117 let sliced = patches.slice(0..10).unwrap().unwrap();
2119 assert_eq!(sliced.num_patches(), 2);
2120 assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2121 assert_eq!(sliced.search_index(1).unwrap(), SearchResult::Found(1));
2122
2123 let sliced = patches.slice(2040..3000).unwrap().unwrap();
2125 assert_eq!(sliced.num_patches(), 2); assert_eq!(sliced.search_index(7).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(8).unwrap(), SearchResult::Found(1)); }
2129
2130 #[test]
2131 fn test_chunk_offsets_slice_nested() {
2132 let indices = buffer![100u64, 200, 300, 400, 500, 600].into_array();
2133 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2134 let chunk_offsets = buffer![0u64].into_array();
2135 let patches = Patches::new(1000, 0, indices, values, Some(chunk_offsets)).unwrap();
2136
2137 let sliced1 = patches.slice(150..550).unwrap().unwrap();
2138 assert_eq!(sliced1.num_patches(), 4); let sliced2 = sliced1.slice(100..250).unwrap().unwrap();
2141 assert_eq!(sliced2.num_patches(), 1); assert_eq!(sliced2.offset(), 250);
2143
2144 assert_eq!(sliced2.search_index(50).unwrap(), SearchResult::Found(0)); assert_eq!(
2146 sliced2.search_index(150).unwrap(),
2147 SearchResult::NotFound(1)
2148 );
2149 }
2150
2151 #[test]
2152 fn test_index_larger_than_length() {
2153 let chunk_offsets = buffer![0u64].into_array();
2154 let indices = buffer![1023u64].into_array();
2155 let values = buffer![42i32].into_array();
2156 let patches = Patches::new(1024, 0, indices, values, Some(chunk_offsets)).unwrap();
2157 assert_eq!(
2158 patches.search_index(2048).unwrap(),
2159 SearchResult::NotFound(1)
2160 );
2161 }
2162}