1use std::cmp::Ordering;
5use std::fmt::Debug;
6use std::hash::Hash;
7use std::ops::Range;
8
9use itertools::Itertools;
10use num_traits::NumCast;
11use vortex_buffer::BitBuffer;
12use vortex_buffer::BufferMut;
13use vortex_error::VortexError;
14use vortex_error::VortexExpect;
15use vortex_error::VortexResult;
16use vortex_error::vortex_bail;
17use vortex_error::vortex_ensure;
18use vortex_error::vortex_err;
19use vortex_mask::AllOr;
20use vortex_mask::Mask;
21use vortex_mask::MaskMut;
22use vortex_utils::aliases::hash_map::HashMap;
23
24use crate::ArrayRef;
25use crate::ArrayVisitor;
26use crate::DynArray;
27use crate::ExecutionCtx;
28use crate::IntoArray;
29use crate::ToCanonical;
30use crate::arrays::BoolArray;
31use crate::arrays::PrimitiveArray;
32use crate::builtins::ArrayBuiltins;
33use crate::compute::is_sorted;
34use crate::dtype::DType;
35use crate::dtype::IntegerPType;
36use crate::dtype::NativePType;
37use crate::dtype::Nullability::NonNullable;
38use crate::dtype::PType;
39use crate::dtype::UnsignedPType;
40use crate::match_each_integer_ptype;
41use crate::match_each_unsigned_integer_ptype;
42use crate::scalar::PValue;
43use crate::scalar::Scalar;
44use crate::search_sorted::SearchResult;
45use crate::search_sorted::SearchSorted;
46use crate::search_sorted::SearchSortedSide;
47use crate::validity::Validity;
48use crate::vtable::ValidityHelper;
49
50pub const PATCH_CHUNK_SIZE: usize = 1024;
53
54#[derive(Copy, Clone, prost::Message)]
55pub struct PatchesMetadata {
56 #[prost(uint64, tag = "1")]
57 len: u64,
58 #[prost(uint64, tag = "2")]
59 offset: u64,
60 #[prost(enumeration = "PType", tag = "3")]
61 indices_ptype: i32,
62 #[prost(uint64, optional, tag = "4")]
63 chunk_offsets_len: Option<u64>,
64 #[prost(enumeration = "PType", optional, tag = "5")]
65 chunk_offsets_ptype: Option<i32>,
66 #[prost(uint64, optional, tag = "6")]
67 offset_within_chunk: Option<u64>,
68}
69
70impl PatchesMetadata {
71 #[inline]
72 pub fn new(
73 len: usize,
74 offset: usize,
75 indices_ptype: PType,
76 chunk_offsets_len: Option<usize>,
77 chunk_offsets_ptype: Option<PType>,
78 offset_within_chunk: Option<usize>,
79 ) -> Self {
80 Self {
81 len: len as u64,
82 offset: offset as u64,
83 indices_ptype: indices_ptype as i32,
84 chunk_offsets_len: chunk_offsets_len.map(|len| len as u64),
85 chunk_offsets_ptype: chunk_offsets_ptype.map(|pt| pt as i32),
86 offset_within_chunk: offset_within_chunk.map(|len| len as u64),
87 }
88 }
89
90 #[inline]
91 pub fn len(&self) -> VortexResult<usize> {
92 usize::try_from(self.len).map_err(|_| vortex_err!("len does not fit in usize"))
93 }
94
95 #[inline]
96 pub fn is_empty(&self) -> bool {
97 self.len == 0
98 }
99
100 #[inline]
101 pub fn offset(&self) -> VortexResult<usize> {
102 usize::try_from(self.offset).map_err(|_| vortex_err!("offset does not fit in usize"))
103 }
104
105 #[inline]
106 pub fn chunk_offsets_dtype(&self) -> VortexResult<Option<DType>> {
107 self.chunk_offsets_ptype
108 .map(|t| {
109 PType::try_from(t)
110 .map_err(|e| vortex_err!("invalid i32 value {t} for PType: {}", e))
111 .map(|ptype| DType::Primitive(ptype, NonNullable))
112 })
113 .transpose()
114 }
115
116 #[inline]
117 pub fn indices_dtype(&self) -> VortexResult<DType> {
118 let ptype = PType::try_from(self.indices_ptype).map_err(|e| {
119 vortex_err!("invalid i32 value {} for PType: {}", self.indices_ptype, e)
120 })?;
121 vortex_ensure!(
122 ptype.is_unsigned_int(),
123 "Patch indices must be unsigned integers"
124 );
125 Ok(DType::Primitive(ptype, NonNullable))
126 }
127}
128
129#[derive(Debug, Clone)]
131pub struct Patches {
132 array_len: usize,
133 offset: usize,
134 indices: ArrayRef,
135 values: ArrayRef,
136 chunk_offsets: Option<ArrayRef>,
143 offset_within_chunk: Option<usize>,
153}
154
155impl Patches {
156 pub fn new(
157 array_len: usize,
158 offset: usize,
159 indices: ArrayRef,
160 values: ArrayRef,
161 chunk_offsets: Option<ArrayRef>,
162 ) -> VortexResult<Self> {
163 vortex_ensure!(
164 indices.len() == values.len(),
165 "Patch indices and values must have the same length"
166 );
167 vortex_ensure!(
168 indices.dtype().is_unsigned_int() && !indices.dtype().is_nullable(),
169 "Patch indices must be non-nullable unsigned integers, got {:?}",
170 indices.dtype()
171 );
172
173 vortex_ensure!(
174 indices.len() <= array_len,
175 "Patch indices must be shorter than the array length"
176 );
177 vortex_ensure!(!indices.is_empty(), "Patch indices must not be empty");
178
179 if indices.is_host() && values.is_host() {
182 let max = usize::try_from(&indices.scalar_at(indices.len() - 1)?)
183 .map_err(|_| vortex_err!("indices must be a number"))?;
184 vortex_ensure!(
185 max - offset < array_len,
186 "Patch indices {max:?}, offset {offset} are longer than the array length {array_len}"
187 );
188
189 debug_assert!(
190 is_sorted(&indices).unwrap_or(Some(false)).unwrap_or(false),
191 "Patch indices must be sorted"
192 );
193 }
194
195 Ok(Self {
196 array_len,
197 offset,
198 indices,
199 values,
200 chunk_offsets: chunk_offsets.clone(),
201 offset_within_chunk: chunk_offsets.map(|_| 0),
203 })
204 }
205
206 pub unsafe fn new_unchecked(
216 array_len: usize,
217 offset: usize,
218 indices: ArrayRef,
219 values: ArrayRef,
220 chunk_offsets: Option<ArrayRef>,
221 offset_within_chunk: Option<usize>,
222 ) -> Self {
223 Self {
224 array_len,
225 offset,
226 indices,
227 values,
228 chunk_offsets,
229 offset_within_chunk,
230 }
231 }
232
233 #[inline]
234 pub fn array_len(&self) -> usize {
235 self.array_len
236 }
237
238 #[inline]
239 pub fn num_patches(&self) -> usize {
240 self.indices.len()
241 }
242
243 #[inline]
244 pub fn dtype(&self) -> &DType {
245 self.values.dtype()
246 }
247
248 #[inline]
249 pub fn indices(&self) -> &ArrayRef {
250 &self.indices
251 }
252
253 #[inline]
254 pub fn into_indices(self) -> ArrayRef {
255 self.indices
256 }
257
258 #[inline]
259 pub fn indices_mut(&mut self) -> &mut ArrayRef {
260 &mut self.indices
261 }
262
263 #[inline]
264 pub fn values(&self) -> &ArrayRef {
265 &self.values
266 }
267
268 #[inline]
269 pub fn into_values(self) -> ArrayRef {
270 self.values
271 }
272
273 #[inline]
274 pub fn values_mut(&mut self) -> &mut ArrayRef {
275 &mut self.values
276 }
277
278 #[inline]
279 pub fn offset(&self) -> usize {
281 self.offset
282 }
283
284 #[inline]
285 pub fn chunk_offsets(&self) -> &Option<ArrayRef> {
286 &self.chunk_offsets
287 }
288
289 #[inline]
290 pub fn chunk_offset_at(&self, idx: usize) -> VortexResult<usize> {
291 let Some(chunk_offsets) = &self.chunk_offsets else {
292 vortex_bail!("chunk_offsets must be set to retrieve offset at index")
293 };
294
295 chunk_offsets
296 .scalar_at(idx)?
297 .as_primitive()
298 .as_::<usize>()
299 .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))
300 }
301
302 #[inline]
311 pub fn offset_within_chunk(&self) -> Option<usize> {
312 self.offset_within_chunk
313 }
314
315 #[inline]
316 pub fn indices_ptype(&self) -> VortexResult<PType> {
317 PType::try_from(self.indices.dtype())
318 .map_err(|_| vortex_err!("indices dtype is not primitive"))
319 }
320
321 pub fn to_metadata(&self, len: usize, dtype: &DType) -> VortexResult<PatchesMetadata> {
322 if self.indices.len() > len {
323 vortex_bail!(
324 "Patch indices {} are longer than the array length {}",
325 self.indices.len(),
326 len
327 );
328 }
329 if self.values.dtype() != dtype {
330 vortex_bail!(
331 "Patch values dtype {} does not match array dtype {}",
332 self.values.dtype(),
333 dtype
334 );
335 }
336 let chunk_offsets_len = self.chunk_offsets.as_ref().map(|co| co.len());
337 let chunk_offsets_ptype = self.chunk_offsets.as_ref().map(|co| co.dtype().as_ptype());
338
339 Ok(PatchesMetadata::new(
340 self.indices.len(),
341 self.offset,
342 self.indices.dtype().as_ptype(),
343 chunk_offsets_len,
344 chunk_offsets_ptype,
345 self.offset_within_chunk,
346 ))
347 }
348
349 pub fn cast_values(self, values_dtype: &DType) -> VortexResult<Self> {
350 unsafe {
352 Ok(Self::new_unchecked(
353 self.array_len,
354 self.offset,
355 self.indices,
356 self.values.cast(values_dtype.clone())?,
357 self.chunk_offsets,
358 self.offset_within_chunk,
359 ))
360 }
361 }
362
363 pub fn get_patched(&self, index: usize) -> VortexResult<Option<Scalar>> {
365 self.search_index(index)?
366 .to_found()
367 .map(|patch_idx| self.values().scalar_at(patch_idx))
368 .transpose()
369 }
370
371 pub fn search_index(&self, index: usize) -> VortexResult<SearchResult> {
389 if self.chunk_offsets.is_some() {
390 return self.search_index_chunked(index);
391 }
392
393 Self::search_index_binary_search(&self.indices, index + self.offset)
394 }
395
396 fn search_index_binary_search(indices: &ArrayRef, needle: usize) -> VortexResult<SearchResult> {
402 if indices.is_canonical() {
403 let primitive = indices.to_primitive();
404 match_each_integer_ptype!(primitive.ptype(), |T| {
405 let Ok(needle) = T::try_from(needle) else {
406 return Ok(SearchResult::NotFound(primitive.len()));
411 };
412 return primitive
413 .as_slice::<T>()
414 .search_sorted(&needle, SearchSortedSide::Left);
415 });
416 }
417 indices
418 .as_primitive_typed()
419 .search_sorted(&PValue::U64(needle as u64), SearchSortedSide::Left)
420 }
421
422 fn search_index_chunked(&self, index: usize) -> VortexResult<SearchResult> {
432 let Some(chunk_offsets) = &self.chunk_offsets else {
433 vortex_bail!("chunk_offsets is required to be set")
434 };
435
436 let Some(offset_within_chunk) = self.offset_within_chunk else {
437 vortex_bail!("offset_within_chunk is required to be set")
438 };
439
440 if index >= self.array_len() {
441 return Ok(SearchResult::NotFound(self.indices().len()));
442 }
443
444 let chunk_idx = (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE;
445
446 let base_offset = self.chunk_offset_at(0)?;
448
449 let patches_start_idx = (self.chunk_offset_at(chunk_idx)? - base_offset)
450 .saturating_sub(offset_within_chunk);
457
458 let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
459 self.chunk_offset_at(chunk_idx + 1)? - base_offset - offset_within_chunk
460 } else {
461 self.indices.len()
462 };
463
464 let chunk_indices = self.indices.slice(patches_start_idx..patches_end_idx)?;
465 let result = Self::search_index_binary_search(&chunk_indices, index + self.offset)?;
466
467 Ok(match result {
468 SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
469 SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
470 })
471 }
472
473 fn search_index_chunked_batch<T, O>(
479 &self,
480 indices: &[T],
481 chunk_offsets: &[O],
482 index: T,
483 ) -> VortexResult<SearchResult>
484 where
485 T: UnsignedPType,
486 O: UnsignedPType,
487 usize: TryFrom<T>,
488 usize: TryFrom<O>,
489 {
490 let Some(offset_within_chunk) = self.offset_within_chunk else {
491 vortex_bail!("offset_within_chunk is required to be set")
492 };
493
494 let chunk_idx = {
495 let Ok(index) = usize::try_from(index) else {
496 return Ok(SearchResult::NotFound(indices.len()));
498 };
499
500 if index >= self.array_len() {
501 return Ok(SearchResult::NotFound(self.indices().len()));
502 }
503
504 (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE
505 };
506
507 let chunk_offset = usize::try_from(chunk_offsets[chunk_idx] - chunk_offsets[0])
509 .map_err(|_| vortex_err!("chunk_offset failed to convert to usize"))?;
510
511 let patches_start_idx = chunk_offset
512 .saturating_sub(offset_within_chunk);
519
520 let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
521 let base_offset_end = chunk_offsets[chunk_idx + 1];
522
523 let offset_within_chunk = O::from(offset_within_chunk)
524 .ok_or_else(|| vortex_err!("offset_within_chunk failed to convert to O"))?;
525
526 usize::try_from(base_offset_end - chunk_offsets[0] - offset_within_chunk)
527 .map_err(|_| vortex_err!("patches_end_idx failed to convert to usize"))?
528 } else {
529 self.indices.len()
530 };
531
532 let Some(offset) = T::from(self.offset) else {
533 return Ok(SearchResult::NotFound(indices.len()));
535 };
536
537 let chunk_indices = &indices[patches_start_idx..patches_end_idx];
538 let result = chunk_indices.search_sorted(&(index + offset), SearchSortedSide::Left)?;
539
540 Ok(match result {
541 SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
542 SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
543 })
544 }
545
546 pub fn min_index(&self) -> VortexResult<usize> {
548 let first = self
549 .indices
550 .scalar_at(0)?
551 .as_primitive()
552 .as_::<usize>()
553 .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
554 Ok(first - self.offset)
555 }
556
557 pub fn max_index(&self) -> VortexResult<usize> {
559 let last = self
560 .indices
561 .scalar_at(self.indices.len() - 1)?
562 .as_primitive()
563 .as_::<usize>()
564 .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
565 Ok(last - self.offset)
566 }
567
568 pub fn filter(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
570 if mask.len() != self.array_len {
571 vortex_bail!(
572 "Filter mask length {} does not match array length {}",
573 mask.len(),
574 self.array_len
575 );
576 }
577
578 match mask.indices() {
579 AllOr::All => Ok(Some(self.clone())),
580 AllOr::None => Ok(None),
581 AllOr::Some(mask_indices) => {
582 let flat_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
583 match_each_unsigned_integer_ptype!(flat_indices.ptype(), |I| {
584 filter_patches_with_mask(
585 flat_indices.as_slice::<I>(),
586 self.offset(),
587 self.values(),
588 mask_indices,
589 )
590 })
591 }
592 }
593 }
594
595 pub fn mask(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
600 if mask.len() != self.array_len {
601 vortex_bail!(
602 "Filter mask length {} does not match array length {}",
603 mask.len(),
604 self.array_len
605 );
606 }
607
608 let filter_mask = match mask.bit_buffer() {
609 AllOr::All => return Ok(None),
610 AllOr::None => return Ok(Some(self.clone())),
611 AllOr::Some(masked) => {
612 let patch_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
613 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |P| {
614 let patch_indices = patch_indices.as_slice::<P>();
615 Mask::from_buffer(BitBuffer::collect_bool(patch_indices.len(), |i| {
616 #[allow(clippy::cast_possible_truncation)]
617 let idx = (patch_indices[i] as usize) - self.offset;
618 !masked.value(idx)
619 }))
620 })
621 }
622 };
623
624 if filter_mask.all_false() {
625 return Ok(None);
626 }
627
628 let filtered_indices = self.indices.filter(filter_mask.clone())?;
630 let filtered_values = self.values.filter(filter_mask)?;
631
632 Ok(Some(Self {
633 array_len: self.array_len,
634 offset: self.offset,
635 indices: filtered_indices,
636 values: filtered_values,
637 chunk_offsets: None,
639 offset_within_chunk: self.offset_within_chunk,
640 }))
641 }
642
643 pub fn slice(&self, range: Range<usize>) -> VortexResult<Option<Self>> {
645 let slice_start_idx = self.search_index(range.start)?.to_index();
646 let slice_end_idx = self.search_index(range.end)?.to_index();
647
648 if slice_start_idx == slice_end_idx {
649 return Ok(None);
650 }
651
652 let values = self.values().slice(slice_start_idx..slice_end_idx)?;
653 let indices = self.indices().slice(slice_start_idx..slice_end_idx)?;
654
655 let chunk_offsets = self
656 .chunk_offsets
657 .as_ref()
658 .map(|chunk_offsets| -> VortexResult<ArrayRef> {
659 let chunk_relative_offset = self.offset % PATCH_CHUNK_SIZE;
660 let chunk_start_idx = (chunk_relative_offset + range.start) / PATCH_CHUNK_SIZE;
661 let chunk_end_idx = (chunk_relative_offset + range.end).div_ceil(PATCH_CHUNK_SIZE);
662 chunk_offsets.slice(chunk_start_idx..chunk_end_idx)
663 })
664 .transpose()?;
665
666 let offset_within_chunk = chunk_offsets
667 .as_ref()
668 .map(|chunk_offsets| -> VortexResult<usize> {
669 let base_offset = chunk_offsets
670 .scalar_at(0)?
671 .as_primitive()
672 .as_::<usize>()
673 .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))?;
674 Ok(slice_start_idx - base_offset)
675 })
676 .transpose()?;
677
678 Ok(Some(Self {
679 array_len: range.len(),
680 offset: range.start + self.offset(),
681 indices,
682 values,
683 chunk_offsets,
684 offset_within_chunk,
685 }))
686 }
687
688 const PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN: f64 = 5.0;
690
691 fn is_map_faster_than_search(&self, take_indices: &PrimitiveArray) -> bool {
692 (self.num_patches() as f64 / take_indices.len() as f64)
693 < Self::PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN
694 }
695
696 pub fn take_with_nulls(
700 &self,
701 take_indices: &ArrayRef,
702 ctx: &mut ExecutionCtx,
703 ) -> VortexResult<Option<Self>> {
704 if take_indices.is_empty() {
705 return Ok(None);
706 }
707
708 let take_indices = take_indices.to_array().execute::<PrimitiveArray>(ctx)?;
709 if self.is_map_faster_than_search(&take_indices) {
710 self.take_map(take_indices, true, ctx)
711 } else {
712 self.take_search(take_indices, true, ctx)
713 }
714 }
715
716 pub fn take(
720 &self,
721 take_indices: &ArrayRef,
722 ctx: &mut ExecutionCtx,
723 ) -> VortexResult<Option<Self>> {
724 if take_indices.is_empty() {
725 return Ok(None);
726 }
727
728 let take_indices = take_indices.to_array().execute::<PrimitiveArray>(ctx)?;
729 if self.is_map_faster_than_search(&take_indices) {
730 self.take_map(take_indices, false, ctx)
731 } else {
732 self.take_search(take_indices, false, ctx)
733 }
734 }
735
736 #[expect(
737 clippy::cognitive_complexity,
738 reason = "complexity is from nested match_each_* macros"
739 )]
740 pub fn take_search(
741 &self,
742 take_indices: PrimitiveArray,
743 include_nulls: bool,
744 ctx: &mut ExecutionCtx,
745 ) -> VortexResult<Option<Self>> {
746 let take_indices_validity = take_indices.validity();
747 let patch_indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
748 let chunk_offsets = self
749 .chunk_offsets()
750 .as_ref()
751 .map(|co| co.clone().execute::<PrimitiveArray>(ctx))
752 .transpose()?;
753
754 let (values_indices, new_indices): (BufferMut<u64>, BufferMut<u64>) =
755 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |PatchT| {
756 let patch_indices_slice = patch_indices.as_slice::<PatchT>();
757 match_each_integer_ptype!(take_indices.ptype(), |TakeT| {
758 let take_slice = take_indices.as_slice::<TakeT>();
759
760 if let Some(chunk_offsets) = chunk_offsets {
761 match_each_unsigned_integer_ptype!(chunk_offsets.ptype(), |OffsetT| {
762 let chunk_offsets = chunk_offsets.as_slice::<OffsetT>();
763 take_indices_with_search_fn(
764 patch_indices_slice,
765 take_slice,
766 take_indices.validity_mask()?,
767 include_nulls,
768 |take_idx| {
769 self.search_index_chunked_batch(
770 patch_indices_slice,
771 chunk_offsets,
772 take_idx,
773 )
774 },
775 )?
776 })
777 } else {
778 take_indices_with_search_fn(
779 patch_indices_slice,
780 take_slice,
781 take_indices.validity_mask()?,
782 include_nulls,
783 |take_idx| {
784 let Some(offset) = <PatchT as NumCast>::from(self.offset) else {
785 return Ok(SearchResult::NotFound(patch_indices_slice.len()));
787 };
788
789 patch_indices_slice
790 .search_sorted(&(take_idx + offset), SearchSortedSide::Left)
791 },
792 )?
793 }
794 })
795 });
796
797 if new_indices.is_empty() {
798 return Ok(None);
799 }
800
801 let new_indices = new_indices.into_array();
802 let new_array_len = take_indices.len();
803 let values_validity = take_indices_validity.take(&new_indices)?;
804
805 Ok(Some(Self {
806 array_len: new_array_len,
807 offset: 0,
808 indices: new_indices.clone(),
809 values: self
810 .values()
811 .take(PrimitiveArray::new(values_indices, values_validity).into_array())?,
812 chunk_offsets: None,
813 offset_within_chunk: Some(0), }))
815 }
816
817 pub fn take_map(
818 &self,
819 take_indices: PrimitiveArray,
820 include_nulls: bool,
821 ctx: &mut ExecutionCtx,
822 ) -> VortexResult<Option<Self>> {
823 let indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
824 let new_length = take_indices.len();
825
826 let min_index = self.min_index()?;
827 let max_index = self.max_index()?;
828
829 let Some((new_sparse_indices, value_indices)) =
830 match_each_unsigned_integer_ptype!(indices.ptype(), |Indices| {
831 match_each_integer_ptype!(take_indices.ptype(), |TakeIndices| {
832 take_map::<_, TakeIndices>(
833 indices.as_slice::<Indices>(),
834 take_indices,
835 self.offset(),
836 min_index,
837 max_index,
838 include_nulls,
839 )?
840 })
841 })
842 else {
843 return Ok(None);
844 };
845
846 let taken_values = self.values().take(value_indices)?;
847
848 Ok(Some(Patches {
849 array_len: new_length,
850 offset: 0,
851 indices: new_sparse_indices,
852 values: taken_values,
853 chunk_offsets: None,
855 offset_within_chunk: self.offset_within_chunk,
856 }))
857 }
858
859 pub unsafe fn apply_to_buffer<P: NativePType>(
870 &self,
871 buffer: &mut [P],
872 validity: &mut MaskMut,
873 ctx: &mut ExecutionCtx,
874 ) {
875 let patch_indices = self
876 .indices
877 .clone()
878 .execute::<PrimitiveArray>(ctx)
879 .vortex_expect("patch indices must be convertible to PrimitiveArray");
880 let patch_values = self
881 .values
882 .clone()
883 .execute::<PrimitiveArray>(ctx)
884 .vortex_expect("patch values must be convertible to PrimitiveArray");
885 let patches_validity = patch_values.validity();
886
887 let patch_values_slice = patch_values.as_slice::<P>();
888 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |I| {
889 let patch_indices_slice = patch_indices.as_slice::<I>();
890
891 unsafe {
897 apply_patches_to_buffer_inner(
898 buffer,
899 validity,
900 patch_indices_slice,
901 self.offset,
902 patch_values_slice,
903 patches_validity,
904 ctx,
905 );
906 }
907 });
908 }
909
910 pub fn map_values<F>(self, f: F) -> VortexResult<Self>
911 where
912 F: FnOnce(ArrayRef) -> VortexResult<ArrayRef>,
913 {
914 let values = f(self.values)?;
915 if self.indices.len() != values.len() {
916 vortex_bail!(
917 "map_values must preserve length: expected {} received {}",
918 self.indices.len(),
919 values.len()
920 )
921 }
922
923 Ok(Self {
924 array_len: self.array_len,
925 offset: self.offset,
926 indices: self.indices,
927 values,
928 chunk_offsets: self.chunk_offsets,
929 offset_within_chunk: self.offset_within_chunk,
930 })
931 }
932}
933
934unsafe fn apply_patches_to_buffer_inner<P, I>(
944 buffer: &mut [P],
945 validity: &mut MaskMut,
946 patch_indices: &[I],
947 patch_offset: usize,
948 patch_values: &[P],
949 patches_validity: &Validity,
950 ctx: &mut ExecutionCtx,
951) where
952 P: NativePType,
953 I: UnsignedPType,
954{
955 debug_assert!(!patch_indices.is_empty());
956 debug_assert_eq!(patch_indices.len(), patch_values.len());
957 debug_assert_eq!(buffer.len(), validity.len());
958
959 match patches_validity {
960 Validity::NonNullable | Validity::AllValid => {
961 for (&i, &value) in patch_indices.iter().zip_eq(patch_values) {
963 let index = i.as_() - patch_offset;
964
965 unsafe {
968 validity.set_unchecked(index);
969 }
970 buffer[index] = value;
971 }
972 }
973 Validity::AllInvalid => {
974 for &i in patch_indices {
976 let index = i.as_() - patch_offset;
977
978 unsafe {
981 validity.unset_unchecked(index);
982 }
983 }
984 }
985 Validity::Array(array) => {
986 let bool_array = array
988 .clone()
989 .execute::<BoolArray>(ctx)
990 .vortex_expect("validity array must be convertible to BoolArray");
991 let mask = bool_array.to_bit_buffer();
992 for (patch_idx, (&i, &value)) in patch_indices.iter().zip_eq(patch_values).enumerate() {
993 let index = i.as_() - patch_offset;
994
995 unsafe {
998 if mask.value_unchecked(patch_idx) {
999 buffer[index] = value;
1000 validity.set_unchecked(index);
1001 } else {
1002 validity.unset_unchecked(index);
1003 }
1004 }
1005 }
1006 }
1007 }
1008}
1009
1010fn take_map<I: NativePType + Hash + Eq + TryFrom<usize>, T: NativePType>(
1011 indices: &[I],
1012 take_indices: PrimitiveArray,
1013 indices_offset: usize,
1014 min_index: usize,
1015 max_index: usize,
1016 include_nulls: bool,
1017) -> VortexResult<Option<(ArrayRef, ArrayRef)>>
1018where
1019 usize: TryFrom<T>,
1020 VortexError: From<<I as TryFrom<usize>>::Error>,
1021{
1022 let take_indices_len = take_indices.len();
1023 let take_indices_validity = take_indices.validity();
1024 let take_indices_validity_mask = take_indices_validity.to_mask(take_indices_len);
1025 let take_indices = take_indices.as_slice::<T>();
1026 let offset_i = I::try_from(indices_offset)?;
1027
1028 let sparse_index_to_value_index: HashMap<I, usize> = indices
1029 .iter()
1030 .copied()
1031 .map(|idx| idx - offset_i)
1032 .enumerate()
1033 .map(|(value_index, sparse_index)| (sparse_index, value_index))
1034 .collect();
1035
1036 let mut new_sparse_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1037 let mut value_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1038
1039 for (idx_in_take, &take_idx) in take_indices.iter().enumerate() {
1040 let ti = usize::try_from(take_idx)
1041 .map_err(|_| vortex_err!("Failed to convert index to usize"))?;
1042
1043 let is_null = match take_indices_validity_mask.bit_buffer() {
1045 AllOr::All => false,
1046 AllOr::None => true,
1047 AllOr::Some(buf) => !buf.value(idx_in_take),
1048 };
1049 if is_null {
1050 if include_nulls {
1051 new_sparse_indices.push(idx_in_take as u64);
1052 value_indices.push(0);
1053 }
1054 } else if ti >= min_index && ti <= max_index {
1055 let ti_as_i = I::try_from(ti)
1056 .map_err(|_| vortex_err!("take index does not fit in index type"))?;
1057 if let Some(&value_index) = sparse_index_to_value_index.get(&ti_as_i) {
1058 new_sparse_indices.push(idx_in_take as u64);
1059 value_indices.push(value_index as u64);
1060 }
1061 }
1062 }
1063
1064 if new_sparse_indices.is_empty() {
1065 return Ok(None);
1066 }
1067
1068 let new_sparse_indices = new_sparse_indices.into_array();
1069 let values_validity = take_indices_validity.take(&new_sparse_indices)?;
1070 Ok(Some((
1071 new_sparse_indices,
1072 PrimitiveArray::new(value_indices, values_validity).into_array(),
1073 )))
1074}
1075
1076fn filter_patches_with_mask<T: IntegerPType>(
1082 patch_indices: &[T],
1083 offset: usize,
1084 patch_values: &ArrayRef,
1085 mask_indices: &[usize],
1086) -> VortexResult<Option<Patches>> {
1087 let true_count = mask_indices.len();
1088 let mut new_patch_indices = BufferMut::<u64>::with_capacity(true_count);
1089 let mut new_mask_indices = Vec::with_capacity(true_count);
1090
1091 const STRIDE: usize = 4;
1095
1096 let mut mask_idx = 0usize;
1097 let mut true_idx = 0usize;
1098
1099 while mask_idx < patch_indices.len() && true_idx < true_count {
1100 if (mask_idx + STRIDE) < patch_indices.len() && (true_idx + STRIDE) < mask_indices.len() {
1107 let left_min = patch_indices[mask_idx]
1109 .to_usize()
1110 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1111 - offset;
1112 let left_max = patch_indices[mask_idx + STRIDE]
1113 .to_usize()
1114 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1115 - offset;
1116 let right_min = mask_indices[true_idx];
1117 let right_max = mask_indices[true_idx + STRIDE];
1118
1119 if left_min > right_max {
1120 true_idx += STRIDE;
1122 continue;
1123 } else if right_min > left_max {
1124 mask_idx += STRIDE;
1125 continue;
1126 } else {
1127 }
1129 }
1130
1131 let left = patch_indices[mask_idx]
1134 .to_usize()
1135 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1136 - offset;
1137 let right = mask_indices[true_idx];
1138
1139 match left.cmp(&right) {
1140 Ordering::Less => {
1141 mask_idx += 1;
1142 }
1143 Ordering::Greater => {
1144 true_idx += 1;
1145 }
1146 Ordering::Equal => {
1147 new_mask_indices.push(mask_idx);
1149 new_patch_indices.push(true_idx as u64);
1150
1151 mask_idx += 1;
1152 true_idx += 1;
1153 }
1154 }
1155 }
1156
1157 if new_mask_indices.is_empty() {
1158 return Ok(None);
1159 }
1160
1161 let new_patch_indices = new_patch_indices.into_array();
1162 let new_patch_values =
1163 patch_values.filter(Mask::from_indices(patch_values.len(), new_mask_indices))?;
1164
1165 Ok(Some(Patches::new(
1166 true_count,
1167 0,
1168 new_patch_indices,
1169 new_patch_values,
1170 None,
1172 )?))
1173}
1174
1175fn take_indices_with_search_fn<
1176 I: UnsignedPType,
1177 T: IntegerPType,
1178 F: Fn(I) -> VortexResult<SearchResult>,
1179>(
1180 indices: &[I],
1181 take_indices: &[T],
1182 take_validity: Mask,
1183 include_nulls: bool,
1184 search_fn: F,
1185) -> VortexResult<(BufferMut<u64>, BufferMut<u64>)> {
1186 let mut values_indices = BufferMut::with_capacity(take_indices.len());
1187 let mut new_indices = BufferMut::with_capacity(take_indices.len());
1188
1189 for (new_patch_idx, &take_idx) in take_indices.iter().enumerate() {
1190 if !take_validity.value(new_patch_idx) {
1191 if include_nulls {
1192 values_indices.push(0u64);
1194 new_indices.push(new_patch_idx as u64);
1195 }
1196 continue;
1197 } else {
1198 let search_result = match I::from(take_idx) {
1199 Some(idx) => search_fn(idx)?,
1200 None => SearchResult::NotFound(indices.len()),
1201 };
1202
1203 if let Some(patch_idx) = search_result.to_found() {
1204 values_indices.push(patch_idx as u64);
1205 new_indices.push(new_patch_idx as u64);
1206 }
1207 }
1208 }
1209
1210 Ok((values_indices, new_indices))
1211}
1212
1213#[cfg(test)]
1214mod test {
1215 use vortex_buffer::BufferMut;
1216 use vortex_buffer::buffer;
1217 use vortex_mask::Mask;
1218
1219 use crate::IntoArray;
1220 use crate::LEGACY_SESSION;
1221 use crate::ToCanonical;
1222 use crate::VortexSessionExecute;
1223 use crate::assert_arrays_eq;
1224 use crate::patches::Patches;
1225 use crate::patches::PrimitiveArray;
1226 use crate::search_sorted::SearchResult;
1227 use crate::validity::Validity;
1228
1229 #[test]
1230 fn test_filter() {
1231 let patches = Patches::new(
1232 100,
1233 0,
1234 buffer![10u32, 11, 20].into_array(),
1235 buffer![100, 110, 200].into_array(),
1236 None,
1237 )
1238 .unwrap();
1239
1240 let filtered = patches
1241 .filter(
1242 &Mask::from_indices(100, vec![10, 20, 30]),
1243 &mut LEGACY_SESSION.create_execution_ctx(),
1244 )
1245 .unwrap()
1246 .unwrap();
1247
1248 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1]));
1249 assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1250 }
1251
1252 #[test]
1253 fn take_with_nulls() {
1254 let patches = Patches::new(
1255 20,
1256 0,
1257 buffer![2u64, 9, 15].into_array(),
1258 PrimitiveArray::new(buffer![33_i32, 44, 55], Validity::AllValid).into_array(),
1259 None,
1260 )
1261 .unwrap();
1262
1263 let taken = patches
1264 .take(
1265 &PrimitiveArray::new(buffer![9, 0], Validity::from_iter(vec![true, false]))
1266 .into_array(),
1267 &mut LEGACY_SESSION.create_execution_ctx(),
1268 )
1269 .unwrap()
1270 .unwrap();
1271 let primitive_values = taken.values().to_primitive();
1272 let primitive_indices = taken.indices().to_primitive();
1273 assert_eq!(taken.array_len(), 2);
1274 assert_arrays_eq!(
1275 primitive_values,
1276 PrimitiveArray::from_option_iter([Some(44i32)])
1277 );
1278 assert_arrays_eq!(primitive_indices, PrimitiveArray::from_iter([0u64]));
1279 assert_eq!(
1280 primitive_values.validity_mask().unwrap(),
1281 Mask::from_iter(vec![true])
1282 );
1283 }
1284
1285 #[test]
1286 fn take_search_with_nulls_chunked() {
1287 let patches = Patches::new(
1288 20,
1289 0,
1290 buffer![2u64, 9, 15].into_array(),
1291 buffer![33_i32, 44, 55].into_array(),
1292 Some(buffer![0u64].into_array()),
1293 )
1294 .unwrap();
1295
1296 let taken = patches
1297 .take_search(
1298 PrimitiveArray::new(buffer![9, 0], Validity::from_iter([true, false])),
1299 true,
1300 &mut LEGACY_SESSION.create_execution_ctx(),
1301 )
1302 .unwrap()
1303 .unwrap();
1304
1305 let primitive_values = taken.values().to_primitive();
1306 assert_eq!(taken.array_len(), 2);
1307 assert_arrays_eq!(
1308 primitive_values,
1309 PrimitiveArray::from_option_iter([Some(44i32), None])
1310 );
1311 assert_arrays_eq!(taken.indices(), PrimitiveArray::from_iter([0u64, 1]));
1312
1313 assert_eq!(
1314 primitive_values.validity_mask().unwrap(),
1315 Mask::from_iter([true, false])
1316 );
1317 }
1318
1319 #[test]
1320 fn take_search_chunked_multiple_chunks() {
1321 let patches = Patches::new(
1322 2048,
1323 0,
1324 buffer![100u64, 500, 1200, 1800].into_array(),
1325 buffer![10_i32, 20, 30, 40].into_array(),
1326 Some(buffer![0u64, 2].into_array()),
1327 )
1328 .unwrap();
1329
1330 let taken = patches
1331 .take_search(
1332 PrimitiveArray::new(buffer![500, 1200, 999], Validity::AllValid),
1333 true,
1334 &mut LEGACY_SESSION.create_execution_ctx(),
1335 )
1336 .unwrap()
1337 .unwrap();
1338
1339 assert_eq!(taken.array_len(), 3);
1340 assert_arrays_eq!(
1341 taken.values(),
1342 PrimitiveArray::from_option_iter([Some(20i32), Some(30)])
1343 );
1344 }
1345
1346 #[test]
1347 fn take_search_chunked_indices_with_no_patches() {
1348 let patches = Patches::new(
1349 20,
1350 0,
1351 buffer![2u64, 9, 15].into_array(),
1352 buffer![33_i32, 44, 55].into_array(),
1353 Some(buffer![0u64].into_array()),
1354 )
1355 .unwrap();
1356
1357 let taken = patches
1358 .take_search(
1359 PrimitiveArray::new(buffer![3, 4, 5], Validity::AllValid),
1360 true,
1361 &mut LEGACY_SESSION.create_execution_ctx(),
1362 )
1363 .unwrap();
1364
1365 assert!(taken.is_none());
1366 }
1367
1368 #[test]
1369 fn take_search_chunked_interleaved() {
1370 let patches = Patches::new(
1371 30,
1372 0,
1373 buffer![5u64, 10, 20, 25].into_array(),
1374 buffer![100_i32, 200, 300, 400].into_array(),
1375 Some(buffer![0u64].into_array()),
1376 )
1377 .unwrap();
1378
1379 let taken = patches
1380 .take_search(
1381 PrimitiveArray::new(buffer![10, 15, 20, 99], Validity::AllValid),
1382 true,
1383 &mut LEGACY_SESSION.create_execution_ctx(),
1384 )
1385 .unwrap()
1386 .unwrap();
1387
1388 assert_eq!(taken.array_len(), 4);
1389 assert_arrays_eq!(
1390 taken.values(),
1391 PrimitiveArray::from_option_iter([Some(200i32), Some(300)])
1392 );
1393 }
1394
1395 #[test]
1396 fn test_take_search_multiple_chunk_offsets() {
1397 let patches = Patches::new(
1398 1500,
1399 0,
1400 BufferMut::from_iter(0..1500u64).into_array(),
1401 BufferMut::from_iter(0..1500i32).into_array(),
1402 Some(buffer![0u64, 1024u64].into_array()),
1403 )
1404 .unwrap();
1405
1406 let taken = patches
1407 .take_search(
1408 PrimitiveArray::new(BufferMut::from_iter(0..1500u64), Validity::AllValid),
1409 false,
1410 &mut LEGACY_SESSION.create_execution_ctx(),
1411 )
1412 .unwrap()
1413 .unwrap();
1414
1415 assert_eq!(taken.array_len(), 1500);
1416 }
1417
1418 #[test]
1419 fn test_slice() {
1420 let values = buffer![15_u32, 135, 13531, 42].into_array();
1421 let indices = buffer![10_u64, 11, 50, 100].into_array();
1422
1423 let patches = Patches::new(101, 0, indices, values, None).unwrap();
1424
1425 let sliced = patches.slice(15..100).unwrap().unwrap();
1426 assert_eq!(sliced.array_len(), 100 - 15);
1427 assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1428 }
1429
1430 #[test]
1431 fn doubly_sliced() {
1432 let values = buffer![15_u32, 135, 13531, 42].into_array();
1433 let indices = buffer![10_u64, 11, 50, 100].into_array();
1434
1435 let patches = Patches::new(101, 0, indices, values, None).unwrap();
1436
1437 let sliced = patches.slice(15..100).unwrap().unwrap();
1438 assert_eq!(sliced.array_len(), 100 - 15);
1439 assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1440
1441 let doubly_sliced = sliced.slice(35..36).unwrap().unwrap();
1442 assert_arrays_eq!(
1443 doubly_sliced.values(),
1444 PrimitiveArray::from_iter([13531u32])
1445 );
1446 }
1447
1448 #[test]
1449 fn test_mask_all_true() {
1450 let patches = Patches::new(
1451 10,
1452 0,
1453 buffer![2u64, 5, 8].into_array(),
1454 buffer![100i32, 200, 300].into_array(),
1455 None,
1456 )
1457 .unwrap();
1458
1459 let mask = Mask::new_true(10);
1460 let masked = patches
1461 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1462 .unwrap();
1463 assert!(masked.is_none());
1464 }
1465
1466 #[test]
1467 fn test_mask_all_false() {
1468 let patches = Patches::new(
1469 10,
1470 0,
1471 buffer![2u64, 5, 8].into_array(),
1472 buffer![100i32, 200, 300].into_array(),
1473 None,
1474 )
1475 .unwrap();
1476
1477 let mask = Mask::new_false(10);
1478 let masked = patches
1479 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1480 .unwrap()
1481 .unwrap();
1482
1483 assert_arrays_eq!(
1485 masked.values(),
1486 PrimitiveArray::from_iter([100i32, 200, 300])
1487 );
1488 assert!(masked.values().is_valid(0).unwrap());
1489 assert!(masked.values().is_valid(1).unwrap());
1490 assert!(masked.values().is_valid(2).unwrap());
1491
1492 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1494 }
1495
1496 #[test]
1497 fn test_mask_partial() {
1498 let patches = Patches::new(
1499 10,
1500 0,
1501 buffer![2u64, 5, 8].into_array(),
1502 buffer![100i32, 200, 300].into_array(),
1503 None,
1504 )
1505 .unwrap();
1506
1507 let mask = Mask::from_iter([
1509 false, false, true, false, false, false, false, false, true, false,
1510 ]);
1511 let masked = patches
1512 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1513 .unwrap()
1514 .unwrap();
1515
1516 assert_eq!(masked.values().len(), 1);
1518 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1519
1520 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64]));
1522 }
1523
1524 #[test]
1525 fn test_mask_with_offset() {
1526 let patches = Patches::new(
1527 10,
1528 5, buffer![7u64, 10, 13].into_array(), buffer![100i32, 200, 300].into_array(),
1531 None,
1532 )
1533 .unwrap();
1534
1535 let mask = Mask::from_iter([
1537 false, false, true, false, false, false, false, false, false, false,
1538 ]);
1539
1540 let masked = patches
1541 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1542 .unwrap()
1543 .unwrap();
1544 assert_eq!(masked.array_len(), 10);
1545 assert_eq!(masked.offset(), 5);
1546 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([10u64, 13]));
1547 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32, 300]));
1548 }
1549
1550 #[test]
1551 fn test_mask_nullable_values() {
1552 let patches = Patches::new(
1553 10,
1554 0,
1555 buffer![2u64, 5, 8].into_array(),
1556 PrimitiveArray::from_option_iter([Some(100i32), None, Some(300)]).into_array(),
1557 None,
1558 )
1559 .unwrap();
1560
1561 let mask = Mask::from_iter([
1563 false, false, true, false, false, false, false, false, false, false,
1564 ]);
1565 let masked = patches
1566 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1567 .unwrap()
1568 .unwrap();
1569
1570 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64, 8]));
1572
1573 let masked_values = masked.values().to_primitive();
1575 assert_eq!(masked_values.len(), 2);
1576 assert!(!masked_values.is_valid(0).unwrap()); assert!(masked_values.is_valid(1).unwrap()); assert_eq!(
1579 i32::try_from(&masked_values.scalar_at(1).unwrap()).unwrap(),
1580 300i32
1581 );
1582 }
1583
1584 #[test]
1585 fn test_filter_keep_all() {
1586 let patches = Patches::new(
1587 10,
1588 0,
1589 buffer![2u64, 5, 8].into_array(),
1590 buffer![100i32, 200, 300].into_array(),
1591 None,
1592 )
1593 .unwrap();
1594
1595 let mask = Mask::from_indices(10, (0..10).collect());
1597 let filtered = patches
1598 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1599 .unwrap()
1600 .unwrap();
1601
1602 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1603 assert_arrays_eq!(
1604 filtered.values(),
1605 PrimitiveArray::from_iter([100i32, 200, 300])
1606 );
1607 }
1608
1609 #[test]
1610 fn test_filter_none() {
1611 let patches = Patches::new(
1612 10,
1613 0,
1614 buffer![2u64, 5, 8].into_array(),
1615 buffer![100i32, 200, 300].into_array(),
1616 None,
1617 )
1618 .unwrap();
1619
1620 let mask = Mask::from_indices(10, vec![]);
1622 let filtered = patches
1623 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1624 .unwrap();
1625 assert!(filtered.is_none());
1626 }
1627
1628 #[test]
1629 fn test_filter_with_indices() {
1630 let patches = Patches::new(
1631 10,
1632 0,
1633 buffer![2u64, 5, 8].into_array(),
1634 buffer![100i32, 200, 300].into_array(),
1635 None,
1636 )
1637 .unwrap();
1638
1639 let mask = Mask::from_indices(10, vec![2, 5, 9]);
1641 let filtered = patches
1642 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1643 .unwrap()
1644 .unwrap();
1645
1646 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1])); assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1648 }
1649
1650 #[test]
1651 fn test_slice_full_range() {
1652 let patches = Patches::new(
1653 10,
1654 0,
1655 buffer![2u64, 5, 8].into_array(),
1656 buffer![100i32, 200, 300].into_array(),
1657 None,
1658 )
1659 .unwrap();
1660
1661 let sliced = patches.slice(0..10).unwrap().unwrap();
1662
1663 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1664 assert_arrays_eq!(
1665 sliced.values(),
1666 PrimitiveArray::from_iter([100i32, 200, 300])
1667 );
1668 }
1669
1670 #[test]
1671 fn test_slice_partial() {
1672 let patches = Patches::new(
1673 10,
1674 0,
1675 buffer![2u64, 5, 8].into_array(),
1676 buffer![100i32, 200, 300].into_array(),
1677 None,
1678 )
1679 .unwrap();
1680
1681 let sliced = patches.slice(3..8).unwrap().unwrap();
1683
1684 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([5u64])); assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1686 assert_eq!(sliced.array_len(), 5); assert_eq!(sliced.offset(), 3); }
1689
1690 #[test]
1691 fn test_slice_no_patches() {
1692 let patches = Patches::new(
1693 10,
1694 0,
1695 buffer![2u64, 5, 8].into_array(),
1696 buffer![100i32, 200, 300].into_array(),
1697 None,
1698 )
1699 .unwrap();
1700
1701 let sliced = patches.slice(6..7).unwrap();
1703 assert!(sliced.is_none());
1704 }
1705
1706 #[test]
1707 fn test_slice_with_offset() {
1708 let patches = Patches::new(
1709 10,
1710 5, buffer![7u64, 10, 13].into_array(), buffer![100i32, 200, 300].into_array(),
1713 None,
1714 )
1715 .unwrap();
1716
1717 let sliced = patches.slice(3..8).unwrap().unwrap();
1719
1720 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([10u64])); assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1722 assert_eq!(sliced.offset(), 8); }
1724
1725 #[test]
1726 fn test_patch_values() {
1727 let patches = Patches::new(
1728 10,
1729 0,
1730 buffer![2u64, 5, 8].into_array(),
1731 buffer![100i32, 200, 300].into_array(),
1732 None,
1733 )
1734 .unwrap();
1735
1736 let values = patches.values().to_primitive();
1737 assert_eq!(
1738 i32::try_from(&values.scalar_at(0).unwrap()).unwrap(),
1739 100i32
1740 );
1741 assert_eq!(
1742 i32::try_from(&values.scalar_at(1).unwrap()).unwrap(),
1743 200i32
1744 );
1745 assert_eq!(
1746 i32::try_from(&values.scalar_at(2).unwrap()).unwrap(),
1747 300i32
1748 );
1749 }
1750
1751 #[test]
1752 fn test_indices_range() {
1753 let patches = Patches::new(
1754 10,
1755 0,
1756 buffer![2u64, 5, 8].into_array(),
1757 buffer![100i32, 200, 300].into_array(),
1758 None,
1759 )
1760 .unwrap();
1761
1762 assert_eq!(patches.min_index().unwrap(), 2);
1763 assert_eq!(patches.max_index().unwrap(), 8);
1764 }
1765
1766 #[test]
1767 fn test_search_index() {
1768 let patches = Patches::new(
1769 10,
1770 0,
1771 buffer![2u64, 5, 8].into_array(),
1772 buffer![100i32, 200, 300].into_array(),
1773 None,
1774 )
1775 .unwrap();
1776
1777 assert_eq!(patches.search_index(2).unwrap(), SearchResult::Found(0));
1779 assert_eq!(patches.search_index(5).unwrap(), SearchResult::Found(1));
1780 assert_eq!(patches.search_index(8).unwrap(), SearchResult::Found(2));
1781
1782 assert_eq!(patches.search_index(0).unwrap(), SearchResult::NotFound(0));
1784 assert_eq!(patches.search_index(3).unwrap(), SearchResult::NotFound(1));
1785 assert_eq!(patches.search_index(6).unwrap(), SearchResult::NotFound(2));
1786 assert_eq!(patches.search_index(9).unwrap(), SearchResult::NotFound(3));
1787 }
1788
1789 #[test]
1790 fn test_mask_boundary_patches() {
1791 let patches = Patches::new(
1793 10,
1794 0,
1795 buffer![0u64, 9].into_array(),
1796 buffer![100i32, 200].into_array(),
1797 None,
1798 )
1799 .unwrap();
1800
1801 let mask = Mask::from_iter([
1802 true, false, false, false, false, false, false, false, false, false,
1803 ]);
1804 let masked = patches
1805 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1806 .unwrap();
1807 assert!(masked.is_some());
1808 let masked = masked.unwrap();
1809 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([9u64]));
1810 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1811 }
1812
1813 #[test]
1814 fn test_mask_all_patches_removed() {
1815 let patches = Patches::new(
1817 10,
1818 0,
1819 buffer![2u64, 5, 8].into_array(),
1820 buffer![100i32, 200, 300].into_array(),
1821 None,
1822 )
1823 .unwrap();
1824
1825 let mask = Mask::from_iter([
1827 false, false, true, false, false, true, false, false, true, false,
1828 ]);
1829 let masked = patches
1830 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1831 .unwrap();
1832 assert!(masked.is_none());
1833 }
1834
1835 #[test]
1836 fn test_mask_no_patches_removed() {
1837 let patches = Patches::new(
1839 10,
1840 0,
1841 buffer![2u64, 5, 8].into_array(),
1842 buffer![100i32, 200, 300].into_array(),
1843 None,
1844 )
1845 .unwrap();
1846
1847 let mask = Mask::from_iter([
1849 true, false, false, true, false, false, true, false, false, true,
1850 ]);
1851 let masked = patches
1852 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1853 .unwrap()
1854 .unwrap();
1855
1856 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1857 assert_arrays_eq!(
1858 masked.values(),
1859 PrimitiveArray::from_iter([100i32, 200, 300])
1860 );
1861 }
1862
1863 #[test]
1864 fn test_mask_single_patch() {
1865 let patches = Patches::new(
1867 5,
1868 0,
1869 buffer![2u64].into_array(),
1870 buffer![42i32].into_array(),
1871 None,
1872 )
1873 .unwrap();
1874
1875 let mask = Mask::from_iter([false, false, true, false, false]);
1877 let masked = patches
1878 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1879 .unwrap();
1880 assert!(masked.is_none());
1881
1882 let mask = Mask::from_iter([true, false, false, true, false]);
1884 let masked = patches
1885 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1886 .unwrap()
1887 .unwrap();
1888 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64]));
1889 }
1890
1891 #[test]
1892 fn test_mask_contiguous_patches() {
1893 let patches = Patches::new(
1895 10,
1896 0,
1897 buffer![3u64, 4, 5, 6].into_array(),
1898 buffer![100i32, 200, 300, 400].into_array(),
1899 None,
1900 )
1901 .unwrap();
1902
1903 let mask = Mask::from_iter([
1905 false, false, false, false, true, true, false, false, false, false,
1906 ]);
1907 let masked = patches
1908 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1909 .unwrap()
1910 .unwrap();
1911
1912 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([3u64, 6]));
1913 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 400]));
1914 }
1915
1916 #[test]
1917 fn test_mask_with_large_offset() {
1918 let patches = Patches::new(
1920 20,
1921 15,
1922 buffer![16u64, 17, 19].into_array(), buffer![100i32, 200, 300].into_array(),
1924 None,
1925 )
1926 .unwrap();
1927
1928 let mask = Mask::from_iter([
1930 false, false, true, false, false, false, false, false, false, false, false, false,
1931 false, false, false, false, false, false, false, false,
1932 ]);
1933 let masked = patches
1934 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1935 .unwrap()
1936 .unwrap();
1937
1938 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([16u64, 19]));
1939 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 300]));
1940 }
1941
1942 #[test]
1943 #[should_panic(expected = "Filter mask length 5 does not match array length 10")]
1944 fn test_mask_wrong_length() {
1945 let patches = Patches::new(
1946 10,
1947 0,
1948 buffer![2u64, 5, 8].into_array(),
1949 buffer![100i32, 200, 300].into_array(),
1950 None,
1951 )
1952 .unwrap();
1953
1954 let mask = Mask::from_iter([false, false, true, false, false]);
1956 patches
1957 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1958 .unwrap();
1959 }
1960
1961 #[test]
1962 fn test_chunk_offsets_search() {
1963 let indices = buffer![100u64, 200, 3000, 3100].into_array();
1964 let values = buffer![10i32, 20, 30, 40].into_array();
1965 let chunk_offsets = buffer![0u64, 2, 2, 3].into_array();
1966 let patches = Patches::new(4096, 0, indices, values, Some(chunk_offsets)).unwrap();
1967
1968 assert!(patches.chunk_offsets.is_some());
1969
1970 assert_eq!(patches.search_index(100).unwrap(), SearchResult::Found(0));
1972 assert_eq!(patches.search_index(200).unwrap(), SearchResult::Found(1));
1973
1974 assert_eq!(
1976 patches.search_index(1500).unwrap(),
1977 SearchResult::NotFound(2)
1978 );
1979 assert_eq!(
1980 patches.search_index(2000).unwrap(),
1981 SearchResult::NotFound(2)
1982 );
1983
1984 assert_eq!(patches.search_index(3000).unwrap(), SearchResult::Found(2));
1986 assert_eq!(patches.search_index(3100).unwrap(), SearchResult::Found(3));
1987
1988 assert_eq!(
1989 patches.search_index(1024).unwrap(),
1990 SearchResult::NotFound(2)
1991 );
1992 }
1993
1994 #[test]
1995 fn test_chunk_offsets_with_slice() {
1996 let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
1997 let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
1998 let chunk_offsets = buffer![0u64, 2, 6].into_array();
1999 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2000
2001 let sliced = patches.slice(1000..2200).unwrap().unwrap();
2002
2003 assert!(sliced.chunk_offsets.is_some());
2004 assert_eq!(sliced.offset(), 1000);
2005
2006 assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(0));
2007 assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
2008 assert_eq!(sliced.search_index(1100).unwrap(), SearchResult::Found(4));
2009
2010 assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(1));
2011 }
2012
2013 #[test]
2014 fn test_chunk_offsets_with_slice_after_first_chunk() {
2015 let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
2016 let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
2017 let chunk_offsets = buffer![0u64, 2, 6].into_array();
2018 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2019
2020 let sliced = patches.slice(1300..2200).unwrap().unwrap();
2021
2022 assert!(sliced.chunk_offsets.is_some());
2023 assert_eq!(sliced.offset(), 1300);
2024
2025 assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2026 assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(1));
2027 assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
2028 assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(2));
2029 assert_eq!(sliced.search_index(900).unwrap(), SearchResult::NotFound(4));
2030 }
2031
2032 #[test]
2033 fn test_chunk_offsets_slice_empty_result() {
2034 let indices = buffer![100u64, 200, 3000, 3100].into_array();
2035 let values = buffer![10i32, 20, 30, 40].into_array();
2036 let chunk_offsets = buffer![0u64, 2].into_array();
2037 let patches = Patches::new(4000, 0, indices, values, Some(chunk_offsets)).unwrap();
2038
2039 let sliced = patches.slice(1000..2000).unwrap();
2040 assert!(sliced.is_none());
2041 }
2042
2043 #[test]
2044 fn test_chunk_offsets_slice_single_patch() {
2045 let indices = buffer![100u64, 1200, 1300, 2500].into_array();
2046 let values = buffer![10i32, 20, 30, 40].into_array();
2047 let chunk_offsets = buffer![0u64, 1, 3].into_array();
2048 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2049
2050 let sliced = patches.slice(1100..1250).unwrap().unwrap();
2051
2052 assert_eq!(sliced.num_patches(), 1);
2053 assert_eq!(sliced.offset(), 1100);
2054 assert_eq!(sliced.search_index(100).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(50).unwrap(), SearchResult::NotFound(0));
2056 assert_eq!(sliced.search_index(150).unwrap(), SearchResult::NotFound(1));
2057 }
2058
2059 #[test]
2060 fn test_chunk_offsets_slice_across_chunks() {
2061 let indices = buffer![100u64, 200, 1100, 1200, 2100, 2200].into_array();
2062 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2063 let chunk_offsets = buffer![0u64, 2, 4].into_array();
2064 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2065
2066 let sliced = patches.slice(150..2150).unwrap().unwrap();
2067
2068 assert_eq!(sliced.num_patches(), 4);
2069 assert_eq!(sliced.offset(), 150);
2070
2071 assert_eq!(sliced.search_index(50).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(950).unwrap(), SearchResult::Found(1)); assert_eq!(sliced.search_index(1050).unwrap(), SearchResult::Found(2)); assert_eq!(sliced.search_index(1950).unwrap(), SearchResult::Found(3)); }
2076
2077 #[test]
2078 fn test_chunk_offsets_boundary_searches() {
2079 let indices = buffer![1023u64, 1024, 1025, 2047, 2048].into_array();
2080 let values = buffer![10i32, 20, 30, 40, 50].into_array();
2081 let chunk_offsets = buffer![0u64, 1, 4].into_array();
2082 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2083
2084 assert_eq!(patches.search_index(1023).unwrap(), SearchResult::Found(0));
2085 assert_eq!(patches.search_index(1024).unwrap(), SearchResult::Found(1));
2086 assert_eq!(patches.search_index(1025).unwrap(), SearchResult::Found(2));
2087 assert_eq!(patches.search_index(2047).unwrap(), SearchResult::Found(3));
2088 assert_eq!(patches.search_index(2048).unwrap(), SearchResult::Found(4));
2089
2090 assert_eq!(
2091 patches.search_index(1022).unwrap(),
2092 SearchResult::NotFound(0)
2093 );
2094 assert_eq!(
2095 patches.search_index(2046).unwrap(),
2096 SearchResult::NotFound(3)
2097 );
2098 }
2099
2100 #[test]
2101 fn test_chunk_offsets_slice_edge_cases() {
2102 let indices = buffer![0u64, 1, 1023, 1024, 2047, 2048].into_array();
2103 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2104 let chunk_offsets = buffer![0u64, 3, 5].into_array();
2105 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2106
2107 let sliced = patches.slice(0..10).unwrap().unwrap();
2109 assert_eq!(sliced.num_patches(), 2);
2110 assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2111 assert_eq!(sliced.search_index(1).unwrap(), SearchResult::Found(1));
2112
2113 let sliced = patches.slice(2040..3000).unwrap().unwrap();
2115 assert_eq!(sliced.num_patches(), 2); assert_eq!(sliced.search_index(7).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(8).unwrap(), SearchResult::Found(1)); }
2119
2120 #[test]
2121 fn test_chunk_offsets_slice_nested() {
2122 let indices = buffer![100u64, 200, 300, 400, 500, 600].into_array();
2123 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2124 let chunk_offsets = buffer![0u64].into_array();
2125 let patches = Patches::new(1000, 0, indices, values, Some(chunk_offsets)).unwrap();
2126
2127 let sliced1 = patches.slice(150..550).unwrap().unwrap();
2128 assert_eq!(sliced1.num_patches(), 4); let sliced2 = sliced1.slice(100..250).unwrap().unwrap();
2131 assert_eq!(sliced2.num_patches(), 1); assert_eq!(sliced2.offset(), 250);
2133
2134 assert_eq!(sliced2.search_index(50).unwrap(), SearchResult::Found(0)); assert_eq!(
2136 sliced2.search_index(150).unwrap(),
2137 SearchResult::NotFound(1)
2138 );
2139 }
2140
2141 #[test]
2142 fn test_index_larger_than_length() {
2143 let chunk_offsets = buffer![0u64].into_array();
2144 let indices = buffer![1023u64].into_array();
2145 let values = buffer![42i32].into_array();
2146 let patches = Patches::new(1024, 0, indices, values, Some(chunk_offsets)).unwrap();
2147 assert_eq!(
2148 patches.search_index(2048).unwrap(),
2149 SearchResult::NotFound(1)
2150 );
2151 }
2152}