1use std::cmp::Ordering;
5use std::fmt::Debug;
6use std::hash::Hash;
7use std::ops::Range;
8
9use itertools::Itertools;
10use num_traits::NumCast;
11use vortex_buffer::BitBuffer;
12use vortex_buffer::BufferMut;
13use vortex_error::VortexError;
14use vortex_error::VortexExpect;
15use vortex_error::VortexResult;
16use vortex_error::vortex_bail;
17use vortex_error::vortex_ensure;
18use vortex_error::vortex_err;
19use vortex_mask::AllOr;
20use vortex_mask::Mask;
21use vortex_mask::MaskMut;
22use vortex_utils::aliases::hash_map::HashMap;
23
24use crate::ArrayRef;
25use crate::ArrayVisitor;
26use crate::DynArray;
27use crate::ExecutionCtx;
28use crate::IntoArray;
29use crate::ToCanonical;
30use crate::arrays::BoolArray;
31use crate::arrays::PrimitiveArray;
32use crate::builtins::ArrayBuiltins;
33use crate::compute::is_sorted;
34use crate::dtype::DType;
35use crate::dtype::IntegerPType;
36use crate::dtype::NativePType;
37use crate::dtype::Nullability;
38use crate::dtype::Nullability::NonNullable;
39use crate::dtype::PType;
40use crate::dtype::UnsignedPType;
41use crate::match_each_integer_ptype;
42use crate::match_each_unsigned_integer_ptype;
43use crate::scalar::PValue;
44use crate::scalar::Scalar;
45use crate::search_sorted::SearchResult;
46use crate::search_sorted::SearchSorted;
47use crate::search_sorted::SearchSortedSide;
48use crate::validity::Validity;
49use crate::vtable::ValidityHelper;
50
51pub const PATCH_CHUNK_SIZE: usize = 1024;
54
55#[derive(Copy, Clone, prost::Message)]
56pub struct PatchesMetadata {
57 #[prost(uint64, tag = "1")]
58 len: u64,
59 #[prost(uint64, tag = "2")]
60 offset: u64,
61 #[prost(enumeration = "PType", tag = "3")]
62 indices_ptype: i32,
63 #[prost(uint64, optional, tag = "4")]
64 chunk_offsets_len: Option<u64>,
65 #[prost(enumeration = "PType", optional, tag = "5")]
66 chunk_offsets_ptype: Option<i32>,
67 #[prost(uint64, optional, tag = "6")]
68 offset_within_chunk: Option<u64>,
69}
70
71impl PatchesMetadata {
72 #[inline]
73 pub fn new(
74 len: usize,
75 offset: usize,
76 indices_ptype: PType,
77 chunk_offsets_len: Option<usize>,
78 chunk_offsets_ptype: Option<PType>,
79 offset_within_chunk: Option<usize>,
80 ) -> Self {
81 Self {
82 len: len as u64,
83 offset: offset as u64,
84 indices_ptype: indices_ptype as i32,
85 chunk_offsets_len: chunk_offsets_len.map(|len| len as u64),
86 chunk_offsets_ptype: chunk_offsets_ptype.map(|pt| pt as i32),
87 offset_within_chunk: offset_within_chunk.map(|len| len as u64),
88 }
89 }
90
91 #[inline]
92 pub fn len(&self) -> VortexResult<usize> {
93 usize::try_from(self.len).map_err(|_| vortex_err!("len does not fit in usize"))
94 }
95
96 #[inline]
97 pub fn is_empty(&self) -> bool {
98 self.len == 0
99 }
100
101 #[inline]
102 pub fn offset(&self) -> VortexResult<usize> {
103 usize::try_from(self.offset).map_err(|_| vortex_err!("offset does not fit in usize"))
104 }
105
106 #[inline]
107 pub fn chunk_offsets_dtype(&self) -> VortexResult<Option<DType>> {
108 self.chunk_offsets_ptype
109 .map(|t| {
110 PType::try_from(t)
111 .map_err(|e| vortex_err!("invalid i32 value {t} for PType: {}", e))
112 .map(|ptype| DType::Primitive(ptype, NonNullable))
113 })
114 .transpose()
115 }
116
117 #[inline]
118 pub fn indices_dtype(&self) -> VortexResult<DType> {
119 let ptype = PType::try_from(self.indices_ptype).map_err(|e| {
120 vortex_err!("invalid i32 value {} for PType: {}", self.indices_ptype, e)
121 })?;
122 vortex_ensure!(
123 ptype.is_unsigned_int(),
124 "Patch indices must be unsigned integers"
125 );
126 Ok(DType::Primitive(ptype, NonNullable))
127 }
128}
129
130#[derive(Debug, Clone)]
132pub struct Patches {
133 array_len: usize,
134 offset: usize,
135 indices: ArrayRef,
136 values: ArrayRef,
137 chunk_offsets: Option<ArrayRef>,
144 offset_within_chunk: Option<usize>,
154}
155
156impl Patches {
157 pub fn new(
158 array_len: usize,
159 offset: usize,
160 indices: ArrayRef,
161 values: ArrayRef,
162 chunk_offsets: Option<ArrayRef>,
163 ) -> VortexResult<Self> {
164 vortex_ensure!(
165 indices.len() == values.len(),
166 "Patch indices and values must have the same length"
167 );
168 vortex_ensure!(
169 indices.dtype().is_unsigned_int() && !indices.dtype().is_nullable(),
170 "Patch indices must be non-nullable unsigned integers, got {:?}",
171 indices.dtype()
172 );
173
174 vortex_ensure!(
175 indices.len() <= array_len,
176 "Patch indices must be shorter than the array length"
177 );
178 vortex_ensure!(!indices.is_empty(), "Patch indices must not be empty");
179
180 if indices.is_host() && values.is_host() {
183 let max = usize::try_from(&indices.scalar_at(indices.len() - 1)?)
184 .map_err(|_| vortex_err!("indices must be a number"))?;
185 vortex_ensure!(
186 max - offset < array_len,
187 "Patch indices {max:?}, offset {offset} are longer than the array length {array_len}"
188 );
189
190 debug_assert!(
191 is_sorted(&indices).unwrap_or(Some(false)).unwrap_or(false),
192 "Patch indices must be sorted"
193 );
194 }
195
196 Ok(Self {
197 array_len,
198 offset,
199 indices,
200 values,
201 chunk_offsets: chunk_offsets.clone(),
202 offset_within_chunk: chunk_offsets.map(|_| 0),
204 })
205 }
206
207 pub unsafe fn new_unchecked(
217 array_len: usize,
218 offset: usize,
219 indices: ArrayRef,
220 values: ArrayRef,
221 chunk_offsets: Option<ArrayRef>,
222 offset_within_chunk: Option<usize>,
223 ) -> Self {
224 Self {
225 array_len,
226 offset,
227 indices,
228 values,
229 chunk_offsets,
230 offset_within_chunk,
231 }
232 }
233
234 #[inline]
235 pub fn array_len(&self) -> usize {
236 self.array_len
237 }
238
239 #[inline]
240 pub fn num_patches(&self) -> usize {
241 self.indices.len()
242 }
243
244 #[inline]
245 pub fn dtype(&self) -> &DType {
246 self.values.dtype()
247 }
248
249 #[inline]
250 pub fn indices(&self) -> &ArrayRef {
251 &self.indices
252 }
253
254 #[inline]
255 pub fn into_indices(self) -> ArrayRef {
256 self.indices
257 }
258
259 #[inline]
260 pub fn indices_mut(&mut self) -> &mut ArrayRef {
261 &mut self.indices
262 }
263
264 #[inline]
265 pub fn values(&self) -> &ArrayRef {
266 &self.values
267 }
268
269 #[inline]
270 pub fn into_values(self) -> ArrayRef {
271 self.values
272 }
273
274 #[inline]
275 pub fn values_mut(&mut self) -> &mut ArrayRef {
276 &mut self.values
277 }
278
279 #[inline]
280 pub fn offset(&self) -> usize {
282 self.offset
283 }
284
285 #[inline]
286 pub fn chunk_offsets(&self) -> &Option<ArrayRef> {
287 &self.chunk_offsets
288 }
289
290 #[inline]
291 pub fn chunk_offset_at(&self, idx: usize) -> VortexResult<usize> {
292 let Some(chunk_offsets) = &self.chunk_offsets else {
293 vortex_bail!("chunk_offsets must be set to retrieve offset at index")
294 };
295
296 chunk_offsets
297 .scalar_at(idx)?
298 .as_primitive()
299 .as_::<usize>()
300 .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))
301 }
302
303 #[inline]
312 pub fn offset_within_chunk(&self) -> Option<usize> {
313 self.offset_within_chunk
314 }
315
316 #[inline]
317 pub fn indices_ptype(&self) -> VortexResult<PType> {
318 PType::try_from(self.indices.dtype())
319 .map_err(|_| vortex_err!("indices dtype is not primitive"))
320 }
321
322 pub fn to_metadata(&self, len: usize, dtype: &DType) -> VortexResult<PatchesMetadata> {
323 if self.indices.len() > len {
324 vortex_bail!(
325 "Patch indices {} are longer than the array length {}",
326 self.indices.len(),
327 len
328 );
329 }
330 if self.values.dtype() != dtype {
331 vortex_bail!(
332 "Patch values dtype {} does not match array dtype {}",
333 self.values.dtype(),
334 dtype
335 );
336 }
337 let chunk_offsets_len = self.chunk_offsets.as_ref().map(|co| co.len());
338 let chunk_offsets_ptype = self.chunk_offsets.as_ref().map(|co| co.dtype().as_ptype());
339
340 Ok(PatchesMetadata::new(
341 self.indices.len(),
342 self.offset,
343 self.indices.dtype().as_ptype(),
344 chunk_offsets_len,
345 chunk_offsets_ptype,
346 self.offset_within_chunk,
347 ))
348 }
349
350 pub fn cast_values(self, values_dtype: &DType) -> VortexResult<Self> {
351 unsafe {
353 Ok(Self::new_unchecked(
354 self.array_len,
355 self.offset,
356 self.indices,
357 self.values.cast(values_dtype.clone())?,
358 self.chunk_offsets,
359 self.offset_within_chunk,
360 ))
361 }
362 }
363
364 pub fn get_patched(&self, index: usize) -> VortexResult<Option<Scalar>> {
366 self.search_index(index)?
367 .to_found()
368 .map(|patch_idx| self.values().scalar_at(patch_idx))
369 .transpose()
370 }
371
372 pub fn search_index(&self, index: usize) -> VortexResult<SearchResult> {
390 if self.chunk_offsets.is_some() {
391 return self.search_index_chunked(index);
392 }
393
394 Self::search_index_binary_search(&self.indices, index + self.offset)
395 }
396
397 fn search_index_binary_search(indices: &ArrayRef, needle: usize) -> VortexResult<SearchResult> {
403 if indices.is_canonical() {
404 let primitive = indices.to_primitive();
405 match_each_integer_ptype!(primitive.ptype(), |T| {
406 let Ok(needle) = T::try_from(needle) else {
407 return Ok(SearchResult::NotFound(primitive.len()));
412 };
413 return primitive
414 .as_slice::<T>()
415 .search_sorted(&needle, SearchSortedSide::Left);
416 });
417 }
418 indices
419 .as_primitive_typed()
420 .search_sorted(&PValue::U64(needle as u64), SearchSortedSide::Left)
421 }
422
423 fn search_index_chunked(&self, index: usize) -> VortexResult<SearchResult> {
433 let Some(chunk_offsets) = &self.chunk_offsets else {
434 vortex_bail!("chunk_offsets is required to be set")
435 };
436
437 let Some(offset_within_chunk) = self.offset_within_chunk else {
438 vortex_bail!("offset_within_chunk is required to be set")
439 };
440
441 if index >= self.array_len() {
442 return Ok(SearchResult::NotFound(self.indices().len()));
443 }
444
445 let chunk_idx = (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE;
446
447 let base_offset = self.chunk_offset_at(0)?;
449
450 let patches_start_idx = (self.chunk_offset_at(chunk_idx)? - base_offset)
451 .saturating_sub(offset_within_chunk);
458
459 let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
460 self.chunk_offset_at(chunk_idx + 1)? - base_offset - offset_within_chunk
461 } else {
462 self.indices.len()
463 };
464
465 let chunk_indices = self.indices.slice(patches_start_idx..patches_end_idx)?;
466 let result = Self::search_index_binary_search(&chunk_indices, index + self.offset)?;
467
468 Ok(match result {
469 SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
470 SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
471 })
472 }
473
474 fn search_index_chunked_batch<T, O>(
480 &self,
481 indices: &[T],
482 chunk_offsets: &[O],
483 index: T,
484 ) -> VortexResult<SearchResult>
485 where
486 T: UnsignedPType,
487 O: UnsignedPType,
488 usize: TryFrom<T>,
489 usize: TryFrom<O>,
490 {
491 let Some(offset_within_chunk) = self.offset_within_chunk else {
492 vortex_bail!("offset_within_chunk is required to be set")
493 };
494
495 let chunk_idx = {
496 let Ok(index) = usize::try_from(index) else {
497 return Ok(SearchResult::NotFound(indices.len()));
499 };
500
501 if index >= self.array_len() {
502 return Ok(SearchResult::NotFound(self.indices().len()));
503 }
504
505 (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE
506 };
507
508 let chunk_offset = usize::try_from(chunk_offsets[chunk_idx] - chunk_offsets[0])
510 .map_err(|_| vortex_err!("chunk_offset failed to convert to usize"))?;
511
512 let patches_start_idx = chunk_offset
513 .saturating_sub(offset_within_chunk);
520
521 let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
522 let base_offset_end = chunk_offsets[chunk_idx + 1];
523
524 let offset_within_chunk = O::from(offset_within_chunk)
525 .ok_or_else(|| vortex_err!("offset_within_chunk failed to convert to O"))?;
526
527 usize::try_from(base_offset_end - chunk_offsets[0] - offset_within_chunk)
528 .map_err(|_| vortex_err!("patches_end_idx failed to convert to usize"))?
529 } else {
530 self.indices.len()
531 };
532
533 let Some(offset) = T::from(self.offset) else {
534 return Ok(SearchResult::NotFound(indices.len()));
536 };
537
538 let chunk_indices = &indices[patches_start_idx..patches_end_idx];
539 let result = chunk_indices.search_sorted(&(index + offset), SearchSortedSide::Left)?;
540
541 Ok(match result {
542 SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
543 SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
544 })
545 }
546
547 pub fn min_index(&self) -> VortexResult<usize> {
549 let first = self
550 .indices
551 .scalar_at(0)?
552 .as_primitive()
553 .as_::<usize>()
554 .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
555 Ok(first - self.offset)
556 }
557
558 pub fn max_index(&self) -> VortexResult<usize> {
560 let last = self
561 .indices
562 .scalar_at(self.indices.len() - 1)?
563 .as_primitive()
564 .as_::<usize>()
565 .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
566 Ok(last - self.offset)
567 }
568
569 pub fn filter(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
571 if mask.len() != self.array_len {
572 vortex_bail!(
573 "Filter mask length {} does not match array length {}",
574 mask.len(),
575 self.array_len
576 );
577 }
578
579 match mask.indices() {
580 AllOr::All => Ok(Some(self.clone())),
581 AllOr::None => Ok(None),
582 AllOr::Some(mask_indices) => {
583 let flat_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
584 match_each_unsigned_integer_ptype!(flat_indices.ptype(), |I| {
585 filter_patches_with_mask(
586 flat_indices.as_slice::<I>(),
587 self.offset(),
588 self.values(),
589 mask_indices,
590 )
591 })
592 }
593 }
594 }
595
596 pub fn mask(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
601 if mask.len() != self.array_len {
602 vortex_bail!(
603 "Filter mask length {} does not match array length {}",
604 mask.len(),
605 self.array_len
606 );
607 }
608
609 let filter_mask = match mask.bit_buffer() {
610 AllOr::All => return Ok(None),
611 AllOr::None => return Ok(Some(self.clone())),
612 AllOr::Some(masked) => {
613 let patch_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
614 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |P| {
615 let patch_indices = patch_indices.as_slice::<P>();
616 Mask::from_buffer(BitBuffer::collect_bool(patch_indices.len(), |i| {
617 #[allow(clippy::cast_possible_truncation)]
618 let idx = (patch_indices[i] as usize) - self.offset;
619 !masked.value(idx)
620 }))
621 })
622 }
623 };
624
625 if filter_mask.all_false() {
626 return Ok(None);
627 }
628
629 let filtered_indices = self.indices.filter(filter_mask.clone())?;
631 let filtered_values = self.values.filter(filter_mask)?;
632
633 Ok(Some(Self {
634 array_len: self.array_len,
635 offset: self.offset,
636 indices: filtered_indices,
637 values: filtered_values,
638 chunk_offsets: None,
640 offset_within_chunk: self.offset_within_chunk,
641 }))
642 }
643
644 pub fn slice(&self, range: Range<usize>) -> VortexResult<Option<Self>> {
646 let slice_start_idx = self.search_index(range.start)?.to_index();
647 let slice_end_idx = self.search_index(range.end)?.to_index();
648
649 if slice_start_idx == slice_end_idx {
650 return Ok(None);
651 }
652
653 let values = self.values().slice(slice_start_idx..slice_end_idx)?;
654 let indices = self.indices().slice(slice_start_idx..slice_end_idx)?;
655
656 let chunk_offsets = self
657 .chunk_offsets
658 .as_ref()
659 .map(|chunk_offsets| -> VortexResult<ArrayRef> {
660 let chunk_relative_offset = self.offset % PATCH_CHUNK_SIZE;
661 let chunk_start_idx = (chunk_relative_offset + range.start) / PATCH_CHUNK_SIZE;
662 let chunk_end_idx = (chunk_relative_offset + range.end).div_ceil(PATCH_CHUNK_SIZE);
663 chunk_offsets.slice(chunk_start_idx..chunk_end_idx)
664 })
665 .transpose()?;
666
667 let offset_within_chunk = chunk_offsets
668 .as_ref()
669 .map(|chunk_offsets| -> VortexResult<usize> {
670 let base_offset = chunk_offsets
671 .scalar_at(0)?
672 .as_primitive()
673 .as_::<usize>()
674 .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))?;
675 Ok(slice_start_idx - base_offset)
676 })
677 .transpose()?;
678
679 Ok(Some(Self {
680 array_len: range.len(),
681 offset: range.start + self.offset(),
682 indices,
683 values,
684 chunk_offsets,
685 offset_within_chunk,
686 }))
687 }
688
689 const PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN: f64 = 5.0;
691
692 fn is_map_faster_than_search(&self, take_indices: &PrimitiveArray) -> bool {
693 (self.num_patches() as f64 / take_indices.len() as f64)
694 < Self::PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN
695 }
696
697 pub fn take_with_nulls(
701 &self,
702 take_indices: &ArrayRef,
703 ctx: &mut ExecutionCtx,
704 ) -> VortexResult<Option<Self>> {
705 if take_indices.is_empty() {
706 return Ok(None);
707 }
708
709 let take_indices = take_indices.to_array().execute::<PrimitiveArray>(ctx)?;
710 if self.is_map_faster_than_search(&take_indices) {
711 self.take_map(take_indices, true, ctx)
712 } else {
713 self.take_search(take_indices, true, ctx)
714 }
715 }
716
717 pub fn take(
721 &self,
722 take_indices: &ArrayRef,
723 ctx: &mut ExecutionCtx,
724 ) -> VortexResult<Option<Self>> {
725 if take_indices.is_empty() {
726 return Ok(None);
727 }
728
729 let take_indices = take_indices.to_array().execute::<PrimitiveArray>(ctx)?;
730 if self.is_map_faster_than_search(&take_indices) {
731 self.take_map(take_indices, false, ctx)
732 } else {
733 self.take_search(take_indices, false, ctx)
734 }
735 }
736
737 #[expect(
738 clippy::cognitive_complexity,
739 reason = "complexity is from nested match_each_* macros"
740 )]
741 pub fn take_search(
742 &self,
743 take_indices: PrimitiveArray,
744 include_nulls: bool,
745 ctx: &mut ExecutionCtx,
746 ) -> VortexResult<Option<Self>> {
747 let take_indices_validity = take_indices.validity();
748 let patch_indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
749 let chunk_offsets = self
750 .chunk_offsets()
751 .as_ref()
752 .map(|co| co.clone().execute::<PrimitiveArray>(ctx))
753 .transpose()?;
754
755 let (values_indices, new_indices): (BufferMut<u64>, BufferMut<u64>) =
756 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |PatchT| {
757 let patch_indices_slice = patch_indices.as_slice::<PatchT>();
758 match_each_integer_ptype!(take_indices.ptype(), |TakeT| {
759 let take_slice = take_indices.as_slice::<TakeT>();
760
761 if let Some(chunk_offsets) = chunk_offsets {
762 match_each_unsigned_integer_ptype!(chunk_offsets.ptype(), |OffsetT| {
763 let chunk_offsets = chunk_offsets.as_slice::<OffsetT>();
764 take_indices_with_search_fn(
765 patch_indices_slice,
766 take_slice,
767 take_indices.validity_mask()?,
768 include_nulls,
769 |take_idx| {
770 self.search_index_chunked_batch(
771 patch_indices_slice,
772 chunk_offsets,
773 take_idx,
774 )
775 },
776 )?
777 })
778 } else {
779 take_indices_with_search_fn(
780 patch_indices_slice,
781 take_slice,
782 take_indices.validity_mask()?,
783 include_nulls,
784 |take_idx| {
785 let Some(offset) = <PatchT as NumCast>::from(self.offset) else {
786 return Ok(SearchResult::NotFound(patch_indices_slice.len()));
788 };
789
790 patch_indices_slice
791 .search_sorted(&(take_idx + offset), SearchSortedSide::Left)
792 },
793 )?
794 }
795 })
796 });
797
798 if new_indices.is_empty() {
799 return Ok(None);
800 }
801
802 let new_indices = new_indices.into_array();
803 let new_array_len = take_indices.len();
804 let values_validity = take_indices_validity.take(&new_indices)?;
805
806 Ok(Some(Self {
807 array_len: new_array_len,
808 offset: 0,
809 indices: new_indices.clone(),
810 values: self
811 .values()
812 .take(PrimitiveArray::new(values_indices, values_validity).into_array())?,
813 chunk_offsets: None,
814 offset_within_chunk: Some(0), }))
816 }
817
818 pub fn take_map(
819 &self,
820 take_indices: PrimitiveArray,
821 include_nulls: bool,
822 ctx: &mut ExecutionCtx,
823 ) -> VortexResult<Option<Self>> {
824 let indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
825 let new_length = take_indices.len();
826
827 let min_index = self.min_index()?;
828 let max_index = self.max_index()?;
829
830 let Some((new_sparse_indices, value_indices)) =
831 match_each_unsigned_integer_ptype!(indices.ptype(), |Indices| {
832 match_each_integer_ptype!(take_indices.ptype(), |TakeIndices| {
833 let take_validity = take_indices
834 .validity()
835 .execute_mask(take_indices.len(), ctx)?;
836 let take_nullability = take_indices.validity().nullability();
837 let take_slice = take_indices.as_slice::<TakeIndices>();
838 take_map::<_, TakeIndices>(
839 indices.as_slice::<Indices>(),
840 take_slice,
841 take_validity,
842 take_nullability,
843 self.offset(),
844 min_index,
845 max_index,
846 include_nulls,
847 )?
848 })
849 })
850 else {
851 return Ok(None);
852 };
853
854 let taken_values = self.values().take(value_indices)?;
855
856 Ok(Some(Patches {
857 array_len: new_length,
858 offset: 0,
859 indices: new_sparse_indices,
860 values: taken_values,
861 chunk_offsets: None,
863 offset_within_chunk: self.offset_within_chunk,
864 }))
865 }
866
867 pub unsafe fn apply_to_buffer<P: NativePType>(
878 &self,
879 buffer: &mut [P],
880 validity: &mut MaskMut,
881 ctx: &mut ExecutionCtx,
882 ) {
883 let patch_indices = self
884 .indices
885 .clone()
886 .execute::<PrimitiveArray>(ctx)
887 .vortex_expect("patch indices must be convertible to PrimitiveArray");
888 let patch_values = self
889 .values
890 .clone()
891 .execute::<PrimitiveArray>(ctx)
892 .vortex_expect("patch values must be convertible to PrimitiveArray");
893 let patches_validity = patch_values.validity();
894
895 let patch_values_slice = patch_values.as_slice::<P>();
896 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |I| {
897 let patch_indices_slice = patch_indices.as_slice::<I>();
898
899 unsafe {
905 apply_patches_to_buffer_inner(
906 buffer,
907 validity,
908 patch_indices_slice,
909 self.offset,
910 patch_values_slice,
911 patches_validity,
912 ctx,
913 );
914 }
915 });
916 }
917
918 pub fn map_values<F>(self, f: F) -> VortexResult<Self>
919 where
920 F: FnOnce(ArrayRef) -> VortexResult<ArrayRef>,
921 {
922 let values = f(self.values)?;
923 if self.indices.len() != values.len() {
924 vortex_bail!(
925 "map_values must preserve length: expected {} received {}",
926 self.indices.len(),
927 values.len()
928 )
929 }
930
931 Ok(Self {
932 array_len: self.array_len,
933 offset: self.offset,
934 indices: self.indices,
935 values,
936 chunk_offsets: self.chunk_offsets,
937 offset_within_chunk: self.offset_within_chunk,
938 })
939 }
940}
941
942unsafe fn apply_patches_to_buffer_inner<P, I>(
952 buffer: &mut [P],
953 validity: &mut MaskMut,
954 patch_indices: &[I],
955 patch_offset: usize,
956 patch_values: &[P],
957 patches_validity: &Validity,
958 ctx: &mut ExecutionCtx,
959) where
960 P: NativePType,
961 I: UnsignedPType,
962{
963 debug_assert!(!patch_indices.is_empty());
964 debug_assert_eq!(patch_indices.len(), patch_values.len());
965 debug_assert_eq!(buffer.len(), validity.len());
966
967 match patches_validity {
968 Validity::NonNullable | Validity::AllValid => {
969 for (&i, &value) in patch_indices.iter().zip_eq(patch_values) {
971 let index = i.as_() - patch_offset;
972
973 unsafe {
976 validity.set_unchecked(index);
977 }
978 buffer[index] = value;
979 }
980 }
981 Validity::AllInvalid => {
982 for &i in patch_indices {
984 let index = i.as_() - patch_offset;
985
986 unsafe {
989 validity.unset_unchecked(index);
990 }
991 }
992 }
993 Validity::Array(array) => {
994 let bool_array = array
996 .clone()
997 .execute::<BoolArray>(ctx)
998 .vortex_expect("validity array must be convertible to BoolArray");
999 let mask = bool_array.to_bit_buffer();
1000 for (patch_idx, (&i, &value)) in patch_indices.iter().zip_eq(patch_values).enumerate() {
1001 let index = i.as_() - patch_offset;
1002
1003 unsafe {
1006 if mask.value_unchecked(patch_idx) {
1007 buffer[index] = value;
1008 validity.set_unchecked(index);
1009 } else {
1010 validity.unset_unchecked(index);
1011 }
1012 }
1013 }
1014 }
1015 }
1016}
1017
1018#[allow(clippy::too_many_arguments)] fn take_map<I: NativePType + Hash + Eq + TryFrom<usize>, T: NativePType>(
1020 indices: &[I],
1021 take_indices: &[T],
1022 take_validity: Mask,
1023 take_nullability: Nullability,
1024 indices_offset: usize,
1025 min_index: usize,
1026 max_index: usize,
1027 include_nulls: bool,
1028) -> VortexResult<Option<(ArrayRef, ArrayRef)>>
1029where
1030 usize: TryFrom<T>,
1031 VortexError: From<<I as TryFrom<usize>>::Error>,
1032{
1033 let offset_i = I::try_from(indices_offset)?;
1034
1035 let sparse_index_to_value_index: HashMap<I, usize> = indices
1036 .iter()
1037 .copied()
1038 .map(|idx| idx - offset_i)
1039 .enumerate()
1040 .map(|(value_index, sparse_index)| (sparse_index, value_index))
1041 .collect();
1042
1043 let mut new_sparse_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1044 let mut value_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1045
1046 for (idx_in_take, &take_idx) in take_indices.iter().enumerate() {
1047 let ti = usize::try_from(take_idx)
1048 .map_err(|_| vortex_err!("Failed to convert index to usize"))?;
1049
1050 let is_null = match take_validity.bit_buffer() {
1052 AllOr::All => false,
1053 AllOr::None => true,
1054 AllOr::Some(buf) => !buf.value(idx_in_take),
1055 };
1056 if is_null {
1057 if include_nulls {
1058 new_sparse_indices.push(idx_in_take as u64);
1059 value_indices.push(0);
1060 }
1061 } else if ti >= min_index && ti <= max_index {
1062 let ti_as_i = I::try_from(ti)
1063 .map_err(|_| vortex_err!("take index does not fit in index type"))?;
1064 if let Some(&value_index) = sparse_index_to_value_index.get(&ti_as_i) {
1065 new_sparse_indices.push(idx_in_take as u64);
1066 value_indices.push(value_index as u64);
1067 }
1068 }
1069 }
1070
1071 if new_sparse_indices.is_empty() {
1072 return Ok(None);
1073 }
1074
1075 let new_sparse_indices = new_sparse_indices.into_array();
1076 let values_validity =
1077 Validity::from_mask(take_validity, take_nullability).take(&new_sparse_indices)?;
1078 Ok(Some((
1079 new_sparse_indices,
1080 PrimitiveArray::new(value_indices, values_validity).into_array(),
1081 )))
1082}
1083
1084fn filter_patches_with_mask<T: IntegerPType>(
1090 patch_indices: &[T],
1091 offset: usize,
1092 patch_values: &ArrayRef,
1093 mask_indices: &[usize],
1094) -> VortexResult<Option<Patches>> {
1095 let true_count = mask_indices.len();
1096 let mut new_patch_indices = BufferMut::<u64>::with_capacity(true_count);
1097 let mut new_mask_indices = Vec::with_capacity(true_count);
1098
1099 const STRIDE: usize = 4;
1103
1104 let mut mask_idx = 0usize;
1105 let mut true_idx = 0usize;
1106
1107 while mask_idx < patch_indices.len() && true_idx < true_count {
1108 if (mask_idx + STRIDE) < patch_indices.len() && (true_idx + STRIDE) < mask_indices.len() {
1115 let left_min = patch_indices[mask_idx]
1117 .to_usize()
1118 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1119 - offset;
1120 let left_max = patch_indices[mask_idx + STRIDE]
1121 .to_usize()
1122 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1123 - offset;
1124 let right_min = mask_indices[true_idx];
1125 let right_max = mask_indices[true_idx + STRIDE];
1126
1127 if left_min > right_max {
1128 true_idx += STRIDE;
1130 continue;
1131 } else if right_min > left_max {
1132 mask_idx += STRIDE;
1133 continue;
1134 } else {
1135 }
1137 }
1138
1139 let left = patch_indices[mask_idx]
1142 .to_usize()
1143 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1144 - offset;
1145 let right = mask_indices[true_idx];
1146
1147 match left.cmp(&right) {
1148 Ordering::Less => {
1149 mask_idx += 1;
1150 }
1151 Ordering::Greater => {
1152 true_idx += 1;
1153 }
1154 Ordering::Equal => {
1155 new_mask_indices.push(mask_idx);
1157 new_patch_indices.push(true_idx as u64);
1158
1159 mask_idx += 1;
1160 true_idx += 1;
1161 }
1162 }
1163 }
1164
1165 if new_mask_indices.is_empty() {
1166 return Ok(None);
1167 }
1168
1169 let new_patch_indices = new_patch_indices.into_array();
1170 let new_patch_values =
1171 patch_values.filter(Mask::from_indices(patch_values.len(), new_mask_indices))?;
1172
1173 Ok(Some(Patches::new(
1174 true_count,
1175 0,
1176 new_patch_indices,
1177 new_patch_values,
1178 None,
1180 )?))
1181}
1182
1183fn take_indices_with_search_fn<
1184 I: UnsignedPType,
1185 T: IntegerPType,
1186 F: Fn(I) -> VortexResult<SearchResult>,
1187>(
1188 indices: &[I],
1189 take_indices: &[T],
1190 take_validity: Mask,
1191 include_nulls: bool,
1192 search_fn: F,
1193) -> VortexResult<(BufferMut<u64>, BufferMut<u64>)> {
1194 let mut values_indices = BufferMut::with_capacity(take_indices.len());
1195 let mut new_indices = BufferMut::with_capacity(take_indices.len());
1196
1197 for (new_patch_idx, &take_idx) in take_indices.iter().enumerate() {
1198 if !take_validity.value(new_patch_idx) {
1199 if include_nulls {
1200 values_indices.push(0u64);
1202 new_indices.push(new_patch_idx as u64);
1203 }
1204 continue;
1205 } else {
1206 let search_result = match I::from(take_idx) {
1207 Some(idx) => search_fn(idx)?,
1208 None => SearchResult::NotFound(indices.len()),
1209 };
1210
1211 if let Some(patch_idx) = search_result.to_found() {
1212 values_indices.push(patch_idx as u64);
1213 new_indices.push(new_patch_idx as u64);
1214 }
1215 }
1216 }
1217
1218 Ok((values_indices, new_indices))
1219}
1220
1221#[cfg(test)]
1222mod test {
1223 use vortex_buffer::BufferMut;
1224 use vortex_buffer::buffer;
1225 use vortex_mask::Mask;
1226
1227 use crate::IntoArray;
1228 use crate::LEGACY_SESSION;
1229 use crate::ToCanonical;
1230 use crate::VortexSessionExecute;
1231 use crate::assert_arrays_eq;
1232 use crate::patches::Patches;
1233 use crate::patches::PrimitiveArray;
1234 use crate::search_sorted::SearchResult;
1235 use crate::validity::Validity;
1236
1237 #[test]
1238 fn test_filter() {
1239 let patches = Patches::new(
1240 100,
1241 0,
1242 buffer![10u32, 11, 20].into_array(),
1243 buffer![100, 110, 200].into_array(),
1244 None,
1245 )
1246 .unwrap();
1247
1248 let filtered = patches
1249 .filter(
1250 &Mask::from_indices(100, vec![10, 20, 30]),
1251 &mut LEGACY_SESSION.create_execution_ctx(),
1252 )
1253 .unwrap()
1254 .unwrap();
1255
1256 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1]));
1257 assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1258 }
1259
1260 #[test]
1261 fn take_with_nulls() {
1262 let patches = Patches::new(
1263 20,
1264 0,
1265 buffer![2u64, 9, 15].into_array(),
1266 PrimitiveArray::new(buffer![33_i32, 44, 55], Validity::AllValid).into_array(),
1267 None,
1268 )
1269 .unwrap();
1270
1271 let taken = patches
1272 .take(
1273 &PrimitiveArray::new(buffer![9, 0], Validity::from_iter(vec![true, false]))
1274 .into_array(),
1275 &mut LEGACY_SESSION.create_execution_ctx(),
1276 )
1277 .unwrap()
1278 .unwrap();
1279 let primitive_values = taken.values().to_primitive();
1280 let primitive_indices = taken.indices().to_primitive();
1281 assert_eq!(taken.array_len(), 2);
1282 assert_arrays_eq!(
1283 primitive_values,
1284 PrimitiveArray::from_option_iter([Some(44i32)])
1285 );
1286 assert_arrays_eq!(primitive_indices, PrimitiveArray::from_iter([0u64]));
1287 assert_eq!(
1288 primitive_values.validity_mask().unwrap(),
1289 Mask::from_iter(vec![true])
1290 );
1291 }
1292
1293 #[test]
1294 fn take_search_with_nulls_chunked() {
1295 let patches = Patches::new(
1296 20,
1297 0,
1298 buffer![2u64, 9, 15].into_array(),
1299 buffer![33_i32, 44, 55].into_array(),
1300 Some(buffer![0u64].into_array()),
1301 )
1302 .unwrap();
1303
1304 let taken = patches
1305 .take_search(
1306 PrimitiveArray::new(buffer![9, 0], Validity::from_iter([true, false])),
1307 true,
1308 &mut LEGACY_SESSION.create_execution_ctx(),
1309 )
1310 .unwrap()
1311 .unwrap();
1312
1313 let primitive_values = taken.values().to_primitive();
1314 assert_eq!(taken.array_len(), 2);
1315 assert_arrays_eq!(
1316 primitive_values,
1317 PrimitiveArray::from_option_iter([Some(44i32), None])
1318 );
1319 assert_arrays_eq!(taken.indices(), PrimitiveArray::from_iter([0u64, 1]));
1320
1321 assert_eq!(
1322 primitive_values.validity_mask().unwrap(),
1323 Mask::from_iter([true, false])
1324 );
1325 }
1326
1327 #[test]
1328 fn take_search_chunked_multiple_chunks() {
1329 let patches = Patches::new(
1330 2048,
1331 0,
1332 buffer![100u64, 500, 1200, 1800].into_array(),
1333 buffer![10_i32, 20, 30, 40].into_array(),
1334 Some(buffer![0u64, 2].into_array()),
1335 )
1336 .unwrap();
1337
1338 let taken = patches
1339 .take_search(
1340 PrimitiveArray::new(buffer![500, 1200, 999], Validity::AllValid),
1341 true,
1342 &mut LEGACY_SESSION.create_execution_ctx(),
1343 )
1344 .unwrap()
1345 .unwrap();
1346
1347 assert_eq!(taken.array_len(), 3);
1348 assert_arrays_eq!(
1349 taken.values(),
1350 PrimitiveArray::from_option_iter([Some(20i32), Some(30)])
1351 );
1352 }
1353
1354 #[test]
1355 fn take_search_chunked_indices_with_no_patches() {
1356 let patches = Patches::new(
1357 20,
1358 0,
1359 buffer![2u64, 9, 15].into_array(),
1360 buffer![33_i32, 44, 55].into_array(),
1361 Some(buffer![0u64].into_array()),
1362 )
1363 .unwrap();
1364
1365 let taken = patches
1366 .take_search(
1367 PrimitiveArray::new(buffer![3, 4, 5], Validity::AllValid),
1368 true,
1369 &mut LEGACY_SESSION.create_execution_ctx(),
1370 )
1371 .unwrap();
1372
1373 assert!(taken.is_none());
1374 }
1375
1376 #[test]
1377 fn take_search_chunked_interleaved() {
1378 let patches = Patches::new(
1379 30,
1380 0,
1381 buffer![5u64, 10, 20, 25].into_array(),
1382 buffer![100_i32, 200, 300, 400].into_array(),
1383 Some(buffer![0u64].into_array()),
1384 )
1385 .unwrap();
1386
1387 let taken = patches
1388 .take_search(
1389 PrimitiveArray::new(buffer![10, 15, 20, 99], Validity::AllValid),
1390 true,
1391 &mut LEGACY_SESSION.create_execution_ctx(),
1392 )
1393 .unwrap()
1394 .unwrap();
1395
1396 assert_eq!(taken.array_len(), 4);
1397 assert_arrays_eq!(
1398 taken.values(),
1399 PrimitiveArray::from_option_iter([Some(200i32), Some(300)])
1400 );
1401 }
1402
1403 #[test]
1404 fn test_take_search_multiple_chunk_offsets() {
1405 let patches = Patches::new(
1406 1500,
1407 0,
1408 BufferMut::from_iter(0..1500u64).into_array(),
1409 BufferMut::from_iter(0..1500i32).into_array(),
1410 Some(buffer![0u64, 1024u64].into_array()),
1411 )
1412 .unwrap();
1413
1414 let taken = patches
1415 .take_search(
1416 PrimitiveArray::new(BufferMut::from_iter(0..1500u64), Validity::AllValid),
1417 false,
1418 &mut LEGACY_SESSION.create_execution_ctx(),
1419 )
1420 .unwrap()
1421 .unwrap();
1422
1423 assert_eq!(taken.array_len(), 1500);
1424 }
1425
1426 #[test]
1427 fn test_slice() {
1428 let values = buffer![15_u32, 135, 13531, 42].into_array();
1429 let indices = buffer![10_u64, 11, 50, 100].into_array();
1430
1431 let patches = Patches::new(101, 0, indices, values, None).unwrap();
1432
1433 let sliced = patches.slice(15..100).unwrap().unwrap();
1434 assert_eq!(sliced.array_len(), 100 - 15);
1435 assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1436 }
1437
1438 #[test]
1439 fn doubly_sliced() {
1440 let values = buffer![15_u32, 135, 13531, 42].into_array();
1441 let indices = buffer![10_u64, 11, 50, 100].into_array();
1442
1443 let patches = Patches::new(101, 0, indices, values, None).unwrap();
1444
1445 let sliced = patches.slice(15..100).unwrap().unwrap();
1446 assert_eq!(sliced.array_len(), 100 - 15);
1447 assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1448
1449 let doubly_sliced = sliced.slice(35..36).unwrap().unwrap();
1450 assert_arrays_eq!(
1451 doubly_sliced.values(),
1452 PrimitiveArray::from_iter([13531u32])
1453 );
1454 }
1455
1456 #[test]
1457 fn test_mask_all_true() {
1458 let patches = Patches::new(
1459 10,
1460 0,
1461 buffer![2u64, 5, 8].into_array(),
1462 buffer![100i32, 200, 300].into_array(),
1463 None,
1464 )
1465 .unwrap();
1466
1467 let mask = Mask::new_true(10);
1468 let masked = patches
1469 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1470 .unwrap();
1471 assert!(masked.is_none());
1472 }
1473
1474 #[test]
1475 fn test_mask_all_false() {
1476 let patches = Patches::new(
1477 10,
1478 0,
1479 buffer![2u64, 5, 8].into_array(),
1480 buffer![100i32, 200, 300].into_array(),
1481 None,
1482 )
1483 .unwrap();
1484
1485 let mask = Mask::new_false(10);
1486 let masked = patches
1487 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1488 .unwrap()
1489 .unwrap();
1490
1491 assert_arrays_eq!(
1493 masked.values(),
1494 PrimitiveArray::from_iter([100i32, 200, 300])
1495 );
1496 assert!(masked.values().is_valid(0).unwrap());
1497 assert!(masked.values().is_valid(1).unwrap());
1498 assert!(masked.values().is_valid(2).unwrap());
1499
1500 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1502 }
1503
1504 #[test]
1505 fn test_mask_partial() {
1506 let patches = Patches::new(
1507 10,
1508 0,
1509 buffer![2u64, 5, 8].into_array(),
1510 buffer![100i32, 200, 300].into_array(),
1511 None,
1512 )
1513 .unwrap();
1514
1515 let mask = Mask::from_iter([
1517 false, false, true, false, false, false, false, false, true, false,
1518 ]);
1519 let masked = patches
1520 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1521 .unwrap()
1522 .unwrap();
1523
1524 assert_eq!(masked.values().len(), 1);
1526 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1527
1528 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64]));
1530 }
1531
1532 #[test]
1533 fn test_mask_with_offset() {
1534 let patches = Patches::new(
1535 10,
1536 5, buffer![7u64, 10, 13].into_array(), buffer![100i32, 200, 300].into_array(),
1539 None,
1540 )
1541 .unwrap();
1542
1543 let mask = Mask::from_iter([
1545 false, false, true, false, false, false, false, false, false, false,
1546 ]);
1547
1548 let masked = patches
1549 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1550 .unwrap()
1551 .unwrap();
1552 assert_eq!(masked.array_len(), 10);
1553 assert_eq!(masked.offset(), 5);
1554 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([10u64, 13]));
1555 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32, 300]));
1556 }
1557
1558 #[test]
1559 fn test_mask_nullable_values() {
1560 let patches = Patches::new(
1561 10,
1562 0,
1563 buffer![2u64, 5, 8].into_array(),
1564 PrimitiveArray::from_option_iter([Some(100i32), None, Some(300)]).into_array(),
1565 None,
1566 )
1567 .unwrap();
1568
1569 let mask = Mask::from_iter([
1571 false, false, true, false, false, false, false, false, false, false,
1572 ]);
1573 let masked = patches
1574 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1575 .unwrap()
1576 .unwrap();
1577
1578 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64, 8]));
1580
1581 let masked_values = masked.values().to_primitive();
1583 assert_eq!(masked_values.len(), 2);
1584 assert!(!masked_values.is_valid(0).unwrap()); assert!(masked_values.is_valid(1).unwrap()); assert_eq!(
1587 i32::try_from(&masked_values.scalar_at(1).unwrap()).unwrap(),
1588 300i32
1589 );
1590 }
1591
1592 #[test]
1593 fn test_filter_keep_all() {
1594 let patches = Patches::new(
1595 10,
1596 0,
1597 buffer![2u64, 5, 8].into_array(),
1598 buffer![100i32, 200, 300].into_array(),
1599 None,
1600 )
1601 .unwrap();
1602
1603 let mask = Mask::from_indices(10, (0..10).collect());
1605 let filtered = patches
1606 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1607 .unwrap()
1608 .unwrap();
1609
1610 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1611 assert_arrays_eq!(
1612 filtered.values(),
1613 PrimitiveArray::from_iter([100i32, 200, 300])
1614 );
1615 }
1616
1617 #[test]
1618 fn test_filter_none() {
1619 let patches = Patches::new(
1620 10,
1621 0,
1622 buffer![2u64, 5, 8].into_array(),
1623 buffer![100i32, 200, 300].into_array(),
1624 None,
1625 )
1626 .unwrap();
1627
1628 let mask = Mask::from_indices(10, vec![]);
1630 let filtered = patches
1631 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1632 .unwrap();
1633 assert!(filtered.is_none());
1634 }
1635
1636 #[test]
1637 fn test_filter_with_indices() {
1638 let patches = Patches::new(
1639 10,
1640 0,
1641 buffer![2u64, 5, 8].into_array(),
1642 buffer![100i32, 200, 300].into_array(),
1643 None,
1644 )
1645 .unwrap();
1646
1647 let mask = Mask::from_indices(10, vec![2, 5, 9]);
1649 let filtered = patches
1650 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1651 .unwrap()
1652 .unwrap();
1653
1654 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1])); assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1656 }
1657
1658 #[test]
1659 fn test_slice_full_range() {
1660 let patches = Patches::new(
1661 10,
1662 0,
1663 buffer![2u64, 5, 8].into_array(),
1664 buffer![100i32, 200, 300].into_array(),
1665 None,
1666 )
1667 .unwrap();
1668
1669 let sliced = patches.slice(0..10).unwrap().unwrap();
1670
1671 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1672 assert_arrays_eq!(
1673 sliced.values(),
1674 PrimitiveArray::from_iter([100i32, 200, 300])
1675 );
1676 }
1677
1678 #[test]
1679 fn test_slice_partial() {
1680 let patches = Patches::new(
1681 10,
1682 0,
1683 buffer![2u64, 5, 8].into_array(),
1684 buffer![100i32, 200, 300].into_array(),
1685 None,
1686 )
1687 .unwrap();
1688
1689 let sliced = patches.slice(3..8).unwrap().unwrap();
1691
1692 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([5u64])); assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1694 assert_eq!(sliced.array_len(), 5); assert_eq!(sliced.offset(), 3); }
1697
1698 #[test]
1699 fn test_slice_no_patches() {
1700 let patches = Patches::new(
1701 10,
1702 0,
1703 buffer![2u64, 5, 8].into_array(),
1704 buffer![100i32, 200, 300].into_array(),
1705 None,
1706 )
1707 .unwrap();
1708
1709 let sliced = patches.slice(6..7).unwrap();
1711 assert!(sliced.is_none());
1712 }
1713
1714 #[test]
1715 fn test_slice_with_offset() {
1716 let patches = Patches::new(
1717 10,
1718 5, buffer![7u64, 10, 13].into_array(), buffer![100i32, 200, 300].into_array(),
1721 None,
1722 )
1723 .unwrap();
1724
1725 let sliced = patches.slice(3..8).unwrap().unwrap();
1727
1728 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([10u64])); assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1730 assert_eq!(sliced.offset(), 8); }
1732
1733 #[test]
1734 fn test_patch_values() {
1735 let patches = Patches::new(
1736 10,
1737 0,
1738 buffer![2u64, 5, 8].into_array(),
1739 buffer![100i32, 200, 300].into_array(),
1740 None,
1741 )
1742 .unwrap();
1743
1744 let values = patches.values().to_primitive();
1745 assert_eq!(
1746 i32::try_from(&values.scalar_at(0).unwrap()).unwrap(),
1747 100i32
1748 );
1749 assert_eq!(
1750 i32::try_from(&values.scalar_at(1).unwrap()).unwrap(),
1751 200i32
1752 );
1753 assert_eq!(
1754 i32::try_from(&values.scalar_at(2).unwrap()).unwrap(),
1755 300i32
1756 );
1757 }
1758
1759 #[test]
1760 fn test_indices_range() {
1761 let patches = Patches::new(
1762 10,
1763 0,
1764 buffer![2u64, 5, 8].into_array(),
1765 buffer![100i32, 200, 300].into_array(),
1766 None,
1767 )
1768 .unwrap();
1769
1770 assert_eq!(patches.min_index().unwrap(), 2);
1771 assert_eq!(patches.max_index().unwrap(), 8);
1772 }
1773
1774 #[test]
1775 fn test_search_index() {
1776 let patches = Patches::new(
1777 10,
1778 0,
1779 buffer![2u64, 5, 8].into_array(),
1780 buffer![100i32, 200, 300].into_array(),
1781 None,
1782 )
1783 .unwrap();
1784
1785 assert_eq!(patches.search_index(2).unwrap(), SearchResult::Found(0));
1787 assert_eq!(patches.search_index(5).unwrap(), SearchResult::Found(1));
1788 assert_eq!(patches.search_index(8).unwrap(), SearchResult::Found(2));
1789
1790 assert_eq!(patches.search_index(0).unwrap(), SearchResult::NotFound(0));
1792 assert_eq!(patches.search_index(3).unwrap(), SearchResult::NotFound(1));
1793 assert_eq!(patches.search_index(6).unwrap(), SearchResult::NotFound(2));
1794 assert_eq!(patches.search_index(9).unwrap(), SearchResult::NotFound(3));
1795 }
1796
1797 #[test]
1798 fn test_mask_boundary_patches() {
1799 let patches = Patches::new(
1801 10,
1802 0,
1803 buffer![0u64, 9].into_array(),
1804 buffer![100i32, 200].into_array(),
1805 None,
1806 )
1807 .unwrap();
1808
1809 let mask = Mask::from_iter([
1810 true, false, false, false, false, false, false, false, false, false,
1811 ]);
1812 let masked = patches
1813 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1814 .unwrap();
1815 assert!(masked.is_some());
1816 let masked = masked.unwrap();
1817 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([9u64]));
1818 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1819 }
1820
1821 #[test]
1822 fn test_mask_all_patches_removed() {
1823 let patches = Patches::new(
1825 10,
1826 0,
1827 buffer![2u64, 5, 8].into_array(),
1828 buffer![100i32, 200, 300].into_array(),
1829 None,
1830 )
1831 .unwrap();
1832
1833 let mask = Mask::from_iter([
1835 false, false, true, false, false, true, false, false, true, false,
1836 ]);
1837 let masked = patches
1838 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1839 .unwrap();
1840 assert!(masked.is_none());
1841 }
1842
1843 #[test]
1844 fn test_mask_no_patches_removed() {
1845 let patches = Patches::new(
1847 10,
1848 0,
1849 buffer![2u64, 5, 8].into_array(),
1850 buffer![100i32, 200, 300].into_array(),
1851 None,
1852 )
1853 .unwrap();
1854
1855 let mask = Mask::from_iter([
1857 true, false, false, true, false, false, true, false, false, true,
1858 ]);
1859 let masked = patches
1860 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1861 .unwrap()
1862 .unwrap();
1863
1864 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1865 assert_arrays_eq!(
1866 masked.values(),
1867 PrimitiveArray::from_iter([100i32, 200, 300])
1868 );
1869 }
1870
1871 #[test]
1872 fn test_mask_single_patch() {
1873 let patches = Patches::new(
1875 5,
1876 0,
1877 buffer![2u64].into_array(),
1878 buffer![42i32].into_array(),
1879 None,
1880 )
1881 .unwrap();
1882
1883 let mask = Mask::from_iter([false, false, true, false, false]);
1885 let masked = patches
1886 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1887 .unwrap();
1888 assert!(masked.is_none());
1889
1890 let mask = Mask::from_iter([true, false, false, true, false]);
1892 let masked = patches
1893 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1894 .unwrap()
1895 .unwrap();
1896 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64]));
1897 }
1898
1899 #[test]
1900 fn test_mask_contiguous_patches() {
1901 let patches = Patches::new(
1903 10,
1904 0,
1905 buffer![3u64, 4, 5, 6].into_array(),
1906 buffer![100i32, 200, 300, 400].into_array(),
1907 None,
1908 )
1909 .unwrap();
1910
1911 let mask = Mask::from_iter([
1913 false, false, false, false, true, true, false, false, false, false,
1914 ]);
1915 let masked = patches
1916 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1917 .unwrap()
1918 .unwrap();
1919
1920 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([3u64, 6]));
1921 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 400]));
1922 }
1923
1924 #[test]
1925 fn test_mask_with_large_offset() {
1926 let patches = Patches::new(
1928 20,
1929 15,
1930 buffer![16u64, 17, 19].into_array(), buffer![100i32, 200, 300].into_array(),
1932 None,
1933 )
1934 .unwrap();
1935
1936 let mask = Mask::from_iter([
1938 false, false, true, false, false, false, false, false, false, false, false, false,
1939 false, false, false, false, false, false, false, false,
1940 ]);
1941 let masked = patches
1942 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1943 .unwrap()
1944 .unwrap();
1945
1946 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([16u64, 19]));
1947 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 300]));
1948 }
1949
1950 #[test]
1951 #[should_panic(expected = "Filter mask length 5 does not match array length 10")]
1952 fn test_mask_wrong_length() {
1953 let patches = Patches::new(
1954 10,
1955 0,
1956 buffer![2u64, 5, 8].into_array(),
1957 buffer![100i32, 200, 300].into_array(),
1958 None,
1959 )
1960 .unwrap();
1961
1962 let mask = Mask::from_iter([false, false, true, false, false]);
1964 patches
1965 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1966 .unwrap();
1967 }
1968
1969 #[test]
1970 fn test_chunk_offsets_search() {
1971 let indices = buffer![100u64, 200, 3000, 3100].into_array();
1972 let values = buffer![10i32, 20, 30, 40].into_array();
1973 let chunk_offsets = buffer![0u64, 2, 2, 3].into_array();
1974 let patches = Patches::new(4096, 0, indices, values, Some(chunk_offsets)).unwrap();
1975
1976 assert!(patches.chunk_offsets.is_some());
1977
1978 assert_eq!(patches.search_index(100).unwrap(), SearchResult::Found(0));
1980 assert_eq!(patches.search_index(200).unwrap(), SearchResult::Found(1));
1981
1982 assert_eq!(
1984 patches.search_index(1500).unwrap(),
1985 SearchResult::NotFound(2)
1986 );
1987 assert_eq!(
1988 patches.search_index(2000).unwrap(),
1989 SearchResult::NotFound(2)
1990 );
1991
1992 assert_eq!(patches.search_index(3000).unwrap(), SearchResult::Found(2));
1994 assert_eq!(patches.search_index(3100).unwrap(), SearchResult::Found(3));
1995
1996 assert_eq!(
1997 patches.search_index(1024).unwrap(),
1998 SearchResult::NotFound(2)
1999 );
2000 }
2001
2002 #[test]
2003 fn test_chunk_offsets_with_slice() {
2004 let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
2005 let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
2006 let chunk_offsets = buffer![0u64, 2, 6].into_array();
2007 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2008
2009 let sliced = patches.slice(1000..2200).unwrap().unwrap();
2010
2011 assert!(sliced.chunk_offsets.is_some());
2012 assert_eq!(sliced.offset(), 1000);
2013
2014 assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(0));
2015 assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
2016 assert_eq!(sliced.search_index(1100).unwrap(), SearchResult::Found(4));
2017
2018 assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(1));
2019 }
2020
2021 #[test]
2022 fn test_chunk_offsets_with_slice_after_first_chunk() {
2023 let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
2024 let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
2025 let chunk_offsets = buffer![0u64, 2, 6].into_array();
2026 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2027
2028 let sliced = patches.slice(1300..2200).unwrap().unwrap();
2029
2030 assert!(sliced.chunk_offsets.is_some());
2031 assert_eq!(sliced.offset(), 1300);
2032
2033 assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2034 assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(1));
2035 assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
2036 assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(2));
2037 assert_eq!(sliced.search_index(900).unwrap(), SearchResult::NotFound(4));
2038 }
2039
2040 #[test]
2041 fn test_chunk_offsets_slice_empty_result() {
2042 let indices = buffer![100u64, 200, 3000, 3100].into_array();
2043 let values = buffer![10i32, 20, 30, 40].into_array();
2044 let chunk_offsets = buffer![0u64, 2].into_array();
2045 let patches = Patches::new(4000, 0, indices, values, Some(chunk_offsets)).unwrap();
2046
2047 let sliced = patches.slice(1000..2000).unwrap();
2048 assert!(sliced.is_none());
2049 }
2050
2051 #[test]
2052 fn test_chunk_offsets_slice_single_patch() {
2053 let indices = buffer![100u64, 1200, 1300, 2500].into_array();
2054 let values = buffer![10i32, 20, 30, 40].into_array();
2055 let chunk_offsets = buffer![0u64, 1, 3].into_array();
2056 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2057
2058 let sliced = patches.slice(1100..1250).unwrap().unwrap();
2059
2060 assert_eq!(sliced.num_patches(), 1);
2061 assert_eq!(sliced.offset(), 1100);
2062 assert_eq!(sliced.search_index(100).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(50).unwrap(), SearchResult::NotFound(0));
2064 assert_eq!(sliced.search_index(150).unwrap(), SearchResult::NotFound(1));
2065 }
2066
2067 #[test]
2068 fn test_chunk_offsets_slice_across_chunks() {
2069 let indices = buffer![100u64, 200, 1100, 1200, 2100, 2200].into_array();
2070 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2071 let chunk_offsets = buffer![0u64, 2, 4].into_array();
2072 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2073
2074 let sliced = patches.slice(150..2150).unwrap().unwrap();
2075
2076 assert_eq!(sliced.num_patches(), 4);
2077 assert_eq!(sliced.offset(), 150);
2078
2079 assert_eq!(sliced.search_index(50).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(950).unwrap(), SearchResult::Found(1)); assert_eq!(sliced.search_index(1050).unwrap(), SearchResult::Found(2)); assert_eq!(sliced.search_index(1950).unwrap(), SearchResult::Found(3)); }
2084
2085 #[test]
2086 fn test_chunk_offsets_boundary_searches() {
2087 let indices = buffer![1023u64, 1024, 1025, 2047, 2048].into_array();
2088 let values = buffer![10i32, 20, 30, 40, 50].into_array();
2089 let chunk_offsets = buffer![0u64, 1, 4].into_array();
2090 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2091
2092 assert_eq!(patches.search_index(1023).unwrap(), SearchResult::Found(0));
2093 assert_eq!(patches.search_index(1024).unwrap(), SearchResult::Found(1));
2094 assert_eq!(patches.search_index(1025).unwrap(), SearchResult::Found(2));
2095 assert_eq!(patches.search_index(2047).unwrap(), SearchResult::Found(3));
2096 assert_eq!(patches.search_index(2048).unwrap(), SearchResult::Found(4));
2097
2098 assert_eq!(
2099 patches.search_index(1022).unwrap(),
2100 SearchResult::NotFound(0)
2101 );
2102 assert_eq!(
2103 patches.search_index(2046).unwrap(),
2104 SearchResult::NotFound(3)
2105 );
2106 }
2107
2108 #[test]
2109 fn test_chunk_offsets_slice_edge_cases() {
2110 let indices = buffer![0u64, 1, 1023, 1024, 2047, 2048].into_array();
2111 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2112 let chunk_offsets = buffer![0u64, 3, 5].into_array();
2113 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2114
2115 let sliced = patches.slice(0..10).unwrap().unwrap();
2117 assert_eq!(sliced.num_patches(), 2);
2118 assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2119 assert_eq!(sliced.search_index(1).unwrap(), SearchResult::Found(1));
2120
2121 let sliced = patches.slice(2040..3000).unwrap().unwrap();
2123 assert_eq!(sliced.num_patches(), 2); assert_eq!(sliced.search_index(7).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(8).unwrap(), SearchResult::Found(1)); }
2127
2128 #[test]
2129 fn test_chunk_offsets_slice_nested() {
2130 let indices = buffer![100u64, 200, 300, 400, 500, 600].into_array();
2131 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2132 let chunk_offsets = buffer![0u64].into_array();
2133 let patches = Patches::new(1000, 0, indices, values, Some(chunk_offsets)).unwrap();
2134
2135 let sliced1 = patches.slice(150..550).unwrap().unwrap();
2136 assert_eq!(sliced1.num_patches(), 4); let sliced2 = sliced1.slice(100..250).unwrap().unwrap();
2139 assert_eq!(sliced2.num_patches(), 1); assert_eq!(sliced2.offset(), 250);
2141
2142 assert_eq!(sliced2.search_index(50).unwrap(), SearchResult::Found(0)); assert_eq!(
2144 sliced2.search_index(150).unwrap(),
2145 SearchResult::NotFound(1)
2146 );
2147 }
2148
2149 #[test]
2150 fn test_index_larger_than_length() {
2151 let chunk_offsets = buffer![0u64].into_array();
2152 let indices = buffer![1023u64].into_array();
2153 let values = buffer![42i32].into_array();
2154 let patches = Patches::new(1024, 0, indices, values, Some(chunk_offsets)).unwrap();
2155 assert_eq!(
2156 patches.search_index(2048).unwrap(),
2157 SearchResult::NotFound(1)
2158 );
2159 }
2160}