1use std::cmp::Ordering;
5use std::fmt::Debug;
6use std::hash::Hash;
7use std::ops::Range;
8
9use num_traits::NumCast;
10use vortex_buffer::BitBuffer;
11use vortex_buffer::BufferMut;
12use vortex_error::VortexError;
13use vortex_error::VortexExpect as _;
14use vortex_error::VortexResult;
15use vortex_error::vortex_bail;
16use vortex_error::vortex_ensure;
17use vortex_error::vortex_err;
18use vortex_mask::AllOr;
19use vortex_mask::Mask;
20use vortex_utils::aliases::hash_map::HashMap;
21
22use crate::ArrayRef;
23use crate::ArraySlots;
24use crate::ExecutionCtx;
25use crate::IntoArray;
26use crate::LEGACY_SESSION;
27#[expect(deprecated)]
28use crate::ToCanonical as _;
29use crate::VortexSessionExecute;
30use crate::arrays::PrimitiveArray;
31use crate::arrays::primitive::PrimitiveArrayExt;
32use crate::builtins::ArrayBuiltins;
33use crate::dtype::DType;
34use crate::dtype::IntegerPType;
35use crate::dtype::NativePType;
36use crate::dtype::Nullability;
37use crate::dtype::Nullability::NonNullable;
38use crate::dtype::PType;
39use crate::dtype::UnsignedPType;
40use crate::match_each_integer_ptype;
41use crate::match_each_unsigned_integer_ptype;
42use crate::scalar::PValue;
43use crate::scalar::Scalar;
44use crate::search_sorted::SearchResult;
45use crate::search_sorted::SearchSorted;
46use crate::search_sorted::SearchSortedSide;
47use crate::validity::Validity;
48
49pub const PATCH_CHUNK_SIZE: usize = 1024;
52
53#[derive(Copy, Clone, prost::Message)]
54pub struct PatchesMetadata {
55 #[prost(uint64, tag = "1")]
56 len: u64,
57 #[prost(uint64, tag = "2")]
58 offset: u64,
59 #[prost(enumeration = "PType", tag = "3")]
60 indices_ptype: i32,
61 #[prost(uint64, optional, tag = "4")]
62 chunk_offsets_len: Option<u64>,
63 #[prost(enumeration = "PType", optional, tag = "5")]
64 chunk_offsets_ptype: Option<i32>,
65 #[prost(uint64, optional, tag = "6")]
66 offset_within_chunk: Option<u64>,
67}
68
69impl PatchesMetadata {
70 #[inline]
71 pub fn new(
72 len: usize,
73 offset: usize,
74 indices_ptype: PType,
75 chunk_offsets_len: Option<usize>,
76 chunk_offsets_ptype: Option<PType>,
77 offset_within_chunk: Option<usize>,
78 ) -> Self {
79 Self {
80 len: len as u64,
81 offset: offset as u64,
82 indices_ptype: indices_ptype as i32,
83 chunk_offsets_len: chunk_offsets_len.map(|len| len as u64),
84 chunk_offsets_ptype: chunk_offsets_ptype.map(|pt| pt as i32),
85 offset_within_chunk: offset_within_chunk.map(|len| len as u64),
86 }
87 }
88
89 #[inline]
90 pub fn len(&self) -> VortexResult<usize> {
91 usize::try_from(self.len).map_err(|_| vortex_err!("len does not fit in usize"))
92 }
93
94 #[inline]
95 pub fn is_empty(&self) -> bool {
96 self.len == 0
97 }
98
99 #[inline]
100 pub fn offset(&self) -> VortexResult<usize> {
101 usize::try_from(self.offset).map_err(|_| vortex_err!("offset does not fit in usize"))
102 }
103
104 #[inline]
105 pub fn chunk_offsets_dtype(&self) -> VortexResult<Option<DType>> {
106 self.chunk_offsets_ptype
107 .map(|t| {
108 PType::try_from(t)
109 .map_err(|e| vortex_err!("invalid i32 value {t} for PType: {}", e))
110 .map(|ptype| DType::Primitive(ptype, NonNullable))
111 })
112 .transpose()
113 }
114
115 #[inline]
116 pub fn indices_dtype(&self) -> VortexResult<DType> {
117 let ptype = PType::try_from(self.indices_ptype).map_err(|e| {
118 vortex_err!("invalid i32 value {} for PType: {}", self.indices_ptype, e)
119 })?;
120 vortex_ensure!(
121 ptype.is_unsigned_int(),
122 "Patch indices must be unsigned integers"
123 );
124 Ok(DType::Primitive(ptype, NonNullable))
125 }
126}
127
128#[derive(Clone, Debug, Hash, PartialEq, Eq)]
133pub struct PatchesData {
134 offset: usize,
135 offset_within_chunk: Option<usize>,
136}
137
138#[derive(Copy, Clone, Debug)]
140pub struct PatchSlotIndices {
141 pub indices: usize,
142 pub values: usize,
143 pub chunk_offsets: usize,
144}
145
146impl PatchesData {
147 pub fn from_patches(patches: &Patches) -> Self {
149 Self {
150 offset: patches.offset(),
151 offset_within_chunk: patches.offset_within_chunk(),
152 }
153 }
154
155 pub fn patches_from_slots(
160 patches_data: Option<&Self>,
161 len: usize,
162 slots: &[Option<ArrayRef>],
163 slot_idx: PatchSlotIndices,
164 ) -> Option<Patches> {
165 let data = patches_data?;
166 let indices = slots[slot_idx.indices]
167 .as_ref()
168 .vortex_expect("patches_data is set but patch_indices slot is missing");
169 let values = slots[slot_idx.values]
170 .as_ref()
171 .vortex_expect("patches_data is set but patch_values slot is missing");
172 Some(unsafe {
173 Patches::new_unchecked(
174 len,
175 data.offset,
176 indices.clone(),
177 values.clone(),
178 slots[slot_idx.chunk_offsets].clone(),
179 data.offset_within_chunk,
180 )
181 })
182 }
183
184 pub fn push_slots(slots: &mut ArraySlots, patches: Option<&Patches>) {
188 match patches {
189 Some(p) => {
190 slots.push(Some(p.indices().clone()));
191 slots.push(Some(p.values().clone()));
192 slots.push(p.chunk_offsets().clone());
193 }
194 None => {
195 slots.push(None);
196 slots.push(None);
197 slots.push(None);
198 }
199 }
200 }
201
202 #[inline]
204 pub fn offset(&self) -> usize {
205 self.offset
206 }
207
208 #[inline]
210 pub fn offset_within_chunk(&self) -> Option<usize> {
211 self.offset_within_chunk
212 }
213}
214
215#[derive(Debug, Clone)]
217pub struct Patches {
218 array_len: usize,
219 offset: usize,
220 indices: ArrayRef,
221 values: ArrayRef,
222 chunk_offsets: Option<ArrayRef>,
229 offset_within_chunk: Option<usize>,
239}
240
241impl Patches {
242 pub fn new(
243 array_len: usize,
244 offset: usize,
245 indices: ArrayRef,
246 values: ArrayRef,
247 chunk_offsets: Option<ArrayRef>,
248 ) -> VortexResult<Self> {
249 vortex_ensure!(
250 indices.len() == values.len(),
251 "Patch indices and values must have the same length"
252 );
253 vortex_ensure!(
254 indices.dtype().is_unsigned_int() && !indices.dtype().is_nullable(),
255 "Patch indices must be non-nullable unsigned integers, got {:?}",
256 indices.dtype()
257 );
258
259 vortex_ensure!(
260 indices.len() <= array_len,
261 "Patch indices must be shorter than the array length"
262 );
263 vortex_ensure!(!indices.is_empty(), "Patch indices must not be empty");
264
265 if indices.is_host() && values.is_host() {
268 let max = usize::try_from(&indices.execute_scalar(
269 indices.len() - 1,
270 &mut LEGACY_SESSION.create_execution_ctx(),
271 )?)
272 .map_err(|_| vortex_err!("indices must be a number"))?;
273 vortex_ensure!(
274 max - offset < array_len,
275 "Patch indices {max:?}, offset {offset} are longer than the array length {array_len}"
276 );
277
278 #[cfg(debug_assertions)]
279 {
280 use crate::VortexSessionExecute;
281 use crate::aggregate_fn::fns::is_sorted::is_sorted;
282 let mut ctx = LEGACY_SESSION.create_execution_ctx();
283 assert!(
284 is_sorted(&indices, &mut ctx).unwrap_or(false),
285 "Patch indices must be sorted"
286 );
287 }
288 }
289
290 Ok(Self {
291 array_len,
292 offset,
293 indices,
294 values,
295 chunk_offsets: chunk_offsets.clone(),
296 offset_within_chunk: chunk_offsets.map(|_| 0),
298 })
299 }
300
301 pub unsafe fn new_unchecked(
311 array_len: usize,
312 offset: usize,
313 indices: ArrayRef,
314 values: ArrayRef,
315 chunk_offsets: Option<ArrayRef>,
316 offset_within_chunk: Option<usize>,
317 ) -> Self {
318 Self {
319 array_len,
320 offset,
321 indices,
322 values,
323 chunk_offsets,
324 offset_within_chunk,
325 }
326 }
327
328 #[inline]
329 pub fn array_len(&self) -> usize {
330 self.array_len
331 }
332
333 #[inline]
334 pub fn num_patches(&self) -> usize {
335 self.indices.len()
336 }
337
338 #[inline]
339 pub fn dtype(&self) -> &DType {
340 self.values.dtype()
341 }
342
343 #[inline]
344 pub fn indices(&self) -> &ArrayRef {
345 &self.indices
346 }
347
348 #[inline]
349 pub fn into_indices(self) -> ArrayRef {
350 self.indices
351 }
352
353 #[inline]
354 pub fn indices_mut(&mut self) -> &mut ArrayRef {
355 &mut self.indices
356 }
357
358 #[inline]
359 pub fn values(&self) -> &ArrayRef {
360 &self.values
361 }
362
363 #[inline]
364 pub fn into_values(self) -> ArrayRef {
365 self.values
366 }
367
368 #[inline]
369 pub fn values_mut(&mut self) -> &mut ArrayRef {
370 &mut self.values
371 }
372
373 #[inline]
374 pub fn offset(&self) -> usize {
376 self.offset
377 }
378
379 #[inline]
380 pub fn chunk_offsets(&self) -> &Option<ArrayRef> {
381 &self.chunk_offsets
382 }
383
384 #[inline]
385 pub fn chunk_offset_at(&self, idx: usize) -> VortexResult<usize> {
386 let Some(chunk_offsets) = &self.chunk_offsets else {
387 vortex_bail!("chunk_offsets must be set to retrieve offset at index")
388 };
389
390 chunk_offsets
391 .execute_scalar(idx, &mut LEGACY_SESSION.create_execution_ctx())?
392 .as_primitive()
393 .as_::<usize>()
394 .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))
395 }
396
397 #[inline]
406 pub fn offset_within_chunk(&self) -> Option<usize> {
407 self.offset_within_chunk
408 }
409
410 #[inline]
411 pub fn indices_ptype(&self) -> VortexResult<PType> {
412 PType::try_from(self.indices.dtype())
413 .map_err(|_| vortex_err!("indices dtype is not primitive"))
414 }
415
416 pub fn to_metadata(&self, len: usize, dtype: &DType) -> VortexResult<PatchesMetadata> {
417 if self.indices.len() > len {
418 vortex_bail!(
419 "Patch indices {} are longer than the array length {}",
420 self.indices.len(),
421 len
422 );
423 }
424 if self.values.dtype() != dtype {
425 vortex_bail!(
426 "Patch values dtype {} does not match array dtype {}",
427 self.values.dtype(),
428 dtype
429 );
430 }
431 let chunk_offsets_len = self.chunk_offsets.as_ref().map(|co| co.len());
432 let chunk_offsets_ptype = self.chunk_offsets.as_ref().map(|co| co.dtype().as_ptype());
433
434 Ok(PatchesMetadata::new(
435 self.indices.len(),
436 self.offset,
437 self.indices.dtype().as_ptype(),
438 chunk_offsets_len,
439 chunk_offsets_ptype,
440 self.offset_within_chunk,
441 ))
442 }
443
444 pub fn cast_values(self, values_dtype: &DType) -> VortexResult<Self> {
445 unsafe {
447 Ok(Self::new_unchecked(
448 self.array_len,
449 self.offset,
450 self.indices,
451 self.values.cast(values_dtype.clone())?,
452 self.chunk_offsets,
453 self.offset_within_chunk,
454 ))
455 }
456 }
457
458 pub fn get_patched(&self, index: usize) -> VortexResult<Option<Scalar>> {
460 self.search_index(index)?
461 .to_found()
462 .map(|patch_idx| {
463 self.values()
464 .execute_scalar(patch_idx, &mut LEGACY_SESSION.create_execution_ctx())
465 })
466 .transpose()
467 }
468
469 pub fn search_index(&self, index: usize) -> VortexResult<SearchResult> {
487 if self.chunk_offsets.is_some() {
488 return self.search_index_chunked(index);
489 }
490
491 Self::search_index_binary_search(&self.indices, index + self.offset)
492 }
493
494 fn search_index_binary_search(indices: &ArrayRef, needle: usize) -> VortexResult<SearchResult> {
500 if indices.is_canonical() {
501 #[expect(deprecated)]
502 let primitive = indices.to_primitive();
503 match_each_integer_ptype!(primitive.ptype(), |T| {
504 let Ok(needle) = T::try_from(needle) else {
505 return Ok(SearchResult::NotFound(primitive.len()));
510 };
511 return primitive
512 .as_slice::<T>()
513 .search_sorted(&needle, SearchSortedSide::Left);
514 });
515 }
516 indices
517 .as_primitive_typed()
518 .search_sorted(&PValue::U64(needle as u64), SearchSortedSide::Left)
519 }
520
521 fn search_index_chunked(&self, index: usize) -> VortexResult<SearchResult> {
531 let Some(chunk_offsets) = &self.chunk_offsets else {
532 vortex_bail!("chunk_offsets is required to be set")
533 };
534
535 let Some(offset_within_chunk) = self.offset_within_chunk else {
536 vortex_bail!("offset_within_chunk is required to be set")
537 };
538
539 if index >= self.array_len() {
540 return Ok(SearchResult::NotFound(self.indices().len()));
541 }
542
543 let chunk_idx = (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE;
544
545 let base_offset = self.chunk_offset_at(0)?;
547
548 let patches_start_idx = (self.chunk_offset_at(chunk_idx)? - base_offset)
549 .saturating_sub(offset_within_chunk);
556
557 let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
558 (self.chunk_offset_at(chunk_idx + 1)? - base_offset)
559 .saturating_sub(offset_within_chunk)
560 .min(self.indices.len())
561 } else {
562 self.indices.len()
563 };
564
565 let chunk_indices = self.indices.slice(patches_start_idx..patches_end_idx)?;
566 let result = Self::search_index_binary_search(&chunk_indices, index + self.offset)?;
567
568 Ok(match result {
569 SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
570 SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
571 })
572 }
573
574 fn search_index_chunked_batch<T, O>(
580 &self,
581 indices: &[T],
582 chunk_offsets: &[O],
583 index: T,
584 ) -> VortexResult<SearchResult>
585 where
586 T: UnsignedPType,
587 O: UnsignedPType,
588 usize: TryFrom<T>,
589 usize: TryFrom<O>,
590 {
591 let Some(offset_within_chunk) = self.offset_within_chunk else {
592 vortex_bail!("offset_within_chunk is required to be set")
593 };
594
595 let chunk_idx = {
596 let Ok(index) = usize::try_from(index) else {
597 return Ok(SearchResult::NotFound(indices.len()));
599 };
600
601 if index >= self.array_len() {
602 return Ok(SearchResult::NotFound(self.indices().len()));
603 }
604
605 (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE
606 };
607
608 let chunk_offset = usize::try_from(chunk_offsets[chunk_idx] - chunk_offsets[0])
610 .map_err(|_| vortex_err!("chunk_offset failed to convert to usize"))?;
611
612 let patches_start_idx = chunk_offset
613 .saturating_sub(offset_within_chunk);
620
621 let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
622 usize::try_from(chunk_offsets[chunk_idx + 1] - chunk_offsets[0])
623 .map_err(|_| vortex_err!("patches_end_idx failed to convert to usize"))?
624 .saturating_sub(offset_within_chunk)
625 .min(indices.len())
626 } else {
627 self.indices.len()
628 };
629
630 let Some(offset) = T::from(self.offset) else {
631 return Ok(SearchResult::NotFound(indices.len()));
633 };
634
635 let chunk_indices = &indices[patches_start_idx..patches_end_idx];
636 let result = chunk_indices.search_sorted(&(index + offset), SearchSortedSide::Left)?;
637
638 Ok(match result {
639 SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
640 SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
641 })
642 }
643
644 pub fn min_index(&self) -> VortexResult<usize> {
646 let first = self
647 .indices
648 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())?
649 .as_primitive()
650 .as_::<usize>()
651 .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
652 Ok(first - self.offset)
653 }
654
655 pub fn max_index(&self) -> VortexResult<usize> {
657 let last = self
658 .indices
659 .execute_scalar(
660 self.indices.len() - 1,
661 &mut LEGACY_SESSION.create_execution_ctx(),
662 )?
663 .as_primitive()
664 .as_::<usize>()
665 .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
666 Ok(last - self.offset)
667 }
668
669 pub fn filter(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
671 if mask.len() != self.array_len {
672 vortex_bail!(
673 "Filter mask length {} does not match array length {}",
674 mask.len(),
675 self.array_len
676 );
677 }
678
679 match mask.indices() {
680 AllOr::All => Ok(Some(self.clone())),
681 AllOr::None => Ok(None),
682 AllOr::Some(mask_indices) => {
683 let flat_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
684 match_each_unsigned_integer_ptype!(flat_indices.ptype(), |I| {
685 filter_patches_with_mask(
686 flat_indices.as_slice::<I>(),
687 self.offset(),
688 self.values(),
689 mask_indices,
690 )
691 })
692 }
693 }
694 }
695
696 pub fn mask(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
701 if mask.len() != self.array_len {
702 vortex_bail!(
703 "Filter mask length {} does not match array length {}",
704 mask.len(),
705 self.array_len
706 );
707 }
708
709 let filter_mask = match mask.bit_buffer() {
710 AllOr::All => return Ok(None),
711 AllOr::None => return Ok(Some(self.clone())),
712 AllOr::Some(masked) => {
713 let patch_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
714 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |P| {
715 let patch_indices = patch_indices.as_slice::<P>();
716 Mask::from_buffer(BitBuffer::collect_bool(patch_indices.len(), |i| {
717 #[allow(clippy::cast_possible_truncation)]
718 let idx = (patch_indices[i] as usize) - self.offset;
719 !masked.value(idx)
720 }))
721 })
722 }
723 };
724
725 if filter_mask.all_false() {
726 return Ok(None);
727 }
728
729 let filtered_indices = self.indices.filter(filter_mask.clone())?;
731 let filtered_values = self.values.filter(filter_mask)?;
732
733 Ok(Some(Self {
734 array_len: self.array_len,
735 offset: self.offset,
736 indices: filtered_indices,
737 values: filtered_values,
738 chunk_offsets: None,
740 offset_within_chunk: self.offset_within_chunk,
741 }))
742 }
743
744 pub fn slice(&self, range: Range<usize>) -> VortexResult<Option<Self>> {
746 let slice_start_idx = self.search_index(range.start)?.to_index();
747 let slice_end_idx = self.search_index(range.end)?.to_index();
748
749 if slice_start_idx == slice_end_idx {
750 return Ok(None);
751 }
752
753 let values = self.values().slice(slice_start_idx..slice_end_idx)?;
754 let indices = self.indices().slice(slice_start_idx..slice_end_idx)?;
755
756 let new_chunk_offsets = self
757 .chunk_offsets
758 .as_ref()
759 .map(|chunk_offsets| -> VortexResult<ArrayRef> {
760 let chunk_relative_offset = self.offset % PATCH_CHUNK_SIZE;
761 let chunk_start_idx = (chunk_relative_offset + range.start) / PATCH_CHUNK_SIZE;
762 let chunk_end_idx = (chunk_relative_offset + range.end).div_ceil(PATCH_CHUNK_SIZE);
763 chunk_offsets.slice(chunk_start_idx..chunk_end_idx)
764 })
765 .transpose()?;
766
767 let offset_within_chunk = new_chunk_offsets
768 .as_ref()
769 .map(|new_chunk_offsets| -> VortexResult<usize> {
770 let new_chunk_base = new_chunk_offsets
771 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())?
772 .as_primitive()
773 .as_::<usize>()
774 .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))?;
775 let parent_chunk_base = self.chunk_offset_at(0)?;
776 let parent_within = self.offset_within_chunk.unwrap_or(0);
777 Ok(parent_chunk_base + parent_within + slice_start_idx - new_chunk_base)
778 })
779 .transpose()?;
780
781 Ok(Some(Self {
782 array_len: range.len(),
783 offset: range.start + self.offset(),
784 indices,
785 values,
786 chunk_offsets: new_chunk_offsets,
787 offset_within_chunk,
788 }))
789 }
790
791 const PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN: f64 = 5.0;
793
794 fn is_map_faster_than_search(&self, take_indices: &PrimitiveArray) -> bool {
795 (self.num_patches() as f64 / take_indices.len() as f64)
796 < Self::PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN
797 }
798
799 pub fn take_with_nulls(
803 &self,
804 take_indices: &ArrayRef,
805 ctx: &mut ExecutionCtx,
806 ) -> VortexResult<Option<Self>> {
807 if take_indices.is_empty() {
808 return Ok(None);
809 }
810
811 let take_indices = take_indices.clone().execute::<PrimitiveArray>(ctx)?;
812 if self.is_map_faster_than_search(&take_indices) {
813 self.take_map(take_indices, true, ctx)
814 } else {
815 self.take_search(take_indices, true, ctx)
816 }
817 }
818
819 pub fn take(
823 &self,
824 take_indices: &ArrayRef,
825 ctx: &mut ExecutionCtx,
826 ) -> VortexResult<Option<Self>> {
827 if take_indices.is_empty() {
828 return Ok(None);
829 }
830
831 let take_indices = take_indices.clone().execute::<PrimitiveArray>(ctx)?;
832 if self.is_map_faster_than_search(&take_indices) {
833 self.take_map(take_indices, false, ctx)
834 } else {
835 self.take_search(take_indices, false, ctx)
836 }
837 }
838
839 #[expect(
840 clippy::cognitive_complexity,
841 reason = "complexity is from nested match_each_* macros"
842 )]
843 pub fn take_search(
844 &self,
845 take_indices: PrimitiveArray,
846 include_nulls: bool,
847 ctx: &mut ExecutionCtx,
848 ) -> VortexResult<Option<Self>> {
849 let take_indices_validity = take_indices.validity()?;
850 let take_indices_unsigned =
853 take_indices.reinterpret_cast(take_indices.ptype().to_unsigned());
854 let patch_indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
855 let chunk_offsets = self
856 .chunk_offsets()
857 .as_ref()
858 .map(|co| co.clone().execute::<PrimitiveArray>(ctx))
859 .transpose()?;
860
861 let (values_indices, new_indices): (BufferMut<u64>, BufferMut<u64>) =
862 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |PatchT| {
863 let patch_indices_slice = patch_indices.as_slice::<PatchT>();
864 match_each_unsigned_integer_ptype!(take_indices_unsigned.ptype(), |TakeT| {
865 let take_slice = take_indices_unsigned.as_slice::<TakeT>();
866
867 if let Some(chunk_offsets) = chunk_offsets {
868 match_each_unsigned_integer_ptype!(chunk_offsets.ptype(), |OffsetT| {
869 let chunk_offsets = chunk_offsets.as_slice::<OffsetT>();
870 take_indices_with_search_fn(
871 patch_indices_slice,
872 take_slice,
873 take_indices
874 .as_ref()
875 .validity()?
876 .execute_mask(take_indices.as_ref().len(), ctx)?,
877 include_nulls,
878 |take_idx| {
879 self.search_index_chunked_batch(
880 patch_indices_slice,
881 chunk_offsets,
882 take_idx,
883 )
884 },
885 )?
886 })
887 } else {
888 take_indices_with_search_fn(
889 patch_indices_slice,
890 take_slice,
891 take_indices
892 .as_ref()
893 .validity()?
894 .execute_mask(take_indices.as_ref().len(), ctx)?,
895 include_nulls,
896 |take_idx| {
897 let Some(offset) = <PatchT as NumCast>::from(self.offset) else {
898 return Ok(SearchResult::NotFound(patch_indices_slice.len()));
900 };
901
902 patch_indices_slice
903 .search_sorted(&(take_idx + offset), SearchSortedSide::Left)
904 },
905 )?
906 }
907 })
908 });
909
910 if new_indices.is_empty() {
911 return Ok(None);
912 }
913
914 let new_indices = new_indices.into_array();
915 let new_array_len = take_indices.len();
916 let values_validity = take_indices_validity.take(&new_indices)?;
917
918 Ok(Some(Self {
919 array_len: new_array_len,
920 offset: 0,
921 indices: new_indices,
922 values: self
923 .values()
924 .take(PrimitiveArray::new(values_indices, values_validity).into_array())?,
925 chunk_offsets: None,
926 offset_within_chunk: Some(0), }))
928 }
929
930 pub fn take_map(
931 &self,
932 take_indices: PrimitiveArray,
933 include_nulls: bool,
934 ctx: &mut ExecutionCtx,
935 ) -> VortexResult<Option<Self>> {
936 let indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
937 let new_length = take_indices.len();
938 let take_indices_unsigned =
940 take_indices.reinterpret_cast(take_indices.ptype().to_unsigned());
941
942 let min_index = self.min_index()?;
943 let max_index = self.max_index()?;
944
945 let Some((new_sparse_indices, value_indices)) =
946 match_each_unsigned_integer_ptype!(indices.ptype(), |Indices| {
947 match_each_unsigned_integer_ptype!(take_indices_unsigned.ptype(), |TakeIndices| {
948 let take_validity = take_indices
949 .validity()?
950 .execute_mask(take_indices.len(), ctx)?;
951 let take_nullability = take_indices.validity()?.nullability();
952 let take_slice = take_indices_unsigned.as_slice::<TakeIndices>();
953 take_map::<_, TakeIndices>(
954 indices.as_slice::<Indices>(),
955 take_slice,
956 take_validity,
957 take_nullability,
958 self.offset(),
959 min_index,
960 max_index,
961 include_nulls,
962 )?
963 })
964 })
965 else {
966 return Ok(None);
967 };
968
969 let taken_values = self.values().take(value_indices)?;
970
971 Ok(Some(Patches {
972 array_len: new_length,
973 offset: 0,
974 indices: new_sparse_indices,
975 values: taken_values,
976 chunk_offsets: None,
978 offset_within_chunk: self.offset_within_chunk,
979 }))
980 }
981
982 pub fn map_values<F>(self, f: F) -> VortexResult<Self>
983 where
984 F: FnOnce(ArrayRef) -> VortexResult<ArrayRef>,
985 {
986 let values = f(self.values)?;
987 if self.indices.len() != values.len() {
988 vortex_bail!(
989 "map_values must preserve length: expected {} received {}",
990 self.indices.len(),
991 values.len()
992 )
993 }
994
995 Ok(Self {
996 array_len: self.array_len,
997 offset: self.offset,
998 indices: self.indices,
999 values,
1000 chunk_offsets: self.chunk_offsets,
1001 offset_within_chunk: self.offset_within_chunk,
1002 })
1003 }
1004}
1005
1006#[expect(clippy::too_many_arguments)] fn take_map<I: NativePType + Hash + Eq + TryFrom<usize>, T: NativePType>(
1008 indices: &[I],
1009 take_indices: &[T],
1010 take_validity: Mask,
1011 take_nullability: Nullability,
1012 indices_offset: usize,
1013 min_index: usize,
1014 max_index: usize,
1015 include_nulls: bool,
1016) -> VortexResult<Option<(ArrayRef, ArrayRef)>>
1017where
1018 usize: TryFrom<T>,
1019 VortexError: From<<I as TryFrom<usize>>::Error>,
1020{
1021 let offset_i = I::try_from(indices_offset)?;
1022
1023 let sparse_index_to_value_index: HashMap<I, usize> = indices
1024 .iter()
1025 .copied()
1026 .map(|idx| idx - offset_i)
1027 .enumerate()
1028 .map(|(value_index, sparse_index)| (sparse_index, value_index))
1029 .collect();
1030
1031 let mut new_sparse_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1032 let mut value_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1033
1034 for (idx_in_take, &take_idx) in take_indices.iter().enumerate() {
1035 let ti = usize::try_from(take_idx)
1036 .map_err(|_| vortex_err!("Failed to convert index to usize"))?;
1037
1038 let is_null = match take_validity.bit_buffer() {
1040 AllOr::All => false,
1041 AllOr::None => true,
1042 AllOr::Some(buf) => !buf.value(idx_in_take),
1043 };
1044 if is_null {
1045 if include_nulls {
1046 new_sparse_indices.push(idx_in_take as u64);
1047 value_indices.push(0);
1048 }
1049 } else if ti >= min_index && ti <= max_index {
1050 let ti_as_i = I::try_from(ti)
1051 .map_err(|_| vortex_err!("take index does not fit in index type"))?;
1052 if let Some(&value_index) = sparse_index_to_value_index.get(&ti_as_i) {
1053 new_sparse_indices.push(idx_in_take as u64);
1054 value_indices.push(value_index as u64);
1055 }
1056 }
1057 }
1058
1059 if new_sparse_indices.is_empty() {
1060 return Ok(None);
1061 }
1062
1063 let new_sparse_indices = new_sparse_indices.into_array();
1064 let values_validity =
1065 Validity::from_mask(take_validity, take_nullability).take(&new_sparse_indices)?;
1066 Ok(Some((
1067 new_sparse_indices,
1068 PrimitiveArray::new(value_indices, values_validity).into_array(),
1069 )))
1070}
1071
1072fn filter_patches_with_mask<T: IntegerPType>(
1078 patch_indices: &[T],
1079 offset: usize,
1080 patch_values: &ArrayRef,
1081 mask_indices: &[usize],
1082) -> VortexResult<Option<Patches>> {
1083 let true_count = mask_indices.len();
1084 let mut new_patch_indices = BufferMut::<u64>::with_capacity(true_count);
1085 let mut new_mask_indices = Vec::with_capacity(true_count);
1086
1087 const STRIDE: usize = 4;
1091
1092 let mut mask_idx = 0usize;
1093 let mut true_idx = 0usize;
1094
1095 while mask_idx < patch_indices.len() && true_idx < true_count {
1096 if (mask_idx + STRIDE) < patch_indices.len() && (true_idx + STRIDE) < mask_indices.len() {
1103 let left_min = patch_indices[mask_idx]
1105 .to_usize()
1106 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1107 - offset;
1108 let left_max = patch_indices[mask_idx + STRIDE]
1109 .to_usize()
1110 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1111 - offset;
1112 let right_min = mask_indices[true_idx];
1113 let right_max = mask_indices[true_idx + STRIDE];
1114
1115 if left_min > right_max {
1116 true_idx += STRIDE;
1118 continue;
1119 } else if right_min > left_max {
1120 mask_idx += STRIDE;
1121 continue;
1122 } else {
1123 }
1125 }
1126
1127 let left = patch_indices[mask_idx]
1130 .to_usize()
1131 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1132 - offset;
1133 let right = mask_indices[true_idx];
1134
1135 match left.cmp(&right) {
1136 Ordering::Less => {
1137 mask_idx += 1;
1138 }
1139 Ordering::Greater => {
1140 true_idx += 1;
1141 }
1142 Ordering::Equal => {
1143 new_mask_indices.push(mask_idx);
1145 new_patch_indices.push(true_idx as u64);
1146
1147 mask_idx += 1;
1148 true_idx += 1;
1149 }
1150 }
1151 }
1152
1153 if new_mask_indices.is_empty() {
1154 return Ok(None);
1155 }
1156
1157 let new_patch_indices = new_patch_indices.into_array();
1158 let new_patch_values =
1159 patch_values.filter(Mask::from_indices(patch_values.len(), new_mask_indices))?;
1160
1161 Ok(Some(Patches::new(
1162 true_count,
1163 0,
1164 new_patch_indices,
1165 new_patch_values,
1166 None,
1168 )?))
1169}
1170
1171fn take_indices_with_search_fn<
1172 I: UnsignedPType,
1173 T: IntegerPType,
1174 F: Fn(I) -> VortexResult<SearchResult>,
1175>(
1176 indices: &[I],
1177 take_indices: &[T],
1178 take_validity: Mask,
1179 include_nulls: bool,
1180 search_fn: F,
1181) -> VortexResult<(BufferMut<u64>, BufferMut<u64>)> {
1182 let mut values_indices = BufferMut::with_capacity(take_indices.len());
1183 let mut new_indices = BufferMut::with_capacity(take_indices.len());
1184
1185 for (new_patch_idx, &take_idx) in take_indices.iter().enumerate() {
1186 if !take_validity.value(new_patch_idx) {
1187 if include_nulls {
1188 values_indices.push(0u64);
1190 new_indices.push(new_patch_idx as u64);
1191 }
1192 continue;
1193 } else {
1194 let search_result = match I::from(take_idx) {
1195 Some(idx) => search_fn(idx)?,
1196 None => SearchResult::NotFound(indices.len()),
1197 };
1198
1199 if let Some(patch_idx) = search_result.to_found() {
1200 values_indices.push(patch_idx as u64);
1201 new_indices.push(new_patch_idx as u64);
1202 }
1203 }
1204 }
1205
1206 Ok((values_indices, new_indices))
1207}
1208
1209#[cfg(test)]
1210mod test {
1211 use vortex_buffer::BufferMut;
1212 use vortex_buffer::buffer;
1213 use vortex_mask::Mask;
1214
1215 use crate::IntoArray;
1216 use crate::LEGACY_SESSION;
1217 #[expect(deprecated)]
1218 use crate::ToCanonical as _;
1219 use crate::VortexSessionExecute;
1220 use crate::assert_arrays_eq;
1221 use crate::patches::Patches;
1222 use crate::patches::PrimitiveArray;
1223 use crate::search_sorted::SearchResult;
1224 use crate::validity::Validity;
1225
1226 #[test]
1227 fn test_filter() {
1228 let patches = Patches::new(
1229 100,
1230 0,
1231 buffer![10u32, 11, 20].into_array(),
1232 buffer![100, 110, 200].into_array(),
1233 None,
1234 )
1235 .unwrap();
1236
1237 let filtered = patches
1238 .filter(
1239 &Mask::from_indices(100, vec![10, 20, 30]),
1240 &mut LEGACY_SESSION.create_execution_ctx(),
1241 )
1242 .unwrap()
1243 .unwrap();
1244
1245 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1]));
1246 assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1247 }
1248
1249 #[test]
1250 fn take_with_nulls() {
1251 let patches = Patches::new(
1252 20,
1253 0,
1254 buffer![2u64, 9, 15].into_array(),
1255 PrimitiveArray::new(buffer![33_i32, 44, 55], Validity::AllValid).into_array(),
1256 None,
1257 )
1258 .unwrap();
1259
1260 let taken = patches
1261 .take(
1262 &PrimitiveArray::new(buffer![9, 0], Validity::from_iter(vec![true, false]))
1263 .into_array(),
1264 &mut LEGACY_SESSION.create_execution_ctx(),
1265 )
1266 .unwrap()
1267 .unwrap();
1268 #[expect(deprecated)]
1269 let primitive_values = taken.values().to_primitive();
1270 #[expect(deprecated)]
1271 let primitive_indices = taken.indices().to_primitive();
1272 assert_eq!(taken.array_len(), 2);
1273 assert_arrays_eq!(
1274 primitive_values,
1275 PrimitiveArray::from_option_iter([Some(44i32)])
1276 );
1277 assert_arrays_eq!(primitive_indices, PrimitiveArray::from_iter([0u64]));
1278 assert_eq!(
1279 primitive_values
1280 .as_ref()
1281 .validity()
1282 .unwrap()
1283 .execute_mask(
1284 primitive_values.as_ref().len(),
1285 &mut LEGACY_SESSION.create_execution_ctx()
1286 )
1287 .unwrap(),
1288 Mask::from_iter(vec![true])
1289 );
1290 }
1291
1292 #[test]
1293 fn take_search_with_nulls_chunked() {
1294 let patches = Patches::new(
1295 20,
1296 0,
1297 buffer![2u64, 9, 15].into_array(),
1298 buffer![33_i32, 44, 55].into_array(),
1299 Some(buffer![0u64].into_array()),
1300 )
1301 .unwrap();
1302
1303 let taken = patches
1304 .take_search(
1305 PrimitiveArray::new(buffer![9, 0], Validity::from_iter([true, false])),
1306 true,
1307 &mut LEGACY_SESSION.create_execution_ctx(),
1308 )
1309 .unwrap()
1310 .unwrap();
1311
1312 #[expect(deprecated)]
1313 let primitive_values = taken.values().to_primitive();
1314 assert_eq!(taken.array_len(), 2);
1315 assert_arrays_eq!(
1316 primitive_values,
1317 PrimitiveArray::from_option_iter([Some(44i32), None])
1318 );
1319 assert_arrays_eq!(taken.indices(), PrimitiveArray::from_iter([0u64, 1]));
1320
1321 assert_eq!(
1322 primitive_values
1323 .as_ref()
1324 .validity()
1325 .unwrap()
1326 .execute_mask(
1327 primitive_values.as_ref().len(),
1328 &mut LEGACY_SESSION.create_execution_ctx()
1329 )
1330 .unwrap(),
1331 Mask::from_iter([true, false])
1332 );
1333 }
1334
1335 #[test]
1336 fn take_search_chunked_multiple_chunks() {
1337 let patches = Patches::new(
1338 2048,
1339 0,
1340 buffer![100u64, 500, 1200, 1800].into_array(),
1341 buffer![10_i32, 20, 30, 40].into_array(),
1342 Some(buffer![0u64, 2].into_array()),
1343 )
1344 .unwrap();
1345
1346 let taken = patches
1347 .take_search(
1348 PrimitiveArray::new(buffer![500, 1200, 999], Validity::AllValid),
1349 true,
1350 &mut LEGACY_SESSION.create_execution_ctx(),
1351 )
1352 .unwrap()
1353 .unwrap();
1354
1355 assert_eq!(taken.array_len(), 3);
1356 assert_arrays_eq!(
1357 taken.values(),
1358 PrimitiveArray::from_option_iter([Some(20i32), Some(30)])
1359 );
1360 }
1361
1362 #[test]
1363 fn take_search_chunked_indices_with_no_patches() {
1364 let patches = Patches::new(
1365 20,
1366 0,
1367 buffer![2u64, 9, 15].into_array(),
1368 buffer![33_i32, 44, 55].into_array(),
1369 Some(buffer![0u64].into_array()),
1370 )
1371 .unwrap();
1372
1373 let taken = patches
1374 .take_search(
1375 PrimitiveArray::new(buffer![3, 4, 5], Validity::AllValid),
1376 true,
1377 &mut LEGACY_SESSION.create_execution_ctx(),
1378 )
1379 .unwrap();
1380
1381 assert!(taken.is_none());
1382 }
1383
1384 #[test]
1385 fn take_search_chunked_interleaved() {
1386 let patches = Patches::new(
1387 30,
1388 0,
1389 buffer![5u64, 10, 20, 25].into_array(),
1390 buffer![100_i32, 200, 300, 400].into_array(),
1391 Some(buffer![0u64].into_array()),
1392 )
1393 .unwrap();
1394
1395 let taken = patches
1396 .take_search(
1397 PrimitiveArray::new(buffer![10, 15, 20, 99], Validity::AllValid),
1398 true,
1399 &mut LEGACY_SESSION.create_execution_ctx(),
1400 )
1401 .unwrap()
1402 .unwrap();
1403
1404 assert_eq!(taken.array_len(), 4);
1405 assert_arrays_eq!(
1406 taken.values(),
1407 PrimitiveArray::from_option_iter([Some(200i32), Some(300)])
1408 );
1409 }
1410
1411 #[test]
1412 fn test_take_search_multiple_chunk_offsets() {
1413 let patches = Patches::new(
1414 1500,
1415 0,
1416 BufferMut::from_iter(0..1500u64).into_array(),
1417 BufferMut::from_iter(0..1500i32).into_array(),
1418 Some(buffer![0u64, 1024u64].into_array()),
1419 )
1420 .unwrap();
1421
1422 let taken = patches
1423 .take_search(
1424 PrimitiveArray::new(BufferMut::from_iter(0..1500u64), Validity::AllValid),
1425 false,
1426 &mut LEGACY_SESSION.create_execution_ctx(),
1427 )
1428 .unwrap()
1429 .unwrap();
1430
1431 assert_eq!(taken.array_len(), 1500);
1432 }
1433
1434 #[test]
1435 fn test_slice() {
1436 let values = buffer![15_u32, 135, 13531, 42].into_array();
1437 let indices = buffer![10_u64, 11, 50, 100].into_array();
1438
1439 let patches = Patches::new(101, 0, indices, values, None).unwrap();
1440
1441 let sliced = patches.slice(15..100).unwrap().unwrap();
1442 assert_eq!(sliced.array_len(), 100 - 15);
1443 assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1444 }
1445
1446 #[test]
1447 fn doubly_sliced() {
1448 let values = buffer![15_u32, 135, 13531, 42].into_array();
1449 let indices = buffer![10_u64, 11, 50, 100].into_array();
1450
1451 let patches = Patches::new(101, 0, indices, values, None).unwrap();
1452
1453 let sliced = patches.slice(15..100).unwrap().unwrap();
1454 assert_eq!(sliced.array_len(), 100 - 15);
1455 assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1456
1457 let doubly_sliced = sliced.slice(35..36).unwrap().unwrap();
1458 assert_arrays_eq!(
1459 doubly_sliced.values(),
1460 PrimitiveArray::from_iter([13531u32])
1461 );
1462 }
1463
1464 #[test]
1465 fn test_mask_all_true() {
1466 let patches = Patches::new(
1467 10,
1468 0,
1469 buffer![2u64, 5, 8].into_array(),
1470 buffer![100i32, 200, 300].into_array(),
1471 None,
1472 )
1473 .unwrap();
1474
1475 let mask = Mask::new_true(10);
1476 let masked = patches
1477 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1478 .unwrap();
1479 assert!(masked.is_none());
1480 }
1481
1482 #[test]
1483 fn test_mask_all_false() {
1484 let patches = Patches::new(
1485 10,
1486 0,
1487 buffer![2u64, 5, 8].into_array(),
1488 buffer![100i32, 200, 300].into_array(),
1489 None,
1490 )
1491 .unwrap();
1492
1493 let mask = Mask::new_false(10);
1494 let masked = patches
1495 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1496 .unwrap()
1497 .unwrap();
1498
1499 assert_arrays_eq!(
1501 masked.values(),
1502 PrimitiveArray::from_iter([100i32, 200, 300])
1503 );
1504 assert!(
1505 masked
1506 .values()
1507 .is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())
1508 .unwrap()
1509 );
1510 assert!(
1511 masked
1512 .values()
1513 .is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())
1514 .unwrap()
1515 );
1516 assert!(
1517 masked
1518 .values()
1519 .is_valid(2, &mut LEGACY_SESSION.create_execution_ctx())
1520 .unwrap()
1521 );
1522
1523 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1525 }
1526
1527 #[test]
1528 fn test_mask_partial() {
1529 let patches = Patches::new(
1530 10,
1531 0,
1532 buffer![2u64, 5, 8].into_array(),
1533 buffer![100i32, 200, 300].into_array(),
1534 None,
1535 )
1536 .unwrap();
1537
1538 let mask = Mask::from_iter([
1540 false, false, true, false, false, false, false, false, true, false,
1541 ]);
1542 let masked = patches
1543 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1544 .unwrap()
1545 .unwrap();
1546
1547 assert_eq!(masked.values().len(), 1);
1549 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1550
1551 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64]));
1553 }
1554
1555 #[test]
1556 fn test_mask_with_offset() {
1557 let patches = Patches::new(
1558 10,
1559 5, buffer![7u64, 10, 13].into_array(), buffer![100i32, 200, 300].into_array(),
1562 None,
1563 )
1564 .unwrap();
1565
1566 let mask = Mask::from_iter([
1568 false, false, true, false, false, false, false, false, false, false,
1569 ]);
1570
1571 let masked = patches
1572 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1573 .unwrap()
1574 .unwrap();
1575 assert_eq!(masked.array_len(), 10);
1576 assert_eq!(masked.offset(), 5);
1577 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([10u64, 13]));
1578 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32, 300]));
1579 }
1580
1581 #[test]
1582 fn test_mask_nullable_values() {
1583 let patches = Patches::new(
1584 10,
1585 0,
1586 buffer![2u64, 5, 8].into_array(),
1587 PrimitiveArray::from_option_iter([Some(100i32), None, Some(300)]).into_array(),
1588 None,
1589 )
1590 .unwrap();
1591
1592 let mask = Mask::from_iter([
1594 false, false, true, false, false, false, false, false, false, false,
1595 ]);
1596 let masked = patches
1597 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1598 .unwrap()
1599 .unwrap();
1600
1601 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64, 8]));
1603
1604 #[expect(deprecated)]
1606 let masked_values = masked.values().to_primitive();
1607 assert_eq!(masked_values.len(), 2);
1608 assert!(
1609 !masked_values
1610 .is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())
1611 .unwrap()
1612 ); assert!(
1614 masked_values
1615 .is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())
1616 .unwrap()
1617 ); assert_eq!(
1619 i32::try_from(
1620 &masked_values
1621 .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
1622 .unwrap()
1623 )
1624 .unwrap(),
1625 300i32
1626 );
1627 }
1628
1629 #[test]
1630 fn test_filter_keep_all() {
1631 let patches = Patches::new(
1632 10,
1633 0,
1634 buffer![2u64, 5, 8].into_array(),
1635 buffer![100i32, 200, 300].into_array(),
1636 None,
1637 )
1638 .unwrap();
1639
1640 let mask = Mask::from_indices(10, 0..10);
1642 let filtered = patches
1643 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1644 .unwrap()
1645 .unwrap();
1646
1647 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1648 assert_arrays_eq!(
1649 filtered.values(),
1650 PrimitiveArray::from_iter([100i32, 200, 300])
1651 );
1652 }
1653
1654 #[test]
1655 fn test_filter_none() {
1656 let patches = Patches::new(
1657 10,
1658 0,
1659 buffer![2u64, 5, 8].into_array(),
1660 buffer![100i32, 200, 300].into_array(),
1661 None,
1662 )
1663 .unwrap();
1664
1665 let mask = Mask::from_indices(10, vec![]);
1667 let filtered = patches
1668 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1669 .unwrap();
1670 assert!(filtered.is_none());
1671 }
1672
1673 #[test]
1674 fn test_filter_with_indices() {
1675 let patches = Patches::new(
1676 10,
1677 0,
1678 buffer![2u64, 5, 8].into_array(),
1679 buffer![100i32, 200, 300].into_array(),
1680 None,
1681 )
1682 .unwrap();
1683
1684 let mask = Mask::from_indices(10, vec![2, 5, 9]);
1686 let filtered = patches
1687 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1688 .unwrap()
1689 .unwrap();
1690
1691 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1])); assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1693 }
1694
1695 #[test]
1696 fn test_slice_full_range() {
1697 let patches = Patches::new(
1698 10,
1699 0,
1700 buffer![2u64, 5, 8].into_array(),
1701 buffer![100i32, 200, 300].into_array(),
1702 None,
1703 )
1704 .unwrap();
1705
1706 let sliced = patches.slice(0..10).unwrap().unwrap();
1707
1708 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1709 assert_arrays_eq!(
1710 sliced.values(),
1711 PrimitiveArray::from_iter([100i32, 200, 300])
1712 );
1713 }
1714
1715 #[test]
1716 fn test_slice_partial() {
1717 let patches = Patches::new(
1718 10,
1719 0,
1720 buffer![2u64, 5, 8].into_array(),
1721 buffer![100i32, 200, 300].into_array(),
1722 None,
1723 )
1724 .unwrap();
1725
1726 let sliced = patches.slice(3..8).unwrap().unwrap();
1728
1729 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([5u64])); assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1731 assert_eq!(sliced.array_len(), 5); assert_eq!(sliced.offset(), 3); }
1734
1735 #[test]
1736 fn test_slice_no_patches() {
1737 let patches = Patches::new(
1738 10,
1739 0,
1740 buffer![2u64, 5, 8].into_array(),
1741 buffer![100i32, 200, 300].into_array(),
1742 None,
1743 )
1744 .unwrap();
1745
1746 let sliced = patches.slice(6..7).unwrap();
1748 assert!(sliced.is_none());
1749 }
1750
1751 #[test]
1752 fn test_slice_with_offset() {
1753 let patches = Patches::new(
1754 10,
1755 5, buffer![7u64, 10, 13].into_array(), buffer![100i32, 200, 300].into_array(),
1758 None,
1759 )
1760 .unwrap();
1761
1762 let sliced = patches.slice(3..8).unwrap().unwrap();
1764
1765 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([10u64])); assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1767 assert_eq!(sliced.offset(), 8); }
1769
1770 #[test]
1771 fn test_patch_values() {
1772 let patches = Patches::new(
1773 10,
1774 0,
1775 buffer![2u64, 5, 8].into_array(),
1776 buffer![100i32, 200, 300].into_array(),
1777 None,
1778 )
1779 .unwrap();
1780
1781 #[expect(deprecated)]
1782 let values = patches.values().to_primitive();
1783 assert_eq!(
1784 i32::try_from(
1785 &values
1786 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
1787 .unwrap()
1788 )
1789 .unwrap(),
1790 100i32
1791 );
1792 assert_eq!(
1793 i32::try_from(
1794 &values
1795 .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
1796 .unwrap()
1797 )
1798 .unwrap(),
1799 200i32
1800 );
1801 assert_eq!(
1802 i32::try_from(
1803 &values
1804 .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
1805 .unwrap()
1806 )
1807 .unwrap(),
1808 300i32
1809 );
1810 }
1811
1812 #[test]
1813 fn test_indices_range() {
1814 let patches = Patches::new(
1815 10,
1816 0,
1817 buffer![2u64, 5, 8].into_array(),
1818 buffer![100i32, 200, 300].into_array(),
1819 None,
1820 )
1821 .unwrap();
1822
1823 assert_eq!(patches.min_index().unwrap(), 2);
1824 assert_eq!(patches.max_index().unwrap(), 8);
1825 }
1826
1827 #[test]
1828 fn test_search_index() {
1829 let patches = Patches::new(
1830 10,
1831 0,
1832 buffer![2u64, 5, 8].into_array(),
1833 buffer![100i32, 200, 300].into_array(),
1834 None,
1835 )
1836 .unwrap();
1837
1838 assert_eq!(patches.search_index(2).unwrap(), SearchResult::Found(0));
1840 assert_eq!(patches.search_index(5).unwrap(), SearchResult::Found(1));
1841 assert_eq!(patches.search_index(8).unwrap(), SearchResult::Found(2));
1842
1843 assert_eq!(patches.search_index(0).unwrap(), SearchResult::NotFound(0));
1845 assert_eq!(patches.search_index(3).unwrap(), SearchResult::NotFound(1));
1846 assert_eq!(patches.search_index(6).unwrap(), SearchResult::NotFound(2));
1847 assert_eq!(patches.search_index(9).unwrap(), SearchResult::NotFound(3));
1848 }
1849
1850 #[test]
1851 fn test_mask_boundary_patches() {
1852 let patches = Patches::new(
1854 10,
1855 0,
1856 buffer![0u64, 9].into_array(),
1857 buffer![100i32, 200].into_array(),
1858 None,
1859 )
1860 .unwrap();
1861
1862 let mask = Mask::from_iter([
1863 true, false, false, false, false, false, false, false, false, false,
1864 ]);
1865 let masked = patches
1866 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1867 .unwrap();
1868 assert!(masked.is_some());
1869 let masked = masked.unwrap();
1870 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([9u64]));
1871 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1872 }
1873
1874 #[test]
1875 fn test_mask_all_patches_removed() {
1876 let patches = Patches::new(
1878 10,
1879 0,
1880 buffer![2u64, 5, 8].into_array(),
1881 buffer![100i32, 200, 300].into_array(),
1882 None,
1883 )
1884 .unwrap();
1885
1886 let mask = Mask::from_iter([
1888 false, false, true, false, false, true, false, false, true, false,
1889 ]);
1890 let masked = patches
1891 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1892 .unwrap();
1893 assert!(masked.is_none());
1894 }
1895
1896 #[test]
1897 fn test_mask_no_patches_removed() {
1898 let patches = Patches::new(
1900 10,
1901 0,
1902 buffer![2u64, 5, 8].into_array(),
1903 buffer![100i32, 200, 300].into_array(),
1904 None,
1905 )
1906 .unwrap();
1907
1908 let mask = Mask::from_iter([
1910 true, false, false, true, false, false, true, false, false, true,
1911 ]);
1912 let masked = patches
1913 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1914 .unwrap()
1915 .unwrap();
1916
1917 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1918 assert_arrays_eq!(
1919 masked.values(),
1920 PrimitiveArray::from_iter([100i32, 200, 300])
1921 );
1922 }
1923
1924 #[test]
1925 fn test_mask_single_patch() {
1926 let patches = Patches::new(
1928 5,
1929 0,
1930 buffer![2u64].into_array(),
1931 buffer![42i32].into_array(),
1932 None,
1933 )
1934 .unwrap();
1935
1936 let mask = Mask::from_iter([false, false, true, false, false]);
1938 let masked = patches
1939 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1940 .unwrap();
1941 assert!(masked.is_none());
1942
1943 let mask = Mask::from_iter([true, false, false, true, false]);
1945 let masked = patches
1946 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1947 .unwrap()
1948 .unwrap();
1949 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64]));
1950 }
1951
1952 #[test]
1953 fn test_mask_contiguous_patches() {
1954 let patches = Patches::new(
1956 10,
1957 0,
1958 buffer![3u64, 4, 5, 6].into_array(),
1959 buffer![100i32, 200, 300, 400].into_array(),
1960 None,
1961 )
1962 .unwrap();
1963
1964 let mask = Mask::from_iter([
1966 false, false, false, false, true, true, false, false, false, false,
1967 ]);
1968 let masked = patches
1969 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1970 .unwrap()
1971 .unwrap();
1972
1973 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([3u64, 6]));
1974 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 400]));
1975 }
1976
1977 #[test]
1978 fn test_mask_with_large_offset() {
1979 let patches = Patches::new(
1981 20,
1982 15,
1983 buffer![16u64, 17, 19].into_array(), buffer![100i32, 200, 300].into_array(),
1985 None,
1986 )
1987 .unwrap();
1988
1989 let mask = Mask::from_iter([
1991 false, false, true, false, false, false, false, false, false, false, false, false,
1992 false, false, false, false, false, false, false, false,
1993 ]);
1994 let masked = patches
1995 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1996 .unwrap()
1997 .unwrap();
1998
1999 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([16u64, 19]));
2000 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 300]));
2001 }
2002
2003 #[test]
2004 #[should_panic(expected = "Filter mask length 5 does not match array length 10")]
2005 fn test_mask_wrong_length() {
2006 let patches = Patches::new(
2007 10,
2008 0,
2009 buffer![2u64, 5, 8].into_array(),
2010 buffer![100i32, 200, 300].into_array(),
2011 None,
2012 )
2013 .unwrap();
2014
2015 let mask = Mask::from_iter([false, false, true, false, false]);
2017 patches
2018 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
2019 .unwrap();
2020 }
2021
2022 #[test]
2023 fn test_chunk_offsets_search() {
2024 let indices = buffer![100u64, 200, 3000, 3100].into_array();
2025 let values = buffer![10i32, 20, 30, 40].into_array();
2026 let chunk_offsets = buffer![0u64, 2, 2, 3].into_array();
2027 let patches = Patches::new(4096, 0, indices, values, Some(chunk_offsets)).unwrap();
2028
2029 assert!(patches.chunk_offsets.is_some());
2030
2031 assert_eq!(patches.search_index(100).unwrap(), SearchResult::Found(0));
2033 assert_eq!(patches.search_index(200).unwrap(), SearchResult::Found(1));
2034
2035 assert_eq!(
2037 patches.search_index(1500).unwrap(),
2038 SearchResult::NotFound(2)
2039 );
2040 assert_eq!(
2041 patches.search_index(2000).unwrap(),
2042 SearchResult::NotFound(2)
2043 );
2044
2045 assert_eq!(patches.search_index(3000).unwrap(), SearchResult::Found(2));
2047 assert_eq!(patches.search_index(3100).unwrap(), SearchResult::Found(3));
2048
2049 assert_eq!(
2050 patches.search_index(1024).unwrap(),
2051 SearchResult::NotFound(2)
2052 );
2053 }
2054
2055 #[test]
2056 fn test_chunk_offsets_with_slice() {
2057 let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
2058 let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
2059 let chunk_offsets = buffer![0u64, 2, 6].into_array();
2060 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2061
2062 let sliced = patches.slice(1000..2200).unwrap().unwrap();
2063
2064 assert!(sliced.chunk_offsets.is_some());
2065 assert_eq!(sliced.offset(), 1000);
2066
2067 assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(0));
2068 assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
2069 assert_eq!(sliced.search_index(1100).unwrap(), SearchResult::Found(4));
2070
2071 assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(1));
2072 }
2073
2074 #[test]
2075 fn test_chunk_offsets_with_slice_after_first_chunk() {
2076 let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
2077 let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
2078 let chunk_offsets = buffer![0u64, 2, 6].into_array();
2079 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2080
2081 let sliced = patches.slice(1300..2200).unwrap().unwrap();
2082
2083 assert!(sliced.chunk_offsets.is_some());
2084 assert_eq!(sliced.offset(), 1300);
2085
2086 assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2087 assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(1));
2088 assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
2089 assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(2));
2090 assert_eq!(sliced.search_index(900).unwrap(), SearchResult::NotFound(4));
2091 }
2092
2093 #[test]
2094 fn test_chunk_offsets_slice_empty_result() {
2095 let indices = buffer![100u64, 200, 3000, 3100].into_array();
2096 let values = buffer![10i32, 20, 30, 40].into_array();
2097 let chunk_offsets = buffer![0u64, 2].into_array();
2098 let patches = Patches::new(4000, 0, indices, values, Some(chunk_offsets)).unwrap();
2099
2100 let sliced = patches.slice(1000..2000).unwrap();
2101 assert!(sliced.is_none());
2102 }
2103
2104 #[test]
2105 fn test_chunk_offsets_slice_single_patch() {
2106 let indices = buffer![100u64, 1200, 1300, 2500].into_array();
2107 let values = buffer![10i32, 20, 30, 40].into_array();
2108 let chunk_offsets = buffer![0u64, 1, 3].into_array();
2109 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2110
2111 let sliced = patches.slice(1100..1250).unwrap().unwrap();
2112
2113 assert_eq!(sliced.num_patches(), 1);
2114 assert_eq!(sliced.offset(), 1100);
2115 assert_eq!(sliced.search_index(100).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(50).unwrap(), SearchResult::NotFound(0));
2117 assert_eq!(sliced.search_index(150).unwrap(), SearchResult::NotFound(1));
2118 }
2119
2120 #[test]
2121 fn test_chunk_offsets_slice_across_chunks() {
2122 let indices = buffer![100u64, 200, 1100, 1200, 2100, 2200].into_array();
2123 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2124 let chunk_offsets = buffer![0u64, 2, 4].into_array();
2125 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2126
2127 let sliced = patches.slice(150..2150).unwrap().unwrap();
2128
2129 assert_eq!(sliced.num_patches(), 4);
2130 assert_eq!(sliced.offset(), 150);
2131
2132 assert_eq!(sliced.search_index(50).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(950).unwrap(), SearchResult::Found(1)); assert_eq!(sliced.search_index(1050).unwrap(), SearchResult::Found(2)); assert_eq!(sliced.search_index(1950).unwrap(), SearchResult::Found(3)); }
2137
2138 #[test]
2139 fn test_chunk_offsets_boundary_searches() {
2140 let indices = buffer![1023u64, 1024, 1025, 2047, 2048].into_array();
2141 let values = buffer![10i32, 20, 30, 40, 50].into_array();
2142 let chunk_offsets = buffer![0u64, 1, 4].into_array();
2143 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2144
2145 assert_eq!(patches.search_index(1023).unwrap(), SearchResult::Found(0));
2146 assert_eq!(patches.search_index(1024).unwrap(), SearchResult::Found(1));
2147 assert_eq!(patches.search_index(1025).unwrap(), SearchResult::Found(2));
2148 assert_eq!(patches.search_index(2047).unwrap(), SearchResult::Found(3));
2149 assert_eq!(patches.search_index(2048).unwrap(), SearchResult::Found(4));
2150
2151 assert_eq!(
2152 patches.search_index(1022).unwrap(),
2153 SearchResult::NotFound(0)
2154 );
2155 assert_eq!(
2156 patches.search_index(2046).unwrap(),
2157 SearchResult::NotFound(3)
2158 );
2159 }
2160
2161 #[test]
2162 fn test_chunk_offsets_slice_edge_cases() {
2163 let indices = buffer![0u64, 1, 1023, 1024, 2047, 2048].into_array();
2164 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2165 let chunk_offsets = buffer![0u64, 3, 5].into_array();
2166 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2167
2168 let sliced = patches.slice(0..10).unwrap().unwrap();
2170 assert_eq!(sliced.num_patches(), 2);
2171 assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2172 assert_eq!(sliced.search_index(1).unwrap(), SearchResult::Found(1));
2173
2174 let sliced = patches.slice(2040..3000).unwrap().unwrap();
2176 assert_eq!(sliced.num_patches(), 2); assert_eq!(sliced.search_index(7).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(8).unwrap(), SearchResult::Found(1)); }
2180
2181 #[test]
2182 fn test_chunk_offsets_slice_nested() {
2183 let indices = buffer![100u64, 200, 300, 400, 500, 600].into_array();
2184 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2185 let chunk_offsets = buffer![0u64].into_array();
2186 let patches = Patches::new(1000, 0, indices, values, Some(chunk_offsets)).unwrap();
2187
2188 let sliced1 = patches.slice(150..550).unwrap().unwrap();
2189 assert_eq!(sliced1.num_patches(), 4); let sliced2 = sliced1.slice(100..250).unwrap().unwrap();
2192 assert_eq!(sliced2.num_patches(), 1); assert_eq!(sliced2.offset(), 250);
2194
2195 assert_eq!(sliced2.search_index(50).unwrap(), SearchResult::Found(0)); assert_eq!(
2197 sliced2.search_index(150).unwrap(),
2198 SearchResult::NotFound(1)
2199 );
2200 }
2201
2202 #[test]
2203 fn test_nested_slice_with_dropped_first_chunk() {
2204 let indices = buffer![0u64, 1024].into_array();
2206 let values = buffer![1i32, 2].into_array();
2207 let chunk_offsets = buffer![0u64, 1].into_array();
2208 let patches = Patches::new(2048, 0, indices, values, Some(chunk_offsets)).unwrap();
2209
2210 let dropped_first = patches.slice(1024..2048).unwrap().unwrap();
2212 let resliced = dropped_first.slice(0..1024).unwrap().unwrap();
2213 assert_eq!(resliced.num_patches(), 1);
2214 }
2215
2216 #[test]
2217 fn test_index_larger_than_length() {
2218 let chunk_offsets = buffer![0u64].into_array();
2219 let indices = buffer![1023u64].into_array();
2220 let values = buffer![42i32].into_array();
2221 let patches = Patches::new(1024, 0, indices, values, Some(chunk_offsets)).unwrap();
2222 assert_eq!(
2223 patches.search_index(2048).unwrap(),
2224 SearchResult::NotFound(1)
2225 );
2226 }
2227}