1use std::cmp::Ordering;
5use std::fmt::Debug;
6use std::hash::Hash;
7use std::ops::Range;
8
9use num_traits::NumCast;
10use vortex_buffer::BitBuffer;
11use vortex_buffer::BufferMut;
12use vortex_error::VortexError;
13use vortex_error::VortexExpect as _;
14use vortex_error::VortexResult;
15use vortex_error::vortex_bail;
16use vortex_error::vortex_ensure;
17use vortex_error::vortex_err;
18use vortex_mask::AllOr;
19use vortex_mask::Mask;
20use vortex_utils::aliases::hash_map::HashMap;
21
22use crate::ArrayRef;
23use crate::ArraySlots;
24use crate::ExecutionCtx;
25use crate::IntoArray;
26use crate::LEGACY_SESSION;
27#[expect(deprecated)]
28use crate::ToCanonical as _;
29use crate::VortexSessionExecute;
30use crate::arrays::PrimitiveArray;
31use crate::builtins::ArrayBuiltins;
32use crate::dtype::DType;
33use crate::dtype::IntegerPType;
34use crate::dtype::NativePType;
35use crate::dtype::Nullability;
36use crate::dtype::Nullability::NonNullable;
37use crate::dtype::PType;
38use crate::dtype::UnsignedPType;
39use crate::match_each_integer_ptype;
40use crate::match_each_unsigned_integer_ptype;
41use crate::scalar::PValue;
42use crate::scalar::Scalar;
43use crate::search_sorted::SearchResult;
44use crate::search_sorted::SearchSorted;
45use crate::search_sorted::SearchSortedSide;
46use crate::validity::Validity;
47
48pub const PATCH_CHUNK_SIZE: usize = 1024;
51
52#[derive(Copy, Clone, prost::Message)]
53pub struct PatchesMetadata {
54 #[prost(uint64, tag = "1")]
55 len: u64,
56 #[prost(uint64, tag = "2")]
57 offset: u64,
58 #[prost(enumeration = "PType", tag = "3")]
59 indices_ptype: i32,
60 #[prost(uint64, optional, tag = "4")]
61 chunk_offsets_len: Option<u64>,
62 #[prost(enumeration = "PType", optional, tag = "5")]
63 chunk_offsets_ptype: Option<i32>,
64 #[prost(uint64, optional, tag = "6")]
65 offset_within_chunk: Option<u64>,
66}
67
68impl PatchesMetadata {
69 #[inline]
70 pub fn new(
71 len: usize,
72 offset: usize,
73 indices_ptype: PType,
74 chunk_offsets_len: Option<usize>,
75 chunk_offsets_ptype: Option<PType>,
76 offset_within_chunk: Option<usize>,
77 ) -> Self {
78 Self {
79 len: len as u64,
80 offset: offset as u64,
81 indices_ptype: indices_ptype as i32,
82 chunk_offsets_len: chunk_offsets_len.map(|len| len as u64),
83 chunk_offsets_ptype: chunk_offsets_ptype.map(|pt| pt as i32),
84 offset_within_chunk: offset_within_chunk.map(|len| len as u64),
85 }
86 }
87
88 #[inline]
89 pub fn len(&self) -> VortexResult<usize> {
90 usize::try_from(self.len).map_err(|_| vortex_err!("len does not fit in usize"))
91 }
92
93 #[inline]
94 pub fn is_empty(&self) -> bool {
95 self.len == 0
96 }
97
98 #[inline]
99 pub fn offset(&self) -> VortexResult<usize> {
100 usize::try_from(self.offset).map_err(|_| vortex_err!("offset does not fit in usize"))
101 }
102
103 #[inline]
104 pub fn chunk_offsets_dtype(&self) -> VortexResult<Option<DType>> {
105 self.chunk_offsets_ptype
106 .map(|t| {
107 PType::try_from(t)
108 .map_err(|e| vortex_err!("invalid i32 value {t} for PType: {}", e))
109 .map(|ptype| DType::Primitive(ptype, NonNullable))
110 })
111 .transpose()
112 }
113
114 #[inline]
115 pub fn indices_dtype(&self) -> VortexResult<DType> {
116 let ptype = PType::try_from(self.indices_ptype).map_err(|e| {
117 vortex_err!("invalid i32 value {} for PType: {}", self.indices_ptype, e)
118 })?;
119 vortex_ensure!(
120 ptype.is_unsigned_int(),
121 "Patch indices must be unsigned integers"
122 );
123 Ok(DType::Primitive(ptype, NonNullable))
124 }
125}
126
127#[derive(Clone, Debug, Hash, PartialEq, Eq)]
132pub struct PatchesData {
133 offset: usize,
134 offset_within_chunk: Option<usize>,
135}
136
137#[derive(Copy, Clone, Debug)]
139pub struct PatchSlotIndices {
140 pub indices: usize,
141 pub values: usize,
142 pub chunk_offsets: usize,
143}
144
145impl PatchesData {
146 pub fn from_patches(patches: &Patches) -> Self {
148 Self {
149 offset: patches.offset(),
150 offset_within_chunk: patches.offset_within_chunk(),
151 }
152 }
153
154 pub fn patches_from_slots(
159 patches_data: Option<&Self>,
160 len: usize,
161 slots: &[Option<ArrayRef>],
162 slot_idx: PatchSlotIndices,
163 ) -> Option<Patches> {
164 let data = patches_data?;
165 let indices = slots[slot_idx.indices]
166 .as_ref()
167 .vortex_expect("patches_data is set but patch_indices slot is missing");
168 let values = slots[slot_idx.values]
169 .as_ref()
170 .vortex_expect("patches_data is set but patch_values slot is missing");
171 Some(unsafe {
172 Patches::new_unchecked(
173 len,
174 data.offset,
175 indices.clone(),
176 values.clone(),
177 slots[slot_idx.chunk_offsets].clone(),
178 data.offset_within_chunk,
179 )
180 })
181 }
182
183 pub fn push_slots(slots: &mut ArraySlots, patches: Option<&Patches>) {
187 match patches {
188 Some(p) => {
189 slots.push(Some(p.indices().clone()));
190 slots.push(Some(p.values().clone()));
191 slots.push(p.chunk_offsets().clone());
192 }
193 None => {
194 slots.push(None);
195 slots.push(None);
196 slots.push(None);
197 }
198 }
199 }
200
201 #[inline]
203 pub fn offset(&self) -> usize {
204 self.offset
205 }
206
207 #[inline]
209 pub fn offset_within_chunk(&self) -> Option<usize> {
210 self.offset_within_chunk
211 }
212}
213
214#[derive(Debug, Clone)]
216pub struct Patches {
217 array_len: usize,
218 offset: usize,
219 indices: ArrayRef,
220 values: ArrayRef,
221 chunk_offsets: Option<ArrayRef>,
228 offset_within_chunk: Option<usize>,
238}
239
240impl Patches {
241 pub fn new(
242 array_len: usize,
243 offset: usize,
244 indices: ArrayRef,
245 values: ArrayRef,
246 chunk_offsets: Option<ArrayRef>,
247 ) -> VortexResult<Self> {
248 vortex_ensure!(
249 indices.len() == values.len(),
250 "Patch indices and values must have the same length"
251 );
252 vortex_ensure!(
253 indices.dtype().is_unsigned_int() && !indices.dtype().is_nullable(),
254 "Patch indices must be non-nullable unsigned integers, got {:?}",
255 indices.dtype()
256 );
257
258 vortex_ensure!(
259 indices.len() <= array_len,
260 "Patch indices must be shorter than the array length"
261 );
262 vortex_ensure!(!indices.is_empty(), "Patch indices must not be empty");
263
264 if indices.is_host() && values.is_host() {
267 let max = usize::try_from(&indices.execute_scalar(
268 indices.len() - 1,
269 &mut LEGACY_SESSION.create_execution_ctx(),
270 )?)
271 .map_err(|_| vortex_err!("indices must be a number"))?;
272 vortex_ensure!(
273 max - offset < array_len,
274 "Patch indices {max:?}, offset {offset} are longer than the array length {array_len}"
275 );
276
277 #[cfg(debug_assertions)]
278 {
279 use crate::VortexSessionExecute;
280 use crate::aggregate_fn::fns::is_sorted::is_sorted;
281 let mut ctx = LEGACY_SESSION.create_execution_ctx();
282 assert!(
283 is_sorted(&indices, &mut ctx).unwrap_or(false),
284 "Patch indices must be sorted"
285 );
286 }
287 }
288
289 Ok(Self {
290 array_len,
291 offset,
292 indices,
293 values,
294 chunk_offsets: chunk_offsets.clone(),
295 offset_within_chunk: chunk_offsets.map(|_| 0),
297 })
298 }
299
300 pub unsafe fn new_unchecked(
310 array_len: usize,
311 offset: usize,
312 indices: ArrayRef,
313 values: ArrayRef,
314 chunk_offsets: Option<ArrayRef>,
315 offset_within_chunk: Option<usize>,
316 ) -> Self {
317 Self {
318 array_len,
319 offset,
320 indices,
321 values,
322 chunk_offsets,
323 offset_within_chunk,
324 }
325 }
326
327 #[inline]
328 pub fn array_len(&self) -> usize {
329 self.array_len
330 }
331
332 #[inline]
333 pub fn num_patches(&self) -> usize {
334 self.indices.len()
335 }
336
337 #[inline]
338 pub fn dtype(&self) -> &DType {
339 self.values.dtype()
340 }
341
342 #[inline]
343 pub fn indices(&self) -> &ArrayRef {
344 &self.indices
345 }
346
347 #[inline]
348 pub fn into_indices(self) -> ArrayRef {
349 self.indices
350 }
351
352 #[inline]
353 pub fn indices_mut(&mut self) -> &mut ArrayRef {
354 &mut self.indices
355 }
356
357 #[inline]
358 pub fn values(&self) -> &ArrayRef {
359 &self.values
360 }
361
362 #[inline]
363 pub fn into_values(self) -> ArrayRef {
364 self.values
365 }
366
367 #[inline]
368 pub fn values_mut(&mut self) -> &mut ArrayRef {
369 &mut self.values
370 }
371
372 #[inline]
373 pub fn offset(&self) -> usize {
375 self.offset
376 }
377
378 #[inline]
379 pub fn chunk_offsets(&self) -> &Option<ArrayRef> {
380 &self.chunk_offsets
381 }
382
383 #[inline]
384 pub fn chunk_offset_at(&self, idx: usize) -> VortexResult<usize> {
385 let Some(chunk_offsets) = &self.chunk_offsets else {
386 vortex_bail!("chunk_offsets must be set to retrieve offset at index")
387 };
388
389 chunk_offsets
390 .execute_scalar(idx, &mut LEGACY_SESSION.create_execution_ctx())?
391 .as_primitive()
392 .as_::<usize>()
393 .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))
394 }
395
396 #[inline]
405 pub fn offset_within_chunk(&self) -> Option<usize> {
406 self.offset_within_chunk
407 }
408
409 #[inline]
410 pub fn indices_ptype(&self) -> VortexResult<PType> {
411 PType::try_from(self.indices.dtype())
412 .map_err(|_| vortex_err!("indices dtype is not primitive"))
413 }
414
415 pub fn to_metadata(&self, len: usize, dtype: &DType) -> VortexResult<PatchesMetadata> {
416 if self.indices.len() > len {
417 vortex_bail!(
418 "Patch indices {} are longer than the array length {}",
419 self.indices.len(),
420 len
421 );
422 }
423 if self.values.dtype() != dtype {
424 vortex_bail!(
425 "Patch values dtype {} does not match array dtype {}",
426 self.values.dtype(),
427 dtype
428 );
429 }
430 let chunk_offsets_len = self.chunk_offsets.as_ref().map(|co| co.len());
431 let chunk_offsets_ptype = self.chunk_offsets.as_ref().map(|co| co.dtype().as_ptype());
432
433 Ok(PatchesMetadata::new(
434 self.indices.len(),
435 self.offset,
436 self.indices.dtype().as_ptype(),
437 chunk_offsets_len,
438 chunk_offsets_ptype,
439 self.offset_within_chunk,
440 ))
441 }
442
443 pub fn cast_values(self, values_dtype: &DType) -> VortexResult<Self> {
444 unsafe {
446 Ok(Self::new_unchecked(
447 self.array_len,
448 self.offset,
449 self.indices,
450 self.values.cast(values_dtype.clone())?,
451 self.chunk_offsets,
452 self.offset_within_chunk,
453 ))
454 }
455 }
456
457 pub fn get_patched(&self, index: usize) -> VortexResult<Option<Scalar>> {
459 self.search_index(index)?
460 .to_found()
461 .map(|patch_idx| {
462 self.values()
463 .execute_scalar(patch_idx, &mut LEGACY_SESSION.create_execution_ctx())
464 })
465 .transpose()
466 }
467
468 pub fn search_index(&self, index: usize) -> VortexResult<SearchResult> {
486 if self.chunk_offsets.is_some() {
487 return self.search_index_chunked(index);
488 }
489
490 Self::search_index_binary_search(&self.indices, index + self.offset)
491 }
492
493 fn search_index_binary_search(indices: &ArrayRef, needle: usize) -> VortexResult<SearchResult> {
499 if indices.is_canonical() {
500 #[expect(deprecated)]
501 let primitive = indices.to_primitive();
502 match_each_integer_ptype!(primitive.ptype(), |T| {
503 let Ok(needle) = T::try_from(needle) else {
504 return Ok(SearchResult::NotFound(primitive.len()));
509 };
510 return primitive
511 .as_slice::<T>()
512 .search_sorted(&needle, SearchSortedSide::Left);
513 });
514 }
515 indices
516 .as_primitive_typed()
517 .search_sorted(&PValue::U64(needle as u64), SearchSortedSide::Left)
518 }
519
520 fn search_index_chunked(&self, index: usize) -> VortexResult<SearchResult> {
530 let Some(chunk_offsets) = &self.chunk_offsets else {
531 vortex_bail!("chunk_offsets is required to be set")
532 };
533
534 let Some(offset_within_chunk) = self.offset_within_chunk else {
535 vortex_bail!("offset_within_chunk is required to be set")
536 };
537
538 if index >= self.array_len() {
539 return Ok(SearchResult::NotFound(self.indices().len()));
540 }
541
542 let chunk_idx = (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE;
543
544 let base_offset = self.chunk_offset_at(0)?;
546
547 let patches_start_idx = (self.chunk_offset_at(chunk_idx)? - base_offset)
548 .saturating_sub(offset_within_chunk);
555
556 let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
557 (self.chunk_offset_at(chunk_idx + 1)? - base_offset)
558 .saturating_sub(offset_within_chunk)
559 .min(self.indices.len())
560 } else {
561 self.indices.len()
562 };
563
564 let chunk_indices = self.indices.slice(patches_start_idx..patches_end_idx)?;
565 let result = Self::search_index_binary_search(&chunk_indices, index + self.offset)?;
566
567 Ok(match result {
568 SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
569 SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
570 })
571 }
572
573 fn search_index_chunked_batch<T, O>(
579 &self,
580 indices: &[T],
581 chunk_offsets: &[O],
582 index: T,
583 ) -> VortexResult<SearchResult>
584 where
585 T: UnsignedPType,
586 O: UnsignedPType,
587 usize: TryFrom<T>,
588 usize: TryFrom<O>,
589 {
590 let Some(offset_within_chunk) = self.offset_within_chunk else {
591 vortex_bail!("offset_within_chunk is required to be set")
592 };
593
594 let chunk_idx = {
595 let Ok(index) = usize::try_from(index) else {
596 return Ok(SearchResult::NotFound(indices.len()));
598 };
599
600 if index >= self.array_len() {
601 return Ok(SearchResult::NotFound(self.indices().len()));
602 }
603
604 (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE
605 };
606
607 let chunk_offset = usize::try_from(chunk_offsets[chunk_idx] - chunk_offsets[0])
609 .map_err(|_| vortex_err!("chunk_offset failed to convert to usize"))?;
610
611 let patches_start_idx = chunk_offset
612 .saturating_sub(offset_within_chunk);
619
620 let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
621 usize::try_from(chunk_offsets[chunk_idx + 1] - chunk_offsets[0])
622 .map_err(|_| vortex_err!("patches_end_idx failed to convert to usize"))?
623 .saturating_sub(offset_within_chunk)
624 .min(indices.len())
625 } else {
626 self.indices.len()
627 };
628
629 let Some(offset) = T::from(self.offset) else {
630 return Ok(SearchResult::NotFound(indices.len()));
632 };
633
634 let chunk_indices = &indices[patches_start_idx..patches_end_idx];
635 let result = chunk_indices.search_sorted(&(index + offset), SearchSortedSide::Left)?;
636
637 Ok(match result {
638 SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
639 SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
640 })
641 }
642
643 pub fn min_index(&self) -> VortexResult<usize> {
645 let first = self
646 .indices
647 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())?
648 .as_primitive()
649 .as_::<usize>()
650 .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
651 Ok(first - self.offset)
652 }
653
654 pub fn max_index(&self) -> VortexResult<usize> {
656 let last = self
657 .indices
658 .execute_scalar(
659 self.indices.len() - 1,
660 &mut LEGACY_SESSION.create_execution_ctx(),
661 )?
662 .as_primitive()
663 .as_::<usize>()
664 .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
665 Ok(last - self.offset)
666 }
667
668 pub fn filter(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
670 if mask.len() != self.array_len {
671 vortex_bail!(
672 "Filter mask length {} does not match array length {}",
673 mask.len(),
674 self.array_len
675 );
676 }
677
678 match mask.indices() {
679 AllOr::All => Ok(Some(self.clone())),
680 AllOr::None => Ok(None),
681 AllOr::Some(mask_indices) => {
682 let flat_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
683 match_each_unsigned_integer_ptype!(flat_indices.ptype(), |I| {
684 filter_patches_with_mask(
685 flat_indices.as_slice::<I>(),
686 self.offset(),
687 self.values(),
688 mask_indices,
689 )
690 })
691 }
692 }
693 }
694
695 pub fn mask(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
700 if mask.len() != self.array_len {
701 vortex_bail!(
702 "Filter mask length {} does not match array length {}",
703 mask.len(),
704 self.array_len
705 );
706 }
707
708 let filter_mask = match mask.bit_buffer() {
709 AllOr::All => return Ok(None),
710 AllOr::None => return Ok(Some(self.clone())),
711 AllOr::Some(masked) => {
712 let patch_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
713 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |P| {
714 let patch_indices = patch_indices.as_slice::<P>();
715 Mask::from_buffer(BitBuffer::collect_bool(patch_indices.len(), |i| {
716 #[allow(clippy::cast_possible_truncation)]
717 let idx = (patch_indices[i] as usize) - self.offset;
718 !masked.value(idx)
719 }))
720 })
721 }
722 };
723
724 if filter_mask.all_false() {
725 return Ok(None);
726 }
727
728 let filtered_indices = self.indices.filter(filter_mask.clone())?;
730 let filtered_values = self.values.filter(filter_mask)?;
731
732 Ok(Some(Self {
733 array_len: self.array_len,
734 offset: self.offset,
735 indices: filtered_indices,
736 values: filtered_values,
737 chunk_offsets: None,
739 offset_within_chunk: self.offset_within_chunk,
740 }))
741 }
742
743 pub fn slice(&self, range: Range<usize>) -> VortexResult<Option<Self>> {
745 let slice_start_idx = self.search_index(range.start)?.to_index();
746 let slice_end_idx = self.search_index(range.end)?.to_index();
747
748 if slice_start_idx == slice_end_idx {
749 return Ok(None);
750 }
751
752 let values = self.values().slice(slice_start_idx..slice_end_idx)?;
753 let indices = self.indices().slice(slice_start_idx..slice_end_idx)?;
754
755 let new_chunk_offsets = self
756 .chunk_offsets
757 .as_ref()
758 .map(|chunk_offsets| -> VortexResult<ArrayRef> {
759 let chunk_relative_offset = self.offset % PATCH_CHUNK_SIZE;
760 let chunk_start_idx = (chunk_relative_offset + range.start) / PATCH_CHUNK_SIZE;
761 let chunk_end_idx = (chunk_relative_offset + range.end).div_ceil(PATCH_CHUNK_SIZE);
762 chunk_offsets.slice(chunk_start_idx..chunk_end_idx)
763 })
764 .transpose()?;
765
766 let offset_within_chunk = new_chunk_offsets
767 .as_ref()
768 .map(|new_chunk_offsets| -> VortexResult<usize> {
769 let new_chunk_base = new_chunk_offsets
770 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())?
771 .as_primitive()
772 .as_::<usize>()
773 .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))?;
774 let parent_chunk_base = self.chunk_offset_at(0)?;
775 let parent_within = self.offset_within_chunk.unwrap_or(0);
776 Ok(parent_chunk_base + parent_within + slice_start_idx - new_chunk_base)
777 })
778 .transpose()?;
779
780 Ok(Some(Self {
781 array_len: range.len(),
782 offset: range.start + self.offset(),
783 indices,
784 values,
785 chunk_offsets: new_chunk_offsets,
786 offset_within_chunk,
787 }))
788 }
789
790 const PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN: f64 = 5.0;
792
793 fn is_map_faster_than_search(&self, take_indices: &PrimitiveArray) -> bool {
794 (self.num_patches() as f64 / take_indices.len() as f64)
795 < Self::PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN
796 }
797
798 pub fn take_with_nulls(
802 &self,
803 take_indices: &ArrayRef,
804 ctx: &mut ExecutionCtx,
805 ) -> VortexResult<Option<Self>> {
806 if take_indices.is_empty() {
807 return Ok(None);
808 }
809
810 let take_indices = take_indices.clone().execute::<PrimitiveArray>(ctx)?;
811 if self.is_map_faster_than_search(&take_indices) {
812 self.take_map(take_indices, true, ctx)
813 } else {
814 self.take_search(take_indices, true, ctx)
815 }
816 }
817
818 pub fn take(
822 &self,
823 take_indices: &ArrayRef,
824 ctx: &mut ExecutionCtx,
825 ) -> VortexResult<Option<Self>> {
826 if take_indices.is_empty() {
827 return Ok(None);
828 }
829
830 let take_indices = take_indices.clone().execute::<PrimitiveArray>(ctx)?;
831 if self.is_map_faster_than_search(&take_indices) {
832 self.take_map(take_indices, false, ctx)
833 } else {
834 self.take_search(take_indices, false, ctx)
835 }
836 }
837
838 #[expect(
839 clippy::cognitive_complexity,
840 reason = "complexity is from nested match_each_* macros"
841 )]
842 pub fn take_search(
843 &self,
844 take_indices: PrimitiveArray,
845 include_nulls: bool,
846 ctx: &mut ExecutionCtx,
847 ) -> VortexResult<Option<Self>> {
848 let take_indices_validity = take_indices.validity()?;
849 let patch_indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
850 let chunk_offsets = self
851 .chunk_offsets()
852 .as_ref()
853 .map(|co| co.clone().execute::<PrimitiveArray>(ctx))
854 .transpose()?;
855
856 let (values_indices, new_indices): (BufferMut<u64>, BufferMut<u64>) =
857 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |PatchT| {
858 let patch_indices_slice = patch_indices.as_slice::<PatchT>();
859 match_each_integer_ptype!(take_indices.ptype(), |TakeT| {
860 let take_slice = take_indices.as_slice::<TakeT>();
861
862 if let Some(chunk_offsets) = chunk_offsets {
863 match_each_unsigned_integer_ptype!(chunk_offsets.ptype(), |OffsetT| {
864 let chunk_offsets = chunk_offsets.as_slice::<OffsetT>();
865 take_indices_with_search_fn(
866 patch_indices_slice,
867 take_slice,
868 take_indices
869 .as_ref()
870 .validity()?
871 .execute_mask(take_indices.as_ref().len(), ctx)?,
872 include_nulls,
873 |take_idx| {
874 self.search_index_chunked_batch(
875 patch_indices_slice,
876 chunk_offsets,
877 take_idx,
878 )
879 },
880 )?
881 })
882 } else {
883 take_indices_with_search_fn(
884 patch_indices_slice,
885 take_slice,
886 take_indices
887 .as_ref()
888 .validity()?
889 .execute_mask(take_indices.as_ref().len(), ctx)?,
890 include_nulls,
891 |take_idx| {
892 let Some(offset) = <PatchT as NumCast>::from(self.offset) else {
893 return Ok(SearchResult::NotFound(patch_indices_slice.len()));
895 };
896
897 patch_indices_slice
898 .search_sorted(&(take_idx + offset), SearchSortedSide::Left)
899 },
900 )?
901 }
902 })
903 });
904
905 if new_indices.is_empty() {
906 return Ok(None);
907 }
908
909 let new_indices = new_indices.into_array();
910 let new_array_len = take_indices.len();
911 let values_validity = take_indices_validity.take(&new_indices)?;
912
913 Ok(Some(Self {
914 array_len: new_array_len,
915 offset: 0,
916 indices: new_indices,
917 values: self
918 .values()
919 .take(PrimitiveArray::new(values_indices, values_validity).into_array())?,
920 chunk_offsets: None,
921 offset_within_chunk: Some(0), }))
923 }
924
925 pub fn take_map(
926 &self,
927 take_indices: PrimitiveArray,
928 include_nulls: bool,
929 ctx: &mut ExecutionCtx,
930 ) -> VortexResult<Option<Self>> {
931 let indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
932 let new_length = take_indices.len();
933
934 let min_index = self.min_index()?;
935 let max_index = self.max_index()?;
936
937 let Some((new_sparse_indices, value_indices)) =
938 match_each_unsigned_integer_ptype!(indices.ptype(), |Indices| {
939 match_each_integer_ptype!(take_indices.ptype(), |TakeIndices| {
940 let take_validity = take_indices
941 .validity()?
942 .execute_mask(take_indices.len(), ctx)?;
943 let take_nullability = take_indices.validity()?.nullability();
944 let take_slice = take_indices.as_slice::<TakeIndices>();
945 take_map::<_, TakeIndices>(
946 indices.as_slice::<Indices>(),
947 take_slice,
948 take_validity,
949 take_nullability,
950 self.offset(),
951 min_index,
952 max_index,
953 include_nulls,
954 )?
955 })
956 })
957 else {
958 return Ok(None);
959 };
960
961 let taken_values = self.values().take(value_indices)?;
962
963 Ok(Some(Patches {
964 array_len: new_length,
965 offset: 0,
966 indices: new_sparse_indices,
967 values: taken_values,
968 chunk_offsets: None,
970 offset_within_chunk: self.offset_within_chunk,
971 }))
972 }
973
974 pub fn map_values<F>(self, f: F) -> VortexResult<Self>
975 where
976 F: FnOnce(ArrayRef) -> VortexResult<ArrayRef>,
977 {
978 let values = f(self.values)?;
979 if self.indices.len() != values.len() {
980 vortex_bail!(
981 "map_values must preserve length: expected {} received {}",
982 self.indices.len(),
983 values.len()
984 )
985 }
986
987 Ok(Self {
988 array_len: self.array_len,
989 offset: self.offset,
990 indices: self.indices,
991 values,
992 chunk_offsets: self.chunk_offsets,
993 offset_within_chunk: self.offset_within_chunk,
994 })
995 }
996}
997
998#[expect(clippy::too_many_arguments)] fn take_map<I: NativePType + Hash + Eq + TryFrom<usize>, T: NativePType>(
1000 indices: &[I],
1001 take_indices: &[T],
1002 take_validity: Mask,
1003 take_nullability: Nullability,
1004 indices_offset: usize,
1005 min_index: usize,
1006 max_index: usize,
1007 include_nulls: bool,
1008) -> VortexResult<Option<(ArrayRef, ArrayRef)>>
1009where
1010 usize: TryFrom<T>,
1011 VortexError: From<<I as TryFrom<usize>>::Error>,
1012{
1013 let offset_i = I::try_from(indices_offset)?;
1014
1015 let sparse_index_to_value_index: HashMap<I, usize> = indices
1016 .iter()
1017 .copied()
1018 .map(|idx| idx - offset_i)
1019 .enumerate()
1020 .map(|(value_index, sparse_index)| (sparse_index, value_index))
1021 .collect();
1022
1023 let mut new_sparse_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1024 let mut value_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1025
1026 for (idx_in_take, &take_idx) in take_indices.iter().enumerate() {
1027 let ti = usize::try_from(take_idx)
1028 .map_err(|_| vortex_err!("Failed to convert index to usize"))?;
1029
1030 let is_null = match take_validity.bit_buffer() {
1032 AllOr::All => false,
1033 AllOr::None => true,
1034 AllOr::Some(buf) => !buf.value(idx_in_take),
1035 };
1036 if is_null {
1037 if include_nulls {
1038 new_sparse_indices.push(idx_in_take as u64);
1039 value_indices.push(0);
1040 }
1041 } else if ti >= min_index && ti <= max_index {
1042 let ti_as_i = I::try_from(ti)
1043 .map_err(|_| vortex_err!("take index does not fit in index type"))?;
1044 if let Some(&value_index) = sparse_index_to_value_index.get(&ti_as_i) {
1045 new_sparse_indices.push(idx_in_take as u64);
1046 value_indices.push(value_index as u64);
1047 }
1048 }
1049 }
1050
1051 if new_sparse_indices.is_empty() {
1052 return Ok(None);
1053 }
1054
1055 let new_sparse_indices = new_sparse_indices.into_array();
1056 let values_validity =
1057 Validity::from_mask(take_validity, take_nullability).take(&new_sparse_indices)?;
1058 Ok(Some((
1059 new_sparse_indices,
1060 PrimitiveArray::new(value_indices, values_validity).into_array(),
1061 )))
1062}
1063
1064fn filter_patches_with_mask<T: IntegerPType>(
1070 patch_indices: &[T],
1071 offset: usize,
1072 patch_values: &ArrayRef,
1073 mask_indices: &[usize],
1074) -> VortexResult<Option<Patches>> {
1075 let true_count = mask_indices.len();
1076 let mut new_patch_indices = BufferMut::<u64>::with_capacity(true_count);
1077 let mut new_mask_indices = Vec::with_capacity(true_count);
1078
1079 const STRIDE: usize = 4;
1083
1084 let mut mask_idx = 0usize;
1085 let mut true_idx = 0usize;
1086
1087 while mask_idx < patch_indices.len() && true_idx < true_count {
1088 if (mask_idx + STRIDE) < patch_indices.len() && (true_idx + STRIDE) < mask_indices.len() {
1095 let left_min = patch_indices[mask_idx]
1097 .to_usize()
1098 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1099 - offset;
1100 let left_max = patch_indices[mask_idx + STRIDE]
1101 .to_usize()
1102 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1103 - offset;
1104 let right_min = mask_indices[true_idx];
1105 let right_max = mask_indices[true_idx + STRIDE];
1106
1107 if left_min > right_max {
1108 true_idx += STRIDE;
1110 continue;
1111 } else if right_min > left_max {
1112 mask_idx += STRIDE;
1113 continue;
1114 } else {
1115 }
1117 }
1118
1119 let left = patch_indices[mask_idx]
1122 .to_usize()
1123 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1124 - offset;
1125 let right = mask_indices[true_idx];
1126
1127 match left.cmp(&right) {
1128 Ordering::Less => {
1129 mask_idx += 1;
1130 }
1131 Ordering::Greater => {
1132 true_idx += 1;
1133 }
1134 Ordering::Equal => {
1135 new_mask_indices.push(mask_idx);
1137 new_patch_indices.push(true_idx as u64);
1138
1139 mask_idx += 1;
1140 true_idx += 1;
1141 }
1142 }
1143 }
1144
1145 if new_mask_indices.is_empty() {
1146 return Ok(None);
1147 }
1148
1149 let new_patch_indices = new_patch_indices.into_array();
1150 let new_patch_values =
1151 patch_values.filter(Mask::from_indices(patch_values.len(), new_mask_indices))?;
1152
1153 Ok(Some(Patches::new(
1154 true_count,
1155 0,
1156 new_patch_indices,
1157 new_patch_values,
1158 None,
1160 )?))
1161}
1162
1163fn take_indices_with_search_fn<
1164 I: UnsignedPType,
1165 T: IntegerPType,
1166 F: Fn(I) -> VortexResult<SearchResult>,
1167>(
1168 indices: &[I],
1169 take_indices: &[T],
1170 take_validity: Mask,
1171 include_nulls: bool,
1172 search_fn: F,
1173) -> VortexResult<(BufferMut<u64>, BufferMut<u64>)> {
1174 let mut values_indices = BufferMut::with_capacity(take_indices.len());
1175 let mut new_indices = BufferMut::with_capacity(take_indices.len());
1176
1177 for (new_patch_idx, &take_idx) in take_indices.iter().enumerate() {
1178 if !take_validity.value(new_patch_idx) {
1179 if include_nulls {
1180 values_indices.push(0u64);
1182 new_indices.push(new_patch_idx as u64);
1183 }
1184 continue;
1185 } else {
1186 let search_result = match I::from(take_idx) {
1187 Some(idx) => search_fn(idx)?,
1188 None => SearchResult::NotFound(indices.len()),
1189 };
1190
1191 if let Some(patch_idx) = search_result.to_found() {
1192 values_indices.push(patch_idx as u64);
1193 new_indices.push(new_patch_idx as u64);
1194 }
1195 }
1196 }
1197
1198 Ok((values_indices, new_indices))
1199}
1200
1201#[cfg(test)]
1202mod test {
1203 use vortex_buffer::BufferMut;
1204 use vortex_buffer::buffer;
1205 use vortex_mask::Mask;
1206
1207 use crate::IntoArray;
1208 use crate::LEGACY_SESSION;
1209 #[expect(deprecated)]
1210 use crate::ToCanonical as _;
1211 use crate::VortexSessionExecute;
1212 use crate::assert_arrays_eq;
1213 use crate::patches::Patches;
1214 use crate::patches::PrimitiveArray;
1215 use crate::search_sorted::SearchResult;
1216 use crate::validity::Validity;
1217
1218 #[test]
1219 fn test_filter() {
1220 let patches = Patches::new(
1221 100,
1222 0,
1223 buffer![10u32, 11, 20].into_array(),
1224 buffer![100, 110, 200].into_array(),
1225 None,
1226 )
1227 .unwrap();
1228
1229 let filtered = patches
1230 .filter(
1231 &Mask::from_indices(100, vec![10, 20, 30]),
1232 &mut LEGACY_SESSION.create_execution_ctx(),
1233 )
1234 .unwrap()
1235 .unwrap();
1236
1237 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1]));
1238 assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1239 }
1240
1241 #[test]
1242 fn take_with_nulls() {
1243 let patches = Patches::new(
1244 20,
1245 0,
1246 buffer![2u64, 9, 15].into_array(),
1247 PrimitiveArray::new(buffer![33_i32, 44, 55], Validity::AllValid).into_array(),
1248 None,
1249 )
1250 .unwrap();
1251
1252 let taken = patches
1253 .take(
1254 &PrimitiveArray::new(buffer![9, 0], Validity::from_iter(vec![true, false]))
1255 .into_array(),
1256 &mut LEGACY_SESSION.create_execution_ctx(),
1257 )
1258 .unwrap()
1259 .unwrap();
1260 #[expect(deprecated)]
1261 let primitive_values = taken.values().to_primitive();
1262 #[expect(deprecated)]
1263 let primitive_indices = taken.indices().to_primitive();
1264 assert_eq!(taken.array_len(), 2);
1265 assert_arrays_eq!(
1266 primitive_values,
1267 PrimitiveArray::from_option_iter([Some(44i32)])
1268 );
1269 assert_arrays_eq!(primitive_indices, PrimitiveArray::from_iter([0u64]));
1270 assert_eq!(
1271 primitive_values
1272 .as_ref()
1273 .validity()
1274 .unwrap()
1275 .execute_mask(
1276 primitive_values.as_ref().len(),
1277 &mut LEGACY_SESSION.create_execution_ctx()
1278 )
1279 .unwrap(),
1280 Mask::from_iter(vec![true])
1281 );
1282 }
1283
1284 #[test]
1285 fn take_search_with_nulls_chunked() {
1286 let patches = Patches::new(
1287 20,
1288 0,
1289 buffer![2u64, 9, 15].into_array(),
1290 buffer![33_i32, 44, 55].into_array(),
1291 Some(buffer![0u64].into_array()),
1292 )
1293 .unwrap();
1294
1295 let taken = patches
1296 .take_search(
1297 PrimitiveArray::new(buffer![9, 0], Validity::from_iter([true, false])),
1298 true,
1299 &mut LEGACY_SESSION.create_execution_ctx(),
1300 )
1301 .unwrap()
1302 .unwrap();
1303
1304 #[expect(deprecated)]
1305 let primitive_values = taken.values().to_primitive();
1306 assert_eq!(taken.array_len(), 2);
1307 assert_arrays_eq!(
1308 primitive_values,
1309 PrimitiveArray::from_option_iter([Some(44i32), None])
1310 );
1311 assert_arrays_eq!(taken.indices(), PrimitiveArray::from_iter([0u64, 1]));
1312
1313 assert_eq!(
1314 primitive_values
1315 .as_ref()
1316 .validity()
1317 .unwrap()
1318 .execute_mask(
1319 primitive_values.as_ref().len(),
1320 &mut LEGACY_SESSION.create_execution_ctx()
1321 )
1322 .unwrap(),
1323 Mask::from_iter([true, false])
1324 );
1325 }
1326
1327 #[test]
1328 fn take_search_chunked_multiple_chunks() {
1329 let patches = Patches::new(
1330 2048,
1331 0,
1332 buffer![100u64, 500, 1200, 1800].into_array(),
1333 buffer![10_i32, 20, 30, 40].into_array(),
1334 Some(buffer![0u64, 2].into_array()),
1335 )
1336 .unwrap();
1337
1338 let taken = patches
1339 .take_search(
1340 PrimitiveArray::new(buffer![500, 1200, 999], Validity::AllValid),
1341 true,
1342 &mut LEGACY_SESSION.create_execution_ctx(),
1343 )
1344 .unwrap()
1345 .unwrap();
1346
1347 assert_eq!(taken.array_len(), 3);
1348 assert_arrays_eq!(
1349 taken.values(),
1350 PrimitiveArray::from_option_iter([Some(20i32), Some(30)])
1351 );
1352 }
1353
1354 #[test]
1355 fn take_search_chunked_indices_with_no_patches() {
1356 let patches = Patches::new(
1357 20,
1358 0,
1359 buffer![2u64, 9, 15].into_array(),
1360 buffer![33_i32, 44, 55].into_array(),
1361 Some(buffer![0u64].into_array()),
1362 )
1363 .unwrap();
1364
1365 let taken = patches
1366 .take_search(
1367 PrimitiveArray::new(buffer![3, 4, 5], Validity::AllValid),
1368 true,
1369 &mut LEGACY_SESSION.create_execution_ctx(),
1370 )
1371 .unwrap();
1372
1373 assert!(taken.is_none());
1374 }
1375
1376 #[test]
1377 fn take_search_chunked_interleaved() {
1378 let patches = Patches::new(
1379 30,
1380 0,
1381 buffer![5u64, 10, 20, 25].into_array(),
1382 buffer![100_i32, 200, 300, 400].into_array(),
1383 Some(buffer![0u64].into_array()),
1384 )
1385 .unwrap();
1386
1387 let taken = patches
1388 .take_search(
1389 PrimitiveArray::new(buffer![10, 15, 20, 99], Validity::AllValid),
1390 true,
1391 &mut LEGACY_SESSION.create_execution_ctx(),
1392 )
1393 .unwrap()
1394 .unwrap();
1395
1396 assert_eq!(taken.array_len(), 4);
1397 assert_arrays_eq!(
1398 taken.values(),
1399 PrimitiveArray::from_option_iter([Some(200i32), Some(300)])
1400 );
1401 }
1402
1403 #[test]
1404 fn test_take_search_multiple_chunk_offsets() {
1405 let patches = Patches::new(
1406 1500,
1407 0,
1408 BufferMut::from_iter(0..1500u64).into_array(),
1409 BufferMut::from_iter(0..1500i32).into_array(),
1410 Some(buffer![0u64, 1024u64].into_array()),
1411 )
1412 .unwrap();
1413
1414 let taken = patches
1415 .take_search(
1416 PrimitiveArray::new(BufferMut::from_iter(0..1500u64), Validity::AllValid),
1417 false,
1418 &mut LEGACY_SESSION.create_execution_ctx(),
1419 )
1420 .unwrap()
1421 .unwrap();
1422
1423 assert_eq!(taken.array_len(), 1500);
1424 }
1425
1426 #[test]
1427 fn test_slice() {
1428 let values = buffer![15_u32, 135, 13531, 42].into_array();
1429 let indices = buffer![10_u64, 11, 50, 100].into_array();
1430
1431 let patches = Patches::new(101, 0, indices, values, None).unwrap();
1432
1433 let sliced = patches.slice(15..100).unwrap().unwrap();
1434 assert_eq!(sliced.array_len(), 100 - 15);
1435 assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1436 }
1437
1438 #[test]
1439 fn doubly_sliced() {
1440 let values = buffer![15_u32, 135, 13531, 42].into_array();
1441 let indices = buffer![10_u64, 11, 50, 100].into_array();
1442
1443 let patches = Patches::new(101, 0, indices, values, None).unwrap();
1444
1445 let sliced = patches.slice(15..100).unwrap().unwrap();
1446 assert_eq!(sliced.array_len(), 100 - 15);
1447 assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1448
1449 let doubly_sliced = sliced.slice(35..36).unwrap().unwrap();
1450 assert_arrays_eq!(
1451 doubly_sliced.values(),
1452 PrimitiveArray::from_iter([13531u32])
1453 );
1454 }
1455
1456 #[test]
1457 fn test_mask_all_true() {
1458 let patches = Patches::new(
1459 10,
1460 0,
1461 buffer![2u64, 5, 8].into_array(),
1462 buffer![100i32, 200, 300].into_array(),
1463 None,
1464 )
1465 .unwrap();
1466
1467 let mask = Mask::new_true(10);
1468 let masked = patches
1469 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1470 .unwrap();
1471 assert!(masked.is_none());
1472 }
1473
1474 #[test]
1475 fn test_mask_all_false() {
1476 let patches = Patches::new(
1477 10,
1478 0,
1479 buffer![2u64, 5, 8].into_array(),
1480 buffer![100i32, 200, 300].into_array(),
1481 None,
1482 )
1483 .unwrap();
1484
1485 let mask = Mask::new_false(10);
1486 let masked = patches
1487 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1488 .unwrap()
1489 .unwrap();
1490
1491 assert_arrays_eq!(
1493 masked.values(),
1494 PrimitiveArray::from_iter([100i32, 200, 300])
1495 );
1496 assert!(
1497 masked
1498 .values()
1499 .is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())
1500 .unwrap()
1501 );
1502 assert!(
1503 masked
1504 .values()
1505 .is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())
1506 .unwrap()
1507 );
1508 assert!(
1509 masked
1510 .values()
1511 .is_valid(2, &mut LEGACY_SESSION.create_execution_ctx())
1512 .unwrap()
1513 );
1514
1515 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1517 }
1518
1519 #[test]
1520 fn test_mask_partial() {
1521 let patches = Patches::new(
1522 10,
1523 0,
1524 buffer![2u64, 5, 8].into_array(),
1525 buffer![100i32, 200, 300].into_array(),
1526 None,
1527 )
1528 .unwrap();
1529
1530 let mask = Mask::from_iter([
1532 false, false, true, false, false, false, false, false, true, false,
1533 ]);
1534 let masked = patches
1535 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1536 .unwrap()
1537 .unwrap();
1538
1539 assert_eq!(masked.values().len(), 1);
1541 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1542
1543 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64]));
1545 }
1546
1547 #[test]
1548 fn test_mask_with_offset() {
1549 let patches = Patches::new(
1550 10,
1551 5, buffer![7u64, 10, 13].into_array(), buffer![100i32, 200, 300].into_array(),
1554 None,
1555 )
1556 .unwrap();
1557
1558 let mask = Mask::from_iter([
1560 false, false, true, false, false, false, false, false, false, false,
1561 ]);
1562
1563 let masked = patches
1564 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1565 .unwrap()
1566 .unwrap();
1567 assert_eq!(masked.array_len(), 10);
1568 assert_eq!(masked.offset(), 5);
1569 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([10u64, 13]));
1570 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32, 300]));
1571 }
1572
1573 #[test]
1574 fn test_mask_nullable_values() {
1575 let patches = Patches::new(
1576 10,
1577 0,
1578 buffer![2u64, 5, 8].into_array(),
1579 PrimitiveArray::from_option_iter([Some(100i32), None, Some(300)]).into_array(),
1580 None,
1581 )
1582 .unwrap();
1583
1584 let mask = Mask::from_iter([
1586 false, false, true, false, false, false, false, false, false, false,
1587 ]);
1588 let masked = patches
1589 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1590 .unwrap()
1591 .unwrap();
1592
1593 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64, 8]));
1595
1596 #[expect(deprecated)]
1598 let masked_values = masked.values().to_primitive();
1599 assert_eq!(masked_values.len(), 2);
1600 assert!(
1601 !masked_values
1602 .is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())
1603 .unwrap()
1604 ); assert!(
1606 masked_values
1607 .is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())
1608 .unwrap()
1609 ); assert_eq!(
1611 i32::try_from(
1612 &masked_values
1613 .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
1614 .unwrap()
1615 )
1616 .unwrap(),
1617 300i32
1618 );
1619 }
1620
1621 #[test]
1622 fn test_filter_keep_all() {
1623 let patches = Patches::new(
1624 10,
1625 0,
1626 buffer![2u64, 5, 8].into_array(),
1627 buffer![100i32, 200, 300].into_array(),
1628 None,
1629 )
1630 .unwrap();
1631
1632 let mask = Mask::from_indices(10, 0..10);
1634 let filtered = patches
1635 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1636 .unwrap()
1637 .unwrap();
1638
1639 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1640 assert_arrays_eq!(
1641 filtered.values(),
1642 PrimitiveArray::from_iter([100i32, 200, 300])
1643 );
1644 }
1645
1646 #[test]
1647 fn test_filter_none() {
1648 let patches = Patches::new(
1649 10,
1650 0,
1651 buffer![2u64, 5, 8].into_array(),
1652 buffer![100i32, 200, 300].into_array(),
1653 None,
1654 )
1655 .unwrap();
1656
1657 let mask = Mask::from_indices(10, vec![]);
1659 let filtered = patches
1660 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1661 .unwrap();
1662 assert!(filtered.is_none());
1663 }
1664
1665 #[test]
1666 fn test_filter_with_indices() {
1667 let patches = Patches::new(
1668 10,
1669 0,
1670 buffer![2u64, 5, 8].into_array(),
1671 buffer![100i32, 200, 300].into_array(),
1672 None,
1673 )
1674 .unwrap();
1675
1676 let mask = Mask::from_indices(10, vec![2, 5, 9]);
1678 let filtered = patches
1679 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1680 .unwrap()
1681 .unwrap();
1682
1683 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1])); assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1685 }
1686
1687 #[test]
1688 fn test_slice_full_range() {
1689 let patches = Patches::new(
1690 10,
1691 0,
1692 buffer![2u64, 5, 8].into_array(),
1693 buffer![100i32, 200, 300].into_array(),
1694 None,
1695 )
1696 .unwrap();
1697
1698 let sliced = patches.slice(0..10).unwrap().unwrap();
1699
1700 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1701 assert_arrays_eq!(
1702 sliced.values(),
1703 PrimitiveArray::from_iter([100i32, 200, 300])
1704 );
1705 }
1706
1707 #[test]
1708 fn test_slice_partial() {
1709 let patches = Patches::new(
1710 10,
1711 0,
1712 buffer![2u64, 5, 8].into_array(),
1713 buffer![100i32, 200, 300].into_array(),
1714 None,
1715 )
1716 .unwrap();
1717
1718 let sliced = patches.slice(3..8).unwrap().unwrap();
1720
1721 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([5u64])); assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1723 assert_eq!(sliced.array_len(), 5); assert_eq!(sliced.offset(), 3); }
1726
1727 #[test]
1728 fn test_slice_no_patches() {
1729 let patches = Patches::new(
1730 10,
1731 0,
1732 buffer![2u64, 5, 8].into_array(),
1733 buffer![100i32, 200, 300].into_array(),
1734 None,
1735 )
1736 .unwrap();
1737
1738 let sliced = patches.slice(6..7).unwrap();
1740 assert!(sliced.is_none());
1741 }
1742
1743 #[test]
1744 fn test_slice_with_offset() {
1745 let patches = Patches::new(
1746 10,
1747 5, buffer![7u64, 10, 13].into_array(), buffer![100i32, 200, 300].into_array(),
1750 None,
1751 )
1752 .unwrap();
1753
1754 let sliced = patches.slice(3..8).unwrap().unwrap();
1756
1757 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([10u64])); assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1759 assert_eq!(sliced.offset(), 8); }
1761
1762 #[test]
1763 fn test_patch_values() {
1764 let patches = Patches::new(
1765 10,
1766 0,
1767 buffer![2u64, 5, 8].into_array(),
1768 buffer![100i32, 200, 300].into_array(),
1769 None,
1770 )
1771 .unwrap();
1772
1773 #[expect(deprecated)]
1774 let values = patches.values().to_primitive();
1775 assert_eq!(
1776 i32::try_from(
1777 &values
1778 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
1779 .unwrap()
1780 )
1781 .unwrap(),
1782 100i32
1783 );
1784 assert_eq!(
1785 i32::try_from(
1786 &values
1787 .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
1788 .unwrap()
1789 )
1790 .unwrap(),
1791 200i32
1792 );
1793 assert_eq!(
1794 i32::try_from(
1795 &values
1796 .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
1797 .unwrap()
1798 )
1799 .unwrap(),
1800 300i32
1801 );
1802 }
1803
1804 #[test]
1805 fn test_indices_range() {
1806 let patches = Patches::new(
1807 10,
1808 0,
1809 buffer![2u64, 5, 8].into_array(),
1810 buffer![100i32, 200, 300].into_array(),
1811 None,
1812 )
1813 .unwrap();
1814
1815 assert_eq!(patches.min_index().unwrap(), 2);
1816 assert_eq!(patches.max_index().unwrap(), 8);
1817 }
1818
1819 #[test]
1820 fn test_search_index() {
1821 let patches = Patches::new(
1822 10,
1823 0,
1824 buffer![2u64, 5, 8].into_array(),
1825 buffer![100i32, 200, 300].into_array(),
1826 None,
1827 )
1828 .unwrap();
1829
1830 assert_eq!(patches.search_index(2).unwrap(), SearchResult::Found(0));
1832 assert_eq!(patches.search_index(5).unwrap(), SearchResult::Found(1));
1833 assert_eq!(patches.search_index(8).unwrap(), SearchResult::Found(2));
1834
1835 assert_eq!(patches.search_index(0).unwrap(), SearchResult::NotFound(0));
1837 assert_eq!(patches.search_index(3).unwrap(), SearchResult::NotFound(1));
1838 assert_eq!(patches.search_index(6).unwrap(), SearchResult::NotFound(2));
1839 assert_eq!(patches.search_index(9).unwrap(), SearchResult::NotFound(3));
1840 }
1841
1842 #[test]
1843 fn test_mask_boundary_patches() {
1844 let patches = Patches::new(
1846 10,
1847 0,
1848 buffer![0u64, 9].into_array(),
1849 buffer![100i32, 200].into_array(),
1850 None,
1851 )
1852 .unwrap();
1853
1854 let mask = Mask::from_iter([
1855 true, false, false, false, false, false, false, false, false, false,
1856 ]);
1857 let masked = patches
1858 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1859 .unwrap();
1860 assert!(masked.is_some());
1861 let masked = masked.unwrap();
1862 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([9u64]));
1863 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1864 }
1865
1866 #[test]
1867 fn test_mask_all_patches_removed() {
1868 let patches = Patches::new(
1870 10,
1871 0,
1872 buffer![2u64, 5, 8].into_array(),
1873 buffer![100i32, 200, 300].into_array(),
1874 None,
1875 )
1876 .unwrap();
1877
1878 let mask = Mask::from_iter([
1880 false, false, true, false, false, true, false, false, true, false,
1881 ]);
1882 let masked = patches
1883 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1884 .unwrap();
1885 assert!(masked.is_none());
1886 }
1887
1888 #[test]
1889 fn test_mask_no_patches_removed() {
1890 let patches = Patches::new(
1892 10,
1893 0,
1894 buffer![2u64, 5, 8].into_array(),
1895 buffer![100i32, 200, 300].into_array(),
1896 None,
1897 )
1898 .unwrap();
1899
1900 let mask = Mask::from_iter([
1902 true, false, false, true, false, false, true, false, false, true,
1903 ]);
1904 let masked = patches
1905 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1906 .unwrap()
1907 .unwrap();
1908
1909 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1910 assert_arrays_eq!(
1911 masked.values(),
1912 PrimitiveArray::from_iter([100i32, 200, 300])
1913 );
1914 }
1915
1916 #[test]
1917 fn test_mask_single_patch() {
1918 let patches = Patches::new(
1920 5,
1921 0,
1922 buffer![2u64].into_array(),
1923 buffer![42i32].into_array(),
1924 None,
1925 )
1926 .unwrap();
1927
1928 let mask = Mask::from_iter([false, false, true, false, false]);
1930 let masked = patches
1931 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1932 .unwrap();
1933 assert!(masked.is_none());
1934
1935 let mask = Mask::from_iter([true, false, false, true, false]);
1937 let masked = patches
1938 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1939 .unwrap()
1940 .unwrap();
1941 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64]));
1942 }
1943
1944 #[test]
1945 fn test_mask_contiguous_patches() {
1946 let patches = Patches::new(
1948 10,
1949 0,
1950 buffer![3u64, 4, 5, 6].into_array(),
1951 buffer![100i32, 200, 300, 400].into_array(),
1952 None,
1953 )
1954 .unwrap();
1955
1956 let mask = Mask::from_iter([
1958 false, false, false, false, true, true, false, false, false, false,
1959 ]);
1960 let masked = patches
1961 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1962 .unwrap()
1963 .unwrap();
1964
1965 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([3u64, 6]));
1966 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 400]));
1967 }
1968
1969 #[test]
1970 fn test_mask_with_large_offset() {
1971 let patches = Patches::new(
1973 20,
1974 15,
1975 buffer![16u64, 17, 19].into_array(), buffer![100i32, 200, 300].into_array(),
1977 None,
1978 )
1979 .unwrap();
1980
1981 let mask = Mask::from_iter([
1983 false, false, true, false, false, false, false, false, false, false, false, false,
1984 false, false, false, false, false, false, false, false,
1985 ]);
1986 let masked = patches
1987 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1988 .unwrap()
1989 .unwrap();
1990
1991 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([16u64, 19]));
1992 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 300]));
1993 }
1994
1995 #[test]
1996 #[should_panic(expected = "Filter mask length 5 does not match array length 10")]
1997 fn test_mask_wrong_length() {
1998 let patches = Patches::new(
1999 10,
2000 0,
2001 buffer![2u64, 5, 8].into_array(),
2002 buffer![100i32, 200, 300].into_array(),
2003 None,
2004 )
2005 .unwrap();
2006
2007 let mask = Mask::from_iter([false, false, true, false, false]);
2009 patches
2010 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
2011 .unwrap();
2012 }
2013
2014 #[test]
2015 fn test_chunk_offsets_search() {
2016 let indices = buffer![100u64, 200, 3000, 3100].into_array();
2017 let values = buffer![10i32, 20, 30, 40].into_array();
2018 let chunk_offsets = buffer![0u64, 2, 2, 3].into_array();
2019 let patches = Patches::new(4096, 0, indices, values, Some(chunk_offsets)).unwrap();
2020
2021 assert!(patches.chunk_offsets.is_some());
2022
2023 assert_eq!(patches.search_index(100).unwrap(), SearchResult::Found(0));
2025 assert_eq!(patches.search_index(200).unwrap(), SearchResult::Found(1));
2026
2027 assert_eq!(
2029 patches.search_index(1500).unwrap(),
2030 SearchResult::NotFound(2)
2031 );
2032 assert_eq!(
2033 patches.search_index(2000).unwrap(),
2034 SearchResult::NotFound(2)
2035 );
2036
2037 assert_eq!(patches.search_index(3000).unwrap(), SearchResult::Found(2));
2039 assert_eq!(patches.search_index(3100).unwrap(), SearchResult::Found(3));
2040
2041 assert_eq!(
2042 patches.search_index(1024).unwrap(),
2043 SearchResult::NotFound(2)
2044 );
2045 }
2046
2047 #[test]
2048 fn test_chunk_offsets_with_slice() {
2049 let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
2050 let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
2051 let chunk_offsets = buffer![0u64, 2, 6].into_array();
2052 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2053
2054 let sliced = patches.slice(1000..2200).unwrap().unwrap();
2055
2056 assert!(sliced.chunk_offsets.is_some());
2057 assert_eq!(sliced.offset(), 1000);
2058
2059 assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(0));
2060 assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
2061 assert_eq!(sliced.search_index(1100).unwrap(), SearchResult::Found(4));
2062
2063 assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(1));
2064 }
2065
2066 #[test]
2067 fn test_chunk_offsets_with_slice_after_first_chunk() {
2068 let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
2069 let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
2070 let chunk_offsets = buffer![0u64, 2, 6].into_array();
2071 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2072
2073 let sliced = patches.slice(1300..2200).unwrap().unwrap();
2074
2075 assert!(sliced.chunk_offsets.is_some());
2076 assert_eq!(sliced.offset(), 1300);
2077
2078 assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2079 assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(1));
2080 assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
2081 assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(2));
2082 assert_eq!(sliced.search_index(900).unwrap(), SearchResult::NotFound(4));
2083 }
2084
2085 #[test]
2086 fn test_chunk_offsets_slice_empty_result() {
2087 let indices = buffer![100u64, 200, 3000, 3100].into_array();
2088 let values = buffer![10i32, 20, 30, 40].into_array();
2089 let chunk_offsets = buffer![0u64, 2].into_array();
2090 let patches = Patches::new(4000, 0, indices, values, Some(chunk_offsets)).unwrap();
2091
2092 let sliced = patches.slice(1000..2000).unwrap();
2093 assert!(sliced.is_none());
2094 }
2095
2096 #[test]
2097 fn test_chunk_offsets_slice_single_patch() {
2098 let indices = buffer![100u64, 1200, 1300, 2500].into_array();
2099 let values = buffer![10i32, 20, 30, 40].into_array();
2100 let chunk_offsets = buffer![0u64, 1, 3].into_array();
2101 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2102
2103 let sliced = patches.slice(1100..1250).unwrap().unwrap();
2104
2105 assert_eq!(sliced.num_patches(), 1);
2106 assert_eq!(sliced.offset(), 1100);
2107 assert_eq!(sliced.search_index(100).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(50).unwrap(), SearchResult::NotFound(0));
2109 assert_eq!(sliced.search_index(150).unwrap(), SearchResult::NotFound(1));
2110 }
2111
2112 #[test]
2113 fn test_chunk_offsets_slice_across_chunks() {
2114 let indices = buffer![100u64, 200, 1100, 1200, 2100, 2200].into_array();
2115 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2116 let chunk_offsets = buffer![0u64, 2, 4].into_array();
2117 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2118
2119 let sliced = patches.slice(150..2150).unwrap().unwrap();
2120
2121 assert_eq!(sliced.num_patches(), 4);
2122 assert_eq!(sliced.offset(), 150);
2123
2124 assert_eq!(sliced.search_index(50).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(950).unwrap(), SearchResult::Found(1)); assert_eq!(sliced.search_index(1050).unwrap(), SearchResult::Found(2)); assert_eq!(sliced.search_index(1950).unwrap(), SearchResult::Found(3)); }
2129
2130 #[test]
2131 fn test_chunk_offsets_boundary_searches() {
2132 let indices = buffer![1023u64, 1024, 1025, 2047, 2048].into_array();
2133 let values = buffer![10i32, 20, 30, 40, 50].into_array();
2134 let chunk_offsets = buffer![0u64, 1, 4].into_array();
2135 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2136
2137 assert_eq!(patches.search_index(1023).unwrap(), SearchResult::Found(0));
2138 assert_eq!(patches.search_index(1024).unwrap(), SearchResult::Found(1));
2139 assert_eq!(patches.search_index(1025).unwrap(), SearchResult::Found(2));
2140 assert_eq!(patches.search_index(2047).unwrap(), SearchResult::Found(3));
2141 assert_eq!(patches.search_index(2048).unwrap(), SearchResult::Found(4));
2142
2143 assert_eq!(
2144 patches.search_index(1022).unwrap(),
2145 SearchResult::NotFound(0)
2146 );
2147 assert_eq!(
2148 patches.search_index(2046).unwrap(),
2149 SearchResult::NotFound(3)
2150 );
2151 }
2152
2153 #[test]
2154 fn test_chunk_offsets_slice_edge_cases() {
2155 let indices = buffer![0u64, 1, 1023, 1024, 2047, 2048].into_array();
2156 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2157 let chunk_offsets = buffer![0u64, 3, 5].into_array();
2158 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2159
2160 let sliced = patches.slice(0..10).unwrap().unwrap();
2162 assert_eq!(sliced.num_patches(), 2);
2163 assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2164 assert_eq!(sliced.search_index(1).unwrap(), SearchResult::Found(1));
2165
2166 let sliced = patches.slice(2040..3000).unwrap().unwrap();
2168 assert_eq!(sliced.num_patches(), 2); assert_eq!(sliced.search_index(7).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(8).unwrap(), SearchResult::Found(1)); }
2172
2173 #[test]
2174 fn test_chunk_offsets_slice_nested() {
2175 let indices = buffer![100u64, 200, 300, 400, 500, 600].into_array();
2176 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2177 let chunk_offsets = buffer![0u64].into_array();
2178 let patches = Patches::new(1000, 0, indices, values, Some(chunk_offsets)).unwrap();
2179
2180 let sliced1 = patches.slice(150..550).unwrap().unwrap();
2181 assert_eq!(sliced1.num_patches(), 4); let sliced2 = sliced1.slice(100..250).unwrap().unwrap();
2184 assert_eq!(sliced2.num_patches(), 1); assert_eq!(sliced2.offset(), 250);
2186
2187 assert_eq!(sliced2.search_index(50).unwrap(), SearchResult::Found(0)); assert_eq!(
2189 sliced2.search_index(150).unwrap(),
2190 SearchResult::NotFound(1)
2191 );
2192 }
2193
2194 #[test]
2195 fn test_nested_slice_with_dropped_first_chunk() {
2196 let indices = buffer![0u64, 1024].into_array();
2198 let values = buffer![1i32, 2].into_array();
2199 let chunk_offsets = buffer![0u64, 1].into_array();
2200 let patches = Patches::new(2048, 0, indices, values, Some(chunk_offsets)).unwrap();
2201
2202 let dropped_first = patches.slice(1024..2048).unwrap().unwrap();
2204 let resliced = dropped_first.slice(0..1024).unwrap().unwrap();
2205 assert_eq!(resliced.num_patches(), 1);
2206 }
2207
2208 #[test]
2209 fn test_index_larger_than_length() {
2210 let chunk_offsets = buffer![0u64].into_array();
2211 let indices = buffer![1023u64].into_array();
2212 let values = buffer![42i32].into_array();
2213 let patches = Patches::new(1024, 0, indices, values, Some(chunk_offsets)).unwrap();
2214 assert_eq!(
2215 patches.search_index(2048).unwrap(),
2216 SearchResult::NotFound(1)
2217 );
2218 }
2219}