1use std::cmp::Ordering;
5use std::fmt::Debug;
6use std::hash::Hash;
7use std::ops::Range;
8
9use num_traits::NumCast;
10use vortex_buffer::BitBuffer;
11use vortex_buffer::BufferMut;
12use vortex_error::VortexError;
13use vortex_error::VortexResult;
14use vortex_error::vortex_bail;
15use vortex_error::vortex_ensure;
16use vortex_error::vortex_err;
17use vortex_mask::AllOr;
18use vortex_mask::Mask;
19use vortex_utils::aliases::hash_map::HashMap;
20
21use crate::ArrayRef;
22use crate::ExecutionCtx;
23use crate::IntoArray;
24use crate::LEGACY_SESSION;
25#[expect(deprecated)]
26use crate::ToCanonical as _;
27use crate::VortexSessionExecute;
28use crate::arrays::PrimitiveArray;
29use crate::builtins::ArrayBuiltins;
30use crate::dtype::DType;
31use crate::dtype::IntegerPType;
32use crate::dtype::NativePType;
33use crate::dtype::Nullability;
34use crate::dtype::Nullability::NonNullable;
35use crate::dtype::PType;
36use crate::dtype::UnsignedPType;
37use crate::match_each_integer_ptype;
38use crate::match_each_unsigned_integer_ptype;
39use crate::scalar::PValue;
40use crate::scalar::Scalar;
41use crate::search_sorted::SearchResult;
42use crate::search_sorted::SearchSorted;
43use crate::search_sorted::SearchSortedSide;
44use crate::validity::Validity;
45
46pub const PATCH_CHUNK_SIZE: usize = 1024;
49
50#[derive(Copy, Clone, prost::Message)]
51pub struct PatchesMetadata {
52 #[prost(uint64, tag = "1")]
53 len: u64,
54 #[prost(uint64, tag = "2")]
55 offset: u64,
56 #[prost(enumeration = "PType", tag = "3")]
57 indices_ptype: i32,
58 #[prost(uint64, optional, tag = "4")]
59 chunk_offsets_len: Option<u64>,
60 #[prost(enumeration = "PType", optional, tag = "5")]
61 chunk_offsets_ptype: Option<i32>,
62 #[prost(uint64, optional, tag = "6")]
63 offset_within_chunk: Option<u64>,
64}
65
66impl PatchesMetadata {
67 #[inline]
68 pub fn new(
69 len: usize,
70 offset: usize,
71 indices_ptype: PType,
72 chunk_offsets_len: Option<usize>,
73 chunk_offsets_ptype: Option<PType>,
74 offset_within_chunk: Option<usize>,
75 ) -> Self {
76 Self {
77 len: len as u64,
78 offset: offset as u64,
79 indices_ptype: indices_ptype as i32,
80 chunk_offsets_len: chunk_offsets_len.map(|len| len as u64),
81 chunk_offsets_ptype: chunk_offsets_ptype.map(|pt| pt as i32),
82 offset_within_chunk: offset_within_chunk.map(|len| len as u64),
83 }
84 }
85
86 #[inline]
87 pub fn len(&self) -> VortexResult<usize> {
88 usize::try_from(self.len).map_err(|_| vortex_err!("len does not fit in usize"))
89 }
90
91 #[inline]
92 pub fn is_empty(&self) -> bool {
93 self.len == 0
94 }
95
96 #[inline]
97 pub fn offset(&self) -> VortexResult<usize> {
98 usize::try_from(self.offset).map_err(|_| vortex_err!("offset does not fit in usize"))
99 }
100
101 #[inline]
102 pub fn chunk_offsets_dtype(&self) -> VortexResult<Option<DType>> {
103 self.chunk_offsets_ptype
104 .map(|t| {
105 PType::try_from(t)
106 .map_err(|e| vortex_err!("invalid i32 value {t} for PType: {}", e))
107 .map(|ptype| DType::Primitive(ptype, NonNullable))
108 })
109 .transpose()
110 }
111
112 #[inline]
113 pub fn indices_dtype(&self) -> VortexResult<DType> {
114 let ptype = PType::try_from(self.indices_ptype).map_err(|e| {
115 vortex_err!("invalid i32 value {} for PType: {}", self.indices_ptype, e)
116 })?;
117 vortex_ensure!(
118 ptype.is_unsigned_int(),
119 "Patch indices must be unsigned integers"
120 );
121 Ok(DType::Primitive(ptype, NonNullable))
122 }
123}
124
125#[derive(Debug, Clone)]
127pub struct Patches {
128 array_len: usize,
129 offset: usize,
130 indices: ArrayRef,
131 values: ArrayRef,
132 chunk_offsets: Option<ArrayRef>,
139 offset_within_chunk: Option<usize>,
149}
150
151impl Patches {
152 pub fn new(
153 array_len: usize,
154 offset: usize,
155 indices: ArrayRef,
156 values: ArrayRef,
157 chunk_offsets: Option<ArrayRef>,
158 ) -> VortexResult<Self> {
159 vortex_ensure!(
160 indices.len() == values.len(),
161 "Patch indices and values must have the same length"
162 );
163 vortex_ensure!(
164 indices.dtype().is_unsigned_int() && !indices.dtype().is_nullable(),
165 "Patch indices must be non-nullable unsigned integers, got {:?}",
166 indices.dtype()
167 );
168
169 vortex_ensure!(
170 indices.len() <= array_len,
171 "Patch indices must be shorter than the array length"
172 );
173 vortex_ensure!(!indices.is_empty(), "Patch indices must not be empty");
174
175 if indices.is_host() && values.is_host() {
178 let max = usize::try_from(&indices.execute_scalar(
179 indices.len() - 1,
180 &mut LEGACY_SESSION.create_execution_ctx(),
181 )?)
182 .map_err(|_| vortex_err!("indices must be a number"))?;
183 vortex_ensure!(
184 max - offset < array_len,
185 "Patch indices {max:?}, offset {offset} are longer than the array length {array_len}"
186 );
187
188 #[cfg(debug_assertions)]
189 {
190 use crate::VortexSessionExecute;
191 let mut ctx = LEGACY_SESSION.create_execution_ctx();
192 assert!(
193 crate::aggregate_fn::fns::is_sorted::is_sorted(&indices, &mut ctx)
194 .unwrap_or(false),
195 "Patch indices must be sorted"
196 );
197 }
198 }
199
200 Ok(Self {
201 array_len,
202 offset,
203 indices,
204 values,
205 chunk_offsets: chunk_offsets.clone(),
206 offset_within_chunk: chunk_offsets.map(|_| 0),
208 })
209 }
210
211 pub unsafe fn new_unchecked(
221 array_len: usize,
222 offset: usize,
223 indices: ArrayRef,
224 values: ArrayRef,
225 chunk_offsets: Option<ArrayRef>,
226 offset_within_chunk: Option<usize>,
227 ) -> Self {
228 Self {
229 array_len,
230 offset,
231 indices,
232 values,
233 chunk_offsets,
234 offset_within_chunk,
235 }
236 }
237
238 #[inline]
239 pub fn array_len(&self) -> usize {
240 self.array_len
241 }
242
243 #[inline]
244 pub fn num_patches(&self) -> usize {
245 self.indices.len()
246 }
247
248 #[inline]
249 pub fn dtype(&self) -> &DType {
250 self.values.dtype()
251 }
252
253 #[inline]
254 pub fn indices(&self) -> &ArrayRef {
255 &self.indices
256 }
257
258 #[inline]
259 pub fn into_indices(self) -> ArrayRef {
260 self.indices
261 }
262
263 #[inline]
264 pub fn indices_mut(&mut self) -> &mut ArrayRef {
265 &mut self.indices
266 }
267
268 #[inline]
269 pub fn values(&self) -> &ArrayRef {
270 &self.values
271 }
272
273 #[inline]
274 pub fn into_values(self) -> ArrayRef {
275 self.values
276 }
277
278 #[inline]
279 pub fn values_mut(&mut self) -> &mut ArrayRef {
280 &mut self.values
281 }
282
283 #[inline]
284 pub fn offset(&self) -> usize {
286 self.offset
287 }
288
289 #[inline]
290 pub fn chunk_offsets(&self) -> &Option<ArrayRef> {
291 &self.chunk_offsets
292 }
293
294 #[inline]
295 pub fn chunk_offset_at(&self, idx: usize) -> VortexResult<usize> {
296 let Some(chunk_offsets) = &self.chunk_offsets else {
297 vortex_bail!("chunk_offsets must be set to retrieve offset at index")
298 };
299
300 chunk_offsets
301 .execute_scalar(idx, &mut LEGACY_SESSION.create_execution_ctx())?
302 .as_primitive()
303 .as_::<usize>()
304 .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))
305 }
306
307 #[inline]
316 pub fn offset_within_chunk(&self) -> Option<usize> {
317 self.offset_within_chunk
318 }
319
320 #[inline]
321 pub fn indices_ptype(&self) -> VortexResult<PType> {
322 PType::try_from(self.indices.dtype())
323 .map_err(|_| vortex_err!("indices dtype is not primitive"))
324 }
325
326 pub fn to_metadata(&self, len: usize, dtype: &DType) -> VortexResult<PatchesMetadata> {
327 if self.indices.len() > len {
328 vortex_bail!(
329 "Patch indices {} are longer than the array length {}",
330 self.indices.len(),
331 len
332 );
333 }
334 if self.values.dtype() != dtype {
335 vortex_bail!(
336 "Patch values dtype {} does not match array dtype {}",
337 self.values.dtype(),
338 dtype
339 );
340 }
341 let chunk_offsets_len = self.chunk_offsets.as_ref().map(|co| co.len());
342 let chunk_offsets_ptype = self.chunk_offsets.as_ref().map(|co| co.dtype().as_ptype());
343
344 Ok(PatchesMetadata::new(
345 self.indices.len(),
346 self.offset,
347 self.indices.dtype().as_ptype(),
348 chunk_offsets_len,
349 chunk_offsets_ptype,
350 self.offset_within_chunk,
351 ))
352 }
353
354 pub fn cast_values(self, values_dtype: &DType) -> VortexResult<Self> {
355 unsafe {
357 Ok(Self::new_unchecked(
358 self.array_len,
359 self.offset,
360 self.indices,
361 self.values.cast(values_dtype.clone())?,
362 self.chunk_offsets,
363 self.offset_within_chunk,
364 ))
365 }
366 }
367
368 pub fn get_patched(&self, index: usize) -> VortexResult<Option<Scalar>> {
370 self.search_index(index)?
371 .to_found()
372 .map(|patch_idx| {
373 self.values()
374 .execute_scalar(patch_idx, &mut LEGACY_SESSION.create_execution_ctx())
375 })
376 .transpose()
377 }
378
379 pub fn search_index(&self, index: usize) -> VortexResult<SearchResult> {
397 if self.chunk_offsets.is_some() {
398 return self.search_index_chunked(index);
399 }
400
401 Self::search_index_binary_search(&self.indices, index + self.offset)
402 }
403
404 fn search_index_binary_search(indices: &ArrayRef, needle: usize) -> VortexResult<SearchResult> {
410 if indices.is_canonical() {
411 #[expect(deprecated)]
412 let primitive = indices.to_primitive();
413 match_each_integer_ptype!(primitive.ptype(), |T| {
414 let Ok(needle) = T::try_from(needle) else {
415 return Ok(SearchResult::NotFound(primitive.len()));
420 };
421 return primitive
422 .as_slice::<T>()
423 .search_sorted(&needle, SearchSortedSide::Left);
424 });
425 }
426 indices
427 .as_primitive_typed()
428 .search_sorted(&PValue::U64(needle as u64), SearchSortedSide::Left)
429 }
430
431 fn search_index_chunked(&self, index: usize) -> VortexResult<SearchResult> {
441 let Some(chunk_offsets) = &self.chunk_offsets else {
442 vortex_bail!("chunk_offsets is required to be set")
443 };
444
445 let Some(offset_within_chunk) = self.offset_within_chunk else {
446 vortex_bail!("offset_within_chunk is required to be set")
447 };
448
449 if index >= self.array_len() {
450 return Ok(SearchResult::NotFound(self.indices().len()));
451 }
452
453 let chunk_idx = (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE;
454
455 let base_offset = self.chunk_offset_at(0)?;
457
458 let patches_start_idx = (self.chunk_offset_at(chunk_idx)? - base_offset)
459 .saturating_sub(offset_within_chunk);
466
467 let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
468 (self.chunk_offset_at(chunk_idx + 1)? - base_offset)
469 .saturating_sub(offset_within_chunk)
470 .min(self.indices.len())
471 } else {
472 self.indices.len()
473 };
474
475 let chunk_indices = self.indices.slice(patches_start_idx..patches_end_idx)?;
476 let result = Self::search_index_binary_search(&chunk_indices, index + self.offset)?;
477
478 Ok(match result {
479 SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
480 SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
481 })
482 }
483
484 fn search_index_chunked_batch<T, O>(
490 &self,
491 indices: &[T],
492 chunk_offsets: &[O],
493 index: T,
494 ) -> VortexResult<SearchResult>
495 where
496 T: UnsignedPType,
497 O: UnsignedPType,
498 usize: TryFrom<T>,
499 usize: TryFrom<O>,
500 {
501 let Some(offset_within_chunk) = self.offset_within_chunk else {
502 vortex_bail!("offset_within_chunk is required to be set")
503 };
504
505 let chunk_idx = {
506 let Ok(index) = usize::try_from(index) else {
507 return Ok(SearchResult::NotFound(indices.len()));
509 };
510
511 if index >= self.array_len() {
512 return Ok(SearchResult::NotFound(self.indices().len()));
513 }
514
515 (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE
516 };
517
518 let chunk_offset = usize::try_from(chunk_offsets[chunk_idx] - chunk_offsets[0])
520 .map_err(|_| vortex_err!("chunk_offset failed to convert to usize"))?;
521
522 let patches_start_idx = chunk_offset
523 .saturating_sub(offset_within_chunk);
530
531 let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
532 usize::try_from(chunk_offsets[chunk_idx + 1] - chunk_offsets[0])
533 .map_err(|_| vortex_err!("patches_end_idx failed to convert to usize"))?
534 .saturating_sub(offset_within_chunk)
535 .min(indices.len())
536 } else {
537 self.indices.len()
538 };
539
540 let Some(offset) = T::from(self.offset) else {
541 return Ok(SearchResult::NotFound(indices.len()));
543 };
544
545 let chunk_indices = &indices[patches_start_idx..patches_end_idx];
546 let result = chunk_indices.search_sorted(&(index + offset), SearchSortedSide::Left)?;
547
548 Ok(match result {
549 SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
550 SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
551 })
552 }
553
554 pub fn min_index(&self) -> VortexResult<usize> {
556 let first = self
557 .indices
558 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())?
559 .as_primitive()
560 .as_::<usize>()
561 .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
562 Ok(first - self.offset)
563 }
564
565 pub fn max_index(&self) -> VortexResult<usize> {
567 let last = self
568 .indices
569 .execute_scalar(
570 self.indices.len() - 1,
571 &mut LEGACY_SESSION.create_execution_ctx(),
572 )?
573 .as_primitive()
574 .as_::<usize>()
575 .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
576 Ok(last - self.offset)
577 }
578
579 pub fn filter(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
581 if mask.len() != self.array_len {
582 vortex_bail!(
583 "Filter mask length {} does not match array length {}",
584 mask.len(),
585 self.array_len
586 );
587 }
588
589 match mask.indices() {
590 AllOr::All => Ok(Some(self.clone())),
591 AllOr::None => Ok(None),
592 AllOr::Some(mask_indices) => {
593 let flat_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
594 match_each_unsigned_integer_ptype!(flat_indices.ptype(), |I| {
595 filter_patches_with_mask(
596 flat_indices.as_slice::<I>(),
597 self.offset(),
598 self.values(),
599 mask_indices,
600 )
601 })
602 }
603 }
604 }
605
606 pub fn mask(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
611 if mask.len() != self.array_len {
612 vortex_bail!(
613 "Filter mask length {} does not match array length {}",
614 mask.len(),
615 self.array_len
616 );
617 }
618
619 let filter_mask = match mask.bit_buffer() {
620 AllOr::All => return Ok(None),
621 AllOr::None => return Ok(Some(self.clone())),
622 AllOr::Some(masked) => {
623 let patch_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
624 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |P| {
625 let patch_indices = patch_indices.as_slice::<P>();
626 Mask::from_buffer(BitBuffer::collect_bool(patch_indices.len(), |i| {
627 #[allow(clippy::cast_possible_truncation)]
628 let idx = (patch_indices[i] as usize) - self.offset;
629 !masked.value(idx)
630 }))
631 })
632 }
633 };
634
635 if filter_mask.all_false() {
636 return Ok(None);
637 }
638
639 let filtered_indices = self.indices.filter(filter_mask.clone())?;
641 let filtered_values = self.values.filter(filter_mask)?;
642
643 Ok(Some(Self {
644 array_len: self.array_len,
645 offset: self.offset,
646 indices: filtered_indices,
647 values: filtered_values,
648 chunk_offsets: None,
650 offset_within_chunk: self.offset_within_chunk,
651 }))
652 }
653
654 pub fn slice(&self, range: Range<usize>) -> VortexResult<Option<Self>> {
656 let slice_start_idx = self.search_index(range.start)?.to_index();
657 let slice_end_idx = self.search_index(range.end)?.to_index();
658
659 if slice_start_idx == slice_end_idx {
660 return Ok(None);
661 }
662
663 let values = self.values().slice(slice_start_idx..slice_end_idx)?;
664 let indices = self.indices().slice(slice_start_idx..slice_end_idx)?;
665
666 let new_chunk_offsets = self
667 .chunk_offsets
668 .as_ref()
669 .map(|chunk_offsets| -> VortexResult<ArrayRef> {
670 let chunk_relative_offset = self.offset % PATCH_CHUNK_SIZE;
671 let chunk_start_idx = (chunk_relative_offset + range.start) / PATCH_CHUNK_SIZE;
672 let chunk_end_idx = (chunk_relative_offset + range.end).div_ceil(PATCH_CHUNK_SIZE);
673 chunk_offsets.slice(chunk_start_idx..chunk_end_idx)
674 })
675 .transpose()?;
676
677 let offset_within_chunk = new_chunk_offsets
678 .as_ref()
679 .map(|new_chunk_offsets| -> VortexResult<usize> {
680 let new_chunk_base = new_chunk_offsets
681 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())?
682 .as_primitive()
683 .as_::<usize>()
684 .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))?;
685 let parent_chunk_base = self.chunk_offset_at(0)?;
686 let parent_within = self.offset_within_chunk.unwrap_or(0);
687 Ok(parent_chunk_base + parent_within + slice_start_idx - new_chunk_base)
688 })
689 .transpose()?;
690
691 Ok(Some(Self {
692 array_len: range.len(),
693 offset: range.start + self.offset(),
694 indices,
695 values,
696 chunk_offsets: new_chunk_offsets,
697 offset_within_chunk,
698 }))
699 }
700
701 const PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN: f64 = 5.0;
703
704 fn is_map_faster_than_search(&self, take_indices: &PrimitiveArray) -> bool {
705 (self.num_patches() as f64 / take_indices.len() as f64)
706 < Self::PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN
707 }
708
709 pub fn take_with_nulls(
713 &self,
714 take_indices: &ArrayRef,
715 ctx: &mut ExecutionCtx,
716 ) -> VortexResult<Option<Self>> {
717 if take_indices.is_empty() {
718 return Ok(None);
719 }
720
721 let take_indices = take_indices.clone().execute::<PrimitiveArray>(ctx)?;
722 if self.is_map_faster_than_search(&take_indices) {
723 self.take_map(take_indices, true, ctx)
724 } else {
725 self.take_search(take_indices, true, ctx)
726 }
727 }
728
729 pub fn take(
733 &self,
734 take_indices: &ArrayRef,
735 ctx: &mut ExecutionCtx,
736 ) -> VortexResult<Option<Self>> {
737 if take_indices.is_empty() {
738 return Ok(None);
739 }
740
741 let take_indices = take_indices.clone().execute::<PrimitiveArray>(ctx)?;
742 if self.is_map_faster_than_search(&take_indices) {
743 self.take_map(take_indices, false, ctx)
744 } else {
745 self.take_search(take_indices, false, ctx)
746 }
747 }
748
749 #[expect(
750 clippy::cognitive_complexity,
751 reason = "complexity is from nested match_each_* macros"
752 )]
753 pub fn take_search(
754 &self,
755 take_indices: PrimitiveArray,
756 include_nulls: bool,
757 ctx: &mut ExecutionCtx,
758 ) -> VortexResult<Option<Self>> {
759 let take_indices_validity = take_indices.validity()?;
760 let patch_indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
761 let chunk_offsets = self
762 .chunk_offsets()
763 .as_ref()
764 .map(|co| co.clone().execute::<PrimitiveArray>(ctx))
765 .transpose()?;
766
767 let (values_indices, new_indices): (BufferMut<u64>, BufferMut<u64>) =
768 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |PatchT| {
769 let patch_indices_slice = patch_indices.as_slice::<PatchT>();
770 match_each_integer_ptype!(take_indices.ptype(), |TakeT| {
771 let take_slice = take_indices.as_slice::<TakeT>();
772
773 if let Some(chunk_offsets) = chunk_offsets {
774 match_each_unsigned_integer_ptype!(chunk_offsets.ptype(), |OffsetT| {
775 let chunk_offsets = chunk_offsets.as_slice::<OffsetT>();
776 take_indices_with_search_fn(
777 patch_indices_slice,
778 take_slice,
779 take_indices
780 .as_ref()
781 .validity()?
782 .execute_mask(take_indices.as_ref().len(), ctx)?,
783 include_nulls,
784 |take_idx| {
785 self.search_index_chunked_batch(
786 patch_indices_slice,
787 chunk_offsets,
788 take_idx,
789 )
790 },
791 )?
792 })
793 } else {
794 take_indices_with_search_fn(
795 patch_indices_slice,
796 take_slice,
797 take_indices
798 .as_ref()
799 .validity()?
800 .execute_mask(take_indices.as_ref().len(), ctx)?,
801 include_nulls,
802 |take_idx| {
803 let Some(offset) = <PatchT as NumCast>::from(self.offset) else {
804 return Ok(SearchResult::NotFound(patch_indices_slice.len()));
806 };
807
808 patch_indices_slice
809 .search_sorted(&(take_idx + offset), SearchSortedSide::Left)
810 },
811 )?
812 }
813 })
814 });
815
816 if new_indices.is_empty() {
817 return Ok(None);
818 }
819
820 let new_indices = new_indices.into_array();
821 let new_array_len = take_indices.len();
822 let values_validity = take_indices_validity.take(&new_indices)?;
823
824 Ok(Some(Self {
825 array_len: new_array_len,
826 offset: 0,
827 indices: new_indices,
828 values: self
829 .values()
830 .take(PrimitiveArray::new(values_indices, values_validity).into_array())?,
831 chunk_offsets: None,
832 offset_within_chunk: Some(0), }))
834 }
835
836 pub fn take_map(
837 &self,
838 take_indices: PrimitiveArray,
839 include_nulls: bool,
840 ctx: &mut ExecutionCtx,
841 ) -> VortexResult<Option<Self>> {
842 let indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
843 let new_length = take_indices.len();
844
845 let min_index = self.min_index()?;
846 let max_index = self.max_index()?;
847
848 let Some((new_sparse_indices, value_indices)) =
849 match_each_unsigned_integer_ptype!(indices.ptype(), |Indices| {
850 match_each_integer_ptype!(take_indices.ptype(), |TakeIndices| {
851 let take_validity = take_indices
852 .validity()?
853 .execute_mask(take_indices.len(), ctx)?;
854 let take_nullability = take_indices.validity()?.nullability();
855 let take_slice = take_indices.as_slice::<TakeIndices>();
856 take_map::<_, TakeIndices>(
857 indices.as_slice::<Indices>(),
858 take_slice,
859 take_validity,
860 take_nullability,
861 self.offset(),
862 min_index,
863 max_index,
864 include_nulls,
865 )?
866 })
867 })
868 else {
869 return Ok(None);
870 };
871
872 let taken_values = self.values().take(value_indices)?;
873
874 Ok(Some(Patches {
875 array_len: new_length,
876 offset: 0,
877 indices: new_sparse_indices,
878 values: taken_values,
879 chunk_offsets: None,
881 offset_within_chunk: self.offset_within_chunk,
882 }))
883 }
884
885 pub fn map_values<F>(self, f: F) -> VortexResult<Self>
886 where
887 F: FnOnce(ArrayRef) -> VortexResult<ArrayRef>,
888 {
889 let values = f(self.values)?;
890 if self.indices.len() != values.len() {
891 vortex_bail!(
892 "map_values must preserve length: expected {} received {}",
893 self.indices.len(),
894 values.len()
895 )
896 }
897
898 Ok(Self {
899 array_len: self.array_len,
900 offset: self.offset,
901 indices: self.indices,
902 values,
903 chunk_offsets: self.chunk_offsets,
904 offset_within_chunk: self.offset_within_chunk,
905 })
906 }
907}
908
909#[expect(clippy::too_many_arguments)] fn take_map<I: NativePType + Hash + Eq + TryFrom<usize>, T: NativePType>(
911 indices: &[I],
912 take_indices: &[T],
913 take_validity: Mask,
914 take_nullability: Nullability,
915 indices_offset: usize,
916 min_index: usize,
917 max_index: usize,
918 include_nulls: bool,
919) -> VortexResult<Option<(ArrayRef, ArrayRef)>>
920where
921 usize: TryFrom<T>,
922 VortexError: From<<I as TryFrom<usize>>::Error>,
923{
924 let offset_i = I::try_from(indices_offset)?;
925
926 let sparse_index_to_value_index: HashMap<I, usize> = indices
927 .iter()
928 .copied()
929 .map(|idx| idx - offset_i)
930 .enumerate()
931 .map(|(value_index, sparse_index)| (sparse_index, value_index))
932 .collect();
933
934 let mut new_sparse_indices = BufferMut::<u64>::with_capacity(take_indices.len());
935 let mut value_indices = BufferMut::<u64>::with_capacity(take_indices.len());
936
937 for (idx_in_take, &take_idx) in take_indices.iter().enumerate() {
938 let ti = usize::try_from(take_idx)
939 .map_err(|_| vortex_err!("Failed to convert index to usize"))?;
940
941 let is_null = match take_validity.bit_buffer() {
943 AllOr::All => false,
944 AllOr::None => true,
945 AllOr::Some(buf) => !buf.value(idx_in_take),
946 };
947 if is_null {
948 if include_nulls {
949 new_sparse_indices.push(idx_in_take as u64);
950 value_indices.push(0);
951 }
952 } else if ti >= min_index && ti <= max_index {
953 let ti_as_i = I::try_from(ti)
954 .map_err(|_| vortex_err!("take index does not fit in index type"))?;
955 if let Some(&value_index) = sparse_index_to_value_index.get(&ti_as_i) {
956 new_sparse_indices.push(idx_in_take as u64);
957 value_indices.push(value_index as u64);
958 }
959 }
960 }
961
962 if new_sparse_indices.is_empty() {
963 return Ok(None);
964 }
965
966 let new_sparse_indices = new_sparse_indices.into_array();
967 let values_validity =
968 Validity::from_mask(take_validity, take_nullability).take(&new_sparse_indices)?;
969 Ok(Some((
970 new_sparse_indices,
971 PrimitiveArray::new(value_indices, values_validity).into_array(),
972 )))
973}
974
975fn filter_patches_with_mask<T: IntegerPType>(
981 patch_indices: &[T],
982 offset: usize,
983 patch_values: &ArrayRef,
984 mask_indices: &[usize],
985) -> VortexResult<Option<Patches>> {
986 let true_count = mask_indices.len();
987 let mut new_patch_indices = BufferMut::<u64>::with_capacity(true_count);
988 let mut new_mask_indices = Vec::with_capacity(true_count);
989
990 const STRIDE: usize = 4;
994
995 let mut mask_idx = 0usize;
996 let mut true_idx = 0usize;
997
998 while mask_idx < patch_indices.len() && true_idx < true_count {
999 if (mask_idx + STRIDE) < patch_indices.len() && (true_idx + STRIDE) < mask_indices.len() {
1006 let left_min = patch_indices[mask_idx]
1008 .to_usize()
1009 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1010 - offset;
1011 let left_max = patch_indices[mask_idx + STRIDE]
1012 .to_usize()
1013 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1014 - offset;
1015 let right_min = mask_indices[true_idx];
1016 let right_max = mask_indices[true_idx + STRIDE];
1017
1018 if left_min > right_max {
1019 true_idx += STRIDE;
1021 continue;
1022 } else if right_min > left_max {
1023 mask_idx += STRIDE;
1024 continue;
1025 } else {
1026 }
1028 }
1029
1030 let left = patch_indices[mask_idx]
1033 .to_usize()
1034 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1035 - offset;
1036 let right = mask_indices[true_idx];
1037
1038 match left.cmp(&right) {
1039 Ordering::Less => {
1040 mask_idx += 1;
1041 }
1042 Ordering::Greater => {
1043 true_idx += 1;
1044 }
1045 Ordering::Equal => {
1046 new_mask_indices.push(mask_idx);
1048 new_patch_indices.push(true_idx as u64);
1049
1050 mask_idx += 1;
1051 true_idx += 1;
1052 }
1053 }
1054 }
1055
1056 if new_mask_indices.is_empty() {
1057 return Ok(None);
1058 }
1059
1060 let new_patch_indices = new_patch_indices.into_array();
1061 let new_patch_values =
1062 patch_values.filter(Mask::from_indices(patch_values.len(), new_mask_indices))?;
1063
1064 Ok(Some(Patches::new(
1065 true_count,
1066 0,
1067 new_patch_indices,
1068 new_patch_values,
1069 None,
1071 )?))
1072}
1073
1074fn take_indices_with_search_fn<
1075 I: UnsignedPType,
1076 T: IntegerPType,
1077 F: Fn(I) -> VortexResult<SearchResult>,
1078>(
1079 indices: &[I],
1080 take_indices: &[T],
1081 take_validity: Mask,
1082 include_nulls: bool,
1083 search_fn: F,
1084) -> VortexResult<(BufferMut<u64>, BufferMut<u64>)> {
1085 let mut values_indices = BufferMut::with_capacity(take_indices.len());
1086 let mut new_indices = BufferMut::with_capacity(take_indices.len());
1087
1088 for (new_patch_idx, &take_idx) in take_indices.iter().enumerate() {
1089 if !take_validity.value(new_patch_idx) {
1090 if include_nulls {
1091 values_indices.push(0u64);
1093 new_indices.push(new_patch_idx as u64);
1094 }
1095 continue;
1096 } else {
1097 let search_result = match I::from(take_idx) {
1098 Some(idx) => search_fn(idx)?,
1099 None => SearchResult::NotFound(indices.len()),
1100 };
1101
1102 if let Some(patch_idx) = search_result.to_found() {
1103 values_indices.push(patch_idx as u64);
1104 new_indices.push(new_patch_idx as u64);
1105 }
1106 }
1107 }
1108
1109 Ok((values_indices, new_indices))
1110}
1111
1112#[cfg(test)]
1113mod test {
1114 use vortex_buffer::BufferMut;
1115 use vortex_buffer::buffer;
1116 use vortex_mask::Mask;
1117
1118 use crate::IntoArray;
1119 use crate::LEGACY_SESSION;
1120 #[expect(deprecated)]
1121 use crate::ToCanonical as _;
1122 use crate::VortexSessionExecute;
1123 use crate::assert_arrays_eq;
1124 use crate::patches::Patches;
1125 use crate::patches::PrimitiveArray;
1126 use crate::search_sorted::SearchResult;
1127 use crate::validity::Validity;
1128
1129 #[test]
1130 fn test_filter() {
1131 let patches = Patches::new(
1132 100,
1133 0,
1134 buffer![10u32, 11, 20].into_array(),
1135 buffer![100, 110, 200].into_array(),
1136 None,
1137 )
1138 .unwrap();
1139
1140 let filtered = patches
1141 .filter(
1142 &Mask::from_indices(100, vec![10, 20, 30]),
1143 &mut LEGACY_SESSION.create_execution_ctx(),
1144 )
1145 .unwrap()
1146 .unwrap();
1147
1148 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1]));
1149 assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1150 }
1151
1152 #[test]
1153 fn take_with_nulls() {
1154 let patches = Patches::new(
1155 20,
1156 0,
1157 buffer![2u64, 9, 15].into_array(),
1158 PrimitiveArray::new(buffer![33_i32, 44, 55], Validity::AllValid).into_array(),
1159 None,
1160 )
1161 .unwrap();
1162
1163 let taken = patches
1164 .take(
1165 &PrimitiveArray::new(buffer![9, 0], Validity::from_iter(vec![true, false]))
1166 .into_array(),
1167 &mut LEGACY_SESSION.create_execution_ctx(),
1168 )
1169 .unwrap()
1170 .unwrap();
1171 #[expect(deprecated)]
1172 let primitive_values = taken.values().to_primitive();
1173 #[expect(deprecated)]
1174 let primitive_indices = taken.indices().to_primitive();
1175 assert_eq!(taken.array_len(), 2);
1176 assert_arrays_eq!(
1177 primitive_values,
1178 PrimitiveArray::from_option_iter([Some(44i32)])
1179 );
1180 assert_arrays_eq!(primitive_indices, PrimitiveArray::from_iter([0u64]));
1181 assert_eq!(
1182 primitive_values
1183 .as_ref()
1184 .validity()
1185 .unwrap()
1186 .execute_mask(
1187 primitive_values.as_ref().len(),
1188 &mut LEGACY_SESSION.create_execution_ctx()
1189 )
1190 .unwrap(),
1191 Mask::from_iter(vec![true])
1192 );
1193 }
1194
1195 #[test]
1196 fn take_search_with_nulls_chunked() {
1197 let patches = Patches::new(
1198 20,
1199 0,
1200 buffer![2u64, 9, 15].into_array(),
1201 buffer![33_i32, 44, 55].into_array(),
1202 Some(buffer![0u64].into_array()),
1203 )
1204 .unwrap();
1205
1206 let taken = patches
1207 .take_search(
1208 PrimitiveArray::new(buffer![9, 0], Validity::from_iter([true, false])),
1209 true,
1210 &mut LEGACY_SESSION.create_execution_ctx(),
1211 )
1212 .unwrap()
1213 .unwrap();
1214
1215 #[expect(deprecated)]
1216 let primitive_values = taken.values().to_primitive();
1217 assert_eq!(taken.array_len(), 2);
1218 assert_arrays_eq!(
1219 primitive_values,
1220 PrimitiveArray::from_option_iter([Some(44i32), None])
1221 );
1222 assert_arrays_eq!(taken.indices(), PrimitiveArray::from_iter([0u64, 1]));
1223
1224 assert_eq!(
1225 primitive_values
1226 .as_ref()
1227 .validity()
1228 .unwrap()
1229 .execute_mask(
1230 primitive_values.as_ref().len(),
1231 &mut LEGACY_SESSION.create_execution_ctx()
1232 )
1233 .unwrap(),
1234 Mask::from_iter([true, false])
1235 );
1236 }
1237
1238 #[test]
1239 fn take_search_chunked_multiple_chunks() {
1240 let patches = Patches::new(
1241 2048,
1242 0,
1243 buffer![100u64, 500, 1200, 1800].into_array(),
1244 buffer![10_i32, 20, 30, 40].into_array(),
1245 Some(buffer![0u64, 2].into_array()),
1246 )
1247 .unwrap();
1248
1249 let taken = patches
1250 .take_search(
1251 PrimitiveArray::new(buffer![500, 1200, 999], Validity::AllValid),
1252 true,
1253 &mut LEGACY_SESSION.create_execution_ctx(),
1254 )
1255 .unwrap()
1256 .unwrap();
1257
1258 assert_eq!(taken.array_len(), 3);
1259 assert_arrays_eq!(
1260 taken.values(),
1261 PrimitiveArray::from_option_iter([Some(20i32), Some(30)])
1262 );
1263 }
1264
1265 #[test]
1266 fn take_search_chunked_indices_with_no_patches() {
1267 let patches = Patches::new(
1268 20,
1269 0,
1270 buffer![2u64, 9, 15].into_array(),
1271 buffer![33_i32, 44, 55].into_array(),
1272 Some(buffer![0u64].into_array()),
1273 )
1274 .unwrap();
1275
1276 let taken = patches
1277 .take_search(
1278 PrimitiveArray::new(buffer![3, 4, 5], Validity::AllValid),
1279 true,
1280 &mut LEGACY_SESSION.create_execution_ctx(),
1281 )
1282 .unwrap();
1283
1284 assert!(taken.is_none());
1285 }
1286
1287 #[test]
1288 fn take_search_chunked_interleaved() {
1289 let patches = Patches::new(
1290 30,
1291 0,
1292 buffer![5u64, 10, 20, 25].into_array(),
1293 buffer![100_i32, 200, 300, 400].into_array(),
1294 Some(buffer![0u64].into_array()),
1295 )
1296 .unwrap();
1297
1298 let taken = patches
1299 .take_search(
1300 PrimitiveArray::new(buffer![10, 15, 20, 99], Validity::AllValid),
1301 true,
1302 &mut LEGACY_SESSION.create_execution_ctx(),
1303 )
1304 .unwrap()
1305 .unwrap();
1306
1307 assert_eq!(taken.array_len(), 4);
1308 assert_arrays_eq!(
1309 taken.values(),
1310 PrimitiveArray::from_option_iter([Some(200i32), Some(300)])
1311 );
1312 }
1313
1314 #[test]
1315 fn test_take_search_multiple_chunk_offsets() {
1316 let patches = Patches::new(
1317 1500,
1318 0,
1319 BufferMut::from_iter(0..1500u64).into_array(),
1320 BufferMut::from_iter(0..1500i32).into_array(),
1321 Some(buffer![0u64, 1024u64].into_array()),
1322 )
1323 .unwrap();
1324
1325 let taken = patches
1326 .take_search(
1327 PrimitiveArray::new(BufferMut::from_iter(0..1500u64), Validity::AllValid),
1328 false,
1329 &mut LEGACY_SESSION.create_execution_ctx(),
1330 )
1331 .unwrap()
1332 .unwrap();
1333
1334 assert_eq!(taken.array_len(), 1500);
1335 }
1336
1337 #[test]
1338 fn test_slice() {
1339 let values = buffer![15_u32, 135, 13531, 42].into_array();
1340 let indices = buffer![10_u64, 11, 50, 100].into_array();
1341
1342 let patches = Patches::new(101, 0, indices, values, None).unwrap();
1343
1344 let sliced = patches.slice(15..100).unwrap().unwrap();
1345 assert_eq!(sliced.array_len(), 100 - 15);
1346 assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1347 }
1348
1349 #[test]
1350 fn doubly_sliced() {
1351 let values = buffer![15_u32, 135, 13531, 42].into_array();
1352 let indices = buffer![10_u64, 11, 50, 100].into_array();
1353
1354 let patches = Patches::new(101, 0, indices, values, None).unwrap();
1355
1356 let sliced = patches.slice(15..100).unwrap().unwrap();
1357 assert_eq!(sliced.array_len(), 100 - 15);
1358 assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1359
1360 let doubly_sliced = sliced.slice(35..36).unwrap().unwrap();
1361 assert_arrays_eq!(
1362 doubly_sliced.values(),
1363 PrimitiveArray::from_iter([13531u32])
1364 );
1365 }
1366
1367 #[test]
1368 fn test_mask_all_true() {
1369 let patches = Patches::new(
1370 10,
1371 0,
1372 buffer![2u64, 5, 8].into_array(),
1373 buffer![100i32, 200, 300].into_array(),
1374 None,
1375 )
1376 .unwrap();
1377
1378 let mask = Mask::new_true(10);
1379 let masked = patches
1380 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1381 .unwrap();
1382 assert!(masked.is_none());
1383 }
1384
1385 #[test]
1386 fn test_mask_all_false() {
1387 let patches = Patches::new(
1388 10,
1389 0,
1390 buffer![2u64, 5, 8].into_array(),
1391 buffer![100i32, 200, 300].into_array(),
1392 None,
1393 )
1394 .unwrap();
1395
1396 let mask = Mask::new_false(10);
1397 let masked = patches
1398 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1399 .unwrap()
1400 .unwrap();
1401
1402 assert_arrays_eq!(
1404 masked.values(),
1405 PrimitiveArray::from_iter([100i32, 200, 300])
1406 );
1407 assert!(
1408 masked
1409 .values()
1410 .is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())
1411 .unwrap()
1412 );
1413 assert!(
1414 masked
1415 .values()
1416 .is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())
1417 .unwrap()
1418 );
1419 assert!(
1420 masked
1421 .values()
1422 .is_valid(2, &mut LEGACY_SESSION.create_execution_ctx())
1423 .unwrap()
1424 );
1425
1426 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1428 }
1429
1430 #[test]
1431 fn test_mask_partial() {
1432 let patches = Patches::new(
1433 10,
1434 0,
1435 buffer![2u64, 5, 8].into_array(),
1436 buffer![100i32, 200, 300].into_array(),
1437 None,
1438 )
1439 .unwrap();
1440
1441 let mask = Mask::from_iter([
1443 false, false, true, false, false, false, false, false, true, false,
1444 ]);
1445 let masked = patches
1446 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1447 .unwrap()
1448 .unwrap();
1449
1450 assert_eq!(masked.values().len(), 1);
1452 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1453
1454 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64]));
1456 }
1457
1458 #[test]
1459 fn test_mask_with_offset() {
1460 let patches = Patches::new(
1461 10,
1462 5, buffer![7u64, 10, 13].into_array(), buffer![100i32, 200, 300].into_array(),
1465 None,
1466 )
1467 .unwrap();
1468
1469 let mask = Mask::from_iter([
1471 false, false, true, false, false, false, false, false, false, false,
1472 ]);
1473
1474 let masked = patches
1475 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1476 .unwrap()
1477 .unwrap();
1478 assert_eq!(masked.array_len(), 10);
1479 assert_eq!(masked.offset(), 5);
1480 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([10u64, 13]));
1481 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32, 300]));
1482 }
1483
1484 #[test]
1485 fn test_mask_nullable_values() {
1486 let patches = Patches::new(
1487 10,
1488 0,
1489 buffer![2u64, 5, 8].into_array(),
1490 PrimitiveArray::from_option_iter([Some(100i32), None, Some(300)]).into_array(),
1491 None,
1492 )
1493 .unwrap();
1494
1495 let mask = Mask::from_iter([
1497 false, false, true, false, false, false, false, false, false, false,
1498 ]);
1499 let masked = patches
1500 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1501 .unwrap()
1502 .unwrap();
1503
1504 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64, 8]));
1506
1507 #[expect(deprecated)]
1509 let masked_values = masked.values().to_primitive();
1510 assert_eq!(masked_values.len(), 2);
1511 assert!(
1512 !masked_values
1513 .is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())
1514 .unwrap()
1515 ); assert!(
1517 masked_values
1518 .is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())
1519 .unwrap()
1520 ); assert_eq!(
1522 i32::try_from(
1523 &masked_values
1524 .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
1525 .unwrap()
1526 )
1527 .unwrap(),
1528 300i32
1529 );
1530 }
1531
1532 #[test]
1533 fn test_filter_keep_all() {
1534 let patches = Patches::new(
1535 10,
1536 0,
1537 buffer![2u64, 5, 8].into_array(),
1538 buffer![100i32, 200, 300].into_array(),
1539 None,
1540 )
1541 .unwrap();
1542
1543 let mask = Mask::from_indices(10, (0..10).collect());
1545 let filtered = patches
1546 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1547 .unwrap()
1548 .unwrap();
1549
1550 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1551 assert_arrays_eq!(
1552 filtered.values(),
1553 PrimitiveArray::from_iter([100i32, 200, 300])
1554 );
1555 }
1556
1557 #[test]
1558 fn test_filter_none() {
1559 let patches = Patches::new(
1560 10,
1561 0,
1562 buffer![2u64, 5, 8].into_array(),
1563 buffer![100i32, 200, 300].into_array(),
1564 None,
1565 )
1566 .unwrap();
1567
1568 let mask = Mask::from_indices(10, vec![]);
1570 let filtered = patches
1571 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1572 .unwrap();
1573 assert!(filtered.is_none());
1574 }
1575
1576 #[test]
1577 fn test_filter_with_indices() {
1578 let patches = Patches::new(
1579 10,
1580 0,
1581 buffer![2u64, 5, 8].into_array(),
1582 buffer![100i32, 200, 300].into_array(),
1583 None,
1584 )
1585 .unwrap();
1586
1587 let mask = Mask::from_indices(10, vec![2, 5, 9]);
1589 let filtered = patches
1590 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1591 .unwrap()
1592 .unwrap();
1593
1594 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1])); assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1596 }
1597
1598 #[test]
1599 fn test_slice_full_range() {
1600 let patches = Patches::new(
1601 10,
1602 0,
1603 buffer![2u64, 5, 8].into_array(),
1604 buffer![100i32, 200, 300].into_array(),
1605 None,
1606 )
1607 .unwrap();
1608
1609 let sliced = patches.slice(0..10).unwrap().unwrap();
1610
1611 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1612 assert_arrays_eq!(
1613 sliced.values(),
1614 PrimitiveArray::from_iter([100i32, 200, 300])
1615 );
1616 }
1617
1618 #[test]
1619 fn test_slice_partial() {
1620 let patches = Patches::new(
1621 10,
1622 0,
1623 buffer![2u64, 5, 8].into_array(),
1624 buffer![100i32, 200, 300].into_array(),
1625 None,
1626 )
1627 .unwrap();
1628
1629 let sliced = patches.slice(3..8).unwrap().unwrap();
1631
1632 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([5u64])); assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1634 assert_eq!(sliced.array_len(), 5); assert_eq!(sliced.offset(), 3); }
1637
1638 #[test]
1639 fn test_slice_no_patches() {
1640 let patches = Patches::new(
1641 10,
1642 0,
1643 buffer![2u64, 5, 8].into_array(),
1644 buffer![100i32, 200, 300].into_array(),
1645 None,
1646 )
1647 .unwrap();
1648
1649 let sliced = patches.slice(6..7).unwrap();
1651 assert!(sliced.is_none());
1652 }
1653
1654 #[test]
1655 fn test_slice_with_offset() {
1656 let patches = Patches::new(
1657 10,
1658 5, buffer![7u64, 10, 13].into_array(), buffer![100i32, 200, 300].into_array(),
1661 None,
1662 )
1663 .unwrap();
1664
1665 let sliced = patches.slice(3..8).unwrap().unwrap();
1667
1668 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([10u64])); assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1670 assert_eq!(sliced.offset(), 8); }
1672
1673 #[test]
1674 fn test_patch_values() {
1675 let patches = Patches::new(
1676 10,
1677 0,
1678 buffer![2u64, 5, 8].into_array(),
1679 buffer![100i32, 200, 300].into_array(),
1680 None,
1681 )
1682 .unwrap();
1683
1684 #[expect(deprecated)]
1685 let values = patches.values().to_primitive();
1686 assert_eq!(
1687 i32::try_from(
1688 &values
1689 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
1690 .unwrap()
1691 )
1692 .unwrap(),
1693 100i32
1694 );
1695 assert_eq!(
1696 i32::try_from(
1697 &values
1698 .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
1699 .unwrap()
1700 )
1701 .unwrap(),
1702 200i32
1703 );
1704 assert_eq!(
1705 i32::try_from(
1706 &values
1707 .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
1708 .unwrap()
1709 )
1710 .unwrap(),
1711 300i32
1712 );
1713 }
1714
1715 #[test]
1716 fn test_indices_range() {
1717 let patches = Patches::new(
1718 10,
1719 0,
1720 buffer![2u64, 5, 8].into_array(),
1721 buffer![100i32, 200, 300].into_array(),
1722 None,
1723 )
1724 .unwrap();
1725
1726 assert_eq!(patches.min_index().unwrap(), 2);
1727 assert_eq!(patches.max_index().unwrap(), 8);
1728 }
1729
1730 #[test]
1731 fn test_search_index() {
1732 let patches = Patches::new(
1733 10,
1734 0,
1735 buffer![2u64, 5, 8].into_array(),
1736 buffer![100i32, 200, 300].into_array(),
1737 None,
1738 )
1739 .unwrap();
1740
1741 assert_eq!(patches.search_index(2).unwrap(), SearchResult::Found(0));
1743 assert_eq!(patches.search_index(5).unwrap(), SearchResult::Found(1));
1744 assert_eq!(patches.search_index(8).unwrap(), SearchResult::Found(2));
1745
1746 assert_eq!(patches.search_index(0).unwrap(), SearchResult::NotFound(0));
1748 assert_eq!(patches.search_index(3).unwrap(), SearchResult::NotFound(1));
1749 assert_eq!(patches.search_index(6).unwrap(), SearchResult::NotFound(2));
1750 assert_eq!(patches.search_index(9).unwrap(), SearchResult::NotFound(3));
1751 }
1752
1753 #[test]
1754 fn test_mask_boundary_patches() {
1755 let patches = Patches::new(
1757 10,
1758 0,
1759 buffer![0u64, 9].into_array(),
1760 buffer![100i32, 200].into_array(),
1761 None,
1762 )
1763 .unwrap();
1764
1765 let mask = Mask::from_iter([
1766 true, false, false, false, false, false, false, false, false, false,
1767 ]);
1768 let masked = patches
1769 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1770 .unwrap();
1771 assert!(masked.is_some());
1772 let masked = masked.unwrap();
1773 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([9u64]));
1774 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1775 }
1776
1777 #[test]
1778 fn test_mask_all_patches_removed() {
1779 let patches = Patches::new(
1781 10,
1782 0,
1783 buffer![2u64, 5, 8].into_array(),
1784 buffer![100i32, 200, 300].into_array(),
1785 None,
1786 )
1787 .unwrap();
1788
1789 let mask = Mask::from_iter([
1791 false, false, true, false, false, true, false, false, true, false,
1792 ]);
1793 let masked = patches
1794 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1795 .unwrap();
1796 assert!(masked.is_none());
1797 }
1798
1799 #[test]
1800 fn test_mask_no_patches_removed() {
1801 let patches = Patches::new(
1803 10,
1804 0,
1805 buffer![2u64, 5, 8].into_array(),
1806 buffer![100i32, 200, 300].into_array(),
1807 None,
1808 )
1809 .unwrap();
1810
1811 let mask = Mask::from_iter([
1813 true, false, false, true, false, false, true, false, false, true,
1814 ]);
1815 let masked = patches
1816 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1817 .unwrap()
1818 .unwrap();
1819
1820 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1821 assert_arrays_eq!(
1822 masked.values(),
1823 PrimitiveArray::from_iter([100i32, 200, 300])
1824 );
1825 }
1826
1827 #[test]
1828 fn test_mask_single_patch() {
1829 let patches = Patches::new(
1831 5,
1832 0,
1833 buffer![2u64].into_array(),
1834 buffer![42i32].into_array(),
1835 None,
1836 )
1837 .unwrap();
1838
1839 let mask = Mask::from_iter([false, false, true, false, false]);
1841 let masked = patches
1842 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1843 .unwrap();
1844 assert!(masked.is_none());
1845
1846 let mask = Mask::from_iter([true, false, false, true, false]);
1848 let masked = patches
1849 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1850 .unwrap()
1851 .unwrap();
1852 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64]));
1853 }
1854
1855 #[test]
1856 fn test_mask_contiguous_patches() {
1857 let patches = Patches::new(
1859 10,
1860 0,
1861 buffer![3u64, 4, 5, 6].into_array(),
1862 buffer![100i32, 200, 300, 400].into_array(),
1863 None,
1864 )
1865 .unwrap();
1866
1867 let mask = Mask::from_iter([
1869 false, false, false, false, true, true, false, false, false, false,
1870 ]);
1871 let masked = patches
1872 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1873 .unwrap()
1874 .unwrap();
1875
1876 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([3u64, 6]));
1877 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 400]));
1878 }
1879
1880 #[test]
1881 fn test_mask_with_large_offset() {
1882 let patches = Patches::new(
1884 20,
1885 15,
1886 buffer![16u64, 17, 19].into_array(), buffer![100i32, 200, 300].into_array(),
1888 None,
1889 )
1890 .unwrap();
1891
1892 let mask = Mask::from_iter([
1894 false, false, true, false, false, false, false, false, false, false, false, false,
1895 false, false, false, false, false, false, false, false,
1896 ]);
1897 let masked = patches
1898 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1899 .unwrap()
1900 .unwrap();
1901
1902 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([16u64, 19]));
1903 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 300]));
1904 }
1905
1906 #[test]
1907 #[should_panic(expected = "Filter mask length 5 does not match array length 10")]
1908 fn test_mask_wrong_length() {
1909 let patches = Patches::new(
1910 10,
1911 0,
1912 buffer![2u64, 5, 8].into_array(),
1913 buffer![100i32, 200, 300].into_array(),
1914 None,
1915 )
1916 .unwrap();
1917
1918 let mask = Mask::from_iter([false, false, true, false, false]);
1920 patches
1921 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1922 .unwrap();
1923 }
1924
1925 #[test]
1926 fn test_chunk_offsets_search() {
1927 let indices = buffer![100u64, 200, 3000, 3100].into_array();
1928 let values = buffer![10i32, 20, 30, 40].into_array();
1929 let chunk_offsets = buffer![0u64, 2, 2, 3].into_array();
1930 let patches = Patches::new(4096, 0, indices, values, Some(chunk_offsets)).unwrap();
1931
1932 assert!(patches.chunk_offsets.is_some());
1933
1934 assert_eq!(patches.search_index(100).unwrap(), SearchResult::Found(0));
1936 assert_eq!(patches.search_index(200).unwrap(), SearchResult::Found(1));
1937
1938 assert_eq!(
1940 patches.search_index(1500).unwrap(),
1941 SearchResult::NotFound(2)
1942 );
1943 assert_eq!(
1944 patches.search_index(2000).unwrap(),
1945 SearchResult::NotFound(2)
1946 );
1947
1948 assert_eq!(patches.search_index(3000).unwrap(), SearchResult::Found(2));
1950 assert_eq!(patches.search_index(3100).unwrap(), SearchResult::Found(3));
1951
1952 assert_eq!(
1953 patches.search_index(1024).unwrap(),
1954 SearchResult::NotFound(2)
1955 );
1956 }
1957
1958 #[test]
1959 fn test_chunk_offsets_with_slice() {
1960 let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
1961 let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
1962 let chunk_offsets = buffer![0u64, 2, 6].into_array();
1963 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
1964
1965 let sliced = patches.slice(1000..2200).unwrap().unwrap();
1966
1967 assert!(sliced.chunk_offsets.is_some());
1968 assert_eq!(sliced.offset(), 1000);
1969
1970 assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(0));
1971 assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
1972 assert_eq!(sliced.search_index(1100).unwrap(), SearchResult::Found(4));
1973
1974 assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(1));
1975 }
1976
1977 #[test]
1978 fn test_chunk_offsets_with_slice_after_first_chunk() {
1979 let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
1980 let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
1981 let chunk_offsets = buffer![0u64, 2, 6].into_array();
1982 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
1983
1984 let sliced = patches.slice(1300..2200).unwrap().unwrap();
1985
1986 assert!(sliced.chunk_offsets.is_some());
1987 assert_eq!(sliced.offset(), 1300);
1988
1989 assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
1990 assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(1));
1991 assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
1992 assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(2));
1993 assert_eq!(sliced.search_index(900).unwrap(), SearchResult::NotFound(4));
1994 }
1995
1996 #[test]
1997 fn test_chunk_offsets_slice_empty_result() {
1998 let indices = buffer![100u64, 200, 3000, 3100].into_array();
1999 let values = buffer![10i32, 20, 30, 40].into_array();
2000 let chunk_offsets = buffer![0u64, 2].into_array();
2001 let patches = Patches::new(4000, 0, indices, values, Some(chunk_offsets)).unwrap();
2002
2003 let sliced = patches.slice(1000..2000).unwrap();
2004 assert!(sliced.is_none());
2005 }
2006
2007 #[test]
2008 fn test_chunk_offsets_slice_single_patch() {
2009 let indices = buffer![100u64, 1200, 1300, 2500].into_array();
2010 let values = buffer![10i32, 20, 30, 40].into_array();
2011 let chunk_offsets = buffer![0u64, 1, 3].into_array();
2012 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2013
2014 let sliced = patches.slice(1100..1250).unwrap().unwrap();
2015
2016 assert_eq!(sliced.num_patches(), 1);
2017 assert_eq!(sliced.offset(), 1100);
2018 assert_eq!(sliced.search_index(100).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(50).unwrap(), SearchResult::NotFound(0));
2020 assert_eq!(sliced.search_index(150).unwrap(), SearchResult::NotFound(1));
2021 }
2022
2023 #[test]
2024 fn test_chunk_offsets_slice_across_chunks() {
2025 let indices = buffer![100u64, 200, 1100, 1200, 2100, 2200].into_array();
2026 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2027 let chunk_offsets = buffer![0u64, 2, 4].into_array();
2028 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2029
2030 let sliced = patches.slice(150..2150).unwrap().unwrap();
2031
2032 assert_eq!(sliced.num_patches(), 4);
2033 assert_eq!(sliced.offset(), 150);
2034
2035 assert_eq!(sliced.search_index(50).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(950).unwrap(), SearchResult::Found(1)); assert_eq!(sliced.search_index(1050).unwrap(), SearchResult::Found(2)); assert_eq!(sliced.search_index(1950).unwrap(), SearchResult::Found(3)); }
2040
2041 #[test]
2042 fn test_chunk_offsets_boundary_searches() {
2043 let indices = buffer![1023u64, 1024, 1025, 2047, 2048].into_array();
2044 let values = buffer![10i32, 20, 30, 40, 50].into_array();
2045 let chunk_offsets = buffer![0u64, 1, 4].into_array();
2046 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2047
2048 assert_eq!(patches.search_index(1023).unwrap(), SearchResult::Found(0));
2049 assert_eq!(patches.search_index(1024).unwrap(), SearchResult::Found(1));
2050 assert_eq!(patches.search_index(1025).unwrap(), SearchResult::Found(2));
2051 assert_eq!(patches.search_index(2047).unwrap(), SearchResult::Found(3));
2052 assert_eq!(patches.search_index(2048).unwrap(), SearchResult::Found(4));
2053
2054 assert_eq!(
2055 patches.search_index(1022).unwrap(),
2056 SearchResult::NotFound(0)
2057 );
2058 assert_eq!(
2059 patches.search_index(2046).unwrap(),
2060 SearchResult::NotFound(3)
2061 );
2062 }
2063
2064 #[test]
2065 fn test_chunk_offsets_slice_edge_cases() {
2066 let indices = buffer![0u64, 1, 1023, 1024, 2047, 2048].into_array();
2067 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2068 let chunk_offsets = buffer![0u64, 3, 5].into_array();
2069 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2070
2071 let sliced = patches.slice(0..10).unwrap().unwrap();
2073 assert_eq!(sliced.num_patches(), 2);
2074 assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2075 assert_eq!(sliced.search_index(1).unwrap(), SearchResult::Found(1));
2076
2077 let sliced = patches.slice(2040..3000).unwrap().unwrap();
2079 assert_eq!(sliced.num_patches(), 2); assert_eq!(sliced.search_index(7).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(8).unwrap(), SearchResult::Found(1)); }
2083
2084 #[test]
2085 fn test_chunk_offsets_slice_nested() {
2086 let indices = buffer![100u64, 200, 300, 400, 500, 600].into_array();
2087 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2088 let chunk_offsets = buffer![0u64].into_array();
2089 let patches = Patches::new(1000, 0, indices, values, Some(chunk_offsets)).unwrap();
2090
2091 let sliced1 = patches.slice(150..550).unwrap().unwrap();
2092 assert_eq!(sliced1.num_patches(), 4); let sliced2 = sliced1.slice(100..250).unwrap().unwrap();
2095 assert_eq!(sliced2.num_patches(), 1); assert_eq!(sliced2.offset(), 250);
2097
2098 assert_eq!(sliced2.search_index(50).unwrap(), SearchResult::Found(0)); assert_eq!(
2100 sliced2.search_index(150).unwrap(),
2101 SearchResult::NotFound(1)
2102 );
2103 }
2104
2105 #[test]
2106 fn test_nested_slice_with_dropped_first_chunk() {
2107 let indices = buffer![0u64, 1024].into_array();
2109 let values = buffer![1i32, 2].into_array();
2110 let chunk_offsets = buffer![0u64, 1].into_array();
2111 let patches = Patches::new(2048, 0, indices, values, Some(chunk_offsets)).unwrap();
2112
2113 let dropped_first = patches.slice(1024..2048).unwrap().unwrap();
2115 let resliced = dropped_first.slice(0..1024).unwrap().unwrap();
2116 assert_eq!(resliced.num_patches(), 1);
2117 }
2118
2119 #[test]
2120 fn test_index_larger_than_length() {
2121 let chunk_offsets = buffer![0u64].into_array();
2122 let indices = buffer![1023u64].into_array();
2123 let values = buffer![42i32].into_array();
2124 let patches = Patches::new(1024, 0, indices, values, Some(chunk_offsets)).unwrap();
2125 assert_eq!(
2126 patches.search_index(2048).unwrap(),
2127 SearchResult::NotFound(1)
2128 );
2129 }
2130}