1use std::cmp::Ordering;
5use std::fmt::Debug;
6use std::hash::Hash;
7use std::ops::Range;
8
9use num_traits::NumCast;
10use vortex_buffer::BitBuffer;
11use vortex_buffer::BufferMut;
12use vortex_error::VortexError;
13use vortex_error::VortexResult;
14use vortex_error::vortex_bail;
15use vortex_error::vortex_ensure;
16use vortex_error::vortex_err;
17use vortex_mask::AllOr;
18use vortex_mask::Mask;
19use vortex_utils::aliases::hash_map::HashMap;
20
21use crate::ArrayRef;
22use crate::ExecutionCtx;
23use crate::IntoArray;
24use crate::LEGACY_SESSION;
25use crate::ToCanonical;
26use crate::VortexSessionExecute;
27use crate::arrays::PrimitiveArray;
28use crate::builtins::ArrayBuiltins;
29use crate::dtype::DType;
30use crate::dtype::IntegerPType;
31use crate::dtype::NativePType;
32use crate::dtype::Nullability;
33use crate::dtype::Nullability::NonNullable;
34use crate::dtype::PType;
35use crate::dtype::UnsignedPType;
36use crate::match_each_integer_ptype;
37use crate::match_each_unsigned_integer_ptype;
38use crate::scalar::PValue;
39use crate::scalar::Scalar;
40use crate::search_sorted::SearchResult;
41use crate::search_sorted::SearchSorted;
42use crate::search_sorted::SearchSortedSide;
43use crate::validity::Validity;
44
45pub const PATCH_CHUNK_SIZE: usize = 1024;
48
49#[derive(Copy, Clone, prost::Message)]
50pub struct PatchesMetadata {
51 #[prost(uint64, tag = "1")]
52 len: u64,
53 #[prost(uint64, tag = "2")]
54 offset: u64,
55 #[prost(enumeration = "PType", tag = "3")]
56 indices_ptype: i32,
57 #[prost(uint64, optional, tag = "4")]
58 chunk_offsets_len: Option<u64>,
59 #[prost(enumeration = "PType", optional, tag = "5")]
60 chunk_offsets_ptype: Option<i32>,
61 #[prost(uint64, optional, tag = "6")]
62 offset_within_chunk: Option<u64>,
63}
64
65impl PatchesMetadata {
66 #[inline]
67 pub fn new(
68 len: usize,
69 offset: usize,
70 indices_ptype: PType,
71 chunk_offsets_len: Option<usize>,
72 chunk_offsets_ptype: Option<PType>,
73 offset_within_chunk: Option<usize>,
74 ) -> Self {
75 Self {
76 len: len as u64,
77 offset: offset as u64,
78 indices_ptype: indices_ptype as i32,
79 chunk_offsets_len: chunk_offsets_len.map(|len| len as u64),
80 chunk_offsets_ptype: chunk_offsets_ptype.map(|pt| pt as i32),
81 offset_within_chunk: offset_within_chunk.map(|len| len as u64),
82 }
83 }
84
85 #[inline]
86 pub fn len(&self) -> VortexResult<usize> {
87 usize::try_from(self.len).map_err(|_| vortex_err!("len does not fit in usize"))
88 }
89
90 #[inline]
91 pub fn is_empty(&self) -> bool {
92 self.len == 0
93 }
94
95 #[inline]
96 pub fn offset(&self) -> VortexResult<usize> {
97 usize::try_from(self.offset).map_err(|_| vortex_err!("offset does not fit in usize"))
98 }
99
100 #[inline]
101 pub fn chunk_offsets_dtype(&self) -> VortexResult<Option<DType>> {
102 self.chunk_offsets_ptype
103 .map(|t| {
104 PType::try_from(t)
105 .map_err(|e| vortex_err!("invalid i32 value {t} for PType: {}", e))
106 .map(|ptype| DType::Primitive(ptype, NonNullable))
107 })
108 .transpose()
109 }
110
111 #[inline]
112 pub fn indices_dtype(&self) -> VortexResult<DType> {
113 let ptype = PType::try_from(self.indices_ptype).map_err(|e| {
114 vortex_err!("invalid i32 value {} for PType: {}", self.indices_ptype, e)
115 })?;
116 vortex_ensure!(
117 ptype.is_unsigned_int(),
118 "Patch indices must be unsigned integers"
119 );
120 Ok(DType::Primitive(ptype, NonNullable))
121 }
122}
123
124#[derive(Debug, Clone)]
126pub struct Patches {
127 array_len: usize,
128 offset: usize,
129 indices: ArrayRef,
130 values: ArrayRef,
131 chunk_offsets: Option<ArrayRef>,
138 offset_within_chunk: Option<usize>,
148}
149
150impl Patches {
151 pub fn new(
152 array_len: usize,
153 offset: usize,
154 indices: ArrayRef,
155 values: ArrayRef,
156 chunk_offsets: Option<ArrayRef>,
157 ) -> VortexResult<Self> {
158 vortex_ensure!(
159 indices.len() == values.len(),
160 "Patch indices and values must have the same length"
161 );
162 vortex_ensure!(
163 indices.dtype().is_unsigned_int() && !indices.dtype().is_nullable(),
164 "Patch indices must be non-nullable unsigned integers, got {:?}",
165 indices.dtype()
166 );
167
168 vortex_ensure!(
169 indices.len() <= array_len,
170 "Patch indices must be shorter than the array length"
171 );
172 vortex_ensure!(!indices.is_empty(), "Patch indices must not be empty");
173
174 if indices.is_host() && values.is_host() {
177 let max = usize::try_from(&indices.execute_scalar(
178 indices.len() - 1,
179 &mut LEGACY_SESSION.create_execution_ctx(),
180 )?)
181 .map_err(|_| vortex_err!("indices must be a number"))?;
182 vortex_ensure!(
183 max - offset < array_len,
184 "Patch indices {max:?}, offset {offset} are longer than the array length {array_len}"
185 );
186
187 #[cfg(debug_assertions)]
188 {
189 use crate::VortexSessionExecute;
190 let mut ctx = LEGACY_SESSION.create_execution_ctx();
191 assert!(
192 crate::aggregate_fn::fns::is_sorted::is_sorted(&indices, &mut ctx)
193 .unwrap_or(false),
194 "Patch indices must be sorted"
195 );
196 }
197 }
198
199 Ok(Self {
200 array_len,
201 offset,
202 indices,
203 values,
204 chunk_offsets: chunk_offsets.clone(),
205 offset_within_chunk: chunk_offsets.map(|_| 0),
207 })
208 }
209
210 pub unsafe fn new_unchecked(
220 array_len: usize,
221 offset: usize,
222 indices: ArrayRef,
223 values: ArrayRef,
224 chunk_offsets: Option<ArrayRef>,
225 offset_within_chunk: Option<usize>,
226 ) -> Self {
227 Self {
228 array_len,
229 offset,
230 indices,
231 values,
232 chunk_offsets,
233 offset_within_chunk,
234 }
235 }
236
237 #[inline]
238 pub fn array_len(&self) -> usize {
239 self.array_len
240 }
241
242 #[inline]
243 pub fn num_patches(&self) -> usize {
244 self.indices.len()
245 }
246
247 #[inline]
248 pub fn dtype(&self) -> &DType {
249 self.values.dtype()
250 }
251
252 #[inline]
253 pub fn indices(&self) -> &ArrayRef {
254 &self.indices
255 }
256
257 #[inline]
258 pub fn into_indices(self) -> ArrayRef {
259 self.indices
260 }
261
262 #[inline]
263 pub fn indices_mut(&mut self) -> &mut ArrayRef {
264 &mut self.indices
265 }
266
267 #[inline]
268 pub fn values(&self) -> &ArrayRef {
269 &self.values
270 }
271
272 #[inline]
273 pub fn into_values(self) -> ArrayRef {
274 self.values
275 }
276
277 #[inline]
278 pub fn values_mut(&mut self) -> &mut ArrayRef {
279 &mut self.values
280 }
281
282 #[inline]
283 pub fn offset(&self) -> usize {
285 self.offset
286 }
287
288 #[inline]
289 pub fn chunk_offsets(&self) -> &Option<ArrayRef> {
290 &self.chunk_offsets
291 }
292
293 #[inline]
294 pub fn chunk_offset_at(&self, idx: usize) -> VortexResult<usize> {
295 let Some(chunk_offsets) = &self.chunk_offsets else {
296 vortex_bail!("chunk_offsets must be set to retrieve offset at index")
297 };
298
299 chunk_offsets
300 .execute_scalar(idx, &mut LEGACY_SESSION.create_execution_ctx())?
301 .as_primitive()
302 .as_::<usize>()
303 .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))
304 }
305
306 #[inline]
315 pub fn offset_within_chunk(&self) -> Option<usize> {
316 self.offset_within_chunk
317 }
318
319 #[inline]
320 pub fn indices_ptype(&self) -> VortexResult<PType> {
321 PType::try_from(self.indices.dtype())
322 .map_err(|_| vortex_err!("indices dtype is not primitive"))
323 }
324
325 pub fn to_metadata(&self, len: usize, dtype: &DType) -> VortexResult<PatchesMetadata> {
326 if self.indices.len() > len {
327 vortex_bail!(
328 "Patch indices {} are longer than the array length {}",
329 self.indices.len(),
330 len
331 );
332 }
333 if self.values.dtype() != dtype {
334 vortex_bail!(
335 "Patch values dtype {} does not match array dtype {}",
336 self.values.dtype(),
337 dtype
338 );
339 }
340 let chunk_offsets_len = self.chunk_offsets.as_ref().map(|co| co.len());
341 let chunk_offsets_ptype = self.chunk_offsets.as_ref().map(|co| co.dtype().as_ptype());
342
343 Ok(PatchesMetadata::new(
344 self.indices.len(),
345 self.offset,
346 self.indices.dtype().as_ptype(),
347 chunk_offsets_len,
348 chunk_offsets_ptype,
349 self.offset_within_chunk,
350 ))
351 }
352
353 pub fn cast_values(self, values_dtype: &DType) -> VortexResult<Self> {
354 unsafe {
356 Ok(Self::new_unchecked(
357 self.array_len,
358 self.offset,
359 self.indices,
360 self.values.cast(values_dtype.clone())?,
361 self.chunk_offsets,
362 self.offset_within_chunk,
363 ))
364 }
365 }
366
367 pub fn get_patched(&self, index: usize) -> VortexResult<Option<Scalar>> {
369 self.search_index(index)?
370 .to_found()
371 .map(|patch_idx| {
372 self.values()
373 .execute_scalar(patch_idx, &mut LEGACY_SESSION.create_execution_ctx())
374 })
375 .transpose()
376 }
377
378 pub fn search_index(&self, index: usize) -> VortexResult<SearchResult> {
396 if self.chunk_offsets.is_some() {
397 return self.search_index_chunked(index);
398 }
399
400 Self::search_index_binary_search(&self.indices, index + self.offset)
401 }
402
403 fn search_index_binary_search(indices: &ArrayRef, needle: usize) -> VortexResult<SearchResult> {
409 if indices.is_canonical() {
410 let primitive = indices.to_primitive();
411 match_each_integer_ptype!(primitive.ptype(), |T| {
412 let Ok(needle) = T::try_from(needle) else {
413 return Ok(SearchResult::NotFound(primitive.len()));
418 };
419 return primitive
420 .as_slice::<T>()
421 .search_sorted(&needle, SearchSortedSide::Left);
422 });
423 }
424 indices
425 .as_primitive_typed()
426 .search_sorted(&PValue::U64(needle as u64), SearchSortedSide::Left)
427 }
428
429 fn search_index_chunked(&self, index: usize) -> VortexResult<SearchResult> {
439 let Some(chunk_offsets) = &self.chunk_offsets else {
440 vortex_bail!("chunk_offsets is required to be set")
441 };
442
443 let Some(offset_within_chunk) = self.offset_within_chunk else {
444 vortex_bail!("offset_within_chunk is required to be set")
445 };
446
447 if index >= self.array_len() {
448 return Ok(SearchResult::NotFound(self.indices().len()));
449 }
450
451 let chunk_idx = (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE;
452
453 let base_offset = self.chunk_offset_at(0)?;
455
456 let patches_start_idx = (self.chunk_offset_at(chunk_idx)? - base_offset)
457 .saturating_sub(offset_within_chunk);
464
465 let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
466 (self.chunk_offset_at(chunk_idx + 1)? - base_offset)
467 .saturating_sub(offset_within_chunk)
468 .min(self.indices.len())
469 } else {
470 self.indices.len()
471 };
472
473 let chunk_indices = self.indices.slice(patches_start_idx..patches_end_idx)?;
474 let result = Self::search_index_binary_search(&chunk_indices, index + self.offset)?;
475
476 Ok(match result {
477 SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
478 SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
479 })
480 }
481
482 fn search_index_chunked_batch<T, O>(
488 &self,
489 indices: &[T],
490 chunk_offsets: &[O],
491 index: T,
492 ) -> VortexResult<SearchResult>
493 where
494 T: UnsignedPType,
495 O: UnsignedPType,
496 usize: TryFrom<T>,
497 usize: TryFrom<O>,
498 {
499 let Some(offset_within_chunk) = self.offset_within_chunk else {
500 vortex_bail!("offset_within_chunk is required to be set")
501 };
502
503 let chunk_idx = {
504 let Ok(index) = usize::try_from(index) else {
505 return Ok(SearchResult::NotFound(indices.len()));
507 };
508
509 if index >= self.array_len() {
510 return Ok(SearchResult::NotFound(self.indices().len()));
511 }
512
513 (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE
514 };
515
516 let chunk_offset = usize::try_from(chunk_offsets[chunk_idx] - chunk_offsets[0])
518 .map_err(|_| vortex_err!("chunk_offset failed to convert to usize"))?;
519
520 let patches_start_idx = chunk_offset
521 .saturating_sub(offset_within_chunk);
528
529 let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
530 usize::try_from(chunk_offsets[chunk_idx + 1] - chunk_offsets[0])
531 .map_err(|_| vortex_err!("patches_end_idx failed to convert to usize"))?
532 .saturating_sub(offset_within_chunk)
533 .min(indices.len())
534 } else {
535 self.indices.len()
536 };
537
538 let Some(offset) = T::from(self.offset) else {
539 return Ok(SearchResult::NotFound(indices.len()));
541 };
542
543 let chunk_indices = &indices[patches_start_idx..patches_end_idx];
544 let result = chunk_indices.search_sorted(&(index + offset), SearchSortedSide::Left)?;
545
546 Ok(match result {
547 SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
548 SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
549 })
550 }
551
552 pub fn min_index(&self) -> VortexResult<usize> {
554 let first = self
555 .indices
556 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())?
557 .as_primitive()
558 .as_::<usize>()
559 .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
560 Ok(first - self.offset)
561 }
562
563 pub fn max_index(&self) -> VortexResult<usize> {
565 let last = self
566 .indices
567 .execute_scalar(
568 self.indices.len() - 1,
569 &mut LEGACY_SESSION.create_execution_ctx(),
570 )?
571 .as_primitive()
572 .as_::<usize>()
573 .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
574 Ok(last - self.offset)
575 }
576
577 pub fn filter(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
579 if mask.len() != self.array_len {
580 vortex_bail!(
581 "Filter mask length {} does not match array length {}",
582 mask.len(),
583 self.array_len
584 );
585 }
586
587 match mask.indices() {
588 AllOr::All => Ok(Some(self.clone())),
589 AllOr::None => Ok(None),
590 AllOr::Some(mask_indices) => {
591 let flat_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
592 match_each_unsigned_integer_ptype!(flat_indices.ptype(), |I| {
593 filter_patches_with_mask(
594 flat_indices.as_slice::<I>(),
595 self.offset(),
596 self.values(),
597 mask_indices,
598 )
599 })
600 }
601 }
602 }
603
604 pub fn mask(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
609 if mask.len() != self.array_len {
610 vortex_bail!(
611 "Filter mask length {} does not match array length {}",
612 mask.len(),
613 self.array_len
614 );
615 }
616
617 let filter_mask = match mask.bit_buffer() {
618 AllOr::All => return Ok(None),
619 AllOr::None => return Ok(Some(self.clone())),
620 AllOr::Some(masked) => {
621 let patch_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
622 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |P| {
623 let patch_indices = patch_indices.as_slice::<P>();
624 Mask::from_buffer(BitBuffer::collect_bool(patch_indices.len(), |i| {
625 #[allow(clippy::cast_possible_truncation)]
626 let idx = (patch_indices[i] as usize) - self.offset;
627 !masked.value(idx)
628 }))
629 })
630 }
631 };
632
633 if filter_mask.all_false() {
634 return Ok(None);
635 }
636
637 let filtered_indices = self.indices.filter(filter_mask.clone())?;
639 let filtered_values = self.values.filter(filter_mask)?;
640
641 Ok(Some(Self {
642 array_len: self.array_len,
643 offset: self.offset,
644 indices: filtered_indices,
645 values: filtered_values,
646 chunk_offsets: None,
648 offset_within_chunk: self.offset_within_chunk,
649 }))
650 }
651
652 pub fn slice(&self, range: Range<usize>) -> VortexResult<Option<Self>> {
654 let slice_start_idx = self.search_index(range.start)?.to_index();
655 let slice_end_idx = self.search_index(range.end)?.to_index();
656
657 if slice_start_idx == slice_end_idx {
658 return Ok(None);
659 }
660
661 let values = self.values().slice(slice_start_idx..slice_end_idx)?;
662 let indices = self.indices().slice(slice_start_idx..slice_end_idx)?;
663
664 let new_chunk_offsets = self
665 .chunk_offsets
666 .as_ref()
667 .map(|chunk_offsets| -> VortexResult<ArrayRef> {
668 let chunk_relative_offset = self.offset % PATCH_CHUNK_SIZE;
669 let chunk_start_idx = (chunk_relative_offset + range.start) / PATCH_CHUNK_SIZE;
670 let chunk_end_idx = (chunk_relative_offset + range.end).div_ceil(PATCH_CHUNK_SIZE);
671 chunk_offsets.slice(chunk_start_idx..chunk_end_idx)
672 })
673 .transpose()?;
674
675 let offset_within_chunk = new_chunk_offsets
676 .as_ref()
677 .map(|new_chunk_offsets| -> VortexResult<usize> {
678 let new_chunk_base = new_chunk_offsets
679 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())?
680 .as_primitive()
681 .as_::<usize>()
682 .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))?;
683 let parent_chunk_base = self.chunk_offset_at(0)?;
684 let parent_within = self.offset_within_chunk.unwrap_or(0);
685 Ok(parent_chunk_base + parent_within + slice_start_idx - new_chunk_base)
686 })
687 .transpose()?;
688
689 Ok(Some(Self {
690 array_len: range.len(),
691 offset: range.start + self.offset(),
692 indices,
693 values,
694 chunk_offsets: new_chunk_offsets,
695 offset_within_chunk,
696 }))
697 }
698
699 const PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN: f64 = 5.0;
701
702 fn is_map_faster_than_search(&self, take_indices: &PrimitiveArray) -> bool {
703 (self.num_patches() as f64 / take_indices.len() as f64)
704 < Self::PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN
705 }
706
707 pub fn take_with_nulls(
711 &self,
712 take_indices: &ArrayRef,
713 ctx: &mut ExecutionCtx,
714 ) -> VortexResult<Option<Self>> {
715 if take_indices.is_empty() {
716 return Ok(None);
717 }
718
719 let take_indices = take_indices.clone().execute::<PrimitiveArray>(ctx)?;
720 if self.is_map_faster_than_search(&take_indices) {
721 self.take_map(take_indices, true, ctx)
722 } else {
723 self.take_search(take_indices, true, ctx)
724 }
725 }
726
727 pub fn take(
731 &self,
732 take_indices: &ArrayRef,
733 ctx: &mut ExecutionCtx,
734 ) -> VortexResult<Option<Self>> {
735 if take_indices.is_empty() {
736 return Ok(None);
737 }
738
739 let take_indices = take_indices.clone().execute::<PrimitiveArray>(ctx)?;
740 if self.is_map_faster_than_search(&take_indices) {
741 self.take_map(take_indices, false, ctx)
742 } else {
743 self.take_search(take_indices, false, ctx)
744 }
745 }
746
747 #[expect(
748 clippy::cognitive_complexity,
749 reason = "complexity is from nested match_each_* macros"
750 )]
751 pub fn take_search(
752 &self,
753 take_indices: PrimitiveArray,
754 include_nulls: bool,
755 ctx: &mut ExecutionCtx,
756 ) -> VortexResult<Option<Self>> {
757 let take_indices_validity = take_indices.validity()?;
758 let patch_indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
759 let chunk_offsets = self
760 .chunk_offsets()
761 .as_ref()
762 .map(|co| co.clone().execute::<PrimitiveArray>(ctx))
763 .transpose()?;
764
765 let (values_indices, new_indices): (BufferMut<u64>, BufferMut<u64>) =
766 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |PatchT| {
767 let patch_indices_slice = patch_indices.as_slice::<PatchT>();
768 match_each_integer_ptype!(take_indices.ptype(), |TakeT| {
769 let take_slice = take_indices.as_slice::<TakeT>();
770
771 if let Some(chunk_offsets) = chunk_offsets {
772 match_each_unsigned_integer_ptype!(chunk_offsets.ptype(), |OffsetT| {
773 let chunk_offsets = chunk_offsets.as_slice::<OffsetT>();
774 take_indices_with_search_fn(
775 patch_indices_slice,
776 take_slice,
777 take_indices
778 .as_ref()
779 .validity()?
780 .to_mask(take_indices.as_ref().len(), ctx)?,
781 include_nulls,
782 |take_idx| {
783 self.search_index_chunked_batch(
784 patch_indices_slice,
785 chunk_offsets,
786 take_idx,
787 )
788 },
789 )?
790 })
791 } else {
792 take_indices_with_search_fn(
793 patch_indices_slice,
794 take_slice,
795 take_indices
796 .as_ref()
797 .validity()?
798 .to_mask(take_indices.as_ref().len(), ctx)?,
799 include_nulls,
800 |take_idx| {
801 let Some(offset) = <PatchT as NumCast>::from(self.offset) else {
802 return Ok(SearchResult::NotFound(patch_indices_slice.len()));
804 };
805
806 patch_indices_slice
807 .search_sorted(&(take_idx + offset), SearchSortedSide::Left)
808 },
809 )?
810 }
811 })
812 });
813
814 if new_indices.is_empty() {
815 return Ok(None);
816 }
817
818 let new_indices = new_indices.into_array();
819 let new_array_len = take_indices.len();
820 let values_validity = take_indices_validity.take(&new_indices)?;
821
822 Ok(Some(Self {
823 array_len: new_array_len,
824 offset: 0,
825 indices: new_indices,
826 values: self
827 .values()
828 .take(PrimitiveArray::new(values_indices, values_validity).into_array())?,
829 chunk_offsets: None,
830 offset_within_chunk: Some(0), }))
832 }
833
834 pub fn take_map(
835 &self,
836 take_indices: PrimitiveArray,
837 include_nulls: bool,
838 ctx: &mut ExecutionCtx,
839 ) -> VortexResult<Option<Self>> {
840 let indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
841 let new_length = take_indices.len();
842
843 let min_index = self.min_index()?;
844 let max_index = self.max_index()?;
845
846 let Some((new_sparse_indices, value_indices)) =
847 match_each_unsigned_integer_ptype!(indices.ptype(), |Indices| {
848 match_each_integer_ptype!(take_indices.ptype(), |TakeIndices| {
849 let take_validity = take_indices
850 .validity()?
851 .execute_mask(take_indices.len(), ctx)?;
852 let take_nullability = take_indices.validity()?.nullability();
853 let take_slice = take_indices.as_slice::<TakeIndices>();
854 take_map::<_, TakeIndices>(
855 indices.as_slice::<Indices>(),
856 take_slice,
857 take_validity,
858 take_nullability,
859 self.offset(),
860 min_index,
861 max_index,
862 include_nulls,
863 )?
864 })
865 })
866 else {
867 return Ok(None);
868 };
869
870 let taken_values = self.values().take(value_indices)?;
871
872 Ok(Some(Patches {
873 array_len: new_length,
874 offset: 0,
875 indices: new_sparse_indices,
876 values: taken_values,
877 chunk_offsets: None,
879 offset_within_chunk: self.offset_within_chunk,
880 }))
881 }
882
883 pub fn map_values<F>(self, f: F) -> VortexResult<Self>
884 where
885 F: FnOnce(ArrayRef) -> VortexResult<ArrayRef>,
886 {
887 let values = f(self.values)?;
888 if self.indices.len() != values.len() {
889 vortex_bail!(
890 "map_values must preserve length: expected {} received {}",
891 self.indices.len(),
892 values.len()
893 )
894 }
895
896 Ok(Self {
897 array_len: self.array_len,
898 offset: self.offset,
899 indices: self.indices,
900 values,
901 chunk_offsets: self.chunk_offsets,
902 offset_within_chunk: self.offset_within_chunk,
903 })
904 }
905}
906
907#[expect(clippy::too_many_arguments)] fn take_map<I: NativePType + Hash + Eq + TryFrom<usize>, T: NativePType>(
909 indices: &[I],
910 take_indices: &[T],
911 take_validity: Mask,
912 take_nullability: Nullability,
913 indices_offset: usize,
914 min_index: usize,
915 max_index: usize,
916 include_nulls: bool,
917) -> VortexResult<Option<(ArrayRef, ArrayRef)>>
918where
919 usize: TryFrom<T>,
920 VortexError: From<<I as TryFrom<usize>>::Error>,
921{
922 let offset_i = I::try_from(indices_offset)?;
923
924 let sparse_index_to_value_index: HashMap<I, usize> = indices
925 .iter()
926 .copied()
927 .map(|idx| idx - offset_i)
928 .enumerate()
929 .map(|(value_index, sparse_index)| (sparse_index, value_index))
930 .collect();
931
932 let mut new_sparse_indices = BufferMut::<u64>::with_capacity(take_indices.len());
933 let mut value_indices = BufferMut::<u64>::with_capacity(take_indices.len());
934
935 for (idx_in_take, &take_idx) in take_indices.iter().enumerate() {
936 let ti = usize::try_from(take_idx)
937 .map_err(|_| vortex_err!("Failed to convert index to usize"))?;
938
939 let is_null = match take_validity.bit_buffer() {
941 AllOr::All => false,
942 AllOr::None => true,
943 AllOr::Some(buf) => !buf.value(idx_in_take),
944 };
945 if is_null {
946 if include_nulls {
947 new_sparse_indices.push(idx_in_take as u64);
948 value_indices.push(0);
949 }
950 } else if ti >= min_index && ti <= max_index {
951 let ti_as_i = I::try_from(ti)
952 .map_err(|_| vortex_err!("take index does not fit in index type"))?;
953 if let Some(&value_index) = sparse_index_to_value_index.get(&ti_as_i) {
954 new_sparse_indices.push(idx_in_take as u64);
955 value_indices.push(value_index as u64);
956 }
957 }
958 }
959
960 if new_sparse_indices.is_empty() {
961 return Ok(None);
962 }
963
964 let new_sparse_indices = new_sparse_indices.into_array();
965 let values_validity =
966 Validity::from_mask(take_validity, take_nullability).take(&new_sparse_indices)?;
967 Ok(Some((
968 new_sparse_indices,
969 PrimitiveArray::new(value_indices, values_validity).into_array(),
970 )))
971}
972
973fn filter_patches_with_mask<T: IntegerPType>(
979 patch_indices: &[T],
980 offset: usize,
981 patch_values: &ArrayRef,
982 mask_indices: &[usize],
983) -> VortexResult<Option<Patches>> {
984 let true_count = mask_indices.len();
985 let mut new_patch_indices = BufferMut::<u64>::with_capacity(true_count);
986 let mut new_mask_indices = Vec::with_capacity(true_count);
987
988 const STRIDE: usize = 4;
992
993 let mut mask_idx = 0usize;
994 let mut true_idx = 0usize;
995
996 while mask_idx < patch_indices.len() && true_idx < true_count {
997 if (mask_idx + STRIDE) < patch_indices.len() && (true_idx + STRIDE) < mask_indices.len() {
1004 let left_min = patch_indices[mask_idx]
1006 .to_usize()
1007 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1008 - offset;
1009 let left_max = patch_indices[mask_idx + STRIDE]
1010 .to_usize()
1011 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1012 - offset;
1013 let right_min = mask_indices[true_idx];
1014 let right_max = mask_indices[true_idx + STRIDE];
1015
1016 if left_min > right_max {
1017 true_idx += STRIDE;
1019 continue;
1020 } else if right_min > left_max {
1021 mask_idx += STRIDE;
1022 continue;
1023 } else {
1024 }
1026 }
1027
1028 let left = patch_indices[mask_idx]
1031 .to_usize()
1032 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1033 - offset;
1034 let right = mask_indices[true_idx];
1035
1036 match left.cmp(&right) {
1037 Ordering::Less => {
1038 mask_idx += 1;
1039 }
1040 Ordering::Greater => {
1041 true_idx += 1;
1042 }
1043 Ordering::Equal => {
1044 new_mask_indices.push(mask_idx);
1046 new_patch_indices.push(true_idx as u64);
1047
1048 mask_idx += 1;
1049 true_idx += 1;
1050 }
1051 }
1052 }
1053
1054 if new_mask_indices.is_empty() {
1055 return Ok(None);
1056 }
1057
1058 let new_patch_indices = new_patch_indices.into_array();
1059 let new_patch_values =
1060 patch_values.filter(Mask::from_indices(patch_values.len(), new_mask_indices))?;
1061
1062 Ok(Some(Patches::new(
1063 true_count,
1064 0,
1065 new_patch_indices,
1066 new_patch_values,
1067 None,
1069 )?))
1070}
1071
1072fn take_indices_with_search_fn<
1073 I: UnsignedPType,
1074 T: IntegerPType,
1075 F: Fn(I) -> VortexResult<SearchResult>,
1076>(
1077 indices: &[I],
1078 take_indices: &[T],
1079 take_validity: Mask,
1080 include_nulls: bool,
1081 search_fn: F,
1082) -> VortexResult<(BufferMut<u64>, BufferMut<u64>)> {
1083 let mut values_indices = BufferMut::with_capacity(take_indices.len());
1084 let mut new_indices = BufferMut::with_capacity(take_indices.len());
1085
1086 for (new_patch_idx, &take_idx) in take_indices.iter().enumerate() {
1087 if !take_validity.value(new_patch_idx) {
1088 if include_nulls {
1089 values_indices.push(0u64);
1091 new_indices.push(new_patch_idx as u64);
1092 }
1093 continue;
1094 } else {
1095 let search_result = match I::from(take_idx) {
1096 Some(idx) => search_fn(idx)?,
1097 None => SearchResult::NotFound(indices.len()),
1098 };
1099
1100 if let Some(patch_idx) = search_result.to_found() {
1101 values_indices.push(patch_idx as u64);
1102 new_indices.push(new_patch_idx as u64);
1103 }
1104 }
1105 }
1106
1107 Ok((values_indices, new_indices))
1108}
1109
1110#[cfg(test)]
1111mod test {
1112 use vortex_buffer::BufferMut;
1113 use vortex_buffer::buffer;
1114 use vortex_mask::Mask;
1115
1116 use crate::IntoArray;
1117 use crate::LEGACY_SESSION;
1118 use crate::ToCanonical;
1119 use crate::VortexSessionExecute;
1120 use crate::assert_arrays_eq;
1121 use crate::patches::Patches;
1122 use crate::patches::PrimitiveArray;
1123 use crate::search_sorted::SearchResult;
1124 use crate::validity::Validity;
1125
1126 #[test]
1127 fn test_filter() {
1128 let patches = Patches::new(
1129 100,
1130 0,
1131 buffer![10u32, 11, 20].into_array(),
1132 buffer![100, 110, 200].into_array(),
1133 None,
1134 )
1135 .unwrap();
1136
1137 let filtered = patches
1138 .filter(
1139 &Mask::from_indices(100, vec![10, 20, 30]),
1140 &mut LEGACY_SESSION.create_execution_ctx(),
1141 )
1142 .unwrap()
1143 .unwrap();
1144
1145 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1]));
1146 assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1147 }
1148
1149 #[test]
1150 fn take_with_nulls() {
1151 let patches = Patches::new(
1152 20,
1153 0,
1154 buffer![2u64, 9, 15].into_array(),
1155 PrimitiveArray::new(buffer![33_i32, 44, 55], Validity::AllValid).into_array(),
1156 None,
1157 )
1158 .unwrap();
1159
1160 let taken = patches
1161 .take(
1162 &PrimitiveArray::new(buffer![9, 0], Validity::from_iter(vec![true, false]))
1163 .into_array(),
1164 &mut LEGACY_SESSION.create_execution_ctx(),
1165 )
1166 .unwrap()
1167 .unwrap();
1168 let primitive_values = taken.values().to_primitive();
1169 let primitive_indices = taken.indices().to_primitive();
1170 assert_eq!(taken.array_len(), 2);
1171 assert_arrays_eq!(
1172 primitive_values,
1173 PrimitiveArray::from_option_iter([Some(44i32)])
1174 );
1175 assert_arrays_eq!(primitive_indices, PrimitiveArray::from_iter([0u64]));
1176 assert_eq!(
1177 primitive_values
1178 .as_ref()
1179 .validity()
1180 .unwrap()
1181 .to_mask(
1182 primitive_values.as_ref().len(),
1183 &mut LEGACY_SESSION.create_execution_ctx()
1184 )
1185 .unwrap(),
1186 Mask::from_iter(vec![true])
1187 );
1188 }
1189
1190 #[test]
1191 fn take_search_with_nulls_chunked() {
1192 let patches = Patches::new(
1193 20,
1194 0,
1195 buffer![2u64, 9, 15].into_array(),
1196 buffer![33_i32, 44, 55].into_array(),
1197 Some(buffer![0u64].into_array()),
1198 )
1199 .unwrap();
1200
1201 let taken = patches
1202 .take_search(
1203 PrimitiveArray::new(buffer![9, 0], Validity::from_iter([true, false])),
1204 true,
1205 &mut LEGACY_SESSION.create_execution_ctx(),
1206 )
1207 .unwrap()
1208 .unwrap();
1209
1210 let primitive_values = taken.values().to_primitive();
1211 assert_eq!(taken.array_len(), 2);
1212 assert_arrays_eq!(
1213 primitive_values,
1214 PrimitiveArray::from_option_iter([Some(44i32), None])
1215 );
1216 assert_arrays_eq!(taken.indices(), PrimitiveArray::from_iter([0u64, 1]));
1217
1218 assert_eq!(
1219 primitive_values
1220 .as_ref()
1221 .validity()
1222 .unwrap()
1223 .to_mask(
1224 primitive_values.as_ref().len(),
1225 &mut LEGACY_SESSION.create_execution_ctx()
1226 )
1227 .unwrap(),
1228 Mask::from_iter([true, false])
1229 );
1230 }
1231
1232 #[test]
1233 fn take_search_chunked_multiple_chunks() {
1234 let patches = Patches::new(
1235 2048,
1236 0,
1237 buffer![100u64, 500, 1200, 1800].into_array(),
1238 buffer![10_i32, 20, 30, 40].into_array(),
1239 Some(buffer![0u64, 2].into_array()),
1240 )
1241 .unwrap();
1242
1243 let taken = patches
1244 .take_search(
1245 PrimitiveArray::new(buffer![500, 1200, 999], Validity::AllValid),
1246 true,
1247 &mut LEGACY_SESSION.create_execution_ctx(),
1248 )
1249 .unwrap()
1250 .unwrap();
1251
1252 assert_eq!(taken.array_len(), 3);
1253 assert_arrays_eq!(
1254 taken.values(),
1255 PrimitiveArray::from_option_iter([Some(20i32), Some(30)])
1256 );
1257 }
1258
1259 #[test]
1260 fn take_search_chunked_indices_with_no_patches() {
1261 let patches = Patches::new(
1262 20,
1263 0,
1264 buffer![2u64, 9, 15].into_array(),
1265 buffer![33_i32, 44, 55].into_array(),
1266 Some(buffer![0u64].into_array()),
1267 )
1268 .unwrap();
1269
1270 let taken = patches
1271 .take_search(
1272 PrimitiveArray::new(buffer![3, 4, 5], Validity::AllValid),
1273 true,
1274 &mut LEGACY_SESSION.create_execution_ctx(),
1275 )
1276 .unwrap();
1277
1278 assert!(taken.is_none());
1279 }
1280
1281 #[test]
1282 fn take_search_chunked_interleaved() {
1283 let patches = Patches::new(
1284 30,
1285 0,
1286 buffer![5u64, 10, 20, 25].into_array(),
1287 buffer![100_i32, 200, 300, 400].into_array(),
1288 Some(buffer![0u64].into_array()),
1289 )
1290 .unwrap();
1291
1292 let taken = patches
1293 .take_search(
1294 PrimitiveArray::new(buffer![10, 15, 20, 99], Validity::AllValid),
1295 true,
1296 &mut LEGACY_SESSION.create_execution_ctx(),
1297 )
1298 .unwrap()
1299 .unwrap();
1300
1301 assert_eq!(taken.array_len(), 4);
1302 assert_arrays_eq!(
1303 taken.values(),
1304 PrimitiveArray::from_option_iter([Some(200i32), Some(300)])
1305 );
1306 }
1307
1308 #[test]
1309 fn test_take_search_multiple_chunk_offsets() {
1310 let patches = Patches::new(
1311 1500,
1312 0,
1313 BufferMut::from_iter(0..1500u64).into_array(),
1314 BufferMut::from_iter(0..1500i32).into_array(),
1315 Some(buffer![0u64, 1024u64].into_array()),
1316 )
1317 .unwrap();
1318
1319 let taken = patches
1320 .take_search(
1321 PrimitiveArray::new(BufferMut::from_iter(0..1500u64), Validity::AllValid),
1322 false,
1323 &mut LEGACY_SESSION.create_execution_ctx(),
1324 )
1325 .unwrap()
1326 .unwrap();
1327
1328 assert_eq!(taken.array_len(), 1500);
1329 }
1330
1331 #[test]
1332 fn test_slice() {
1333 let values = buffer![15_u32, 135, 13531, 42].into_array();
1334 let indices = buffer![10_u64, 11, 50, 100].into_array();
1335
1336 let patches = Patches::new(101, 0, indices, values, None).unwrap();
1337
1338 let sliced = patches.slice(15..100).unwrap().unwrap();
1339 assert_eq!(sliced.array_len(), 100 - 15);
1340 assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1341 }
1342
1343 #[test]
1344 fn doubly_sliced() {
1345 let values = buffer![15_u32, 135, 13531, 42].into_array();
1346 let indices = buffer![10_u64, 11, 50, 100].into_array();
1347
1348 let patches = Patches::new(101, 0, indices, values, None).unwrap();
1349
1350 let sliced = patches.slice(15..100).unwrap().unwrap();
1351 assert_eq!(sliced.array_len(), 100 - 15);
1352 assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1353
1354 let doubly_sliced = sliced.slice(35..36).unwrap().unwrap();
1355 assert_arrays_eq!(
1356 doubly_sliced.values(),
1357 PrimitiveArray::from_iter([13531u32])
1358 );
1359 }
1360
1361 #[test]
1362 fn test_mask_all_true() {
1363 let patches = Patches::new(
1364 10,
1365 0,
1366 buffer![2u64, 5, 8].into_array(),
1367 buffer![100i32, 200, 300].into_array(),
1368 None,
1369 )
1370 .unwrap();
1371
1372 let mask = Mask::new_true(10);
1373 let masked = patches
1374 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1375 .unwrap();
1376 assert!(masked.is_none());
1377 }
1378
1379 #[test]
1380 fn test_mask_all_false() {
1381 let patches = Patches::new(
1382 10,
1383 0,
1384 buffer![2u64, 5, 8].into_array(),
1385 buffer![100i32, 200, 300].into_array(),
1386 None,
1387 )
1388 .unwrap();
1389
1390 let mask = Mask::new_false(10);
1391 let masked = patches
1392 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1393 .unwrap()
1394 .unwrap();
1395
1396 assert_arrays_eq!(
1398 masked.values(),
1399 PrimitiveArray::from_iter([100i32, 200, 300])
1400 );
1401 assert!(
1402 masked
1403 .values()
1404 .is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())
1405 .unwrap()
1406 );
1407 assert!(
1408 masked
1409 .values()
1410 .is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())
1411 .unwrap()
1412 );
1413 assert!(
1414 masked
1415 .values()
1416 .is_valid(2, &mut LEGACY_SESSION.create_execution_ctx())
1417 .unwrap()
1418 );
1419
1420 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1422 }
1423
1424 #[test]
1425 fn test_mask_partial() {
1426 let patches = Patches::new(
1427 10,
1428 0,
1429 buffer![2u64, 5, 8].into_array(),
1430 buffer![100i32, 200, 300].into_array(),
1431 None,
1432 )
1433 .unwrap();
1434
1435 let mask = Mask::from_iter([
1437 false, false, true, false, false, false, false, false, true, false,
1438 ]);
1439 let masked = patches
1440 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1441 .unwrap()
1442 .unwrap();
1443
1444 assert_eq!(masked.values().len(), 1);
1446 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1447
1448 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64]));
1450 }
1451
1452 #[test]
1453 fn test_mask_with_offset() {
1454 let patches = Patches::new(
1455 10,
1456 5, buffer![7u64, 10, 13].into_array(), buffer![100i32, 200, 300].into_array(),
1459 None,
1460 )
1461 .unwrap();
1462
1463 let mask = Mask::from_iter([
1465 false, false, true, false, false, false, false, false, false, false,
1466 ]);
1467
1468 let masked = patches
1469 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1470 .unwrap()
1471 .unwrap();
1472 assert_eq!(masked.array_len(), 10);
1473 assert_eq!(masked.offset(), 5);
1474 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([10u64, 13]));
1475 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32, 300]));
1476 }
1477
1478 #[test]
1479 fn test_mask_nullable_values() {
1480 let patches = Patches::new(
1481 10,
1482 0,
1483 buffer![2u64, 5, 8].into_array(),
1484 PrimitiveArray::from_option_iter([Some(100i32), None, Some(300)]).into_array(),
1485 None,
1486 )
1487 .unwrap();
1488
1489 let mask = Mask::from_iter([
1491 false, false, true, false, false, false, false, false, false, false,
1492 ]);
1493 let masked = patches
1494 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1495 .unwrap()
1496 .unwrap();
1497
1498 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64, 8]));
1500
1501 let masked_values = masked.values().to_primitive();
1503 assert_eq!(masked_values.len(), 2);
1504 assert!(
1505 !masked_values
1506 .is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())
1507 .unwrap()
1508 ); assert!(
1510 masked_values
1511 .is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())
1512 .unwrap()
1513 ); assert_eq!(
1515 i32::try_from(
1516 &masked_values
1517 .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
1518 .unwrap()
1519 )
1520 .unwrap(),
1521 300i32
1522 );
1523 }
1524
1525 #[test]
1526 fn test_filter_keep_all() {
1527 let patches = Patches::new(
1528 10,
1529 0,
1530 buffer![2u64, 5, 8].into_array(),
1531 buffer![100i32, 200, 300].into_array(),
1532 None,
1533 )
1534 .unwrap();
1535
1536 let mask = Mask::from_indices(10, (0..10).collect());
1538 let filtered = patches
1539 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1540 .unwrap()
1541 .unwrap();
1542
1543 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1544 assert_arrays_eq!(
1545 filtered.values(),
1546 PrimitiveArray::from_iter([100i32, 200, 300])
1547 );
1548 }
1549
1550 #[test]
1551 fn test_filter_none() {
1552 let patches = Patches::new(
1553 10,
1554 0,
1555 buffer![2u64, 5, 8].into_array(),
1556 buffer![100i32, 200, 300].into_array(),
1557 None,
1558 )
1559 .unwrap();
1560
1561 let mask = Mask::from_indices(10, vec![]);
1563 let filtered = patches
1564 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1565 .unwrap();
1566 assert!(filtered.is_none());
1567 }
1568
1569 #[test]
1570 fn test_filter_with_indices() {
1571 let patches = Patches::new(
1572 10,
1573 0,
1574 buffer![2u64, 5, 8].into_array(),
1575 buffer![100i32, 200, 300].into_array(),
1576 None,
1577 )
1578 .unwrap();
1579
1580 let mask = Mask::from_indices(10, vec![2, 5, 9]);
1582 let filtered = patches
1583 .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1584 .unwrap()
1585 .unwrap();
1586
1587 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1])); assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1589 }
1590
1591 #[test]
1592 fn test_slice_full_range() {
1593 let patches = Patches::new(
1594 10,
1595 0,
1596 buffer![2u64, 5, 8].into_array(),
1597 buffer![100i32, 200, 300].into_array(),
1598 None,
1599 )
1600 .unwrap();
1601
1602 let sliced = patches.slice(0..10).unwrap().unwrap();
1603
1604 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1605 assert_arrays_eq!(
1606 sliced.values(),
1607 PrimitiveArray::from_iter([100i32, 200, 300])
1608 );
1609 }
1610
1611 #[test]
1612 fn test_slice_partial() {
1613 let patches = Patches::new(
1614 10,
1615 0,
1616 buffer![2u64, 5, 8].into_array(),
1617 buffer![100i32, 200, 300].into_array(),
1618 None,
1619 )
1620 .unwrap();
1621
1622 let sliced = patches.slice(3..8).unwrap().unwrap();
1624
1625 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([5u64])); assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1627 assert_eq!(sliced.array_len(), 5); assert_eq!(sliced.offset(), 3); }
1630
1631 #[test]
1632 fn test_slice_no_patches() {
1633 let patches = Patches::new(
1634 10,
1635 0,
1636 buffer![2u64, 5, 8].into_array(),
1637 buffer![100i32, 200, 300].into_array(),
1638 None,
1639 )
1640 .unwrap();
1641
1642 let sliced = patches.slice(6..7).unwrap();
1644 assert!(sliced.is_none());
1645 }
1646
1647 #[test]
1648 fn test_slice_with_offset() {
1649 let patches = Patches::new(
1650 10,
1651 5, buffer![7u64, 10, 13].into_array(), buffer![100i32, 200, 300].into_array(),
1654 None,
1655 )
1656 .unwrap();
1657
1658 let sliced = patches.slice(3..8).unwrap().unwrap();
1660
1661 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([10u64])); assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1663 assert_eq!(sliced.offset(), 8); }
1665
1666 #[test]
1667 fn test_patch_values() {
1668 let patches = Patches::new(
1669 10,
1670 0,
1671 buffer![2u64, 5, 8].into_array(),
1672 buffer![100i32, 200, 300].into_array(),
1673 None,
1674 )
1675 .unwrap();
1676
1677 let values = patches.values().to_primitive();
1678 assert_eq!(
1679 i32::try_from(
1680 &values
1681 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
1682 .unwrap()
1683 )
1684 .unwrap(),
1685 100i32
1686 );
1687 assert_eq!(
1688 i32::try_from(
1689 &values
1690 .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
1691 .unwrap()
1692 )
1693 .unwrap(),
1694 200i32
1695 );
1696 assert_eq!(
1697 i32::try_from(
1698 &values
1699 .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
1700 .unwrap()
1701 )
1702 .unwrap(),
1703 300i32
1704 );
1705 }
1706
1707 #[test]
1708 fn test_indices_range() {
1709 let patches = Patches::new(
1710 10,
1711 0,
1712 buffer![2u64, 5, 8].into_array(),
1713 buffer![100i32, 200, 300].into_array(),
1714 None,
1715 )
1716 .unwrap();
1717
1718 assert_eq!(patches.min_index().unwrap(), 2);
1719 assert_eq!(patches.max_index().unwrap(), 8);
1720 }
1721
1722 #[test]
1723 fn test_search_index() {
1724 let patches = Patches::new(
1725 10,
1726 0,
1727 buffer![2u64, 5, 8].into_array(),
1728 buffer![100i32, 200, 300].into_array(),
1729 None,
1730 )
1731 .unwrap();
1732
1733 assert_eq!(patches.search_index(2).unwrap(), SearchResult::Found(0));
1735 assert_eq!(patches.search_index(5).unwrap(), SearchResult::Found(1));
1736 assert_eq!(patches.search_index(8).unwrap(), SearchResult::Found(2));
1737
1738 assert_eq!(patches.search_index(0).unwrap(), SearchResult::NotFound(0));
1740 assert_eq!(patches.search_index(3).unwrap(), SearchResult::NotFound(1));
1741 assert_eq!(patches.search_index(6).unwrap(), SearchResult::NotFound(2));
1742 assert_eq!(patches.search_index(9).unwrap(), SearchResult::NotFound(3));
1743 }
1744
1745 #[test]
1746 fn test_mask_boundary_patches() {
1747 let patches = Patches::new(
1749 10,
1750 0,
1751 buffer![0u64, 9].into_array(),
1752 buffer![100i32, 200].into_array(),
1753 None,
1754 )
1755 .unwrap();
1756
1757 let mask = Mask::from_iter([
1758 true, false, false, false, false, false, false, false, false, false,
1759 ]);
1760 let masked = patches
1761 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1762 .unwrap();
1763 assert!(masked.is_some());
1764 let masked = masked.unwrap();
1765 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([9u64]));
1766 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1767 }
1768
1769 #[test]
1770 fn test_mask_all_patches_removed() {
1771 let patches = Patches::new(
1773 10,
1774 0,
1775 buffer![2u64, 5, 8].into_array(),
1776 buffer![100i32, 200, 300].into_array(),
1777 None,
1778 )
1779 .unwrap();
1780
1781 let mask = Mask::from_iter([
1783 false, false, true, false, false, true, false, false, true, false,
1784 ]);
1785 let masked = patches
1786 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1787 .unwrap();
1788 assert!(masked.is_none());
1789 }
1790
1791 #[test]
1792 fn test_mask_no_patches_removed() {
1793 let patches = Patches::new(
1795 10,
1796 0,
1797 buffer![2u64, 5, 8].into_array(),
1798 buffer![100i32, 200, 300].into_array(),
1799 None,
1800 )
1801 .unwrap();
1802
1803 let mask = Mask::from_iter([
1805 true, false, false, true, false, false, true, false, false, true,
1806 ]);
1807 let masked = patches
1808 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1809 .unwrap()
1810 .unwrap();
1811
1812 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1813 assert_arrays_eq!(
1814 masked.values(),
1815 PrimitiveArray::from_iter([100i32, 200, 300])
1816 );
1817 }
1818
1819 #[test]
1820 fn test_mask_single_patch() {
1821 let patches = Patches::new(
1823 5,
1824 0,
1825 buffer![2u64].into_array(),
1826 buffer![42i32].into_array(),
1827 None,
1828 )
1829 .unwrap();
1830
1831 let mask = Mask::from_iter([false, false, true, false, false]);
1833 let masked = patches
1834 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1835 .unwrap();
1836 assert!(masked.is_none());
1837
1838 let mask = Mask::from_iter([true, false, false, true, false]);
1840 let masked = patches
1841 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1842 .unwrap()
1843 .unwrap();
1844 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64]));
1845 }
1846
1847 #[test]
1848 fn test_mask_contiguous_patches() {
1849 let patches = Patches::new(
1851 10,
1852 0,
1853 buffer![3u64, 4, 5, 6].into_array(),
1854 buffer![100i32, 200, 300, 400].into_array(),
1855 None,
1856 )
1857 .unwrap();
1858
1859 let mask = Mask::from_iter([
1861 false, false, false, false, true, true, false, false, false, false,
1862 ]);
1863 let masked = patches
1864 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1865 .unwrap()
1866 .unwrap();
1867
1868 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([3u64, 6]));
1869 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 400]));
1870 }
1871
1872 #[test]
1873 fn test_mask_with_large_offset() {
1874 let patches = Patches::new(
1876 20,
1877 15,
1878 buffer![16u64, 17, 19].into_array(), buffer![100i32, 200, 300].into_array(),
1880 None,
1881 )
1882 .unwrap();
1883
1884 let mask = Mask::from_iter([
1886 false, false, true, false, false, false, false, false, false, false, false, false,
1887 false, false, false, false, false, false, false, false,
1888 ]);
1889 let masked = patches
1890 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1891 .unwrap()
1892 .unwrap();
1893
1894 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([16u64, 19]));
1895 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 300]));
1896 }
1897
1898 #[test]
1899 #[should_panic(expected = "Filter mask length 5 does not match array length 10")]
1900 fn test_mask_wrong_length() {
1901 let patches = Patches::new(
1902 10,
1903 0,
1904 buffer![2u64, 5, 8].into_array(),
1905 buffer![100i32, 200, 300].into_array(),
1906 None,
1907 )
1908 .unwrap();
1909
1910 let mask = Mask::from_iter([false, false, true, false, false]);
1912 patches
1913 .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1914 .unwrap();
1915 }
1916
1917 #[test]
1918 fn test_chunk_offsets_search() {
1919 let indices = buffer![100u64, 200, 3000, 3100].into_array();
1920 let values = buffer![10i32, 20, 30, 40].into_array();
1921 let chunk_offsets = buffer![0u64, 2, 2, 3].into_array();
1922 let patches = Patches::new(4096, 0, indices, values, Some(chunk_offsets)).unwrap();
1923
1924 assert!(patches.chunk_offsets.is_some());
1925
1926 assert_eq!(patches.search_index(100).unwrap(), SearchResult::Found(0));
1928 assert_eq!(patches.search_index(200).unwrap(), SearchResult::Found(1));
1929
1930 assert_eq!(
1932 patches.search_index(1500).unwrap(),
1933 SearchResult::NotFound(2)
1934 );
1935 assert_eq!(
1936 patches.search_index(2000).unwrap(),
1937 SearchResult::NotFound(2)
1938 );
1939
1940 assert_eq!(patches.search_index(3000).unwrap(), SearchResult::Found(2));
1942 assert_eq!(patches.search_index(3100).unwrap(), SearchResult::Found(3));
1943
1944 assert_eq!(
1945 patches.search_index(1024).unwrap(),
1946 SearchResult::NotFound(2)
1947 );
1948 }
1949
1950 #[test]
1951 fn test_chunk_offsets_with_slice() {
1952 let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
1953 let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
1954 let chunk_offsets = buffer![0u64, 2, 6].into_array();
1955 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
1956
1957 let sliced = patches.slice(1000..2200).unwrap().unwrap();
1958
1959 assert!(sliced.chunk_offsets.is_some());
1960 assert_eq!(sliced.offset(), 1000);
1961
1962 assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(0));
1963 assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
1964 assert_eq!(sliced.search_index(1100).unwrap(), SearchResult::Found(4));
1965
1966 assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(1));
1967 }
1968
1969 #[test]
1970 fn test_chunk_offsets_with_slice_after_first_chunk() {
1971 let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
1972 let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
1973 let chunk_offsets = buffer![0u64, 2, 6].into_array();
1974 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
1975
1976 let sliced = patches.slice(1300..2200).unwrap().unwrap();
1977
1978 assert!(sliced.chunk_offsets.is_some());
1979 assert_eq!(sliced.offset(), 1300);
1980
1981 assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
1982 assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(1));
1983 assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
1984 assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(2));
1985 assert_eq!(sliced.search_index(900).unwrap(), SearchResult::NotFound(4));
1986 }
1987
1988 #[test]
1989 fn test_chunk_offsets_slice_empty_result() {
1990 let indices = buffer![100u64, 200, 3000, 3100].into_array();
1991 let values = buffer![10i32, 20, 30, 40].into_array();
1992 let chunk_offsets = buffer![0u64, 2].into_array();
1993 let patches = Patches::new(4000, 0, indices, values, Some(chunk_offsets)).unwrap();
1994
1995 let sliced = patches.slice(1000..2000).unwrap();
1996 assert!(sliced.is_none());
1997 }
1998
1999 #[test]
2000 fn test_chunk_offsets_slice_single_patch() {
2001 let indices = buffer![100u64, 1200, 1300, 2500].into_array();
2002 let values = buffer![10i32, 20, 30, 40].into_array();
2003 let chunk_offsets = buffer![0u64, 1, 3].into_array();
2004 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2005
2006 let sliced = patches.slice(1100..1250).unwrap().unwrap();
2007
2008 assert_eq!(sliced.num_patches(), 1);
2009 assert_eq!(sliced.offset(), 1100);
2010 assert_eq!(sliced.search_index(100).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(50).unwrap(), SearchResult::NotFound(0));
2012 assert_eq!(sliced.search_index(150).unwrap(), SearchResult::NotFound(1));
2013 }
2014
2015 #[test]
2016 fn test_chunk_offsets_slice_across_chunks() {
2017 let indices = buffer![100u64, 200, 1100, 1200, 2100, 2200].into_array();
2018 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2019 let chunk_offsets = buffer![0u64, 2, 4].into_array();
2020 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2021
2022 let sliced = patches.slice(150..2150).unwrap().unwrap();
2023
2024 assert_eq!(sliced.num_patches(), 4);
2025 assert_eq!(sliced.offset(), 150);
2026
2027 assert_eq!(sliced.search_index(50).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(950).unwrap(), SearchResult::Found(1)); assert_eq!(sliced.search_index(1050).unwrap(), SearchResult::Found(2)); assert_eq!(sliced.search_index(1950).unwrap(), SearchResult::Found(3)); }
2032
2033 #[test]
2034 fn test_chunk_offsets_boundary_searches() {
2035 let indices = buffer![1023u64, 1024, 1025, 2047, 2048].into_array();
2036 let values = buffer![10i32, 20, 30, 40, 50].into_array();
2037 let chunk_offsets = buffer![0u64, 1, 4].into_array();
2038 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2039
2040 assert_eq!(patches.search_index(1023).unwrap(), SearchResult::Found(0));
2041 assert_eq!(patches.search_index(1024).unwrap(), SearchResult::Found(1));
2042 assert_eq!(patches.search_index(1025).unwrap(), SearchResult::Found(2));
2043 assert_eq!(patches.search_index(2047).unwrap(), SearchResult::Found(3));
2044 assert_eq!(patches.search_index(2048).unwrap(), SearchResult::Found(4));
2045
2046 assert_eq!(
2047 patches.search_index(1022).unwrap(),
2048 SearchResult::NotFound(0)
2049 );
2050 assert_eq!(
2051 patches.search_index(2046).unwrap(),
2052 SearchResult::NotFound(3)
2053 );
2054 }
2055
2056 #[test]
2057 fn test_chunk_offsets_slice_edge_cases() {
2058 let indices = buffer![0u64, 1, 1023, 1024, 2047, 2048].into_array();
2059 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2060 let chunk_offsets = buffer![0u64, 3, 5].into_array();
2061 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2062
2063 let sliced = patches.slice(0..10).unwrap().unwrap();
2065 assert_eq!(sliced.num_patches(), 2);
2066 assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2067 assert_eq!(sliced.search_index(1).unwrap(), SearchResult::Found(1));
2068
2069 let sliced = patches.slice(2040..3000).unwrap().unwrap();
2071 assert_eq!(sliced.num_patches(), 2); assert_eq!(sliced.search_index(7).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(8).unwrap(), SearchResult::Found(1)); }
2075
2076 #[test]
2077 fn test_chunk_offsets_slice_nested() {
2078 let indices = buffer![100u64, 200, 300, 400, 500, 600].into_array();
2079 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2080 let chunk_offsets = buffer![0u64].into_array();
2081 let patches = Patches::new(1000, 0, indices, values, Some(chunk_offsets)).unwrap();
2082
2083 let sliced1 = patches.slice(150..550).unwrap().unwrap();
2084 assert_eq!(sliced1.num_patches(), 4); let sliced2 = sliced1.slice(100..250).unwrap().unwrap();
2087 assert_eq!(sliced2.num_patches(), 1); assert_eq!(sliced2.offset(), 250);
2089
2090 assert_eq!(sliced2.search_index(50).unwrap(), SearchResult::Found(0)); assert_eq!(
2092 sliced2.search_index(150).unwrap(),
2093 SearchResult::NotFound(1)
2094 );
2095 }
2096
2097 #[test]
2098 fn test_nested_slice_with_dropped_first_chunk() {
2099 let indices = buffer![0u64, 1024].into_array();
2101 let values = buffer![1i32, 2].into_array();
2102 let chunk_offsets = buffer![0u64, 1].into_array();
2103 let patches = Patches::new(2048, 0, indices, values, Some(chunk_offsets)).unwrap();
2104
2105 let dropped_first = patches.slice(1024..2048).unwrap().unwrap();
2107 let resliced = dropped_first.slice(0..1024).unwrap().unwrap();
2108 assert_eq!(resliced.num_patches(), 1);
2109 }
2110
2111 #[test]
2112 fn test_index_larger_than_length() {
2113 let chunk_offsets = buffer![0u64].into_array();
2114 let indices = buffer![1023u64].into_array();
2115 let values = buffer![42i32].into_array();
2116 let patches = Patches::new(1024, 0, indices, values, Some(chunk_offsets)).unwrap();
2117 assert_eq!(
2118 patches.search_index(2048).unwrap(),
2119 SearchResult::NotFound(1)
2120 );
2121 }
2122}