1use std::cmp::Ordering;
5use std::fmt::Debug;
6use std::hash::Hash;
7use std::ops::Range;
8
9use itertools::Itertools;
10use num_traits::NumCast;
11use vortex_buffer::BitBuffer;
12use vortex_buffer::BufferMut;
13use vortex_dtype::DType;
14use vortex_dtype::IntegerPType;
15use vortex_dtype::NativePType;
16use vortex_dtype::Nullability::NonNullable;
17use vortex_dtype::PType;
18use vortex_dtype::UnsignedPType;
19use vortex_dtype::match_each_integer_ptype;
20use vortex_dtype::match_each_unsigned_integer_ptype;
21use vortex_error::VortexError;
22use vortex_error::VortexResult;
23use vortex_error::vortex_bail;
24use vortex_error::vortex_ensure;
25use vortex_error::vortex_err;
26use vortex_mask::AllOr;
27use vortex_mask::Mask;
28use vortex_mask::MaskMut;
29use vortex_utils::aliases::hash_map::HashMap;
30
31use crate::Array;
32use crate::ArrayRef;
33use crate::ArrayVisitor;
34use crate::IntoArray;
35use crate::ToCanonical;
36use crate::arrays::PrimitiveArray;
37use crate::builtins::ArrayBuiltins;
38use crate::compute::filter;
39use crate::compute::is_sorted;
40use crate::scalar::PValue;
41use crate::scalar::Scalar;
42use crate::search_sorted::SearchResult;
43use crate::search_sorted::SearchSorted;
44use crate::search_sorted::SearchSortedSide;
45use crate::validity::Validity;
46use crate::vtable::ValidityHelper;
47
48pub const PATCH_CHUNK_SIZE: usize = 1024;
51
52#[derive(Copy, Clone, prost::Message)]
53pub struct PatchesMetadata {
54 #[prost(uint64, tag = "1")]
55 len: u64,
56 #[prost(uint64, tag = "2")]
57 offset: u64,
58 #[prost(enumeration = "PType", tag = "3")]
59 indices_ptype: i32,
60 #[prost(uint64, optional, tag = "4")]
61 chunk_offsets_len: Option<u64>,
62 #[prost(enumeration = "PType", optional, tag = "5")]
63 chunk_offsets_ptype: Option<i32>,
64 #[prost(uint64, optional, tag = "6")]
65 offset_within_chunk: Option<u64>,
66}
67
68impl PatchesMetadata {
69 #[inline]
70 pub fn new(
71 len: usize,
72 offset: usize,
73 indices_ptype: PType,
74 chunk_offsets_len: Option<usize>,
75 chunk_offsets_ptype: Option<PType>,
76 offset_within_chunk: Option<usize>,
77 ) -> Self {
78 Self {
79 len: len as u64,
80 offset: offset as u64,
81 indices_ptype: indices_ptype as i32,
82 chunk_offsets_len: chunk_offsets_len.map(|len| len as u64),
83 chunk_offsets_ptype: chunk_offsets_ptype.map(|pt| pt as i32),
84 offset_within_chunk: offset_within_chunk.map(|len| len as u64),
85 }
86 }
87
88 #[inline]
89 pub fn len(&self) -> VortexResult<usize> {
90 usize::try_from(self.len).map_err(|_| vortex_err!("len does not fit in usize"))
91 }
92
93 #[inline]
94 pub fn is_empty(&self) -> bool {
95 self.len == 0
96 }
97
98 #[inline]
99 pub fn offset(&self) -> VortexResult<usize> {
100 usize::try_from(self.offset).map_err(|_| vortex_err!("offset does not fit in usize"))
101 }
102
103 #[inline]
104 pub fn chunk_offsets_dtype(&self) -> VortexResult<Option<DType>> {
105 self.chunk_offsets_ptype
106 .map(|t| {
107 PType::try_from(t)
108 .map_err(|e| vortex_err!("invalid i32 value {t} for PType: {}", e))
109 .map(|ptype| DType::Primitive(ptype, NonNullable))
110 })
111 .transpose()
112 }
113
114 #[inline]
115 pub fn indices_dtype(&self) -> VortexResult<DType> {
116 let ptype = PType::try_from(self.indices_ptype).map_err(|e| {
117 vortex_err!("invalid i32 value {} for PType: {}", self.indices_ptype, e)
118 })?;
119 vortex_ensure!(
120 ptype.is_unsigned_int(),
121 "Patch indices must be unsigned integers"
122 );
123 Ok(DType::Primitive(ptype, NonNullable))
124 }
125}
126
127#[derive(Debug, Clone)]
129pub struct Patches {
130 array_len: usize,
131 offset: usize,
132 indices: ArrayRef,
133 values: ArrayRef,
134 chunk_offsets: Option<ArrayRef>,
141 offset_within_chunk: Option<usize>,
151}
152
153impl Patches {
154 pub fn new(
155 array_len: usize,
156 offset: usize,
157 indices: ArrayRef,
158 values: ArrayRef,
159 chunk_offsets: Option<ArrayRef>,
160 ) -> VortexResult<Self> {
161 vortex_ensure!(
162 indices.len() == values.len(),
163 "Patch indices and values must have the same length"
164 );
165 vortex_ensure!(
166 indices.dtype().is_unsigned_int() && !indices.dtype().is_nullable(),
167 "Patch indices must be non-nullable unsigned integers, got {:?}",
168 indices.dtype()
169 );
170
171 vortex_ensure!(
172 indices.len() <= array_len,
173 "Patch indices must be shorter than the array length"
174 );
175 vortex_ensure!(!indices.is_empty(), "Patch indices must not be empty");
176
177 if indices.is_host() && values.is_host() {
180 let max = usize::try_from(&indices.scalar_at(indices.len() - 1)?)
181 .map_err(|_| vortex_err!("indices must be a number"))?;
182 vortex_ensure!(
183 max - offset < array_len,
184 "Patch indices {max:?}, offset {offset} are longer than the array length {array_len}"
185 );
186
187 debug_assert!(
188 is_sorted(indices.as_ref())
189 .unwrap_or(Some(false))
190 .unwrap_or(false),
191 "Patch indices must be sorted"
192 );
193 }
194
195 Ok(Self {
196 array_len,
197 offset,
198 indices,
199 values,
200 chunk_offsets: chunk_offsets.clone(),
201 offset_within_chunk: chunk_offsets.map(|_| 0),
203 })
204 }
205
206 pub unsafe fn new_unchecked(
216 array_len: usize,
217 offset: usize,
218 indices: ArrayRef,
219 values: ArrayRef,
220 chunk_offsets: Option<ArrayRef>,
221 offset_within_chunk: Option<usize>,
222 ) -> Self {
223 Self {
224 array_len,
225 offset,
226 indices,
227 values,
228 chunk_offsets,
229 offset_within_chunk,
230 }
231 }
232
233 #[inline]
234 pub fn array_len(&self) -> usize {
235 self.array_len
236 }
237
238 #[inline]
239 pub fn num_patches(&self) -> usize {
240 self.indices.len()
241 }
242
243 #[inline]
244 pub fn dtype(&self) -> &DType {
245 self.values.dtype()
246 }
247
248 #[inline]
249 pub fn indices(&self) -> &ArrayRef {
250 &self.indices
251 }
252
253 #[inline]
254 pub fn into_indices(self) -> ArrayRef {
255 self.indices
256 }
257
258 #[inline]
259 pub fn indices_mut(&mut self) -> &mut ArrayRef {
260 &mut self.indices
261 }
262
263 #[inline]
264 pub fn values(&self) -> &ArrayRef {
265 &self.values
266 }
267
268 #[inline]
269 pub fn into_values(self) -> ArrayRef {
270 self.values
271 }
272
273 #[inline]
274 pub fn values_mut(&mut self) -> &mut ArrayRef {
275 &mut self.values
276 }
277
278 #[inline]
279 pub fn offset(&self) -> usize {
281 self.offset
282 }
283
284 #[inline]
285 pub fn chunk_offsets(&self) -> &Option<ArrayRef> {
286 &self.chunk_offsets
287 }
288
289 #[inline]
290 pub fn chunk_offset_at(&self, idx: usize) -> VortexResult<usize> {
291 let Some(chunk_offsets) = &self.chunk_offsets else {
292 vortex_bail!("chunk_offsets must be set to retrieve offset at index")
293 };
294
295 chunk_offsets
296 .scalar_at(idx)?
297 .as_primitive()
298 .as_::<usize>()
299 .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))
300 }
301
302 #[inline]
311 pub fn offset_within_chunk(&self) -> Option<usize> {
312 self.offset_within_chunk
313 }
314
315 #[inline]
316 pub fn indices_ptype(&self) -> VortexResult<PType> {
317 PType::try_from(self.indices.dtype())
318 .map_err(|_| vortex_err!("indices dtype is not primitive"))
319 }
320
321 pub fn to_metadata(&self, len: usize, dtype: &DType) -> VortexResult<PatchesMetadata> {
322 if self.indices.len() > len {
323 vortex_bail!(
324 "Patch indices {} are longer than the array length {}",
325 self.indices.len(),
326 len
327 );
328 }
329 if self.values.dtype() != dtype {
330 vortex_bail!(
331 "Patch values dtype {} does not match array dtype {}",
332 self.values.dtype(),
333 dtype
334 );
335 }
336 let chunk_offsets_len = self.chunk_offsets.as_ref().map(|co| co.len());
337 let chunk_offsets_ptype = self.chunk_offsets.as_ref().map(|co| co.dtype().as_ptype());
338
339 Ok(PatchesMetadata::new(
340 self.indices.len(),
341 self.offset,
342 self.indices.dtype().as_ptype(),
343 chunk_offsets_len,
344 chunk_offsets_ptype,
345 self.offset_within_chunk,
346 ))
347 }
348
349 pub fn cast_values(self, values_dtype: &DType) -> VortexResult<Self> {
350 unsafe {
352 Ok(Self::new_unchecked(
353 self.array_len,
354 self.offset,
355 self.indices,
356 self.values.cast(values_dtype.clone())?,
357 self.chunk_offsets,
358 self.offset_within_chunk,
359 ))
360 }
361 }
362
363 pub fn get_patched(&self, index: usize) -> VortexResult<Option<Scalar>> {
365 self.search_index(index)?
366 .to_found()
367 .map(|patch_idx| self.values().scalar_at(patch_idx))
368 .transpose()
369 }
370
371 pub fn search_index(&self, index: usize) -> VortexResult<SearchResult> {
389 if self.chunk_offsets.is_some() {
390 return self.search_index_chunked(index);
391 }
392
393 Self::search_index_binary_search(&self.indices, index + self.offset)
394 }
395
396 fn search_index_binary_search(
402 indices: &dyn Array,
403 needle: usize,
404 ) -> VortexResult<SearchResult> {
405 if indices.is_canonical() {
406 let primitive = indices.to_primitive();
407 match_each_integer_ptype!(primitive.ptype(), |T| {
408 let Ok(needle) = T::try_from(needle) else {
409 return Ok(SearchResult::NotFound(primitive.len()));
414 };
415 return primitive
416 .as_slice::<T>()
417 .search_sorted(&needle, SearchSortedSide::Left);
418 });
419 }
420 indices
421 .as_primitive_typed()
422 .search_sorted(&PValue::U64(needle as u64), SearchSortedSide::Left)
423 }
424
425 fn search_index_chunked(&self, index: usize) -> VortexResult<SearchResult> {
435 let Some(chunk_offsets) = &self.chunk_offsets else {
436 vortex_bail!("chunk_offsets is required to be set")
437 };
438
439 let Some(offset_within_chunk) = self.offset_within_chunk else {
440 vortex_bail!("offset_within_chunk is required to be set")
441 };
442
443 if index >= self.array_len() {
444 return Ok(SearchResult::NotFound(self.indices().len()));
445 }
446
447 let chunk_idx = (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE;
448
449 let base_offset = self.chunk_offset_at(0)?;
451
452 let patches_start_idx = (self.chunk_offset_at(chunk_idx)? - base_offset)
453 .saturating_sub(offset_within_chunk);
460
461 let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
462 self.chunk_offset_at(chunk_idx + 1)? - base_offset - offset_within_chunk
463 } else {
464 self.indices.len()
465 };
466
467 let chunk_indices = self.indices.slice(patches_start_idx..patches_end_idx)?;
468 let result = Self::search_index_binary_search(&chunk_indices, index + self.offset)?;
469
470 Ok(match result {
471 SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
472 SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
473 })
474 }
475
476 fn search_index_chunked_batch<T, O>(
482 &self,
483 indices: &[T],
484 chunk_offsets: &[O],
485 index: T,
486 ) -> VortexResult<SearchResult>
487 where
488 T: UnsignedPType,
489 O: UnsignedPType,
490 usize: TryFrom<T>,
491 usize: TryFrom<O>,
492 {
493 let Some(offset_within_chunk) = self.offset_within_chunk else {
494 vortex_bail!("offset_within_chunk is required to be set")
495 };
496
497 let chunk_idx = {
498 let Ok(index) = usize::try_from(index) else {
499 return Ok(SearchResult::NotFound(indices.len()));
501 };
502
503 if index >= self.array_len() {
504 return Ok(SearchResult::NotFound(self.indices().len()));
505 }
506
507 (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE
508 };
509
510 let chunk_offset = usize::try_from(chunk_offsets[chunk_idx] - chunk_offsets[0])
512 .map_err(|_| vortex_err!("chunk_offset failed to convert to usize"))?;
513
514 let patches_start_idx = chunk_offset
515 .saturating_sub(offset_within_chunk);
522
523 let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
524 let base_offset_end = chunk_offsets[chunk_idx + 1];
525
526 let offset_within_chunk = O::from(offset_within_chunk)
527 .ok_or_else(|| vortex_err!("offset_within_chunk failed to convert to O"))?;
528
529 usize::try_from(base_offset_end - chunk_offsets[0] - offset_within_chunk)
530 .map_err(|_| vortex_err!("patches_end_idx failed to convert to usize"))?
531 } else {
532 self.indices.len()
533 };
534
535 let Some(offset) = T::from(self.offset) else {
536 return Ok(SearchResult::NotFound(indices.len()));
538 };
539
540 let chunk_indices = &indices[patches_start_idx..patches_end_idx];
541 let result = chunk_indices.search_sorted(&(index + offset), SearchSortedSide::Left)?;
542
543 Ok(match result {
544 SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
545 SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
546 })
547 }
548
549 pub fn min_index(&self) -> VortexResult<usize> {
551 let first = self
552 .indices
553 .scalar_at(0)?
554 .as_primitive()
555 .as_::<usize>()
556 .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
557 Ok(first - self.offset)
558 }
559
560 pub fn max_index(&self) -> VortexResult<usize> {
562 let last = self
563 .indices
564 .scalar_at(self.indices.len() - 1)?
565 .as_primitive()
566 .as_::<usize>()
567 .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
568 Ok(last - self.offset)
569 }
570
571 pub fn filter(&self, mask: &Mask) -> VortexResult<Option<Self>> {
573 if mask.len() != self.array_len {
574 vortex_bail!(
575 "Filter mask length {} does not match array length {}",
576 mask.len(),
577 self.array_len
578 );
579 }
580
581 match mask.indices() {
582 AllOr::All => Ok(Some(self.clone())),
583 AllOr::None => Ok(None),
584 AllOr::Some(mask_indices) => {
585 let flat_indices = self.indices().to_primitive();
586 match_each_unsigned_integer_ptype!(flat_indices.ptype(), |I| {
587 filter_patches_with_mask(
588 flat_indices.as_slice::<I>(),
589 self.offset(),
590 self.values(),
591 mask_indices,
592 )
593 })
594 }
595 }
596 }
597
598 pub fn mask(&self, mask: &Mask) -> VortexResult<Option<Self>> {
602 if mask.len() != self.array_len {
603 vortex_bail!(
604 "Filter mask length {} does not match array length {}",
605 mask.len(),
606 self.array_len
607 );
608 }
609
610 let filter_mask = match mask.bit_buffer() {
611 AllOr::All => return Ok(None),
612 AllOr::None => return Ok(Some(self.clone())),
613 AllOr::Some(masked) => {
614 let patch_indices = self.indices().to_primitive();
615 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |P| {
616 let patch_indices = patch_indices.as_slice::<P>();
617 Mask::from_buffer(BitBuffer::collect_bool(patch_indices.len(), |i| {
618 #[allow(clippy::cast_possible_truncation)]
619 let idx = (patch_indices[i] as usize) - self.offset;
620 !masked.value(idx)
621 }))
622 })
623 }
624 };
625
626 if filter_mask.all_false() {
627 return Ok(None);
628 }
629
630 let filtered_indices = filter(&self.indices, &filter_mask)?;
632 let filtered_values = filter(&self.values, &filter_mask)?;
633
634 Ok(Some(Self {
635 array_len: self.array_len,
636 offset: self.offset,
637 indices: filtered_indices,
638 values: filtered_values,
639 chunk_offsets: None,
641 offset_within_chunk: self.offset_within_chunk,
642 }))
643 }
644
645 pub fn slice(&self, range: Range<usize>) -> VortexResult<Option<Self>> {
647 let slice_start_idx = self.search_index(range.start)?.to_index();
648 let slice_end_idx = self.search_index(range.end)?.to_index();
649
650 if slice_start_idx == slice_end_idx {
651 return Ok(None);
652 }
653
654 let values = self.values().slice(slice_start_idx..slice_end_idx)?;
655 let indices = self.indices().slice(slice_start_idx..slice_end_idx)?;
656
657 let chunk_offsets = self
658 .chunk_offsets
659 .as_ref()
660 .map(|chunk_offsets| -> VortexResult<ArrayRef> {
661 let chunk_relative_offset = self.offset % PATCH_CHUNK_SIZE;
662 let chunk_start_idx = (chunk_relative_offset + range.start) / PATCH_CHUNK_SIZE;
663 let chunk_end_idx = (chunk_relative_offset + range.end).div_ceil(PATCH_CHUNK_SIZE);
664 chunk_offsets.slice(chunk_start_idx..chunk_end_idx)
665 })
666 .transpose()?;
667
668 let offset_within_chunk = chunk_offsets
669 .as_ref()
670 .map(|chunk_offsets| -> VortexResult<usize> {
671 let base_offset = chunk_offsets
672 .scalar_at(0)?
673 .as_primitive()
674 .as_::<usize>()
675 .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))?;
676 Ok(slice_start_idx - base_offset)
677 })
678 .transpose()?;
679
680 Ok(Some(Self {
681 array_len: range.len(),
682 offset: range.start + self.offset(),
683 indices,
684 values,
685 chunk_offsets,
686 offset_within_chunk,
687 }))
688 }
689
690 const PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN: f64 = 5.0;
692
693 fn is_map_faster_than_search(&self, take_indices: &PrimitiveArray) -> bool {
694 (self.num_patches() as f64 / take_indices.len() as f64)
695 < Self::PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN
696 }
697
698 pub fn take_with_nulls(&self, take_indices: &dyn Array) -> VortexResult<Option<Self>> {
702 if take_indices.is_empty() {
703 return Ok(None);
704 }
705
706 let take_indices = take_indices.to_primitive();
707 if self.is_map_faster_than_search(&take_indices) {
708 self.take_map(take_indices, true)
709 } else {
710 self.take_search(take_indices, true)
711 }
712 }
713
714 pub fn take(&self, take_indices: &dyn Array) -> VortexResult<Option<Self>> {
718 if take_indices.is_empty() {
719 return Ok(None);
720 }
721
722 let take_indices = take_indices.to_primitive();
723 if self.is_map_faster_than_search(&take_indices) {
724 self.take_map(take_indices, false)
725 } else {
726 self.take_search(take_indices, false)
727 }
728 }
729
730 #[expect(
731 clippy::cognitive_complexity,
732 reason = "complexity is from nested match_each_* macros"
733 )]
734 pub fn take_search(
735 &self,
736 take_indices: PrimitiveArray,
737 include_nulls: bool,
738 ) -> VortexResult<Option<Self>> {
739 let take_indices_validity = take_indices.validity();
740 let patch_indices = self.indices.to_primitive();
741 let chunk_offsets = self.chunk_offsets().as_ref().map(|co| co.to_primitive());
742
743 let (values_indices, new_indices): (BufferMut<u64>, BufferMut<u64>) =
744 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |PatchT| {
745 let patch_indices_slice = patch_indices.as_slice::<PatchT>();
746 match_each_integer_ptype!(take_indices.ptype(), |TakeT| {
747 let take_slice = take_indices.as_slice::<TakeT>();
748
749 if let Some(chunk_offsets) = chunk_offsets {
750 match_each_unsigned_integer_ptype!(chunk_offsets.ptype(), |OffsetT| {
751 let chunk_offsets = chunk_offsets.as_slice::<OffsetT>();
752 take_indices_with_search_fn(
753 patch_indices_slice,
754 take_slice,
755 take_indices.validity_mask()?,
756 include_nulls,
757 |take_idx| {
758 self.search_index_chunked_batch(
759 patch_indices_slice,
760 chunk_offsets,
761 take_idx,
762 )
763 },
764 )?
765 })
766 } else {
767 take_indices_with_search_fn(
768 patch_indices_slice,
769 take_slice,
770 take_indices.validity_mask()?,
771 include_nulls,
772 |take_idx| {
773 let Some(offset) = <PatchT as NumCast>::from(self.offset) else {
774 return Ok(SearchResult::NotFound(patch_indices_slice.len()));
776 };
777
778 patch_indices_slice
779 .search_sorted(&(take_idx + offset), SearchSortedSide::Left)
780 },
781 )?
782 }
783 })
784 });
785
786 if new_indices.is_empty() {
787 return Ok(None);
788 }
789
790 let new_indices = new_indices.into_array();
791 let new_array_len = take_indices.len();
792 let values_validity = take_indices_validity.take(&new_indices)?;
793
794 Ok(Some(Self {
795 array_len: new_array_len,
796 offset: 0,
797 indices: new_indices.clone(),
798 values: self
799 .values()
800 .take(PrimitiveArray::new(values_indices, values_validity).into_array())?,
801 chunk_offsets: None,
802 offset_within_chunk: Some(0), }))
804 }
805
806 pub fn take_map(
807 &self,
808 take_indices: PrimitiveArray,
809 include_nulls: bool,
810 ) -> VortexResult<Option<Self>> {
811 let indices = self.indices.to_primitive();
812 let new_length = take_indices.len();
813
814 let min_index = self.min_index()?;
815 let max_index = self.max_index()?;
816
817 let Some((new_sparse_indices, value_indices)) =
818 match_each_unsigned_integer_ptype!(indices.ptype(), |Indices| {
819 match_each_integer_ptype!(take_indices.ptype(), |TakeIndices| {
820 take_map::<_, TakeIndices>(
821 indices.as_slice::<Indices>(),
822 take_indices,
823 self.offset(),
824 min_index,
825 max_index,
826 include_nulls,
827 )?
828 })
829 })
830 else {
831 return Ok(None);
832 };
833
834 let taken_values = self.values().take(value_indices)?;
835
836 Ok(Some(Patches {
837 array_len: new_length,
838 offset: 0,
839 indices: new_sparse_indices,
840 values: taken_values,
841 chunk_offsets: None,
843 offset_within_chunk: self.offset_within_chunk,
844 }))
845 }
846
847 pub unsafe fn apply_to_buffer<P: NativePType>(&self, buffer: &mut [P], validity: &mut MaskMut) {
858 let patch_indices = self.indices.to_primitive();
859 let patch_values = self.values.to_primitive();
860 let patches_validity = patch_values.validity();
861
862 let patch_values_slice = patch_values.as_slice::<P>();
863 match_each_unsigned_integer_ptype!(patch_indices.ptype(), |I| {
864 let patch_indices_slice = patch_indices.as_slice::<I>();
865
866 unsafe {
872 apply_patches_to_buffer_inner(
873 buffer,
874 validity,
875 patch_indices_slice,
876 self.offset,
877 patch_values_slice,
878 patches_validity,
879 );
880 }
881 });
882 }
883
884 pub fn map_values<F>(self, f: F) -> VortexResult<Self>
885 where
886 F: FnOnce(ArrayRef) -> VortexResult<ArrayRef>,
887 {
888 let values = f(self.values)?;
889 if self.indices.len() != values.len() {
890 vortex_bail!(
891 "map_values must preserve length: expected {} received {}",
892 self.indices.len(),
893 values.len()
894 )
895 }
896
897 Ok(Self {
898 array_len: self.array_len,
899 offset: self.offset,
900 indices: self.indices,
901 values,
902 chunk_offsets: self.chunk_offsets,
903 offset_within_chunk: self.offset_within_chunk,
904 })
905 }
906}
907
908unsafe fn apply_patches_to_buffer_inner<P, I>(
918 buffer: &mut [P],
919 validity: &mut MaskMut,
920 patch_indices: &[I],
921 patch_offset: usize,
922 patch_values: &[P],
923 patches_validity: &Validity,
924) where
925 P: NativePType,
926 I: UnsignedPType,
927{
928 debug_assert!(!patch_indices.is_empty());
929 debug_assert_eq!(patch_indices.len(), patch_values.len());
930 debug_assert_eq!(buffer.len(), validity.len());
931
932 match patches_validity {
933 Validity::NonNullable | Validity::AllValid => {
934 for (&i, &value) in patch_indices.iter().zip_eq(patch_values) {
936 let index = i.as_() - patch_offset;
937
938 unsafe {
941 validity.set_unchecked(index);
942 }
943 buffer[index] = value;
944 }
945 }
946 Validity::AllInvalid => {
947 for &i in patch_indices {
949 let index = i.as_() - patch_offset;
950
951 unsafe {
954 validity.unset_unchecked(index);
955 }
956 }
957 }
958 Validity::Array(array) => {
959 let bool_array = array.to_bool();
961 let mask = bool_array.to_bit_buffer();
962 for (patch_idx, (&i, &value)) in patch_indices.iter().zip_eq(patch_values).enumerate() {
963 let index = i.as_() - patch_offset;
964
965 unsafe {
968 if mask.value_unchecked(patch_idx) {
969 buffer[index] = value;
970 validity.set_unchecked(index);
971 } else {
972 validity.unset_unchecked(index);
973 }
974 }
975 }
976 }
977 }
978}
979
980fn take_map<I: NativePType + Hash + Eq + TryFrom<usize>, T: NativePType>(
981 indices: &[I],
982 take_indices: PrimitiveArray,
983 indices_offset: usize,
984 min_index: usize,
985 max_index: usize,
986 include_nulls: bool,
987) -> VortexResult<Option<(ArrayRef, ArrayRef)>>
988where
989 usize: TryFrom<T>,
990 VortexError: From<<I as TryFrom<usize>>::Error>,
991{
992 let take_indices_len = take_indices.len();
993 let take_indices_validity = take_indices.validity();
994 let take_indices_validity_mask = take_indices_validity.to_mask(take_indices_len);
995 let take_indices = take_indices.as_slice::<T>();
996 let offset_i = I::try_from(indices_offset)?;
997
998 let sparse_index_to_value_index: HashMap<I, usize> = indices
999 .iter()
1000 .copied()
1001 .map(|idx| idx - offset_i)
1002 .enumerate()
1003 .map(|(value_index, sparse_index)| (sparse_index, value_index))
1004 .collect();
1005
1006 let mut new_sparse_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1007 let mut value_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1008
1009 for (idx_in_take, &take_idx) in take_indices.iter().enumerate() {
1010 let ti = usize::try_from(take_idx)
1011 .map_err(|_| vortex_err!("Failed to convert index to usize"))?;
1012
1013 let is_null = match take_indices_validity_mask.bit_buffer() {
1015 AllOr::All => false,
1016 AllOr::None => true,
1017 AllOr::Some(buf) => !buf.value(idx_in_take),
1018 };
1019 if is_null {
1020 if include_nulls {
1021 new_sparse_indices.push(idx_in_take as u64);
1022 value_indices.push(0);
1023 }
1024 } else if ti >= min_index && ti <= max_index {
1025 let ti_as_i = I::try_from(ti)
1026 .map_err(|_| vortex_err!("take index does not fit in index type"))?;
1027 if let Some(&value_index) = sparse_index_to_value_index.get(&ti_as_i) {
1028 new_sparse_indices.push(idx_in_take as u64);
1029 value_indices.push(value_index as u64);
1030 }
1031 }
1032 }
1033
1034 if new_sparse_indices.is_empty() {
1035 return Ok(None);
1036 }
1037
1038 let new_sparse_indices = new_sparse_indices.into_array();
1039 let values_validity = take_indices_validity.take(&new_sparse_indices)?;
1040 Ok(Some((
1041 new_sparse_indices,
1042 PrimitiveArray::new(value_indices, values_validity).into_array(),
1043 )))
1044}
1045
1046fn filter_patches_with_mask<T: IntegerPType>(
1052 patch_indices: &[T],
1053 offset: usize,
1054 patch_values: &dyn Array,
1055 mask_indices: &[usize],
1056) -> VortexResult<Option<Patches>> {
1057 let true_count = mask_indices.len();
1058 let mut new_patch_indices = BufferMut::<u64>::with_capacity(true_count);
1059 let mut new_mask_indices = Vec::with_capacity(true_count);
1060
1061 const STRIDE: usize = 4;
1065
1066 let mut mask_idx = 0usize;
1067 let mut true_idx = 0usize;
1068
1069 while mask_idx < patch_indices.len() && true_idx < true_count {
1070 if (mask_idx + STRIDE) < patch_indices.len() && (true_idx + STRIDE) < mask_indices.len() {
1077 let left_min = patch_indices[mask_idx]
1079 .to_usize()
1080 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1081 - offset;
1082 let left_max = patch_indices[mask_idx + STRIDE]
1083 .to_usize()
1084 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1085 - offset;
1086 let right_min = mask_indices[true_idx];
1087 let right_max = mask_indices[true_idx + STRIDE];
1088
1089 if left_min > right_max {
1090 true_idx += STRIDE;
1092 continue;
1093 } else if right_min > left_max {
1094 mask_idx += STRIDE;
1095 continue;
1096 } else {
1097 }
1099 }
1100
1101 let left = patch_indices[mask_idx]
1104 .to_usize()
1105 .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1106 - offset;
1107 let right = mask_indices[true_idx];
1108
1109 match left.cmp(&right) {
1110 Ordering::Less => {
1111 mask_idx += 1;
1112 }
1113 Ordering::Greater => {
1114 true_idx += 1;
1115 }
1116 Ordering::Equal => {
1117 new_mask_indices.push(mask_idx);
1119 new_patch_indices.push(true_idx as u64);
1120
1121 mask_idx += 1;
1122 true_idx += 1;
1123 }
1124 }
1125 }
1126
1127 if new_mask_indices.is_empty() {
1128 return Ok(None);
1129 }
1130
1131 let new_patch_indices = new_patch_indices.into_array();
1132 let new_patch_values = filter(
1133 patch_values,
1134 &Mask::from_indices(patch_values.len(), new_mask_indices),
1135 )?;
1136
1137 Ok(Some(Patches::new(
1138 true_count,
1139 0,
1140 new_patch_indices,
1141 new_patch_values,
1142 None,
1144 )?))
1145}
1146
1147fn take_indices_with_search_fn<
1148 I: UnsignedPType,
1149 T: IntegerPType,
1150 F: Fn(I) -> VortexResult<SearchResult>,
1151>(
1152 indices: &[I],
1153 take_indices: &[T],
1154 take_validity: Mask,
1155 include_nulls: bool,
1156 search_fn: F,
1157) -> VortexResult<(BufferMut<u64>, BufferMut<u64>)> {
1158 let mut values_indices = BufferMut::with_capacity(take_indices.len());
1159 let mut new_indices = BufferMut::with_capacity(take_indices.len());
1160
1161 for (new_patch_idx, &take_idx) in take_indices.iter().enumerate() {
1162 if !take_validity.value(new_patch_idx) {
1163 if include_nulls {
1164 values_indices.push(0u64);
1166 new_indices.push(new_patch_idx as u64);
1167 }
1168 continue;
1169 } else {
1170 let search_result = match I::from(take_idx) {
1171 Some(idx) => search_fn(idx)?,
1172 None => SearchResult::NotFound(indices.len()),
1173 };
1174
1175 if let Some(patch_idx) = search_result.to_found() {
1176 values_indices.push(patch_idx as u64);
1177 new_indices.push(new_patch_idx as u64);
1178 }
1179 }
1180 }
1181
1182 Ok((values_indices, new_indices))
1183}
1184
1185#[cfg(test)]
1186mod test {
1187 use vortex_buffer::BufferMut;
1188 use vortex_buffer::buffer;
1189 use vortex_mask::Mask;
1190
1191 use crate::IntoArray;
1192 use crate::ToCanonical;
1193 use crate::arrays::PrimitiveArray;
1194 use crate::assert_arrays_eq;
1195 use crate::patches::Patches;
1196 use crate::search_sorted::SearchResult;
1197 use crate::validity::Validity;
1198
1199 #[test]
1200 fn test_filter() {
1201 let patches = Patches::new(
1202 100,
1203 0,
1204 buffer![10u32, 11, 20].into_array(),
1205 buffer![100, 110, 200].into_array(),
1206 None,
1207 )
1208 .unwrap();
1209
1210 let filtered = patches
1211 .filter(&Mask::from_indices(100, vec![10, 20, 30]))
1212 .unwrap()
1213 .unwrap();
1214
1215 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1]));
1216 assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1217 }
1218
1219 #[test]
1220 fn take_with_nulls() {
1221 let patches = Patches::new(
1222 20,
1223 0,
1224 buffer![2u64, 9, 15].into_array(),
1225 PrimitiveArray::new(buffer![33_i32, 44, 55], Validity::AllValid).into_array(),
1226 None,
1227 )
1228 .unwrap();
1229
1230 let taken = patches
1231 .take(
1232 &PrimitiveArray::new(buffer![9, 0], Validity::from_iter(vec![true, false]))
1233 .into_array(),
1234 )
1235 .unwrap()
1236 .unwrap();
1237 let primitive_values = taken.values().to_primitive();
1238 let primitive_indices = taken.indices().to_primitive();
1239 assert_eq!(taken.array_len(), 2);
1240 assert_arrays_eq!(
1241 primitive_values,
1242 PrimitiveArray::from_option_iter([Some(44i32)])
1243 );
1244 assert_arrays_eq!(primitive_indices, PrimitiveArray::from_iter([0u64]));
1245 assert_eq!(
1246 primitive_values.validity_mask().unwrap(),
1247 Mask::from_iter(vec![true])
1248 );
1249 }
1250
1251 #[test]
1252 fn take_search_with_nulls_chunked() {
1253 let patches = Patches::new(
1254 20,
1255 0,
1256 buffer![2u64, 9, 15].into_array(),
1257 buffer![33_i32, 44, 55].into_array(),
1258 Some(buffer![0u64].into_array()),
1259 )
1260 .unwrap();
1261
1262 let taken = patches
1263 .take_search(
1264 PrimitiveArray::new(buffer![9, 0], Validity::from_iter([true, false])),
1265 true,
1266 )
1267 .unwrap()
1268 .unwrap();
1269
1270 let primitive_values = taken.values().to_primitive();
1271 assert_eq!(taken.array_len(), 2);
1272 assert_arrays_eq!(
1273 primitive_values,
1274 PrimitiveArray::from_option_iter([Some(44i32), None])
1275 );
1276 assert_arrays_eq!(taken.indices(), PrimitiveArray::from_iter([0u64, 1]));
1277
1278 assert_eq!(
1279 primitive_values.validity_mask().unwrap(),
1280 Mask::from_iter([true, false])
1281 );
1282 }
1283
1284 #[test]
1285 fn take_search_chunked_multiple_chunks() {
1286 let patches = Patches::new(
1287 2048,
1288 0,
1289 buffer![100u64, 500, 1200, 1800].into_array(),
1290 buffer![10_i32, 20, 30, 40].into_array(),
1291 Some(buffer![0u64, 2].into_array()),
1292 )
1293 .unwrap();
1294
1295 let taken = patches
1296 .take_search(
1297 PrimitiveArray::new(buffer![500, 1200, 999], Validity::AllValid),
1298 true,
1299 )
1300 .unwrap()
1301 .unwrap();
1302
1303 assert_eq!(taken.array_len(), 3);
1304 assert_arrays_eq!(
1305 taken.values(),
1306 PrimitiveArray::from_option_iter([Some(20i32), Some(30)])
1307 );
1308 }
1309
1310 #[test]
1311 fn take_search_chunked_indices_with_no_patches() {
1312 let patches = Patches::new(
1313 20,
1314 0,
1315 buffer![2u64, 9, 15].into_array(),
1316 buffer![33_i32, 44, 55].into_array(),
1317 Some(buffer![0u64].into_array()),
1318 )
1319 .unwrap();
1320
1321 let taken = patches
1322 .take_search(
1323 PrimitiveArray::new(buffer![3, 4, 5], Validity::AllValid),
1324 true,
1325 )
1326 .unwrap();
1327
1328 assert!(taken.is_none());
1329 }
1330
1331 #[test]
1332 fn take_search_chunked_interleaved() {
1333 let patches = Patches::new(
1334 30,
1335 0,
1336 buffer![5u64, 10, 20, 25].into_array(),
1337 buffer![100_i32, 200, 300, 400].into_array(),
1338 Some(buffer![0u64].into_array()),
1339 )
1340 .unwrap();
1341
1342 let taken = patches
1343 .take_search(
1344 PrimitiveArray::new(buffer![10, 15, 20, 99], Validity::AllValid),
1345 true,
1346 )
1347 .unwrap()
1348 .unwrap();
1349
1350 assert_eq!(taken.array_len(), 4);
1351 assert_arrays_eq!(
1352 taken.values(),
1353 PrimitiveArray::from_option_iter([Some(200i32), Some(300)])
1354 );
1355 }
1356
1357 #[test]
1358 fn test_take_search_multiple_chunk_offsets() {
1359 let patches = Patches::new(
1360 1500,
1361 0,
1362 BufferMut::from_iter(0..1500u64).into_array(),
1363 BufferMut::from_iter(0..1500i32).into_array(),
1364 Some(buffer![0u64, 1024u64].into_array()),
1365 )
1366 .unwrap();
1367
1368 let taken = patches
1369 .take_search(
1370 PrimitiveArray::new(BufferMut::from_iter(0..1500u64), Validity::AllValid),
1371 false,
1372 )
1373 .unwrap()
1374 .unwrap();
1375
1376 assert_eq!(taken.array_len(), 1500);
1377 }
1378
1379 #[test]
1380 fn test_slice() {
1381 let values = buffer![15_u32, 135, 13531, 42].into_array();
1382 let indices = buffer![10_u64, 11, 50, 100].into_array();
1383
1384 let patches = Patches::new(101, 0, indices, values, None).unwrap();
1385
1386 let sliced = patches.slice(15..100).unwrap().unwrap();
1387 assert_eq!(sliced.array_len(), 100 - 15);
1388 assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1389 }
1390
1391 #[test]
1392 fn doubly_sliced() {
1393 let values = buffer![15_u32, 135, 13531, 42].into_array();
1394 let indices = buffer![10_u64, 11, 50, 100].into_array();
1395
1396 let patches = Patches::new(101, 0, indices, values, None).unwrap();
1397
1398 let sliced = patches.slice(15..100).unwrap().unwrap();
1399 assert_eq!(sliced.array_len(), 100 - 15);
1400 assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1401
1402 let doubly_sliced = sliced.slice(35..36).unwrap().unwrap();
1403 assert_arrays_eq!(
1404 doubly_sliced.values(),
1405 PrimitiveArray::from_iter([13531u32])
1406 );
1407 }
1408
1409 #[test]
1410 fn test_mask_all_true() {
1411 let patches = Patches::new(
1412 10,
1413 0,
1414 buffer![2u64, 5, 8].into_array(),
1415 buffer![100i32, 200, 300].into_array(),
1416 None,
1417 )
1418 .unwrap();
1419
1420 let mask = Mask::new_true(10);
1421 let masked = patches.mask(&mask).unwrap();
1422 assert!(masked.is_none());
1423 }
1424
1425 #[test]
1426 fn test_mask_all_false() {
1427 let patches = Patches::new(
1428 10,
1429 0,
1430 buffer![2u64, 5, 8].into_array(),
1431 buffer![100i32, 200, 300].into_array(),
1432 None,
1433 )
1434 .unwrap();
1435
1436 let mask = Mask::new_false(10);
1437 let masked = patches.mask(&mask).unwrap().unwrap();
1438
1439 assert_arrays_eq!(
1441 masked.values(),
1442 PrimitiveArray::from_iter([100i32, 200, 300])
1443 );
1444 assert!(masked.values().is_valid(0).unwrap());
1445 assert!(masked.values().is_valid(1).unwrap());
1446 assert!(masked.values().is_valid(2).unwrap());
1447
1448 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1450 }
1451
1452 #[test]
1453 fn test_mask_partial() {
1454 let patches = Patches::new(
1455 10,
1456 0,
1457 buffer![2u64, 5, 8].into_array(),
1458 buffer![100i32, 200, 300].into_array(),
1459 None,
1460 )
1461 .unwrap();
1462
1463 let mask = Mask::from_iter([
1465 false, false, true, false, false, false, false, false, true, false,
1466 ]);
1467 let masked = patches.mask(&mask).unwrap().unwrap();
1468
1469 assert_eq!(masked.values().len(), 1);
1471 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1472
1473 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64]));
1475 }
1476
1477 #[test]
1478 fn test_mask_with_offset() {
1479 let patches = Patches::new(
1480 10,
1481 5, buffer![7u64, 10, 13].into_array(), buffer![100i32, 200, 300].into_array(),
1484 None,
1485 )
1486 .unwrap();
1487
1488 let mask = Mask::from_iter([
1490 false, false, true, false, false, false, false, false, false, false,
1491 ]);
1492
1493 let masked = patches.mask(&mask).unwrap().unwrap();
1494 assert_eq!(masked.array_len(), 10);
1495 assert_eq!(masked.offset(), 5);
1496 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([10u64, 13]));
1497 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32, 300]));
1498 }
1499
1500 #[test]
1501 fn test_mask_nullable_values() {
1502 let patches = Patches::new(
1503 10,
1504 0,
1505 buffer![2u64, 5, 8].into_array(),
1506 PrimitiveArray::from_option_iter([Some(100i32), None, Some(300)]).into_array(),
1507 None,
1508 )
1509 .unwrap();
1510
1511 let mask = Mask::from_iter([
1513 false, false, true, false, false, false, false, false, false, false,
1514 ]);
1515 let masked = patches.mask(&mask).unwrap().unwrap();
1516
1517 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64, 8]));
1519
1520 let masked_values = masked.values().to_primitive();
1522 assert_eq!(masked_values.len(), 2);
1523 assert!(!masked_values.is_valid(0).unwrap()); assert!(masked_values.is_valid(1).unwrap()); assert_eq!(
1526 i32::try_from(&masked_values.scalar_at(1).unwrap()).unwrap(),
1527 300i32
1528 );
1529 }
1530
1531 #[test]
1532 fn test_filter_keep_all() {
1533 let patches = Patches::new(
1534 10,
1535 0,
1536 buffer![2u64, 5, 8].into_array(),
1537 buffer![100i32, 200, 300].into_array(),
1538 None,
1539 )
1540 .unwrap();
1541
1542 let mask = Mask::from_indices(10, (0..10).collect());
1544 let filtered = patches.filter(&mask).unwrap().unwrap();
1545
1546 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1547 assert_arrays_eq!(
1548 filtered.values(),
1549 PrimitiveArray::from_iter([100i32, 200, 300])
1550 );
1551 }
1552
1553 #[test]
1554 fn test_filter_none() {
1555 let patches = Patches::new(
1556 10,
1557 0,
1558 buffer![2u64, 5, 8].into_array(),
1559 buffer![100i32, 200, 300].into_array(),
1560 None,
1561 )
1562 .unwrap();
1563
1564 let mask = Mask::from_indices(10, vec![]);
1566 let filtered = patches.filter(&mask).unwrap();
1567 assert!(filtered.is_none());
1568 }
1569
1570 #[test]
1571 fn test_filter_with_indices() {
1572 let patches = Patches::new(
1573 10,
1574 0,
1575 buffer![2u64, 5, 8].into_array(),
1576 buffer![100i32, 200, 300].into_array(),
1577 None,
1578 )
1579 .unwrap();
1580
1581 let mask = Mask::from_indices(10, vec![2, 5, 9]);
1583 let filtered = patches.filter(&mask).unwrap().unwrap();
1584
1585 assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1])); assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1587 }
1588
1589 #[test]
1590 fn test_slice_full_range() {
1591 let patches = Patches::new(
1592 10,
1593 0,
1594 buffer![2u64, 5, 8].into_array(),
1595 buffer![100i32, 200, 300].into_array(),
1596 None,
1597 )
1598 .unwrap();
1599
1600 let sliced = patches.slice(0..10).unwrap().unwrap();
1601
1602 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1603 assert_arrays_eq!(
1604 sliced.values(),
1605 PrimitiveArray::from_iter([100i32, 200, 300])
1606 );
1607 }
1608
1609 #[test]
1610 fn test_slice_partial() {
1611 let patches = Patches::new(
1612 10,
1613 0,
1614 buffer![2u64, 5, 8].into_array(),
1615 buffer![100i32, 200, 300].into_array(),
1616 None,
1617 )
1618 .unwrap();
1619
1620 let sliced = patches.slice(3..8).unwrap().unwrap();
1622
1623 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([5u64])); assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1625 assert_eq!(sliced.array_len(), 5); assert_eq!(sliced.offset(), 3); }
1628
1629 #[test]
1630 fn test_slice_no_patches() {
1631 let patches = Patches::new(
1632 10,
1633 0,
1634 buffer![2u64, 5, 8].into_array(),
1635 buffer![100i32, 200, 300].into_array(),
1636 None,
1637 )
1638 .unwrap();
1639
1640 let sliced = patches.slice(6..7).unwrap();
1642 assert!(sliced.is_none());
1643 }
1644
1645 #[test]
1646 fn test_slice_with_offset() {
1647 let patches = Patches::new(
1648 10,
1649 5, buffer![7u64, 10, 13].into_array(), buffer![100i32, 200, 300].into_array(),
1652 None,
1653 )
1654 .unwrap();
1655
1656 let sliced = patches.slice(3..8).unwrap().unwrap();
1658
1659 assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([10u64])); assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1661 assert_eq!(sliced.offset(), 8); }
1663
1664 #[test]
1665 fn test_patch_values() {
1666 let patches = Patches::new(
1667 10,
1668 0,
1669 buffer![2u64, 5, 8].into_array(),
1670 buffer![100i32, 200, 300].into_array(),
1671 None,
1672 )
1673 .unwrap();
1674
1675 let values = patches.values().to_primitive();
1676 assert_eq!(
1677 i32::try_from(&values.scalar_at(0).unwrap()).unwrap(),
1678 100i32
1679 );
1680 assert_eq!(
1681 i32::try_from(&values.scalar_at(1).unwrap()).unwrap(),
1682 200i32
1683 );
1684 assert_eq!(
1685 i32::try_from(&values.scalar_at(2).unwrap()).unwrap(),
1686 300i32
1687 );
1688 }
1689
1690 #[test]
1691 fn test_indices_range() {
1692 let patches = Patches::new(
1693 10,
1694 0,
1695 buffer![2u64, 5, 8].into_array(),
1696 buffer![100i32, 200, 300].into_array(),
1697 None,
1698 )
1699 .unwrap();
1700
1701 assert_eq!(patches.min_index().unwrap(), 2);
1702 assert_eq!(patches.max_index().unwrap(), 8);
1703 }
1704
1705 #[test]
1706 fn test_search_index() {
1707 let patches = Patches::new(
1708 10,
1709 0,
1710 buffer![2u64, 5, 8].into_array(),
1711 buffer![100i32, 200, 300].into_array(),
1712 None,
1713 )
1714 .unwrap();
1715
1716 assert_eq!(patches.search_index(2).unwrap(), SearchResult::Found(0));
1718 assert_eq!(patches.search_index(5).unwrap(), SearchResult::Found(1));
1719 assert_eq!(patches.search_index(8).unwrap(), SearchResult::Found(2));
1720
1721 assert_eq!(patches.search_index(0).unwrap(), SearchResult::NotFound(0));
1723 assert_eq!(patches.search_index(3).unwrap(), SearchResult::NotFound(1));
1724 assert_eq!(patches.search_index(6).unwrap(), SearchResult::NotFound(2));
1725 assert_eq!(patches.search_index(9).unwrap(), SearchResult::NotFound(3));
1726 }
1727
1728 #[test]
1729 fn test_mask_boundary_patches() {
1730 let patches = Patches::new(
1732 10,
1733 0,
1734 buffer![0u64, 9].into_array(),
1735 buffer![100i32, 200].into_array(),
1736 None,
1737 )
1738 .unwrap();
1739
1740 let mask = Mask::from_iter([
1741 true, false, false, false, false, false, false, false, false, false,
1742 ]);
1743 let masked = patches.mask(&mask).unwrap();
1744 assert!(masked.is_some());
1745 let masked = masked.unwrap();
1746 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([9u64]));
1747 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1748 }
1749
1750 #[test]
1751 fn test_mask_all_patches_removed() {
1752 let patches = Patches::new(
1754 10,
1755 0,
1756 buffer![2u64, 5, 8].into_array(),
1757 buffer![100i32, 200, 300].into_array(),
1758 None,
1759 )
1760 .unwrap();
1761
1762 let mask = Mask::from_iter([
1764 false, false, true, false, false, true, false, false, true, false,
1765 ]);
1766 let masked = patches.mask(&mask).unwrap();
1767 assert!(masked.is_none());
1768 }
1769
1770 #[test]
1771 fn test_mask_no_patches_removed() {
1772 let patches = Patches::new(
1774 10,
1775 0,
1776 buffer![2u64, 5, 8].into_array(),
1777 buffer![100i32, 200, 300].into_array(),
1778 None,
1779 )
1780 .unwrap();
1781
1782 let mask = Mask::from_iter([
1784 true, false, false, true, false, false, true, false, false, true,
1785 ]);
1786 let masked = patches.mask(&mask).unwrap().unwrap();
1787
1788 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1789 assert_arrays_eq!(
1790 masked.values(),
1791 PrimitiveArray::from_iter([100i32, 200, 300])
1792 );
1793 }
1794
1795 #[test]
1796 fn test_mask_single_patch() {
1797 let patches = Patches::new(
1799 5,
1800 0,
1801 buffer![2u64].into_array(),
1802 buffer![42i32].into_array(),
1803 None,
1804 )
1805 .unwrap();
1806
1807 let mask = Mask::from_iter([false, false, true, false, false]);
1809 let masked = patches.mask(&mask).unwrap();
1810 assert!(masked.is_none());
1811
1812 let mask = Mask::from_iter([true, false, false, true, false]);
1814 let masked = patches.mask(&mask).unwrap().unwrap();
1815 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64]));
1816 }
1817
1818 #[test]
1819 fn test_mask_contiguous_patches() {
1820 let patches = Patches::new(
1822 10,
1823 0,
1824 buffer![3u64, 4, 5, 6].into_array(),
1825 buffer![100i32, 200, 300, 400].into_array(),
1826 None,
1827 )
1828 .unwrap();
1829
1830 let mask = Mask::from_iter([
1832 false, false, false, false, true, true, false, false, false, false,
1833 ]);
1834 let masked = patches.mask(&mask).unwrap().unwrap();
1835
1836 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([3u64, 6]));
1837 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 400]));
1838 }
1839
1840 #[test]
1841 fn test_mask_with_large_offset() {
1842 let patches = Patches::new(
1844 20,
1845 15,
1846 buffer![16u64, 17, 19].into_array(), buffer![100i32, 200, 300].into_array(),
1848 None,
1849 )
1850 .unwrap();
1851
1852 let mask = Mask::from_iter([
1854 false, false, true, false, false, false, false, false, false, false, false, false,
1855 false, false, false, false, false, false, false, false,
1856 ]);
1857 let masked = patches.mask(&mask).unwrap().unwrap();
1858
1859 assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([16u64, 19]));
1860 assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 300]));
1861 }
1862
1863 #[test]
1864 #[should_panic(expected = "Filter mask length 5 does not match array length 10")]
1865 fn test_mask_wrong_length() {
1866 let patches = Patches::new(
1867 10,
1868 0,
1869 buffer![2u64, 5, 8].into_array(),
1870 buffer![100i32, 200, 300].into_array(),
1871 None,
1872 )
1873 .unwrap();
1874
1875 let mask = Mask::from_iter([false, false, true, false, false]);
1877 patches.mask(&mask).unwrap();
1878 }
1879
1880 #[test]
1881 fn test_chunk_offsets_search() {
1882 let indices = buffer![100u64, 200, 3000, 3100].into_array();
1883 let values = buffer![10i32, 20, 30, 40].into_array();
1884 let chunk_offsets = buffer![0u64, 2, 2, 3].into_array();
1885 let patches = Patches::new(4096, 0, indices, values, Some(chunk_offsets)).unwrap();
1886
1887 assert!(patches.chunk_offsets.is_some());
1888
1889 assert_eq!(patches.search_index(100).unwrap(), SearchResult::Found(0));
1891 assert_eq!(patches.search_index(200).unwrap(), SearchResult::Found(1));
1892
1893 assert_eq!(
1895 patches.search_index(1500).unwrap(),
1896 SearchResult::NotFound(2)
1897 );
1898 assert_eq!(
1899 patches.search_index(2000).unwrap(),
1900 SearchResult::NotFound(2)
1901 );
1902
1903 assert_eq!(patches.search_index(3000).unwrap(), SearchResult::Found(2));
1905 assert_eq!(patches.search_index(3100).unwrap(), SearchResult::Found(3));
1906
1907 assert_eq!(
1908 patches.search_index(1024).unwrap(),
1909 SearchResult::NotFound(2)
1910 );
1911 }
1912
1913 #[test]
1914 fn test_chunk_offsets_with_slice() {
1915 let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
1916 let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
1917 let chunk_offsets = buffer![0u64, 2, 6].into_array();
1918 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
1919
1920 let sliced = patches.slice(1000..2200).unwrap().unwrap();
1921
1922 assert!(sliced.chunk_offsets.is_some());
1923 assert_eq!(sliced.offset(), 1000);
1924
1925 assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(0));
1926 assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
1927 assert_eq!(sliced.search_index(1100).unwrap(), SearchResult::Found(4));
1928
1929 assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(1));
1930 }
1931
1932 #[test]
1933 fn test_chunk_offsets_with_slice_after_first_chunk() {
1934 let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
1935 let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
1936 let chunk_offsets = buffer![0u64, 2, 6].into_array();
1937 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
1938
1939 let sliced = patches.slice(1300..2200).unwrap().unwrap();
1940
1941 assert!(sliced.chunk_offsets.is_some());
1942 assert_eq!(sliced.offset(), 1300);
1943
1944 assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
1945 assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(1));
1946 assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
1947 assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(2));
1948 assert_eq!(sliced.search_index(900).unwrap(), SearchResult::NotFound(4));
1949 }
1950
1951 #[test]
1952 fn test_chunk_offsets_slice_empty_result() {
1953 let indices = buffer![100u64, 200, 3000, 3100].into_array();
1954 let values = buffer![10i32, 20, 30, 40].into_array();
1955 let chunk_offsets = buffer![0u64, 2].into_array();
1956 let patches = Patches::new(4000, 0, indices, values, Some(chunk_offsets)).unwrap();
1957
1958 let sliced = patches.slice(1000..2000).unwrap();
1959 assert!(sliced.is_none());
1960 }
1961
1962 #[test]
1963 fn test_chunk_offsets_slice_single_patch() {
1964 let indices = buffer![100u64, 1200, 1300, 2500].into_array();
1965 let values = buffer![10i32, 20, 30, 40].into_array();
1966 let chunk_offsets = buffer![0u64, 1, 3].into_array();
1967 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
1968
1969 let sliced = patches.slice(1100..1250).unwrap().unwrap();
1970
1971 assert_eq!(sliced.num_patches(), 1);
1972 assert_eq!(sliced.offset(), 1100);
1973 assert_eq!(sliced.search_index(100).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(50).unwrap(), SearchResult::NotFound(0));
1975 assert_eq!(sliced.search_index(150).unwrap(), SearchResult::NotFound(1));
1976 }
1977
1978 #[test]
1979 fn test_chunk_offsets_slice_across_chunks() {
1980 let indices = buffer![100u64, 200, 1100, 1200, 2100, 2200].into_array();
1981 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
1982 let chunk_offsets = buffer![0u64, 2, 4].into_array();
1983 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
1984
1985 let sliced = patches.slice(150..2150).unwrap().unwrap();
1986
1987 assert_eq!(sliced.num_patches(), 4);
1988 assert_eq!(sliced.offset(), 150);
1989
1990 assert_eq!(sliced.search_index(50).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(950).unwrap(), SearchResult::Found(1)); assert_eq!(sliced.search_index(1050).unwrap(), SearchResult::Found(2)); assert_eq!(sliced.search_index(1950).unwrap(), SearchResult::Found(3)); }
1995
1996 #[test]
1997 fn test_chunk_offsets_boundary_searches() {
1998 let indices = buffer![1023u64, 1024, 1025, 2047, 2048].into_array();
1999 let values = buffer![10i32, 20, 30, 40, 50].into_array();
2000 let chunk_offsets = buffer![0u64, 1, 4].into_array();
2001 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2002
2003 assert_eq!(patches.search_index(1023).unwrap(), SearchResult::Found(0));
2004 assert_eq!(patches.search_index(1024).unwrap(), SearchResult::Found(1));
2005 assert_eq!(patches.search_index(1025).unwrap(), SearchResult::Found(2));
2006 assert_eq!(patches.search_index(2047).unwrap(), SearchResult::Found(3));
2007 assert_eq!(patches.search_index(2048).unwrap(), SearchResult::Found(4));
2008
2009 assert_eq!(
2010 patches.search_index(1022).unwrap(),
2011 SearchResult::NotFound(0)
2012 );
2013 assert_eq!(
2014 patches.search_index(2046).unwrap(),
2015 SearchResult::NotFound(3)
2016 );
2017 }
2018
2019 #[test]
2020 fn test_chunk_offsets_slice_edge_cases() {
2021 let indices = buffer![0u64, 1, 1023, 1024, 2047, 2048].into_array();
2022 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2023 let chunk_offsets = buffer![0u64, 3, 5].into_array();
2024 let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2025
2026 let sliced = patches.slice(0..10).unwrap().unwrap();
2028 assert_eq!(sliced.num_patches(), 2);
2029 assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2030 assert_eq!(sliced.search_index(1).unwrap(), SearchResult::Found(1));
2031
2032 let sliced = patches.slice(2040..3000).unwrap().unwrap();
2034 assert_eq!(sliced.num_patches(), 2); assert_eq!(sliced.search_index(7).unwrap(), SearchResult::Found(0)); assert_eq!(sliced.search_index(8).unwrap(), SearchResult::Found(1)); }
2038
2039 #[test]
2040 fn test_chunk_offsets_slice_nested() {
2041 let indices = buffer![100u64, 200, 300, 400, 500, 600].into_array();
2042 let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2043 let chunk_offsets = buffer![0u64].into_array();
2044 let patches = Patches::new(1000, 0, indices, values, Some(chunk_offsets)).unwrap();
2045
2046 let sliced1 = patches.slice(150..550).unwrap().unwrap();
2047 assert_eq!(sliced1.num_patches(), 4); let sliced2 = sliced1.slice(100..250).unwrap().unwrap();
2050 assert_eq!(sliced2.num_patches(), 1); assert_eq!(sliced2.offset(), 250);
2052
2053 assert_eq!(sliced2.search_index(50).unwrap(), SearchResult::Found(0)); assert_eq!(
2055 sliced2.search_index(150).unwrap(),
2056 SearchResult::NotFound(1)
2057 );
2058 }
2059
2060 #[test]
2061 fn test_index_larger_than_length() {
2062 let chunk_offsets = buffer![0u64].into_array();
2063 let indices = buffer![1023u64].into_array();
2064 let values = buffer![42i32].into_array();
2065 let patches = Patches::new(1024, 0, indices, values, Some(chunk_offsets)).unwrap();
2066 assert_eq!(
2067 patches.search_index(2048).unwrap(),
2068 SearchResult::NotFound(1)
2069 );
2070 }
2071}