Skip to main content

vortex_array/
patches.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::cmp::Ordering;
5use std::fmt::Debug;
6use std::hash::Hash;
7use std::ops::Range;
8
9use num_traits::NumCast;
10use vortex_buffer::BitBuffer;
11use vortex_buffer::BufferMut;
12use vortex_error::VortexError;
13use vortex_error::VortexExpect as _;
14use vortex_error::VortexResult;
15use vortex_error::vortex_bail;
16use vortex_error::vortex_ensure;
17use vortex_error::vortex_err;
18use vortex_mask::AllOr;
19use vortex_mask::Mask;
20use vortex_utils::aliases::hash_map::HashMap;
21
22use crate::ArrayRef;
23use crate::ArraySlots;
24use crate::ExecutionCtx;
25use crate::IntoArray;
26use crate::LEGACY_SESSION;
27#[expect(deprecated)]
28use crate::ToCanonical as _;
29use crate::VortexSessionExecute;
30use crate::arrays::PrimitiveArray;
31use crate::arrays::primitive::PrimitiveArrayExt;
32use crate::builtins::ArrayBuiltins;
33use crate::dtype::DType;
34use crate::dtype::IntegerPType;
35use crate::dtype::NativePType;
36use crate::dtype::Nullability;
37use crate::dtype::Nullability::NonNullable;
38use crate::dtype::PType;
39use crate::dtype::UnsignedPType;
40use crate::match_each_integer_ptype;
41use crate::match_each_unsigned_integer_ptype;
42use crate::scalar::PValue;
43use crate::scalar::Scalar;
44use crate::search_sorted::SearchResult;
45use crate::search_sorted::SearchSorted;
46use crate::search_sorted::SearchSortedSide;
47use crate::validity::Validity;
48
49/// One patch index offset is stored for each chunk.
50/// This allows for constant time patch index lookups.
51pub const PATCH_CHUNK_SIZE: usize = 1024;
52
53#[derive(Copy, Clone, prost::Message)]
54pub struct PatchesMetadata {
55    #[prost(uint64, tag = "1")]
56    len: u64,
57    #[prost(uint64, tag = "2")]
58    offset: u64,
59    #[prost(enumeration = "PType", tag = "3")]
60    indices_ptype: i32,
61    #[prost(uint64, optional, tag = "4")]
62    chunk_offsets_len: Option<u64>,
63    #[prost(enumeration = "PType", optional, tag = "5")]
64    chunk_offsets_ptype: Option<i32>,
65    #[prost(uint64, optional, tag = "6")]
66    offset_within_chunk: Option<u64>,
67}
68
69impl PatchesMetadata {
70    #[inline]
71    pub fn new(
72        len: usize,
73        offset: usize,
74        indices_ptype: PType,
75        chunk_offsets_len: Option<usize>,
76        chunk_offsets_ptype: Option<PType>,
77        offset_within_chunk: Option<usize>,
78    ) -> Self {
79        Self {
80            len: len as u64,
81            offset: offset as u64,
82            indices_ptype: indices_ptype as i32,
83            chunk_offsets_len: chunk_offsets_len.map(|len| len as u64),
84            chunk_offsets_ptype: chunk_offsets_ptype.map(|pt| pt as i32),
85            offset_within_chunk: offset_within_chunk.map(|len| len as u64),
86        }
87    }
88
89    #[inline]
90    pub fn len(&self) -> VortexResult<usize> {
91        usize::try_from(self.len).map_err(|_| vortex_err!("len does not fit in usize"))
92    }
93
94    #[inline]
95    pub fn is_empty(&self) -> bool {
96        self.len == 0
97    }
98
99    #[inline]
100    pub fn offset(&self) -> VortexResult<usize> {
101        usize::try_from(self.offset).map_err(|_| vortex_err!("offset does not fit in usize"))
102    }
103
104    #[inline]
105    pub fn chunk_offsets_dtype(&self) -> VortexResult<Option<DType>> {
106        self.chunk_offsets_ptype
107            .map(|t| {
108                PType::try_from(t)
109                    .map_err(|e| vortex_err!("invalid i32 value {t} for PType: {}", e))
110                    .map(|ptype| DType::Primitive(ptype, NonNullable))
111            })
112            .transpose()
113    }
114
115    #[inline]
116    pub fn indices_dtype(&self) -> VortexResult<DType> {
117        let ptype = PType::try_from(self.indices_ptype).map_err(|e| {
118            vortex_err!("invalid i32 value {} for PType: {}", self.indices_ptype, e)
119        })?;
120        vortex_ensure!(
121            ptype.is_unsigned_int(),
122            "Patch indices must be unsigned integers"
123        );
124        Ok(DType::Primitive(ptype, NonNullable))
125    }
126}
127
128/// Metadata stored in an array's data struct for reconstructing [`Patches`] from slots.
129///
130/// The actual patch arrays (indices, values, chunk_offsets) live in the array's
131/// slots. This struct stores only the scalar metadata needed to reassemble them.
132#[derive(Clone, Debug, Hash, PartialEq, Eq)]
133pub struct PatchesData {
134    offset: usize,
135    offset_within_chunk: Option<usize>,
136}
137
138/// Slot indices for the three patch components within a slot array.
139#[derive(Copy, Clone, Debug)]
140pub struct PatchSlotIndices {
141    pub indices: usize,
142    pub values: usize,
143    pub chunk_offsets: usize,
144}
145
146impl PatchesData {
147    /// Extract patch metadata from an existing [`Patches`].
148    pub fn from_patches(patches: &Patches) -> Self {
149        Self {
150            offset: patches.offset(),
151            offset_within_chunk: patches.offset_within_chunk(),
152        }
153    }
154
155    /// Reconstruct patches from the given slot positions.
156    ///
157    /// Returns `None` if `patches_data` is `None`.
158    /// Panics if `patches_data` is `Some` but the indices or values slots are missing.
159    pub fn patches_from_slots(
160        patches_data: Option<&Self>,
161        len: usize,
162        slots: &[Option<ArrayRef>],
163        slot_idx: PatchSlotIndices,
164    ) -> Option<Patches> {
165        let data = patches_data?;
166        let indices = slots[slot_idx.indices]
167            .as_ref()
168            .vortex_expect("patches_data is set but patch_indices slot is missing");
169        let values = slots[slot_idx.values]
170            .as_ref()
171            .vortex_expect("patches_data is set but patch_values slot is missing");
172        Some(unsafe {
173            Patches::new_unchecked(
174                len,
175                data.offset,
176                indices.clone(),
177                values.clone(),
178                slots[slot_idx.chunk_offsets].clone(),
179                data.offset_within_chunk,
180            )
181        })
182    }
183
184    /// Push 3 patch slots (indices, values, chunk_offsets) onto a slot vector.
185    ///
186    /// If `patches` is `None`, pushes three `None` entries.
187    pub fn push_slots(slots: &mut ArraySlots, patches: Option<&Patches>) {
188        match patches {
189            Some(p) => {
190                slots.push(Some(p.indices().clone()));
191                slots.push(Some(p.values().clone()));
192                slots.push(p.chunk_offsets().clone());
193            }
194            None => {
195                slots.push(None);
196                slots.push(None);
197                slots.push(None);
198            }
199        }
200    }
201
202    /// Returns the patch offset.
203    #[inline]
204    pub fn offset(&self) -> usize {
205        self.offset
206    }
207
208    /// Returns the offset within the first chunk, if chunk offsets are present.
209    #[inline]
210    pub fn offset_within_chunk(&self) -> Option<usize> {
211        self.offset_within_chunk
212    }
213}
214
215/// A helper for working with patched arrays.
216#[derive(Debug, Clone)]
217pub struct Patches {
218    array_len: usize,
219    offset: usize,
220    indices: ArrayRef,
221    values: ArrayRef,
222    /// Stores the patch index offset for each chunk.
223    ///
224    /// This allows us to lookup the patches for a given chunk in constant time via
225    /// `patch_indices[chunk_offsets[i]..chunk_offsets[i+1]]`.
226    ///
227    /// This is optional for compatibility reasons.
228    chunk_offsets: Option<ArrayRef>,
229    /// Chunk offsets are only sliced off in case the slice is fully
230    /// outside of the chunk range.
231    ///
232    /// Though the range for indices and values is sliced in terms of
233    /// individual elements, not chunks. To account for that we do a
234    /// saturating sub when adjusting the indices based on the chunk offset.
235    ///
236    /// `offset_within_chunk` is necessary in order to keep track of how many
237    /// elements were sliced off within the chunk.
238    offset_within_chunk: Option<usize>,
239}
240
241impl Patches {
242    pub fn new(
243        array_len: usize,
244        offset: usize,
245        indices: ArrayRef,
246        values: ArrayRef,
247        chunk_offsets: Option<ArrayRef>,
248    ) -> VortexResult<Self> {
249        vortex_ensure!(
250            indices.len() == values.len(),
251            "Patch indices and values must have the same length"
252        );
253        vortex_ensure!(
254            indices.dtype().is_unsigned_int() && !indices.dtype().is_nullable(),
255            "Patch indices must be non-nullable unsigned integers, got {:?}",
256            indices.dtype()
257        );
258
259        vortex_ensure!(
260            indices.len() <= array_len,
261            "Patch indices must be shorter than the array length"
262        );
263        vortex_ensure!(!indices.is_empty(), "Patch indices must not be empty");
264
265        // Perform validation of components when they are host-resident.
266        // This is not possible to do eagerly when the data is on GPU memory.
267        if indices.is_host() && values.is_host() {
268            let max = usize::try_from(&indices.execute_scalar(
269                indices.len() - 1,
270                &mut LEGACY_SESSION.create_execution_ctx(),
271            )?)
272            .map_err(|_| vortex_err!("indices must be a number"))?;
273            vortex_ensure!(
274                max - offset < array_len,
275                "Patch indices {max:?}, offset {offset} are longer than the array length {array_len}"
276            );
277
278            #[cfg(debug_assertions)]
279            {
280                use crate::VortexSessionExecute;
281                use crate::aggregate_fn::fns::is_sorted::is_sorted;
282                let mut ctx = LEGACY_SESSION.create_execution_ctx();
283                assert!(
284                    is_sorted(&indices, &mut ctx).unwrap_or(false),
285                    "Patch indices must be sorted"
286                );
287            }
288        }
289
290        Ok(Self {
291            array_len,
292            offset,
293            indices,
294            values,
295            chunk_offsets: chunk_offsets.clone(),
296            // Initialize with `Some(0)` only if `chunk_offsets` are set.
297            offset_within_chunk: chunk_offsets.map(|_| 0),
298        })
299    }
300
301    /// Construct new patches without validating any of the arguments
302    ///
303    /// # Safety
304    ///
305    /// Users have to assert that
306    /// * Indices and values have the same length
307    /// * Indices is an unsigned integer type
308    /// * Indices must be sorted
309    /// * Last value in indices is smaller than array_len
310    pub unsafe fn new_unchecked(
311        array_len: usize,
312        offset: usize,
313        indices: ArrayRef,
314        values: ArrayRef,
315        chunk_offsets: Option<ArrayRef>,
316        offset_within_chunk: Option<usize>,
317    ) -> Self {
318        Self {
319            array_len,
320            offset,
321            indices,
322            values,
323            chunk_offsets,
324            offset_within_chunk,
325        }
326    }
327
328    #[inline]
329    pub fn array_len(&self) -> usize {
330        self.array_len
331    }
332
333    #[inline]
334    pub fn num_patches(&self) -> usize {
335        self.indices.len()
336    }
337
338    #[inline]
339    pub fn dtype(&self) -> &DType {
340        self.values.dtype()
341    }
342
343    #[inline]
344    pub fn indices(&self) -> &ArrayRef {
345        &self.indices
346    }
347
348    #[inline]
349    pub fn into_indices(self) -> ArrayRef {
350        self.indices
351    }
352
353    #[inline]
354    pub fn indices_mut(&mut self) -> &mut ArrayRef {
355        &mut self.indices
356    }
357
358    #[inline]
359    pub fn values(&self) -> &ArrayRef {
360        &self.values
361    }
362
363    #[inline]
364    pub fn into_values(self) -> ArrayRef {
365        self.values
366    }
367
368    #[inline]
369    pub fn values_mut(&mut self) -> &mut ArrayRef {
370        &mut self.values
371    }
372
373    #[inline]
374    // Absolute offset: 0 if the array is unsliced.
375    pub fn offset(&self) -> usize {
376        self.offset
377    }
378
379    #[inline]
380    pub fn chunk_offsets(&self) -> &Option<ArrayRef> {
381        &self.chunk_offsets
382    }
383
384    #[inline]
385    pub fn chunk_offset_at(&self, idx: usize) -> VortexResult<usize> {
386        let Some(chunk_offsets) = &self.chunk_offsets else {
387            vortex_bail!("chunk_offsets must be set to retrieve offset at index")
388        };
389
390        chunk_offsets
391            .execute_scalar(idx, &mut LEGACY_SESSION.create_execution_ctx())?
392            .as_primitive()
393            .as_::<usize>()
394            .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))
395    }
396
397    /// Returns the number of patches sliced off from the current first chunk.
398    ///
399    /// When patches are sliced, the chunk offsets array is also sliced to only include
400    /// chunks that overlap with the slice range. However, the slice boundary may fall
401    /// in the middle of a chunk's patch range. This offset indicates how many patches
402    /// at the start of the first chunk should be skipped.
403    ///
404    /// Returns `None` if chunk offsets are not set.
405    #[inline]
406    pub fn offset_within_chunk(&self) -> Option<usize> {
407        self.offset_within_chunk
408    }
409
410    #[inline]
411    pub fn indices_ptype(&self) -> VortexResult<PType> {
412        PType::try_from(self.indices.dtype())
413            .map_err(|_| vortex_err!("indices dtype is not primitive"))
414    }
415
416    pub fn to_metadata(&self, len: usize, dtype: &DType) -> VortexResult<PatchesMetadata> {
417        if self.indices.len() > len {
418            vortex_bail!(
419                "Patch indices {} are longer than the array length {}",
420                self.indices.len(),
421                len
422            );
423        }
424        if self.values.dtype() != dtype {
425            vortex_bail!(
426                "Patch values dtype {} does not match array dtype {}",
427                self.values.dtype(),
428                dtype
429            );
430        }
431        let chunk_offsets_len = self.chunk_offsets.as_ref().map(|co| co.len());
432        let chunk_offsets_ptype = self.chunk_offsets.as_ref().map(|co| co.dtype().as_ptype());
433
434        Ok(PatchesMetadata::new(
435            self.indices.len(),
436            self.offset,
437            self.indices.dtype().as_ptype(),
438            chunk_offsets_len,
439            chunk_offsets_ptype,
440            self.offset_within_chunk,
441        ))
442    }
443
444    pub fn cast_values(self, values_dtype: &DType) -> VortexResult<Self> {
445        // SAFETY: casting does not affect the relationship between the indices and values
446        unsafe {
447            Ok(Self::new_unchecked(
448                self.array_len,
449                self.offset,
450                self.indices,
451                self.values.cast(values_dtype.clone())?,
452                self.chunk_offsets,
453                self.offset_within_chunk,
454            ))
455        }
456    }
457
458    /// Get the patched value at a given index if it exists.
459    pub fn get_patched(&self, index: usize) -> VortexResult<Option<Scalar>> {
460        self.search_index(index)?
461            .to_found()
462            .map(|patch_idx| {
463                self.values()
464                    .execute_scalar(patch_idx, &mut LEGACY_SESSION.create_execution_ctx())
465            })
466            .transpose()
467    }
468
469    /// Searches for `index` in the indices array.
470    ///
471    /// Chooses between chunked search when [`Self::chunk_offsets`] is
472    /// available, and binary search otherwise. The `index` parameter is
473    /// adjusted by [`Self::offset`] for both.
474    ///
475    /// # Arguments
476    /// * `index` - The index to search for
477    ///
478    /// # Returns
479    /// * [`SearchResult::Found(patch_idx)`] - If a patch exists at this index, returns the
480    ///   position in the patches array
481    /// * [`SearchResult::NotFound(insertion_point)`] - If no patch exists, returns where
482    ///   a patch at this index would be inserted to maintain sorted order
483    ///
484    /// [`SearchResult::Found(patch_idx)`]: SearchResult::Found
485    /// [`SearchResult::NotFound(insertion_point)`]: SearchResult::NotFound
486    pub fn search_index(&self, index: usize) -> VortexResult<SearchResult> {
487        if self.chunk_offsets.is_some() {
488            return self.search_index_chunked(index);
489        }
490
491        Self::search_index_binary_search(&self.indices, index + self.offset)
492    }
493
494    /// Binary searches for `needle` in the indices array.
495    ///
496    /// # Returns
497    /// [`SearchResult::Found`] with the position if needle exists, or [`SearchResult::NotFound`]
498    /// with the insertion point if not found.
499    fn search_index_binary_search(indices: &ArrayRef, needle: usize) -> VortexResult<SearchResult> {
500        if indices.is_canonical() {
501            #[expect(deprecated)]
502            let primitive = indices.to_primitive();
503            match_each_integer_ptype!(primitive.ptype(), |T| {
504                let Ok(needle) = T::try_from(needle) else {
505                    // If the needle is not of type T, then it cannot possibly be in this array.
506                    //
507                    // The needle is a non-negative integer (a usize); therefore, it must be larger
508                    // than all values in this array.
509                    return Ok(SearchResult::NotFound(primitive.len()));
510                };
511                return primitive
512                    .as_slice::<T>()
513                    .search_sorted(&needle, SearchSortedSide::Left);
514            });
515        }
516        indices
517            .as_primitive_typed()
518            .search_sorted(&PValue::U64(needle as u64), SearchSortedSide::Left)
519    }
520
521    /// Constant time searches for `index` in the indices array.
522    ///
523    /// First determines which chunk the target index falls into, then performs
524    /// a binary search within that chunk's range.
525    ///
526    /// Returns a [`SearchResult`] indicating either the exact patch index if found,
527    /// or the insertion point if not found.
528    ///
529    /// Returns an error if `chunk_offsets` or `offset_within_chunk` are not set.
530    fn search_index_chunked(&self, index: usize) -> VortexResult<SearchResult> {
531        let Some(chunk_offsets) = &self.chunk_offsets else {
532            vortex_bail!("chunk_offsets is required to be set")
533        };
534
535        let Some(offset_within_chunk) = self.offset_within_chunk else {
536            vortex_bail!("offset_within_chunk is required to be set")
537        };
538
539        if index >= self.array_len() {
540            return Ok(SearchResult::NotFound(self.indices().len()));
541        }
542
543        let chunk_idx = (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE;
544
545        // Patch index offsets are absolute and need to be offset by the first chunk of the current slice.
546        let base_offset = self.chunk_offset_at(0)?;
547
548        let patches_start_idx = (self.chunk_offset_at(chunk_idx)? - base_offset)
549            // Chunk offsets are only sliced off in case the slice is fully
550            // outside of the chunk range.
551            //
552            // Though the range for indices and values is sliced in terms of
553            // individual elements, not chunks. To account for that we do a
554            // saturating sub when adjusting the indices based on the chunk offset.
555            .saturating_sub(offset_within_chunk);
556
557        let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
558            (self.chunk_offset_at(chunk_idx + 1)? - base_offset)
559                .saturating_sub(offset_within_chunk)
560                .min(self.indices.len())
561        } else {
562            self.indices.len()
563        };
564
565        let chunk_indices = self.indices.slice(patches_start_idx..patches_end_idx)?;
566        let result = Self::search_index_binary_search(&chunk_indices, index + self.offset)?;
567
568        Ok(match result {
569            SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
570            SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
571        })
572    }
573
574    /// Batch version of `search_index`.
575    ///
576    /// In contrast to `search_index`, this function requires `indices` as
577    /// well as `chunk_offsets` to be passed as slices. This is to avoid
578    /// redundant canonicalization and `scalar_at` lookups across calls.
579    fn search_index_chunked_batch<T, O>(
580        &self,
581        indices: &[T],
582        chunk_offsets: &[O],
583        index: T,
584    ) -> VortexResult<SearchResult>
585    where
586        T: UnsignedPType,
587        O: UnsignedPType,
588        usize: TryFrom<T>,
589        usize: TryFrom<O>,
590    {
591        let Some(offset_within_chunk) = self.offset_within_chunk else {
592            vortex_bail!("offset_within_chunk is required to be set")
593        };
594
595        let chunk_idx = {
596            let Ok(index) = usize::try_from(index) else {
597                // If the needle cannot be converted to usize, it's larger than all values in this array.
598                return Ok(SearchResult::NotFound(indices.len()));
599            };
600
601            if index >= self.array_len() {
602                return Ok(SearchResult::NotFound(self.indices().len()));
603            }
604
605            (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE
606        };
607
608        // Patch index offsets are absolute and need to be offset by the first chunk of the current slice.
609        let chunk_offset = usize::try_from(chunk_offsets[chunk_idx] - chunk_offsets[0])
610            .map_err(|_| vortex_err!("chunk_offset failed to convert to usize"))?;
611
612        let patches_start_idx = chunk_offset
613            // Chunk offsets are only sliced off in case the slice is fully
614            // outside of the chunk range.
615            //
616            // Though the range for indices and values is sliced in terms of
617            // individual elements, not chunks. To account for that we do a
618            // saturating sub when adjusting the indices based on the chunk offset.
619            .saturating_sub(offset_within_chunk);
620
621        let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
622            usize::try_from(chunk_offsets[chunk_idx + 1] - chunk_offsets[0])
623                .map_err(|_| vortex_err!("patches_end_idx failed to convert to usize"))?
624                .saturating_sub(offset_within_chunk)
625                .min(indices.len())
626        } else {
627            self.indices.len()
628        };
629
630        let Some(offset) = T::from(self.offset) else {
631            // If the offset cannot be converted to T, it's larger than all values in this array.
632            return Ok(SearchResult::NotFound(indices.len()));
633        };
634
635        let chunk_indices = &indices[patches_start_idx..patches_end_idx];
636        let result = chunk_indices.search_sorted(&(index + offset), SearchSortedSide::Left)?;
637
638        Ok(match result {
639            SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
640            SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
641        })
642    }
643
644    /// Returns the minimum patch index
645    pub fn min_index(&self) -> VortexResult<usize> {
646        let first = self
647            .indices
648            .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())?
649            .as_primitive()
650            .as_::<usize>()
651            .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
652        Ok(first - self.offset)
653    }
654
655    /// Returns the maximum patch index
656    pub fn max_index(&self) -> VortexResult<usize> {
657        let last = self
658            .indices
659            .execute_scalar(
660                self.indices.len() - 1,
661                &mut LEGACY_SESSION.create_execution_ctx(),
662            )?
663            .as_primitive()
664            .as_::<usize>()
665            .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
666        Ok(last - self.offset)
667    }
668
669    /// Filter the patches by a mask, resulting in new patches for the filtered array.
670    pub fn filter(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
671        if mask.len() != self.array_len {
672            vortex_bail!(
673                "Filter mask length {} does not match array length {}",
674                mask.len(),
675                self.array_len
676            );
677        }
678
679        match mask.indices() {
680            AllOr::All => Ok(Some(self.clone())),
681            AllOr::None => Ok(None),
682            AllOr::Some(mask_indices) => {
683                let flat_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
684                match_each_unsigned_integer_ptype!(flat_indices.ptype(), |I| {
685                    filter_patches_with_mask(
686                        flat_indices.as_slice::<I>(),
687                        self.offset(),
688                        self.values(),
689                        mask_indices,
690                    )
691                })
692            }
693        }
694    }
695
696    /// Mask the patches, REMOVING the patches where the mask is true.
697    /// Unlike filter, this preserves the patch indices.
698    /// Unlike mask on a single array, this does not set masked values to null.
699    // TODO(joe): make this lazy and remove the ctx.
700    pub fn mask(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
701        if mask.len() != self.array_len {
702            vortex_bail!(
703                "Filter mask length {} does not match array length {}",
704                mask.len(),
705                self.array_len
706            );
707        }
708
709        let filter_mask = match mask.bit_buffer() {
710            AllOr::All => return Ok(None),
711            AllOr::None => return Ok(Some(self.clone())),
712            AllOr::Some(masked) => {
713                let patch_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
714                match_each_unsigned_integer_ptype!(patch_indices.ptype(), |P| {
715                    let patch_indices = patch_indices.as_slice::<P>();
716                    Mask::from_buffer(BitBuffer::collect_bool(patch_indices.len(), |i| {
717                        #[allow(clippy::cast_possible_truncation)]
718                        let idx = (patch_indices[i] as usize) - self.offset;
719                        !masked.value(idx)
720                    }))
721                })
722            }
723        };
724
725        if filter_mask.all_false() {
726            return Ok(None);
727        }
728
729        // SAFETY: filtering indices/values with same mask maintains their 1:1 relationship
730        let filtered_indices = self.indices.filter(filter_mask.clone())?;
731        let filtered_values = self.values.filter(filter_mask)?;
732
733        Ok(Some(Self {
734            array_len: self.array_len,
735            offset: self.offset,
736            indices: filtered_indices,
737            values: filtered_values,
738            // TODO(0ax1): Chunk offsets are invalid after a filter is applied.
739            chunk_offsets: None,
740            offset_within_chunk: self.offset_within_chunk,
741        }))
742    }
743
744    /// Slice the patches by a range of the patched array.
745    pub fn slice(&self, range: Range<usize>) -> VortexResult<Option<Self>> {
746        let slice_start_idx = self.search_index(range.start)?.to_index();
747        let slice_end_idx = self.search_index(range.end)?.to_index();
748
749        if slice_start_idx == slice_end_idx {
750            return Ok(None);
751        }
752
753        let values = self.values().slice(slice_start_idx..slice_end_idx)?;
754        let indices = self.indices().slice(slice_start_idx..slice_end_idx)?;
755
756        let new_chunk_offsets = self
757            .chunk_offsets
758            .as_ref()
759            .map(|chunk_offsets| -> VortexResult<ArrayRef> {
760                let chunk_relative_offset = self.offset % PATCH_CHUNK_SIZE;
761                let chunk_start_idx = (chunk_relative_offset + range.start) / PATCH_CHUNK_SIZE;
762                let chunk_end_idx = (chunk_relative_offset + range.end).div_ceil(PATCH_CHUNK_SIZE);
763                chunk_offsets.slice(chunk_start_idx..chunk_end_idx)
764            })
765            .transpose()?;
766
767        let offset_within_chunk = new_chunk_offsets
768            .as_ref()
769            .map(|new_chunk_offsets| -> VortexResult<usize> {
770                let new_chunk_base = new_chunk_offsets
771                    .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())?
772                    .as_primitive()
773                    .as_::<usize>()
774                    .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))?;
775                let parent_chunk_base = self.chunk_offset_at(0)?;
776                let parent_within = self.offset_within_chunk.unwrap_or(0);
777                Ok(parent_chunk_base + parent_within + slice_start_idx - new_chunk_base)
778            })
779            .transpose()?;
780
781        Ok(Some(Self {
782            array_len: range.len(),
783            offset: range.start + self.offset(),
784            indices,
785            values,
786            chunk_offsets: new_chunk_offsets,
787            offset_within_chunk,
788        }))
789    }
790
791    // https://docs.google.com/spreadsheets/d/1D9vBZ1QJ6mwcIvV5wIL0hjGgVchcEnAyhvitqWu2ugU
792    const PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN: f64 = 5.0;
793
794    fn is_map_faster_than_search(&self, take_indices: &PrimitiveArray) -> bool {
795        (self.num_patches() as f64 / take_indices.len() as f64)
796            < Self::PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN
797    }
798
799    /// Take the indices from the patches
800    ///
801    /// Any nulls in take_indices are added to the resulting patches.
802    pub fn take_with_nulls(
803        &self,
804        take_indices: &ArrayRef,
805        ctx: &mut ExecutionCtx,
806    ) -> VortexResult<Option<Self>> {
807        if take_indices.is_empty() {
808            return Ok(None);
809        }
810
811        let take_indices = take_indices.clone().execute::<PrimitiveArray>(ctx)?;
812        if self.is_map_faster_than_search(&take_indices) {
813            self.take_map(take_indices, true, ctx)
814        } else {
815            self.take_search(take_indices, true, ctx)
816        }
817    }
818
819    /// Take the indices from the patches.
820    ///
821    /// Any nulls in take_indices are ignored.
822    pub fn take(
823        &self,
824        take_indices: &ArrayRef,
825        ctx: &mut ExecutionCtx,
826    ) -> VortexResult<Option<Self>> {
827        if take_indices.is_empty() {
828            return Ok(None);
829        }
830
831        let take_indices = take_indices.clone().execute::<PrimitiveArray>(ctx)?;
832        if self.is_map_faster_than_search(&take_indices) {
833            self.take_map(take_indices, false, ctx)
834        } else {
835            self.take_search(take_indices, false, ctx)
836        }
837    }
838
839    #[expect(
840        clippy::cognitive_complexity,
841        reason = "complexity is from nested match_each_* macros"
842    )]
843    pub fn take_search(
844        &self,
845        take_indices: PrimitiveArray,
846        include_nulls: bool,
847        ctx: &mut ExecutionCtx,
848    ) -> VortexResult<Option<Self>> {
849        let take_indices_validity = take_indices.validity()?;
850        // Take indices are non-negative; reinterpret to unsigned so the `TakeT` dimension
851        // dispatches over 4 widths instead of 8.
852        let take_indices_unsigned =
853            take_indices.reinterpret_cast(take_indices.ptype().to_unsigned());
854        let patch_indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
855        let chunk_offsets = self
856            .chunk_offsets()
857            .as_ref()
858            .map(|co| co.clone().execute::<PrimitiveArray>(ctx))
859            .transpose()?;
860
861        let (values_indices, new_indices): (BufferMut<u64>, BufferMut<u64>) =
862            match_each_unsigned_integer_ptype!(patch_indices.ptype(), |PatchT| {
863                let patch_indices_slice = patch_indices.as_slice::<PatchT>();
864                match_each_unsigned_integer_ptype!(take_indices_unsigned.ptype(), |TakeT| {
865                    let take_slice = take_indices_unsigned.as_slice::<TakeT>();
866
867                    if let Some(chunk_offsets) = chunk_offsets {
868                        match_each_unsigned_integer_ptype!(chunk_offsets.ptype(), |OffsetT| {
869                            let chunk_offsets = chunk_offsets.as_slice::<OffsetT>();
870                            take_indices_with_search_fn(
871                                patch_indices_slice,
872                                take_slice,
873                                take_indices
874                                    .as_ref()
875                                    .validity()?
876                                    .execute_mask(take_indices.as_ref().len(), ctx)?,
877                                include_nulls,
878                                |take_idx| {
879                                    self.search_index_chunked_batch(
880                                        patch_indices_slice,
881                                        chunk_offsets,
882                                        take_idx,
883                                    )
884                                },
885                            )?
886                        })
887                    } else {
888                        take_indices_with_search_fn(
889                            patch_indices_slice,
890                            take_slice,
891                            take_indices
892                                .as_ref()
893                                .validity()?
894                                .execute_mask(take_indices.as_ref().len(), ctx)?,
895                            include_nulls,
896                            |take_idx| {
897                                let Some(offset) = <PatchT as NumCast>::from(self.offset) else {
898                                    // If the offset cannot be converted to T, it's larger than all values in this array.
899                                    return Ok(SearchResult::NotFound(patch_indices_slice.len()));
900                                };
901
902                                patch_indices_slice
903                                    .search_sorted(&(take_idx + offset), SearchSortedSide::Left)
904                            },
905                        )?
906                    }
907                })
908            });
909
910        if new_indices.is_empty() {
911            return Ok(None);
912        }
913
914        let new_indices = new_indices.into_array();
915        let new_array_len = take_indices.len();
916        let values_validity = take_indices_validity.take(&new_indices)?;
917
918        Ok(Some(Self {
919            array_len: new_array_len,
920            offset: 0,
921            indices: new_indices,
922            values: self
923                .values()
924                .take(PrimitiveArray::new(values_indices, values_validity).into_array())?,
925            chunk_offsets: None,
926            offset_within_chunk: Some(0), // Reset when creating new Patches.
927        }))
928    }
929
930    pub fn take_map(
931        &self,
932        take_indices: PrimitiveArray,
933        include_nulls: bool,
934        ctx: &mut ExecutionCtx,
935    ) -> VortexResult<Option<Self>> {
936        let indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
937        let new_length = take_indices.len();
938        // Take indices are non-negative; reinterpret to unsigned (4 widths instead of 8).
939        let take_indices_unsigned =
940            take_indices.reinterpret_cast(take_indices.ptype().to_unsigned());
941
942        let min_index = self.min_index()?;
943        let max_index = self.max_index()?;
944
945        let Some((new_sparse_indices, value_indices)) =
946            match_each_unsigned_integer_ptype!(indices.ptype(), |Indices| {
947                match_each_unsigned_integer_ptype!(take_indices_unsigned.ptype(), |TakeIndices| {
948                    let take_validity = take_indices
949                        .validity()?
950                        .execute_mask(take_indices.len(), ctx)?;
951                    let take_nullability = take_indices.validity()?.nullability();
952                    let take_slice = take_indices_unsigned.as_slice::<TakeIndices>();
953                    take_map::<_, TakeIndices>(
954                        indices.as_slice::<Indices>(),
955                        take_slice,
956                        take_validity,
957                        take_nullability,
958                        self.offset(),
959                        min_index,
960                        max_index,
961                        include_nulls,
962                    )?
963                })
964            })
965        else {
966            return Ok(None);
967        };
968
969        let taken_values = self.values().take(value_indices)?;
970
971        Ok(Some(Patches {
972            array_len: new_length,
973            offset: 0,
974            indices: new_sparse_indices,
975            values: taken_values,
976            // TODO(0ax1): Chunk offsets are invalid after take is applied.
977            chunk_offsets: None,
978            offset_within_chunk: self.offset_within_chunk,
979        }))
980    }
981
982    pub fn map_values<F>(self, f: F) -> VortexResult<Self>
983    where
984        F: FnOnce(ArrayRef) -> VortexResult<ArrayRef>,
985    {
986        let values = f(self.values)?;
987        if self.indices.len() != values.len() {
988            vortex_bail!(
989                "map_values must preserve length: expected {} received {}",
990                self.indices.len(),
991                values.len()
992            )
993        }
994
995        Ok(Self {
996            array_len: self.array_len,
997            offset: self.offset,
998            indices: self.indices,
999            values,
1000            chunk_offsets: self.chunk_offsets,
1001            offset_within_chunk: self.offset_within_chunk,
1002        })
1003    }
1004}
1005
1006#[expect(clippy::too_many_arguments)] // private function, can clean up one day
1007fn take_map<I: NativePType + Hash + Eq + TryFrom<usize>, T: NativePType>(
1008    indices: &[I],
1009    take_indices: &[T],
1010    take_validity: Mask,
1011    take_nullability: Nullability,
1012    indices_offset: usize,
1013    min_index: usize,
1014    max_index: usize,
1015    include_nulls: bool,
1016) -> VortexResult<Option<(ArrayRef, ArrayRef)>>
1017where
1018    usize: TryFrom<T>,
1019    VortexError: From<<I as TryFrom<usize>>::Error>,
1020{
1021    let offset_i = I::try_from(indices_offset)?;
1022
1023    let sparse_index_to_value_index: HashMap<I, usize> = indices
1024        .iter()
1025        .copied()
1026        .map(|idx| idx - offset_i)
1027        .enumerate()
1028        .map(|(value_index, sparse_index)| (sparse_index, value_index))
1029        .collect();
1030
1031    let mut new_sparse_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1032    let mut value_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1033
1034    for (idx_in_take, &take_idx) in take_indices.iter().enumerate() {
1035        let ti = usize::try_from(take_idx)
1036            .map_err(|_| vortex_err!("Failed to convert index to usize"))?;
1037
1038        // If we have to take nulls the take index doesn't matter, make it 0 for consistency
1039        let is_null = match take_validity.bit_buffer() {
1040            AllOr::All => false,
1041            AllOr::None => true,
1042            AllOr::Some(buf) => !buf.value(idx_in_take),
1043        };
1044        if is_null {
1045            if include_nulls {
1046                new_sparse_indices.push(idx_in_take as u64);
1047                value_indices.push(0);
1048            }
1049        } else if ti >= min_index && ti <= max_index {
1050            let ti_as_i = I::try_from(ti)
1051                .map_err(|_| vortex_err!("take index does not fit in index type"))?;
1052            if let Some(&value_index) = sparse_index_to_value_index.get(&ti_as_i) {
1053                new_sparse_indices.push(idx_in_take as u64);
1054                value_indices.push(value_index as u64);
1055            }
1056        }
1057    }
1058
1059    if new_sparse_indices.is_empty() {
1060        return Ok(None);
1061    }
1062
1063    let new_sparse_indices = new_sparse_indices.into_array();
1064    let values_validity =
1065        Validity::from_mask(take_validity, take_nullability).take(&new_sparse_indices)?;
1066    Ok(Some((
1067        new_sparse_indices,
1068        PrimitiveArray::new(value_indices, values_validity).into_array(),
1069    )))
1070}
1071
1072/// Filter patches with the provided mask (in flattened space).
1073///
1074/// The filter mask may contain indices that are non-patched. The return value of this function
1075/// is a new set of `Patches` with the indices relative to the provided `mask` rank, and the
1076/// patch values.
1077fn filter_patches_with_mask<T: IntegerPType>(
1078    patch_indices: &[T],
1079    offset: usize,
1080    patch_values: &ArrayRef,
1081    mask_indices: &[usize],
1082) -> VortexResult<Option<Patches>> {
1083    let true_count = mask_indices.len();
1084    let mut new_patch_indices = BufferMut::<u64>::with_capacity(true_count);
1085    let mut new_mask_indices = Vec::with_capacity(true_count);
1086
1087    // Attempt to move the window by `STRIDE` elements on each iteration. This assumes that
1088    // the patches are relatively sparse compared to the overall mask, and so many indices in the
1089    // mask will end up being skipped.
1090    const STRIDE: usize = 4;
1091
1092    let mut mask_idx = 0usize;
1093    let mut true_idx = 0usize;
1094
1095    while mask_idx < patch_indices.len() && true_idx < true_count {
1096        // NOTE: we are searching for overlaps between sorted, unaligned indices in `patch_indices`
1097        //  and `mask_indices`. We assume that Patches are sparse relative to the global space of
1098        //  the mask (which covers both patch and non-patch values of the parent array), and so to
1099        //  quickly jump through regions with no overlap, we attempt to move our pointers by STRIDE
1100        //  elements on each iteration. If we cannot rule out overlap due to min/max values, we
1101        //  fallback to performing a two-way iterator merge.
1102        if (mask_idx + STRIDE) < patch_indices.len() && (true_idx + STRIDE) < mask_indices.len() {
1103            // Load a vector of each into our registers.
1104            let left_min = patch_indices[mask_idx]
1105                .to_usize()
1106                .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1107                - offset;
1108            let left_max = patch_indices[mask_idx + STRIDE]
1109                .to_usize()
1110                .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1111                - offset;
1112            let right_min = mask_indices[true_idx];
1113            let right_max = mask_indices[true_idx + STRIDE];
1114
1115            if left_min > right_max {
1116                // Advance right side
1117                true_idx += STRIDE;
1118                continue;
1119            } else if right_min > left_max {
1120                mask_idx += STRIDE;
1121                continue;
1122            } else {
1123                // Fallthrough to direct comparison path.
1124            }
1125        }
1126
1127        // Two-way sorted iterator merge:
1128
1129        let left = patch_indices[mask_idx]
1130            .to_usize()
1131            .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1132            - offset;
1133        let right = mask_indices[true_idx];
1134
1135        match left.cmp(&right) {
1136            Ordering::Less => {
1137                mask_idx += 1;
1138            }
1139            Ordering::Greater => {
1140                true_idx += 1;
1141            }
1142            Ordering::Equal => {
1143                // Save the mask index as well as the positional index.
1144                new_mask_indices.push(mask_idx);
1145                new_patch_indices.push(true_idx as u64);
1146
1147                mask_idx += 1;
1148                true_idx += 1;
1149            }
1150        }
1151    }
1152
1153    if new_mask_indices.is_empty() {
1154        return Ok(None);
1155    }
1156
1157    let new_patch_indices = new_patch_indices.into_array();
1158    let new_patch_values =
1159        patch_values.filter(Mask::from_indices(patch_values.len(), new_mask_indices))?;
1160
1161    Ok(Some(Patches::new(
1162        true_count,
1163        0,
1164        new_patch_indices,
1165        new_patch_values,
1166        // TODO(0ax1): Chunk offsets are invalid after a filter is applied.
1167        None,
1168    )?))
1169}
1170
1171fn take_indices_with_search_fn<
1172    I: UnsignedPType,
1173    T: IntegerPType,
1174    F: Fn(I) -> VortexResult<SearchResult>,
1175>(
1176    indices: &[I],
1177    take_indices: &[T],
1178    take_validity: Mask,
1179    include_nulls: bool,
1180    search_fn: F,
1181) -> VortexResult<(BufferMut<u64>, BufferMut<u64>)> {
1182    let mut values_indices = BufferMut::with_capacity(take_indices.len());
1183    let mut new_indices = BufferMut::with_capacity(take_indices.len());
1184
1185    for (new_patch_idx, &take_idx) in take_indices.iter().enumerate() {
1186        if !take_validity.value(new_patch_idx) {
1187            if include_nulls {
1188                // For nulls, patch index doesn't matter - use 0 for consistency
1189                values_indices.push(0u64);
1190                new_indices.push(new_patch_idx as u64);
1191            }
1192            continue;
1193        } else {
1194            let search_result = match I::from(take_idx) {
1195                Some(idx) => search_fn(idx)?,
1196                None => SearchResult::NotFound(indices.len()),
1197            };
1198
1199            if let Some(patch_idx) = search_result.to_found() {
1200                values_indices.push(patch_idx as u64);
1201                new_indices.push(new_patch_idx as u64);
1202            }
1203        }
1204    }
1205
1206    Ok((values_indices, new_indices))
1207}
1208
1209#[cfg(test)]
1210mod test {
1211    use vortex_buffer::BufferMut;
1212    use vortex_buffer::buffer;
1213    use vortex_mask::Mask;
1214
1215    use crate::IntoArray;
1216    use crate::LEGACY_SESSION;
1217    #[expect(deprecated)]
1218    use crate::ToCanonical as _;
1219    use crate::VortexSessionExecute;
1220    use crate::assert_arrays_eq;
1221    use crate::patches::Patches;
1222    use crate::patches::PrimitiveArray;
1223    use crate::search_sorted::SearchResult;
1224    use crate::validity::Validity;
1225
1226    #[test]
1227    fn test_filter() {
1228        let patches = Patches::new(
1229            100,
1230            0,
1231            buffer![10u32, 11, 20].into_array(),
1232            buffer![100, 110, 200].into_array(),
1233            None,
1234        )
1235        .unwrap();
1236
1237        let filtered = patches
1238            .filter(
1239                &Mask::from_indices(100, vec![10, 20, 30]),
1240                &mut LEGACY_SESSION.create_execution_ctx(),
1241            )
1242            .unwrap()
1243            .unwrap();
1244
1245        assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1]));
1246        assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1247    }
1248
1249    #[test]
1250    fn take_with_nulls() {
1251        let patches = Patches::new(
1252            20,
1253            0,
1254            buffer![2u64, 9, 15].into_array(),
1255            PrimitiveArray::new(buffer![33_i32, 44, 55], Validity::AllValid).into_array(),
1256            None,
1257        )
1258        .unwrap();
1259
1260        let taken = patches
1261            .take(
1262                &PrimitiveArray::new(buffer![9, 0], Validity::from_iter(vec![true, false]))
1263                    .into_array(),
1264                &mut LEGACY_SESSION.create_execution_ctx(),
1265            )
1266            .unwrap()
1267            .unwrap();
1268        #[expect(deprecated)]
1269        let primitive_values = taken.values().to_primitive();
1270        #[expect(deprecated)]
1271        let primitive_indices = taken.indices().to_primitive();
1272        assert_eq!(taken.array_len(), 2);
1273        assert_arrays_eq!(
1274            primitive_values,
1275            PrimitiveArray::from_option_iter([Some(44i32)])
1276        );
1277        assert_arrays_eq!(primitive_indices, PrimitiveArray::from_iter([0u64]));
1278        assert_eq!(
1279            primitive_values
1280                .as_ref()
1281                .validity()
1282                .unwrap()
1283                .execute_mask(
1284                    primitive_values.as_ref().len(),
1285                    &mut LEGACY_SESSION.create_execution_ctx()
1286                )
1287                .unwrap(),
1288            Mask::from_iter(vec![true])
1289        );
1290    }
1291
1292    #[test]
1293    fn take_search_with_nulls_chunked() {
1294        let patches = Patches::new(
1295            20,
1296            0,
1297            buffer![2u64, 9, 15].into_array(),
1298            buffer![33_i32, 44, 55].into_array(),
1299            Some(buffer![0u64].into_array()),
1300        )
1301        .unwrap();
1302
1303        let taken = patches
1304            .take_search(
1305                PrimitiveArray::new(buffer![9, 0], Validity::from_iter([true, false])),
1306                true,
1307                &mut LEGACY_SESSION.create_execution_ctx(),
1308            )
1309            .unwrap()
1310            .unwrap();
1311
1312        #[expect(deprecated)]
1313        let primitive_values = taken.values().to_primitive();
1314        assert_eq!(taken.array_len(), 2);
1315        assert_arrays_eq!(
1316            primitive_values,
1317            PrimitiveArray::from_option_iter([Some(44i32), None])
1318        );
1319        assert_arrays_eq!(taken.indices(), PrimitiveArray::from_iter([0u64, 1]));
1320
1321        assert_eq!(
1322            primitive_values
1323                .as_ref()
1324                .validity()
1325                .unwrap()
1326                .execute_mask(
1327                    primitive_values.as_ref().len(),
1328                    &mut LEGACY_SESSION.create_execution_ctx()
1329                )
1330                .unwrap(),
1331            Mask::from_iter([true, false])
1332        );
1333    }
1334
1335    #[test]
1336    fn take_search_chunked_multiple_chunks() {
1337        let patches = Patches::new(
1338            2048,
1339            0,
1340            buffer![100u64, 500, 1200, 1800].into_array(),
1341            buffer![10_i32, 20, 30, 40].into_array(),
1342            Some(buffer![0u64, 2].into_array()),
1343        )
1344        .unwrap();
1345
1346        let taken = patches
1347            .take_search(
1348                PrimitiveArray::new(buffer![500, 1200, 999], Validity::AllValid),
1349                true,
1350                &mut LEGACY_SESSION.create_execution_ctx(),
1351            )
1352            .unwrap()
1353            .unwrap();
1354
1355        assert_eq!(taken.array_len(), 3);
1356        assert_arrays_eq!(
1357            taken.values(),
1358            PrimitiveArray::from_option_iter([Some(20i32), Some(30)])
1359        );
1360    }
1361
1362    #[test]
1363    fn take_search_chunked_indices_with_no_patches() {
1364        let patches = Patches::new(
1365            20,
1366            0,
1367            buffer![2u64, 9, 15].into_array(),
1368            buffer![33_i32, 44, 55].into_array(),
1369            Some(buffer![0u64].into_array()),
1370        )
1371        .unwrap();
1372
1373        let taken = patches
1374            .take_search(
1375                PrimitiveArray::new(buffer![3, 4, 5], Validity::AllValid),
1376                true,
1377                &mut LEGACY_SESSION.create_execution_ctx(),
1378            )
1379            .unwrap();
1380
1381        assert!(taken.is_none());
1382    }
1383
1384    #[test]
1385    fn take_search_chunked_interleaved() {
1386        let patches = Patches::new(
1387            30,
1388            0,
1389            buffer![5u64, 10, 20, 25].into_array(),
1390            buffer![100_i32, 200, 300, 400].into_array(),
1391            Some(buffer![0u64].into_array()),
1392        )
1393        .unwrap();
1394
1395        let taken = patches
1396            .take_search(
1397                PrimitiveArray::new(buffer![10, 15, 20, 99], Validity::AllValid),
1398                true,
1399                &mut LEGACY_SESSION.create_execution_ctx(),
1400            )
1401            .unwrap()
1402            .unwrap();
1403
1404        assert_eq!(taken.array_len(), 4);
1405        assert_arrays_eq!(
1406            taken.values(),
1407            PrimitiveArray::from_option_iter([Some(200i32), Some(300)])
1408        );
1409    }
1410
1411    #[test]
1412    fn test_take_search_multiple_chunk_offsets() {
1413        let patches = Patches::new(
1414            1500,
1415            0,
1416            BufferMut::from_iter(0..1500u64).into_array(),
1417            BufferMut::from_iter(0..1500i32).into_array(),
1418            Some(buffer![0u64, 1024u64].into_array()),
1419        )
1420        .unwrap();
1421
1422        let taken = patches
1423            .take_search(
1424                PrimitiveArray::new(BufferMut::from_iter(0..1500u64), Validity::AllValid),
1425                false,
1426                &mut LEGACY_SESSION.create_execution_ctx(),
1427            )
1428            .unwrap()
1429            .unwrap();
1430
1431        assert_eq!(taken.array_len(), 1500);
1432    }
1433
1434    #[test]
1435    fn test_slice() {
1436        let values = buffer![15_u32, 135, 13531, 42].into_array();
1437        let indices = buffer![10_u64, 11, 50, 100].into_array();
1438
1439        let patches = Patches::new(101, 0, indices, values, None).unwrap();
1440
1441        let sliced = patches.slice(15..100).unwrap().unwrap();
1442        assert_eq!(sliced.array_len(), 100 - 15);
1443        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1444    }
1445
1446    #[test]
1447    fn doubly_sliced() {
1448        let values = buffer![15_u32, 135, 13531, 42].into_array();
1449        let indices = buffer![10_u64, 11, 50, 100].into_array();
1450
1451        let patches = Patches::new(101, 0, indices, values, None).unwrap();
1452
1453        let sliced = patches.slice(15..100).unwrap().unwrap();
1454        assert_eq!(sliced.array_len(), 100 - 15);
1455        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1456
1457        let doubly_sliced = sliced.slice(35..36).unwrap().unwrap();
1458        assert_arrays_eq!(
1459            doubly_sliced.values(),
1460            PrimitiveArray::from_iter([13531u32])
1461        );
1462    }
1463
1464    #[test]
1465    fn test_mask_all_true() {
1466        let patches = Patches::new(
1467            10,
1468            0,
1469            buffer![2u64, 5, 8].into_array(),
1470            buffer![100i32, 200, 300].into_array(),
1471            None,
1472        )
1473        .unwrap();
1474
1475        let mask = Mask::new_true(10);
1476        let masked = patches
1477            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1478            .unwrap();
1479        assert!(masked.is_none());
1480    }
1481
1482    #[test]
1483    fn test_mask_all_false() {
1484        let patches = Patches::new(
1485            10,
1486            0,
1487            buffer![2u64, 5, 8].into_array(),
1488            buffer![100i32, 200, 300].into_array(),
1489            None,
1490        )
1491        .unwrap();
1492
1493        let mask = Mask::new_false(10);
1494        let masked = patches
1495            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1496            .unwrap()
1497            .unwrap();
1498
1499        // No patch values should be masked
1500        assert_arrays_eq!(
1501            masked.values(),
1502            PrimitiveArray::from_iter([100i32, 200, 300])
1503        );
1504        assert!(
1505            masked
1506                .values()
1507                .is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())
1508                .unwrap()
1509        );
1510        assert!(
1511            masked
1512                .values()
1513                .is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())
1514                .unwrap()
1515        );
1516        assert!(
1517            masked
1518                .values()
1519                .is_valid(2, &mut LEGACY_SESSION.create_execution_ctx())
1520                .unwrap()
1521        );
1522
1523        // Indices should remain unchanged
1524        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1525    }
1526
1527    #[test]
1528    fn test_mask_partial() {
1529        let patches = Patches::new(
1530            10,
1531            0,
1532            buffer![2u64, 5, 8].into_array(),
1533            buffer![100i32, 200, 300].into_array(),
1534            None,
1535        )
1536        .unwrap();
1537
1538        // Mask that removes patches at indices 2 and 8 (but not 5)
1539        let mask = Mask::from_iter([
1540            false, false, true, false, false, false, false, false, true, false,
1541        ]);
1542        let masked = patches
1543            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1544            .unwrap()
1545            .unwrap();
1546
1547        // Only the patch at index 5 should remain
1548        assert_eq!(masked.values().len(), 1);
1549        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1550
1551        // Only index 5 should remain
1552        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64]));
1553    }
1554
1555    #[test]
1556    fn test_mask_with_offset() {
1557        let patches = Patches::new(
1558            10,
1559            5,                                  // offset
1560            buffer![7u64, 10, 13].into_array(), // actual indices are 2, 5, 8
1561            buffer![100i32, 200, 300].into_array(),
1562            None,
1563        )
1564        .unwrap();
1565
1566        // Mask that sets actual index 2 to null
1567        let mask = Mask::from_iter([
1568            false, false, true, false, false, false, false, false, false, false,
1569        ]);
1570
1571        let masked = patches
1572            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1573            .unwrap()
1574            .unwrap();
1575        assert_eq!(masked.array_len(), 10);
1576        assert_eq!(masked.offset(), 5);
1577        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([10u64, 13]));
1578        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32, 300]));
1579    }
1580
1581    #[test]
1582    fn test_mask_nullable_values() {
1583        let patches = Patches::new(
1584            10,
1585            0,
1586            buffer![2u64, 5, 8].into_array(),
1587            PrimitiveArray::from_option_iter([Some(100i32), None, Some(300)]).into_array(),
1588            None,
1589        )
1590        .unwrap();
1591
1592        // Test masking removes patch at index 2
1593        let mask = Mask::from_iter([
1594            false, false, true, false, false, false, false, false, false, false,
1595        ]);
1596        let masked = patches
1597            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1598            .unwrap()
1599            .unwrap();
1600
1601        // Patches at indices 5 and 8 should remain
1602        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64, 8]));
1603
1604        // Values should be the null and 300
1605        #[expect(deprecated)]
1606        let masked_values = masked.values().to_primitive();
1607        assert_eq!(masked_values.len(), 2);
1608        assert!(
1609            !masked_values
1610                .is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())
1611                .unwrap()
1612        ); // the null value at index 5
1613        assert!(
1614            masked_values
1615                .is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())
1616                .unwrap()
1617        ); // the 300 value at index 8
1618        assert_eq!(
1619            i32::try_from(
1620                &masked_values
1621                    .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
1622                    .unwrap()
1623            )
1624            .unwrap(),
1625            300i32
1626        );
1627    }
1628
1629    #[test]
1630    fn test_filter_keep_all() {
1631        let patches = Patches::new(
1632            10,
1633            0,
1634            buffer![2u64, 5, 8].into_array(),
1635            buffer![100i32, 200, 300].into_array(),
1636            None,
1637        )
1638        .unwrap();
1639
1640        // Keep all indices (mask with indices 0-9)
1641        let mask = Mask::from_indices(10, 0..10);
1642        let filtered = patches
1643            .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1644            .unwrap()
1645            .unwrap();
1646
1647        assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1648        assert_arrays_eq!(
1649            filtered.values(),
1650            PrimitiveArray::from_iter([100i32, 200, 300])
1651        );
1652    }
1653
1654    #[test]
1655    fn test_filter_none() {
1656        let patches = Patches::new(
1657            10,
1658            0,
1659            buffer![2u64, 5, 8].into_array(),
1660            buffer![100i32, 200, 300].into_array(),
1661            None,
1662        )
1663        .unwrap();
1664
1665        // Filter out all (empty mask means keep nothing)
1666        let mask = Mask::from_indices(10, vec![]);
1667        let filtered = patches
1668            .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1669            .unwrap();
1670        assert!(filtered.is_none());
1671    }
1672
1673    #[test]
1674    fn test_filter_with_indices() {
1675        let patches = Patches::new(
1676            10,
1677            0,
1678            buffer![2u64, 5, 8].into_array(),
1679            buffer![100i32, 200, 300].into_array(),
1680            None,
1681        )
1682        .unwrap();
1683
1684        // Keep indices 2, 5, 9 (so patches at 2 and 5 remain)
1685        let mask = Mask::from_indices(10, vec![2, 5, 9]);
1686        let filtered = patches
1687            .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1688            .unwrap()
1689            .unwrap();
1690
1691        assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1])); // Adjusted indices
1692        assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1693    }
1694
1695    #[test]
1696    fn test_slice_full_range() {
1697        let patches = Patches::new(
1698            10,
1699            0,
1700            buffer![2u64, 5, 8].into_array(),
1701            buffer![100i32, 200, 300].into_array(),
1702            None,
1703        )
1704        .unwrap();
1705
1706        let sliced = patches.slice(0..10).unwrap().unwrap();
1707
1708        assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1709        assert_arrays_eq!(
1710            sliced.values(),
1711            PrimitiveArray::from_iter([100i32, 200, 300])
1712        );
1713    }
1714
1715    #[test]
1716    fn test_slice_partial() {
1717        let patches = Patches::new(
1718            10,
1719            0,
1720            buffer![2u64, 5, 8].into_array(),
1721            buffer![100i32, 200, 300].into_array(),
1722            None,
1723        )
1724        .unwrap();
1725
1726        // Slice from 3 to 8 (includes patch at 5)
1727        let sliced = patches.slice(3..8).unwrap().unwrap();
1728
1729        assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([5u64])); // Index stays the same
1730        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1731        assert_eq!(sliced.array_len(), 5); // 8 - 3 = 5
1732        assert_eq!(sliced.offset(), 3); // New offset
1733    }
1734
1735    #[test]
1736    fn test_slice_no_patches() {
1737        let patches = Patches::new(
1738            10,
1739            0,
1740            buffer![2u64, 5, 8].into_array(),
1741            buffer![100i32, 200, 300].into_array(),
1742            None,
1743        )
1744        .unwrap();
1745
1746        // Slice from 6 to 7 (no patches in this range)
1747        let sliced = patches.slice(6..7).unwrap();
1748        assert!(sliced.is_none());
1749    }
1750
1751    #[test]
1752    fn test_slice_with_offset() {
1753        let patches = Patches::new(
1754            10,
1755            5,                                  // offset
1756            buffer![7u64, 10, 13].into_array(), // actual indices are 2, 5, 8
1757            buffer![100i32, 200, 300].into_array(),
1758            None,
1759        )
1760        .unwrap();
1761
1762        // Slice from 3 to 8 (includes patch at actual index 5)
1763        let sliced = patches.slice(3..8).unwrap().unwrap();
1764
1765        assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([10u64])); // Index stays the same (offset + 5 = 10)
1766        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1767        assert_eq!(sliced.offset(), 8); // New offset = 5 + 3
1768    }
1769
1770    #[test]
1771    fn test_patch_values() {
1772        let patches = Patches::new(
1773            10,
1774            0,
1775            buffer![2u64, 5, 8].into_array(),
1776            buffer![100i32, 200, 300].into_array(),
1777            None,
1778        )
1779        .unwrap();
1780
1781        #[expect(deprecated)]
1782        let values = patches.values().to_primitive();
1783        assert_eq!(
1784            i32::try_from(
1785                &values
1786                    .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
1787                    .unwrap()
1788            )
1789            .unwrap(),
1790            100i32
1791        );
1792        assert_eq!(
1793            i32::try_from(
1794                &values
1795                    .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
1796                    .unwrap()
1797            )
1798            .unwrap(),
1799            200i32
1800        );
1801        assert_eq!(
1802            i32::try_from(
1803                &values
1804                    .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
1805                    .unwrap()
1806            )
1807            .unwrap(),
1808            300i32
1809        );
1810    }
1811
1812    #[test]
1813    fn test_indices_range() {
1814        let patches = Patches::new(
1815            10,
1816            0,
1817            buffer![2u64, 5, 8].into_array(),
1818            buffer![100i32, 200, 300].into_array(),
1819            None,
1820        )
1821        .unwrap();
1822
1823        assert_eq!(patches.min_index().unwrap(), 2);
1824        assert_eq!(patches.max_index().unwrap(), 8);
1825    }
1826
1827    #[test]
1828    fn test_search_index() {
1829        let patches = Patches::new(
1830            10,
1831            0,
1832            buffer![2u64, 5, 8].into_array(),
1833            buffer![100i32, 200, 300].into_array(),
1834            None,
1835        )
1836        .unwrap();
1837
1838        // Search for exact indices
1839        assert_eq!(patches.search_index(2).unwrap(), SearchResult::Found(0));
1840        assert_eq!(patches.search_index(5).unwrap(), SearchResult::Found(1));
1841        assert_eq!(patches.search_index(8).unwrap(), SearchResult::Found(2));
1842
1843        // Search for non-patch indices
1844        assert_eq!(patches.search_index(0).unwrap(), SearchResult::NotFound(0));
1845        assert_eq!(patches.search_index(3).unwrap(), SearchResult::NotFound(1));
1846        assert_eq!(patches.search_index(6).unwrap(), SearchResult::NotFound(2));
1847        assert_eq!(patches.search_index(9).unwrap(), SearchResult::NotFound(3));
1848    }
1849
1850    #[test]
1851    fn test_mask_boundary_patches() {
1852        // Test masking patches at array boundaries
1853        let patches = Patches::new(
1854            10,
1855            0,
1856            buffer![0u64, 9].into_array(),
1857            buffer![100i32, 200].into_array(),
1858            None,
1859        )
1860        .unwrap();
1861
1862        let mask = Mask::from_iter([
1863            true, false, false, false, false, false, false, false, false, false,
1864        ]);
1865        let masked = patches
1866            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1867            .unwrap();
1868        assert!(masked.is_some());
1869        let masked = masked.unwrap();
1870        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([9u64]));
1871        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1872    }
1873
1874    #[test]
1875    fn test_mask_all_patches_removed() {
1876        // Test when all patches are masked out
1877        let patches = Patches::new(
1878            10,
1879            0,
1880            buffer![2u64, 5, 8].into_array(),
1881            buffer![100i32, 200, 300].into_array(),
1882            None,
1883        )
1884        .unwrap();
1885
1886        // Mask that removes all patches
1887        let mask = Mask::from_iter([
1888            false, false, true, false, false, true, false, false, true, false,
1889        ]);
1890        let masked = patches
1891            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1892            .unwrap();
1893        assert!(masked.is_none());
1894    }
1895
1896    #[test]
1897    fn test_mask_no_patches_removed() {
1898        // Test when no patches are masked
1899        let patches = Patches::new(
1900            10,
1901            0,
1902            buffer![2u64, 5, 8].into_array(),
1903            buffer![100i32, 200, 300].into_array(),
1904            None,
1905        )
1906        .unwrap();
1907
1908        // Mask that doesn't affect any patches
1909        let mask = Mask::from_iter([
1910            true, false, false, true, false, false, true, false, false, true,
1911        ]);
1912        let masked = patches
1913            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1914            .unwrap()
1915            .unwrap();
1916
1917        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1918        assert_arrays_eq!(
1919            masked.values(),
1920            PrimitiveArray::from_iter([100i32, 200, 300])
1921        );
1922    }
1923
1924    #[test]
1925    fn test_mask_single_patch() {
1926        // Test with a single patch
1927        let patches = Patches::new(
1928            5,
1929            0,
1930            buffer![2u64].into_array(),
1931            buffer![42i32].into_array(),
1932            None,
1933        )
1934        .unwrap();
1935
1936        // Mask that removes the single patch
1937        let mask = Mask::from_iter([false, false, true, false, false]);
1938        let masked = patches
1939            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1940            .unwrap();
1941        assert!(masked.is_none());
1942
1943        // Mask that keeps the single patch
1944        let mask = Mask::from_iter([true, false, false, true, false]);
1945        let masked = patches
1946            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1947            .unwrap()
1948            .unwrap();
1949        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64]));
1950    }
1951
1952    #[test]
1953    fn test_mask_contiguous_patches() {
1954        // Test with contiguous patches
1955        let patches = Patches::new(
1956            10,
1957            0,
1958            buffer![3u64, 4, 5, 6].into_array(),
1959            buffer![100i32, 200, 300, 400].into_array(),
1960            None,
1961        )
1962        .unwrap();
1963
1964        // Mask that removes middle patches
1965        let mask = Mask::from_iter([
1966            false, false, false, false, true, true, false, false, false, false,
1967        ]);
1968        let masked = patches
1969            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1970            .unwrap()
1971            .unwrap();
1972
1973        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([3u64, 6]));
1974        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 400]));
1975    }
1976
1977    #[test]
1978    fn test_mask_with_large_offset() {
1979        // Test with a large offset that shifts all indices
1980        let patches = Patches::new(
1981            20,
1982            15,
1983            buffer![16u64, 17, 19].into_array(), // actual indices are 1, 2, 4
1984            buffer![100i32, 200, 300].into_array(),
1985            None,
1986        )
1987        .unwrap();
1988
1989        // Mask that removes the patch at actual index 2
1990        let mask = Mask::from_iter([
1991            false, false, true, false, false, false, false, false, false, false, false, false,
1992            false, false, false, false, false, false, false, false,
1993        ]);
1994        let masked = patches
1995            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1996            .unwrap()
1997            .unwrap();
1998
1999        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([16u64, 19]));
2000        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 300]));
2001    }
2002
2003    #[test]
2004    #[should_panic(expected = "Filter mask length 5 does not match array length 10")]
2005    fn test_mask_wrong_length() {
2006        let patches = Patches::new(
2007            10,
2008            0,
2009            buffer![2u64, 5, 8].into_array(),
2010            buffer![100i32, 200, 300].into_array(),
2011            None,
2012        )
2013        .unwrap();
2014
2015        // Mask with wrong length
2016        let mask = Mask::from_iter([false, false, true, false, false]);
2017        patches
2018            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
2019            .unwrap();
2020    }
2021
2022    #[test]
2023    fn test_chunk_offsets_search() {
2024        let indices = buffer![100u64, 200, 3000, 3100].into_array();
2025        let values = buffer![10i32, 20, 30, 40].into_array();
2026        let chunk_offsets = buffer![0u64, 2, 2, 3].into_array();
2027        let patches = Patches::new(4096, 0, indices, values, Some(chunk_offsets)).unwrap();
2028
2029        assert!(patches.chunk_offsets.is_some());
2030
2031        // chunk 0: patches at 100, 200
2032        assert_eq!(patches.search_index(100).unwrap(), SearchResult::Found(0));
2033        assert_eq!(patches.search_index(200).unwrap(), SearchResult::Found(1));
2034
2035        // chunks 1, 2: no patches
2036        assert_eq!(
2037            patches.search_index(1500).unwrap(),
2038            SearchResult::NotFound(2)
2039        );
2040        assert_eq!(
2041            patches.search_index(2000).unwrap(),
2042            SearchResult::NotFound(2)
2043        );
2044
2045        // chunk 3: patches at 3000, 3100
2046        assert_eq!(patches.search_index(3000).unwrap(), SearchResult::Found(2));
2047        assert_eq!(patches.search_index(3100).unwrap(), SearchResult::Found(3));
2048
2049        assert_eq!(
2050            patches.search_index(1024).unwrap(),
2051            SearchResult::NotFound(2)
2052        );
2053    }
2054
2055    #[test]
2056    fn test_chunk_offsets_with_slice() {
2057        let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
2058        let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
2059        let chunk_offsets = buffer![0u64, 2, 6].into_array();
2060        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2061
2062        let sliced = patches.slice(1000..2200).unwrap().unwrap();
2063
2064        assert!(sliced.chunk_offsets.is_some());
2065        assert_eq!(sliced.offset(), 1000);
2066
2067        assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(0));
2068        assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
2069        assert_eq!(sliced.search_index(1100).unwrap(), SearchResult::Found(4));
2070
2071        assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(1));
2072    }
2073
2074    #[test]
2075    fn test_chunk_offsets_with_slice_after_first_chunk() {
2076        let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
2077        let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
2078        let chunk_offsets = buffer![0u64, 2, 6].into_array();
2079        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2080
2081        let sliced = patches.slice(1300..2200).unwrap().unwrap();
2082
2083        assert!(sliced.chunk_offsets.is_some());
2084        assert_eq!(sliced.offset(), 1300);
2085
2086        assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2087        assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(1));
2088        assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
2089        assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(2));
2090        assert_eq!(sliced.search_index(900).unwrap(), SearchResult::NotFound(4));
2091    }
2092
2093    #[test]
2094    fn test_chunk_offsets_slice_empty_result() {
2095        let indices = buffer![100u64, 200, 3000, 3100].into_array();
2096        let values = buffer![10i32, 20, 30, 40].into_array();
2097        let chunk_offsets = buffer![0u64, 2].into_array();
2098        let patches = Patches::new(4000, 0, indices, values, Some(chunk_offsets)).unwrap();
2099
2100        let sliced = patches.slice(1000..2000).unwrap();
2101        assert!(sliced.is_none());
2102    }
2103
2104    #[test]
2105    fn test_chunk_offsets_slice_single_patch() {
2106        let indices = buffer![100u64, 1200, 1300, 2500].into_array();
2107        let values = buffer![10i32, 20, 30, 40].into_array();
2108        let chunk_offsets = buffer![0u64, 1, 3].into_array();
2109        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2110
2111        let sliced = patches.slice(1100..1250).unwrap().unwrap();
2112
2113        assert_eq!(sliced.num_patches(), 1);
2114        assert_eq!(sliced.offset(), 1100);
2115        assert_eq!(sliced.search_index(100).unwrap(), SearchResult::Found(0)); // 1200 - 1100 = 100
2116        assert_eq!(sliced.search_index(50).unwrap(), SearchResult::NotFound(0));
2117        assert_eq!(sliced.search_index(150).unwrap(), SearchResult::NotFound(1));
2118    }
2119
2120    #[test]
2121    fn test_chunk_offsets_slice_across_chunks() {
2122        let indices = buffer![100u64, 200, 1100, 1200, 2100, 2200].into_array();
2123        let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2124        let chunk_offsets = buffer![0u64, 2, 4].into_array();
2125        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2126
2127        let sliced = patches.slice(150..2150).unwrap().unwrap();
2128
2129        assert_eq!(sliced.num_patches(), 4);
2130        assert_eq!(sliced.offset(), 150);
2131
2132        assert_eq!(sliced.search_index(50).unwrap(), SearchResult::Found(0)); // 200 - 150 = 50
2133        assert_eq!(sliced.search_index(950).unwrap(), SearchResult::Found(1)); // 1100 - 150 = 950
2134        assert_eq!(sliced.search_index(1050).unwrap(), SearchResult::Found(2)); // 1200 - 150 = 1050
2135        assert_eq!(sliced.search_index(1950).unwrap(), SearchResult::Found(3)); // 2100 - 150 = 1950
2136    }
2137
2138    #[test]
2139    fn test_chunk_offsets_boundary_searches() {
2140        let indices = buffer![1023u64, 1024, 1025, 2047, 2048].into_array();
2141        let values = buffer![10i32, 20, 30, 40, 50].into_array();
2142        let chunk_offsets = buffer![0u64, 1, 4].into_array();
2143        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2144
2145        assert_eq!(patches.search_index(1023).unwrap(), SearchResult::Found(0));
2146        assert_eq!(patches.search_index(1024).unwrap(), SearchResult::Found(1));
2147        assert_eq!(patches.search_index(1025).unwrap(), SearchResult::Found(2));
2148        assert_eq!(patches.search_index(2047).unwrap(), SearchResult::Found(3));
2149        assert_eq!(patches.search_index(2048).unwrap(), SearchResult::Found(4));
2150
2151        assert_eq!(
2152            patches.search_index(1022).unwrap(),
2153            SearchResult::NotFound(0)
2154        );
2155        assert_eq!(
2156            patches.search_index(2046).unwrap(),
2157            SearchResult::NotFound(3)
2158        );
2159    }
2160
2161    #[test]
2162    fn test_chunk_offsets_slice_edge_cases() {
2163        let indices = buffer![0u64, 1, 1023, 1024, 2047, 2048].into_array();
2164        let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2165        let chunk_offsets = buffer![0u64, 3, 5].into_array();
2166        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2167
2168        // Slice at the very beginning
2169        let sliced = patches.slice(0..10).unwrap().unwrap();
2170        assert_eq!(sliced.num_patches(), 2);
2171        assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2172        assert_eq!(sliced.search_index(1).unwrap(), SearchResult::Found(1));
2173
2174        // Slice at the very end
2175        let sliced = patches.slice(2040..3000).unwrap().unwrap();
2176        assert_eq!(sliced.num_patches(), 2); // patches at 2047 and 2048
2177        assert_eq!(sliced.search_index(7).unwrap(), SearchResult::Found(0)); // 2047 - 2040
2178        assert_eq!(sliced.search_index(8).unwrap(), SearchResult::Found(1)); // 2048 - 2040
2179    }
2180
2181    #[test]
2182    fn test_chunk_offsets_slice_nested() {
2183        let indices = buffer![100u64, 200, 300, 400, 500, 600].into_array();
2184        let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2185        let chunk_offsets = buffer![0u64].into_array();
2186        let patches = Patches::new(1000, 0, indices, values, Some(chunk_offsets)).unwrap();
2187
2188        let sliced1 = patches.slice(150..550).unwrap().unwrap();
2189        assert_eq!(sliced1.num_patches(), 4); // 200, 300, 400, 500
2190
2191        let sliced2 = sliced1.slice(100..250).unwrap().unwrap();
2192        assert_eq!(sliced2.num_patches(), 1); // 300
2193        assert_eq!(sliced2.offset(), 250);
2194
2195        assert_eq!(sliced2.search_index(50).unwrap(), SearchResult::Found(0)); // 300 - 250
2196        assert_eq!(
2197            sliced2.search_index(150).unwrap(),
2198            SearchResult::NotFound(1)
2199        );
2200    }
2201
2202    #[test]
2203    fn test_nested_slice_with_dropped_first_chunk() {
2204        // PATCH_CHUNK_SIZE = 1024, so the two patches land in different chunks.
2205        let indices = buffer![0u64, 1024].into_array();
2206        let values = buffer![1i32, 2].into_array();
2207        let chunk_offsets = buffer![0u64, 1].into_array();
2208        let patches = Patches::new(2048, 0, indices, values, Some(chunk_offsets)).unwrap();
2209
2210        // Drop chunk 0, then re-slice the result.
2211        let dropped_first = patches.slice(1024..2048).unwrap().unwrap();
2212        let resliced = dropped_first.slice(0..1024).unwrap().unwrap();
2213        assert_eq!(resliced.num_patches(), 1);
2214    }
2215
2216    #[test]
2217    fn test_index_larger_than_length() {
2218        let chunk_offsets = buffer![0u64].into_array();
2219        let indices = buffer![1023u64].into_array();
2220        let values = buffer![42i32].into_array();
2221        let patches = Patches::new(1024, 0, indices, values, Some(chunk_offsets)).unwrap();
2222        assert_eq!(
2223            patches.search_index(2048).unwrap(),
2224            SearchResult::NotFound(1)
2225        );
2226    }
2227}