Skip to main content

vortex_array/
patches.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::cmp::Ordering;
5use std::fmt::Debug;
6use std::hash::Hash;
7use std::ops::Range;
8
9use num_traits::NumCast;
10use vortex_buffer::BitBuffer;
11use vortex_buffer::BufferMut;
12use vortex_error::VortexError;
13use vortex_error::VortexExpect as _;
14use vortex_error::VortexResult;
15use vortex_error::vortex_bail;
16use vortex_error::vortex_ensure;
17use vortex_error::vortex_err;
18use vortex_mask::AllOr;
19use vortex_mask::Mask;
20use vortex_utils::aliases::hash_map::HashMap;
21
22use crate::ArrayRef;
23use crate::ArraySlots;
24use crate::ExecutionCtx;
25use crate::IntoArray;
26use crate::LEGACY_SESSION;
27#[expect(deprecated)]
28use crate::ToCanonical as _;
29use crate::VortexSessionExecute;
30use crate::arrays::PrimitiveArray;
31use crate::builtins::ArrayBuiltins;
32use crate::dtype::DType;
33use crate::dtype::IntegerPType;
34use crate::dtype::NativePType;
35use crate::dtype::Nullability;
36use crate::dtype::Nullability::NonNullable;
37use crate::dtype::PType;
38use crate::dtype::UnsignedPType;
39use crate::match_each_integer_ptype;
40use crate::match_each_unsigned_integer_ptype;
41use crate::scalar::PValue;
42use crate::scalar::Scalar;
43use crate::search_sorted::SearchResult;
44use crate::search_sorted::SearchSorted;
45use crate::search_sorted::SearchSortedSide;
46use crate::validity::Validity;
47
48/// One patch index offset is stored for each chunk.
49/// This allows for constant time patch index lookups.
50pub const PATCH_CHUNK_SIZE: usize = 1024;
51
52#[derive(Copy, Clone, prost::Message)]
53pub struct PatchesMetadata {
54    #[prost(uint64, tag = "1")]
55    len: u64,
56    #[prost(uint64, tag = "2")]
57    offset: u64,
58    #[prost(enumeration = "PType", tag = "3")]
59    indices_ptype: i32,
60    #[prost(uint64, optional, tag = "4")]
61    chunk_offsets_len: Option<u64>,
62    #[prost(enumeration = "PType", optional, tag = "5")]
63    chunk_offsets_ptype: Option<i32>,
64    #[prost(uint64, optional, tag = "6")]
65    offset_within_chunk: Option<u64>,
66}
67
68impl PatchesMetadata {
69    #[inline]
70    pub fn new(
71        len: usize,
72        offset: usize,
73        indices_ptype: PType,
74        chunk_offsets_len: Option<usize>,
75        chunk_offsets_ptype: Option<PType>,
76        offset_within_chunk: Option<usize>,
77    ) -> Self {
78        Self {
79            len: len as u64,
80            offset: offset as u64,
81            indices_ptype: indices_ptype as i32,
82            chunk_offsets_len: chunk_offsets_len.map(|len| len as u64),
83            chunk_offsets_ptype: chunk_offsets_ptype.map(|pt| pt as i32),
84            offset_within_chunk: offset_within_chunk.map(|len| len as u64),
85        }
86    }
87
88    #[inline]
89    pub fn len(&self) -> VortexResult<usize> {
90        usize::try_from(self.len).map_err(|_| vortex_err!("len does not fit in usize"))
91    }
92
93    #[inline]
94    pub fn is_empty(&self) -> bool {
95        self.len == 0
96    }
97
98    #[inline]
99    pub fn offset(&self) -> VortexResult<usize> {
100        usize::try_from(self.offset).map_err(|_| vortex_err!("offset does not fit in usize"))
101    }
102
103    #[inline]
104    pub fn chunk_offsets_dtype(&self) -> VortexResult<Option<DType>> {
105        self.chunk_offsets_ptype
106            .map(|t| {
107                PType::try_from(t)
108                    .map_err(|e| vortex_err!("invalid i32 value {t} for PType: {}", e))
109                    .map(|ptype| DType::Primitive(ptype, NonNullable))
110            })
111            .transpose()
112    }
113
114    #[inline]
115    pub fn indices_dtype(&self) -> VortexResult<DType> {
116        let ptype = PType::try_from(self.indices_ptype).map_err(|e| {
117            vortex_err!("invalid i32 value {} for PType: {}", self.indices_ptype, e)
118        })?;
119        vortex_ensure!(
120            ptype.is_unsigned_int(),
121            "Patch indices must be unsigned integers"
122        );
123        Ok(DType::Primitive(ptype, NonNullable))
124    }
125}
126
127/// Metadata stored in an array's data struct for reconstructing [`Patches`] from slots.
128///
129/// The actual patch arrays (indices, values, chunk_offsets) live in the array's
130/// slots. This struct stores only the scalar metadata needed to reassemble them.
131#[derive(Clone, Debug, Hash, PartialEq, Eq)]
132pub struct PatchesData {
133    offset: usize,
134    offset_within_chunk: Option<usize>,
135}
136
137/// Slot indices for the three patch components within a slot array.
138#[derive(Copy, Clone, Debug)]
139pub struct PatchSlotIndices {
140    pub indices: usize,
141    pub values: usize,
142    pub chunk_offsets: usize,
143}
144
145impl PatchesData {
146    /// Extract patch metadata from an existing [`Patches`].
147    pub fn from_patches(patches: &Patches) -> Self {
148        Self {
149            offset: patches.offset(),
150            offset_within_chunk: patches.offset_within_chunk(),
151        }
152    }
153
154    /// Reconstruct patches from the given slot positions.
155    ///
156    /// Returns `None` if `patches_data` is `None`.
157    /// Panics if `patches_data` is `Some` but the indices or values slots are missing.
158    pub fn patches_from_slots(
159        patches_data: Option<&Self>,
160        len: usize,
161        slots: &[Option<ArrayRef>],
162        slot_idx: PatchSlotIndices,
163    ) -> Option<Patches> {
164        let data = patches_data?;
165        let indices = slots[slot_idx.indices]
166            .as_ref()
167            .vortex_expect("patches_data is set but patch_indices slot is missing");
168        let values = slots[slot_idx.values]
169            .as_ref()
170            .vortex_expect("patches_data is set but patch_values slot is missing");
171        Some(unsafe {
172            Patches::new_unchecked(
173                len,
174                data.offset,
175                indices.clone(),
176                values.clone(),
177                slots[slot_idx.chunk_offsets].clone(),
178                data.offset_within_chunk,
179            )
180        })
181    }
182
183    /// Push 3 patch slots (indices, values, chunk_offsets) onto a slot vector.
184    ///
185    /// If `patches` is `None`, pushes three `None` entries.
186    pub fn push_slots(slots: &mut ArraySlots, patches: Option<&Patches>) {
187        match patches {
188            Some(p) => {
189                slots.push(Some(p.indices().clone()));
190                slots.push(Some(p.values().clone()));
191                slots.push(p.chunk_offsets().clone());
192            }
193            None => {
194                slots.push(None);
195                slots.push(None);
196                slots.push(None);
197            }
198        }
199    }
200
201    /// Returns the patch offset.
202    #[inline]
203    pub fn offset(&self) -> usize {
204        self.offset
205    }
206
207    /// Returns the offset within the first chunk, if chunk offsets are present.
208    #[inline]
209    pub fn offset_within_chunk(&self) -> Option<usize> {
210        self.offset_within_chunk
211    }
212}
213
214/// A helper for working with patched arrays.
215#[derive(Debug, Clone)]
216pub struct Patches {
217    array_len: usize,
218    offset: usize,
219    indices: ArrayRef,
220    values: ArrayRef,
221    /// Stores the patch index offset for each chunk.
222    ///
223    /// This allows us to lookup the patches for a given chunk in constant time via
224    /// `patch_indices[chunk_offsets[i]..chunk_offsets[i+1]]`.
225    ///
226    /// This is optional for compatibility reasons.
227    chunk_offsets: Option<ArrayRef>,
228    /// Chunk offsets are only sliced off in case the slice is fully
229    /// outside of the chunk range.
230    ///
231    /// Though the range for indices and values is sliced in terms of
232    /// individual elements, not chunks. To account for that we do a
233    /// saturating sub when adjusting the indices based on the chunk offset.
234    ///
235    /// `offset_within_chunk` is necessary in order to keep track of how many
236    /// elements were sliced off within the chunk.
237    offset_within_chunk: Option<usize>,
238}
239
240impl Patches {
241    pub fn new(
242        array_len: usize,
243        offset: usize,
244        indices: ArrayRef,
245        values: ArrayRef,
246        chunk_offsets: Option<ArrayRef>,
247    ) -> VortexResult<Self> {
248        vortex_ensure!(
249            indices.len() == values.len(),
250            "Patch indices and values must have the same length"
251        );
252        vortex_ensure!(
253            indices.dtype().is_unsigned_int() && !indices.dtype().is_nullable(),
254            "Patch indices must be non-nullable unsigned integers, got {:?}",
255            indices.dtype()
256        );
257
258        vortex_ensure!(
259            indices.len() <= array_len,
260            "Patch indices must be shorter than the array length"
261        );
262        vortex_ensure!(!indices.is_empty(), "Patch indices must not be empty");
263
264        // Perform validation of components when they are host-resident.
265        // This is not possible to do eagerly when the data is on GPU memory.
266        if indices.is_host() && values.is_host() {
267            let max = usize::try_from(&indices.execute_scalar(
268                indices.len() - 1,
269                &mut LEGACY_SESSION.create_execution_ctx(),
270            )?)
271            .map_err(|_| vortex_err!("indices must be a number"))?;
272            vortex_ensure!(
273                max - offset < array_len,
274                "Patch indices {max:?}, offset {offset} are longer than the array length {array_len}"
275            );
276
277            #[cfg(debug_assertions)]
278            {
279                use crate::VortexSessionExecute;
280                use crate::aggregate_fn::fns::is_sorted::is_sorted;
281                let mut ctx = LEGACY_SESSION.create_execution_ctx();
282                assert!(
283                    is_sorted(&indices, &mut ctx).unwrap_or(false),
284                    "Patch indices must be sorted"
285                );
286            }
287        }
288
289        Ok(Self {
290            array_len,
291            offset,
292            indices,
293            values,
294            chunk_offsets: chunk_offsets.clone(),
295            // Initialize with `Some(0)` only if `chunk_offsets` are set.
296            offset_within_chunk: chunk_offsets.map(|_| 0),
297        })
298    }
299
300    /// Construct new patches without validating any of the arguments
301    ///
302    /// # Safety
303    ///
304    /// Users have to assert that
305    /// * Indices and values have the same length
306    /// * Indices is an unsigned integer type
307    /// * Indices must be sorted
308    /// * Last value in indices is smaller than array_len
309    pub unsafe fn new_unchecked(
310        array_len: usize,
311        offset: usize,
312        indices: ArrayRef,
313        values: ArrayRef,
314        chunk_offsets: Option<ArrayRef>,
315        offset_within_chunk: Option<usize>,
316    ) -> Self {
317        Self {
318            array_len,
319            offset,
320            indices,
321            values,
322            chunk_offsets,
323            offset_within_chunk,
324        }
325    }
326
327    #[inline]
328    pub fn array_len(&self) -> usize {
329        self.array_len
330    }
331
332    #[inline]
333    pub fn num_patches(&self) -> usize {
334        self.indices.len()
335    }
336
337    #[inline]
338    pub fn dtype(&self) -> &DType {
339        self.values.dtype()
340    }
341
342    #[inline]
343    pub fn indices(&self) -> &ArrayRef {
344        &self.indices
345    }
346
347    #[inline]
348    pub fn into_indices(self) -> ArrayRef {
349        self.indices
350    }
351
352    #[inline]
353    pub fn indices_mut(&mut self) -> &mut ArrayRef {
354        &mut self.indices
355    }
356
357    #[inline]
358    pub fn values(&self) -> &ArrayRef {
359        &self.values
360    }
361
362    #[inline]
363    pub fn into_values(self) -> ArrayRef {
364        self.values
365    }
366
367    #[inline]
368    pub fn values_mut(&mut self) -> &mut ArrayRef {
369        &mut self.values
370    }
371
372    #[inline]
373    // Absolute offset: 0 if the array is unsliced.
374    pub fn offset(&self) -> usize {
375        self.offset
376    }
377
378    #[inline]
379    pub fn chunk_offsets(&self) -> &Option<ArrayRef> {
380        &self.chunk_offsets
381    }
382
383    #[inline]
384    pub fn chunk_offset_at(&self, idx: usize) -> VortexResult<usize> {
385        let Some(chunk_offsets) = &self.chunk_offsets else {
386            vortex_bail!("chunk_offsets must be set to retrieve offset at index")
387        };
388
389        chunk_offsets
390            .execute_scalar(idx, &mut LEGACY_SESSION.create_execution_ctx())?
391            .as_primitive()
392            .as_::<usize>()
393            .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))
394    }
395
396    /// Returns the number of patches sliced off from the current first chunk.
397    ///
398    /// When patches are sliced, the chunk offsets array is also sliced to only include
399    /// chunks that overlap with the slice range. However, the slice boundary may fall
400    /// in the middle of a chunk's patch range. This offset indicates how many patches
401    /// at the start of the first chunk should be skipped.
402    ///
403    /// Returns `None` if chunk offsets are not set.
404    #[inline]
405    pub fn offset_within_chunk(&self) -> Option<usize> {
406        self.offset_within_chunk
407    }
408
409    #[inline]
410    pub fn indices_ptype(&self) -> VortexResult<PType> {
411        PType::try_from(self.indices.dtype())
412            .map_err(|_| vortex_err!("indices dtype is not primitive"))
413    }
414
415    pub fn to_metadata(&self, len: usize, dtype: &DType) -> VortexResult<PatchesMetadata> {
416        if self.indices.len() > len {
417            vortex_bail!(
418                "Patch indices {} are longer than the array length {}",
419                self.indices.len(),
420                len
421            );
422        }
423        if self.values.dtype() != dtype {
424            vortex_bail!(
425                "Patch values dtype {} does not match array dtype {}",
426                self.values.dtype(),
427                dtype
428            );
429        }
430        let chunk_offsets_len = self.chunk_offsets.as_ref().map(|co| co.len());
431        let chunk_offsets_ptype = self.chunk_offsets.as_ref().map(|co| co.dtype().as_ptype());
432
433        Ok(PatchesMetadata::new(
434            self.indices.len(),
435            self.offset,
436            self.indices.dtype().as_ptype(),
437            chunk_offsets_len,
438            chunk_offsets_ptype,
439            self.offset_within_chunk,
440        ))
441    }
442
443    pub fn cast_values(self, values_dtype: &DType) -> VortexResult<Self> {
444        // SAFETY: casting does not affect the relationship between the indices and values
445        unsafe {
446            Ok(Self::new_unchecked(
447                self.array_len,
448                self.offset,
449                self.indices,
450                self.values.cast(values_dtype.clone())?,
451                self.chunk_offsets,
452                self.offset_within_chunk,
453            ))
454        }
455    }
456
457    /// Get the patched value at a given index if it exists.
458    pub fn get_patched(&self, index: usize) -> VortexResult<Option<Scalar>> {
459        self.search_index(index)?
460            .to_found()
461            .map(|patch_idx| {
462                self.values()
463                    .execute_scalar(patch_idx, &mut LEGACY_SESSION.create_execution_ctx())
464            })
465            .transpose()
466    }
467
468    /// Searches for `index` in the indices array.
469    ///
470    /// Chooses between chunked search when [`Self::chunk_offsets`] is
471    /// available, and binary search otherwise. The `index` parameter is
472    /// adjusted by [`Self::offset`] for both.
473    ///
474    /// # Arguments
475    /// * `index` - The index to search for
476    ///
477    /// # Returns
478    /// * [`SearchResult::Found(patch_idx)`] - If a patch exists at this index, returns the
479    ///   position in the patches array
480    /// * [`SearchResult::NotFound(insertion_point)`] - If no patch exists, returns where
481    ///   a patch at this index would be inserted to maintain sorted order
482    ///
483    /// [`SearchResult::Found(patch_idx)`]: SearchResult::Found
484    /// [`SearchResult::NotFound(insertion_point)`]: SearchResult::NotFound
485    pub fn search_index(&self, index: usize) -> VortexResult<SearchResult> {
486        if self.chunk_offsets.is_some() {
487            return self.search_index_chunked(index);
488        }
489
490        Self::search_index_binary_search(&self.indices, index + self.offset)
491    }
492
493    /// Binary searches for `needle` in the indices array.
494    ///
495    /// # Returns
496    /// [`SearchResult::Found`] with the position if needle exists, or [`SearchResult::NotFound`]
497    /// with the insertion point if not found.
498    fn search_index_binary_search(indices: &ArrayRef, needle: usize) -> VortexResult<SearchResult> {
499        if indices.is_canonical() {
500            #[expect(deprecated)]
501            let primitive = indices.to_primitive();
502            match_each_integer_ptype!(primitive.ptype(), |T| {
503                let Ok(needle) = T::try_from(needle) else {
504                    // If the needle is not of type T, then it cannot possibly be in this array.
505                    //
506                    // The needle is a non-negative integer (a usize); therefore, it must be larger
507                    // than all values in this array.
508                    return Ok(SearchResult::NotFound(primitive.len()));
509                };
510                return primitive
511                    .as_slice::<T>()
512                    .search_sorted(&needle, SearchSortedSide::Left);
513            });
514        }
515        indices
516            .as_primitive_typed()
517            .search_sorted(&PValue::U64(needle as u64), SearchSortedSide::Left)
518    }
519
520    /// Constant time searches for `index` in the indices array.
521    ///
522    /// First determines which chunk the target index falls into, then performs
523    /// a binary search within that chunk's range.
524    ///
525    /// Returns a [`SearchResult`] indicating either the exact patch index if found,
526    /// or the insertion point if not found.
527    ///
528    /// Returns an error if `chunk_offsets` or `offset_within_chunk` are not set.
529    fn search_index_chunked(&self, index: usize) -> VortexResult<SearchResult> {
530        let Some(chunk_offsets) = &self.chunk_offsets else {
531            vortex_bail!("chunk_offsets is required to be set")
532        };
533
534        let Some(offset_within_chunk) = self.offset_within_chunk else {
535            vortex_bail!("offset_within_chunk is required to be set")
536        };
537
538        if index >= self.array_len() {
539            return Ok(SearchResult::NotFound(self.indices().len()));
540        }
541
542        let chunk_idx = (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE;
543
544        // Patch index offsets are absolute and need to be offset by the first chunk of the current slice.
545        let base_offset = self.chunk_offset_at(0)?;
546
547        let patches_start_idx = (self.chunk_offset_at(chunk_idx)? - base_offset)
548            // Chunk offsets are only sliced off in case the slice is fully
549            // outside of the chunk range.
550            //
551            // Though the range for indices and values is sliced in terms of
552            // individual elements, not chunks. To account for that we do a
553            // saturating sub when adjusting the indices based on the chunk offset.
554            .saturating_sub(offset_within_chunk);
555
556        let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
557            (self.chunk_offset_at(chunk_idx + 1)? - base_offset)
558                .saturating_sub(offset_within_chunk)
559                .min(self.indices.len())
560        } else {
561            self.indices.len()
562        };
563
564        let chunk_indices = self.indices.slice(patches_start_idx..patches_end_idx)?;
565        let result = Self::search_index_binary_search(&chunk_indices, index + self.offset)?;
566
567        Ok(match result {
568            SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
569            SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
570        })
571    }
572
573    /// Batch version of `search_index`.
574    ///
575    /// In contrast to `search_index`, this function requires `indices` as
576    /// well as `chunk_offsets` to be passed as slices. This is to avoid
577    /// redundant canonicalization and `scalar_at` lookups across calls.
578    fn search_index_chunked_batch<T, O>(
579        &self,
580        indices: &[T],
581        chunk_offsets: &[O],
582        index: T,
583    ) -> VortexResult<SearchResult>
584    where
585        T: UnsignedPType,
586        O: UnsignedPType,
587        usize: TryFrom<T>,
588        usize: TryFrom<O>,
589    {
590        let Some(offset_within_chunk) = self.offset_within_chunk else {
591            vortex_bail!("offset_within_chunk is required to be set")
592        };
593
594        let chunk_idx = {
595            let Ok(index) = usize::try_from(index) else {
596                // If the needle cannot be converted to usize, it's larger than all values in this array.
597                return Ok(SearchResult::NotFound(indices.len()));
598            };
599
600            if index >= self.array_len() {
601                return Ok(SearchResult::NotFound(self.indices().len()));
602            }
603
604            (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE
605        };
606
607        // Patch index offsets are absolute and need to be offset by the first chunk of the current slice.
608        let chunk_offset = usize::try_from(chunk_offsets[chunk_idx] - chunk_offsets[0])
609            .map_err(|_| vortex_err!("chunk_offset failed to convert to usize"))?;
610
611        let patches_start_idx = chunk_offset
612            // Chunk offsets are only sliced off in case the slice is fully
613            // outside of the chunk range.
614            //
615            // Though the range for indices and values is sliced in terms of
616            // individual elements, not chunks. To account for that we do a
617            // saturating sub when adjusting the indices based on the chunk offset.
618            .saturating_sub(offset_within_chunk);
619
620        let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
621            usize::try_from(chunk_offsets[chunk_idx + 1] - chunk_offsets[0])
622                .map_err(|_| vortex_err!("patches_end_idx failed to convert to usize"))?
623                .saturating_sub(offset_within_chunk)
624                .min(indices.len())
625        } else {
626            self.indices.len()
627        };
628
629        let Some(offset) = T::from(self.offset) else {
630            // If the offset cannot be converted to T, it's larger than all values in this array.
631            return Ok(SearchResult::NotFound(indices.len()));
632        };
633
634        let chunk_indices = &indices[patches_start_idx..patches_end_idx];
635        let result = chunk_indices.search_sorted(&(index + offset), SearchSortedSide::Left)?;
636
637        Ok(match result {
638            SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
639            SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
640        })
641    }
642
643    /// Returns the minimum patch index
644    pub fn min_index(&self) -> VortexResult<usize> {
645        let first = self
646            .indices
647            .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())?
648            .as_primitive()
649            .as_::<usize>()
650            .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
651        Ok(first - self.offset)
652    }
653
654    /// Returns the maximum patch index
655    pub fn max_index(&self) -> VortexResult<usize> {
656        let last = self
657            .indices
658            .execute_scalar(
659                self.indices.len() - 1,
660                &mut LEGACY_SESSION.create_execution_ctx(),
661            )?
662            .as_primitive()
663            .as_::<usize>()
664            .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
665        Ok(last - self.offset)
666    }
667
668    /// Filter the patches by a mask, resulting in new patches for the filtered array.
669    pub fn filter(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
670        if mask.len() != self.array_len {
671            vortex_bail!(
672                "Filter mask length {} does not match array length {}",
673                mask.len(),
674                self.array_len
675            );
676        }
677
678        match mask.indices() {
679            AllOr::All => Ok(Some(self.clone())),
680            AllOr::None => Ok(None),
681            AllOr::Some(mask_indices) => {
682                let flat_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
683                match_each_unsigned_integer_ptype!(flat_indices.ptype(), |I| {
684                    filter_patches_with_mask(
685                        flat_indices.as_slice::<I>(),
686                        self.offset(),
687                        self.values(),
688                        mask_indices,
689                    )
690                })
691            }
692        }
693    }
694
695    /// Mask the patches, REMOVING the patches where the mask is true.
696    /// Unlike filter, this preserves the patch indices.
697    /// Unlike mask on a single array, this does not set masked values to null.
698    // TODO(joe): make this lazy and remove the ctx.
699    pub fn mask(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
700        if mask.len() != self.array_len {
701            vortex_bail!(
702                "Filter mask length {} does not match array length {}",
703                mask.len(),
704                self.array_len
705            );
706        }
707
708        let filter_mask = match mask.bit_buffer() {
709            AllOr::All => return Ok(None),
710            AllOr::None => return Ok(Some(self.clone())),
711            AllOr::Some(masked) => {
712                let patch_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
713                match_each_unsigned_integer_ptype!(patch_indices.ptype(), |P| {
714                    let patch_indices = patch_indices.as_slice::<P>();
715                    Mask::from_buffer(BitBuffer::collect_bool(patch_indices.len(), |i| {
716                        #[allow(clippy::cast_possible_truncation)]
717                        let idx = (patch_indices[i] as usize) - self.offset;
718                        !masked.value(idx)
719                    }))
720                })
721            }
722        };
723
724        if filter_mask.all_false() {
725            return Ok(None);
726        }
727
728        // SAFETY: filtering indices/values with same mask maintains their 1:1 relationship
729        let filtered_indices = self.indices.filter(filter_mask.clone())?;
730        let filtered_values = self.values.filter(filter_mask)?;
731
732        Ok(Some(Self {
733            array_len: self.array_len,
734            offset: self.offset,
735            indices: filtered_indices,
736            values: filtered_values,
737            // TODO(0ax1): Chunk offsets are invalid after a filter is applied.
738            chunk_offsets: None,
739            offset_within_chunk: self.offset_within_chunk,
740        }))
741    }
742
743    /// Slice the patches by a range of the patched array.
744    pub fn slice(&self, range: Range<usize>) -> VortexResult<Option<Self>> {
745        let slice_start_idx = self.search_index(range.start)?.to_index();
746        let slice_end_idx = self.search_index(range.end)?.to_index();
747
748        if slice_start_idx == slice_end_idx {
749            return Ok(None);
750        }
751
752        let values = self.values().slice(slice_start_idx..slice_end_idx)?;
753        let indices = self.indices().slice(slice_start_idx..slice_end_idx)?;
754
755        let new_chunk_offsets = self
756            .chunk_offsets
757            .as_ref()
758            .map(|chunk_offsets| -> VortexResult<ArrayRef> {
759                let chunk_relative_offset = self.offset % PATCH_CHUNK_SIZE;
760                let chunk_start_idx = (chunk_relative_offset + range.start) / PATCH_CHUNK_SIZE;
761                let chunk_end_idx = (chunk_relative_offset + range.end).div_ceil(PATCH_CHUNK_SIZE);
762                chunk_offsets.slice(chunk_start_idx..chunk_end_idx)
763            })
764            .transpose()?;
765
766        let offset_within_chunk = new_chunk_offsets
767            .as_ref()
768            .map(|new_chunk_offsets| -> VortexResult<usize> {
769                let new_chunk_base = new_chunk_offsets
770                    .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())?
771                    .as_primitive()
772                    .as_::<usize>()
773                    .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))?;
774                let parent_chunk_base = self.chunk_offset_at(0)?;
775                let parent_within = self.offset_within_chunk.unwrap_or(0);
776                Ok(parent_chunk_base + parent_within + slice_start_idx - new_chunk_base)
777            })
778            .transpose()?;
779
780        Ok(Some(Self {
781            array_len: range.len(),
782            offset: range.start + self.offset(),
783            indices,
784            values,
785            chunk_offsets: new_chunk_offsets,
786            offset_within_chunk,
787        }))
788    }
789
790    // https://docs.google.com/spreadsheets/d/1D9vBZ1QJ6mwcIvV5wIL0hjGgVchcEnAyhvitqWu2ugU
791    const PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN: f64 = 5.0;
792
793    fn is_map_faster_than_search(&self, take_indices: &PrimitiveArray) -> bool {
794        (self.num_patches() as f64 / take_indices.len() as f64)
795            < Self::PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN
796    }
797
798    /// Take the indices from the patches
799    ///
800    /// Any nulls in take_indices are added to the resulting patches.
801    pub fn take_with_nulls(
802        &self,
803        take_indices: &ArrayRef,
804        ctx: &mut ExecutionCtx,
805    ) -> VortexResult<Option<Self>> {
806        if take_indices.is_empty() {
807            return Ok(None);
808        }
809
810        let take_indices = take_indices.clone().execute::<PrimitiveArray>(ctx)?;
811        if self.is_map_faster_than_search(&take_indices) {
812            self.take_map(take_indices, true, ctx)
813        } else {
814            self.take_search(take_indices, true, ctx)
815        }
816    }
817
818    /// Take the indices from the patches.
819    ///
820    /// Any nulls in take_indices are ignored.
821    pub fn take(
822        &self,
823        take_indices: &ArrayRef,
824        ctx: &mut ExecutionCtx,
825    ) -> VortexResult<Option<Self>> {
826        if take_indices.is_empty() {
827            return Ok(None);
828        }
829
830        let take_indices = take_indices.clone().execute::<PrimitiveArray>(ctx)?;
831        if self.is_map_faster_than_search(&take_indices) {
832            self.take_map(take_indices, false, ctx)
833        } else {
834            self.take_search(take_indices, false, ctx)
835        }
836    }
837
838    #[expect(
839        clippy::cognitive_complexity,
840        reason = "complexity is from nested match_each_* macros"
841    )]
842    pub fn take_search(
843        &self,
844        take_indices: PrimitiveArray,
845        include_nulls: bool,
846        ctx: &mut ExecutionCtx,
847    ) -> VortexResult<Option<Self>> {
848        let take_indices_validity = take_indices.validity()?;
849        let patch_indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
850        let chunk_offsets = self
851            .chunk_offsets()
852            .as_ref()
853            .map(|co| co.clone().execute::<PrimitiveArray>(ctx))
854            .transpose()?;
855
856        let (values_indices, new_indices): (BufferMut<u64>, BufferMut<u64>) =
857            match_each_unsigned_integer_ptype!(patch_indices.ptype(), |PatchT| {
858                let patch_indices_slice = patch_indices.as_slice::<PatchT>();
859                match_each_integer_ptype!(take_indices.ptype(), |TakeT| {
860                    let take_slice = take_indices.as_slice::<TakeT>();
861
862                    if let Some(chunk_offsets) = chunk_offsets {
863                        match_each_unsigned_integer_ptype!(chunk_offsets.ptype(), |OffsetT| {
864                            let chunk_offsets = chunk_offsets.as_slice::<OffsetT>();
865                            take_indices_with_search_fn(
866                                patch_indices_slice,
867                                take_slice,
868                                take_indices
869                                    .as_ref()
870                                    .validity()?
871                                    .execute_mask(take_indices.as_ref().len(), ctx)?,
872                                include_nulls,
873                                |take_idx| {
874                                    self.search_index_chunked_batch(
875                                        patch_indices_slice,
876                                        chunk_offsets,
877                                        take_idx,
878                                    )
879                                },
880                            )?
881                        })
882                    } else {
883                        take_indices_with_search_fn(
884                            patch_indices_slice,
885                            take_slice,
886                            take_indices
887                                .as_ref()
888                                .validity()?
889                                .execute_mask(take_indices.as_ref().len(), ctx)?,
890                            include_nulls,
891                            |take_idx| {
892                                let Some(offset) = <PatchT as NumCast>::from(self.offset) else {
893                                    // If the offset cannot be converted to T, it's larger than all values in this array.
894                                    return Ok(SearchResult::NotFound(patch_indices_slice.len()));
895                                };
896
897                                patch_indices_slice
898                                    .search_sorted(&(take_idx + offset), SearchSortedSide::Left)
899                            },
900                        )?
901                    }
902                })
903            });
904
905        if new_indices.is_empty() {
906            return Ok(None);
907        }
908
909        let new_indices = new_indices.into_array();
910        let new_array_len = take_indices.len();
911        let values_validity = take_indices_validity.take(&new_indices)?;
912
913        Ok(Some(Self {
914            array_len: new_array_len,
915            offset: 0,
916            indices: new_indices,
917            values: self
918                .values()
919                .take(PrimitiveArray::new(values_indices, values_validity).into_array())?,
920            chunk_offsets: None,
921            offset_within_chunk: Some(0), // Reset when creating new Patches.
922        }))
923    }
924
925    pub fn take_map(
926        &self,
927        take_indices: PrimitiveArray,
928        include_nulls: bool,
929        ctx: &mut ExecutionCtx,
930    ) -> VortexResult<Option<Self>> {
931        let indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
932        let new_length = take_indices.len();
933
934        let min_index = self.min_index()?;
935        let max_index = self.max_index()?;
936
937        let Some((new_sparse_indices, value_indices)) =
938            match_each_unsigned_integer_ptype!(indices.ptype(), |Indices| {
939                match_each_integer_ptype!(take_indices.ptype(), |TakeIndices| {
940                    let take_validity = take_indices
941                        .validity()?
942                        .execute_mask(take_indices.len(), ctx)?;
943                    let take_nullability = take_indices.validity()?.nullability();
944                    let take_slice = take_indices.as_slice::<TakeIndices>();
945                    take_map::<_, TakeIndices>(
946                        indices.as_slice::<Indices>(),
947                        take_slice,
948                        take_validity,
949                        take_nullability,
950                        self.offset(),
951                        min_index,
952                        max_index,
953                        include_nulls,
954                    )?
955                })
956            })
957        else {
958            return Ok(None);
959        };
960
961        let taken_values = self.values().take(value_indices)?;
962
963        Ok(Some(Patches {
964            array_len: new_length,
965            offset: 0,
966            indices: new_sparse_indices,
967            values: taken_values,
968            // TODO(0ax1): Chunk offsets are invalid after take is applied.
969            chunk_offsets: None,
970            offset_within_chunk: self.offset_within_chunk,
971        }))
972    }
973
974    pub fn map_values<F>(self, f: F) -> VortexResult<Self>
975    where
976        F: FnOnce(ArrayRef) -> VortexResult<ArrayRef>,
977    {
978        let values = f(self.values)?;
979        if self.indices.len() != values.len() {
980            vortex_bail!(
981                "map_values must preserve length: expected {} received {}",
982                self.indices.len(),
983                values.len()
984            )
985        }
986
987        Ok(Self {
988            array_len: self.array_len,
989            offset: self.offset,
990            indices: self.indices,
991            values,
992            chunk_offsets: self.chunk_offsets,
993            offset_within_chunk: self.offset_within_chunk,
994        })
995    }
996}
997
998#[expect(clippy::too_many_arguments)] // private function, can clean up one day
999fn take_map<I: NativePType + Hash + Eq + TryFrom<usize>, T: NativePType>(
1000    indices: &[I],
1001    take_indices: &[T],
1002    take_validity: Mask,
1003    take_nullability: Nullability,
1004    indices_offset: usize,
1005    min_index: usize,
1006    max_index: usize,
1007    include_nulls: bool,
1008) -> VortexResult<Option<(ArrayRef, ArrayRef)>>
1009where
1010    usize: TryFrom<T>,
1011    VortexError: From<<I as TryFrom<usize>>::Error>,
1012{
1013    let offset_i = I::try_from(indices_offset)?;
1014
1015    let sparse_index_to_value_index: HashMap<I, usize> = indices
1016        .iter()
1017        .copied()
1018        .map(|idx| idx - offset_i)
1019        .enumerate()
1020        .map(|(value_index, sparse_index)| (sparse_index, value_index))
1021        .collect();
1022
1023    let mut new_sparse_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1024    let mut value_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1025
1026    for (idx_in_take, &take_idx) in take_indices.iter().enumerate() {
1027        let ti = usize::try_from(take_idx)
1028            .map_err(|_| vortex_err!("Failed to convert index to usize"))?;
1029
1030        // If we have to take nulls the take index doesn't matter, make it 0 for consistency
1031        let is_null = match take_validity.bit_buffer() {
1032            AllOr::All => false,
1033            AllOr::None => true,
1034            AllOr::Some(buf) => !buf.value(idx_in_take),
1035        };
1036        if is_null {
1037            if include_nulls {
1038                new_sparse_indices.push(idx_in_take as u64);
1039                value_indices.push(0);
1040            }
1041        } else if ti >= min_index && ti <= max_index {
1042            let ti_as_i = I::try_from(ti)
1043                .map_err(|_| vortex_err!("take index does not fit in index type"))?;
1044            if let Some(&value_index) = sparse_index_to_value_index.get(&ti_as_i) {
1045                new_sparse_indices.push(idx_in_take as u64);
1046                value_indices.push(value_index as u64);
1047            }
1048        }
1049    }
1050
1051    if new_sparse_indices.is_empty() {
1052        return Ok(None);
1053    }
1054
1055    let new_sparse_indices = new_sparse_indices.into_array();
1056    let values_validity =
1057        Validity::from_mask(take_validity, take_nullability).take(&new_sparse_indices)?;
1058    Ok(Some((
1059        new_sparse_indices,
1060        PrimitiveArray::new(value_indices, values_validity).into_array(),
1061    )))
1062}
1063
1064/// Filter patches with the provided mask (in flattened space).
1065///
1066/// The filter mask may contain indices that are non-patched. The return value of this function
1067/// is a new set of `Patches` with the indices relative to the provided `mask` rank, and the
1068/// patch values.
1069fn filter_patches_with_mask<T: IntegerPType>(
1070    patch_indices: &[T],
1071    offset: usize,
1072    patch_values: &ArrayRef,
1073    mask_indices: &[usize],
1074) -> VortexResult<Option<Patches>> {
1075    let true_count = mask_indices.len();
1076    let mut new_patch_indices = BufferMut::<u64>::with_capacity(true_count);
1077    let mut new_mask_indices = Vec::with_capacity(true_count);
1078
1079    // Attempt to move the window by `STRIDE` elements on each iteration. This assumes that
1080    // the patches are relatively sparse compared to the overall mask, and so many indices in the
1081    // mask will end up being skipped.
1082    const STRIDE: usize = 4;
1083
1084    let mut mask_idx = 0usize;
1085    let mut true_idx = 0usize;
1086
1087    while mask_idx < patch_indices.len() && true_idx < true_count {
1088        // NOTE: we are searching for overlaps between sorted, unaligned indices in `patch_indices`
1089        //  and `mask_indices`. We assume that Patches are sparse relative to the global space of
1090        //  the mask (which covers both patch and non-patch values of the parent array), and so to
1091        //  quickly jump through regions with no overlap, we attempt to move our pointers by STRIDE
1092        //  elements on each iteration. If we cannot rule out overlap due to min/max values, we
1093        //  fallback to performing a two-way iterator merge.
1094        if (mask_idx + STRIDE) < patch_indices.len() && (true_idx + STRIDE) < mask_indices.len() {
1095            // Load a vector of each into our registers.
1096            let left_min = patch_indices[mask_idx]
1097                .to_usize()
1098                .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1099                - offset;
1100            let left_max = patch_indices[mask_idx + STRIDE]
1101                .to_usize()
1102                .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1103                - offset;
1104            let right_min = mask_indices[true_idx];
1105            let right_max = mask_indices[true_idx + STRIDE];
1106
1107            if left_min > right_max {
1108                // Advance right side
1109                true_idx += STRIDE;
1110                continue;
1111            } else if right_min > left_max {
1112                mask_idx += STRIDE;
1113                continue;
1114            } else {
1115                // Fallthrough to direct comparison path.
1116            }
1117        }
1118
1119        // Two-way sorted iterator merge:
1120
1121        let left = patch_indices[mask_idx]
1122            .to_usize()
1123            .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1124            - offset;
1125        let right = mask_indices[true_idx];
1126
1127        match left.cmp(&right) {
1128            Ordering::Less => {
1129                mask_idx += 1;
1130            }
1131            Ordering::Greater => {
1132                true_idx += 1;
1133            }
1134            Ordering::Equal => {
1135                // Save the mask index as well as the positional index.
1136                new_mask_indices.push(mask_idx);
1137                new_patch_indices.push(true_idx as u64);
1138
1139                mask_idx += 1;
1140                true_idx += 1;
1141            }
1142        }
1143    }
1144
1145    if new_mask_indices.is_empty() {
1146        return Ok(None);
1147    }
1148
1149    let new_patch_indices = new_patch_indices.into_array();
1150    let new_patch_values =
1151        patch_values.filter(Mask::from_indices(patch_values.len(), new_mask_indices))?;
1152
1153    Ok(Some(Patches::new(
1154        true_count,
1155        0,
1156        new_patch_indices,
1157        new_patch_values,
1158        // TODO(0ax1): Chunk offsets are invalid after a filter is applied.
1159        None,
1160    )?))
1161}
1162
1163fn take_indices_with_search_fn<
1164    I: UnsignedPType,
1165    T: IntegerPType,
1166    F: Fn(I) -> VortexResult<SearchResult>,
1167>(
1168    indices: &[I],
1169    take_indices: &[T],
1170    take_validity: Mask,
1171    include_nulls: bool,
1172    search_fn: F,
1173) -> VortexResult<(BufferMut<u64>, BufferMut<u64>)> {
1174    let mut values_indices = BufferMut::with_capacity(take_indices.len());
1175    let mut new_indices = BufferMut::with_capacity(take_indices.len());
1176
1177    for (new_patch_idx, &take_idx) in take_indices.iter().enumerate() {
1178        if !take_validity.value(new_patch_idx) {
1179            if include_nulls {
1180                // For nulls, patch index doesn't matter - use 0 for consistency
1181                values_indices.push(0u64);
1182                new_indices.push(new_patch_idx as u64);
1183            }
1184            continue;
1185        } else {
1186            let search_result = match I::from(take_idx) {
1187                Some(idx) => search_fn(idx)?,
1188                None => SearchResult::NotFound(indices.len()),
1189            };
1190
1191            if let Some(patch_idx) = search_result.to_found() {
1192                values_indices.push(patch_idx as u64);
1193                new_indices.push(new_patch_idx as u64);
1194            }
1195        }
1196    }
1197
1198    Ok((values_indices, new_indices))
1199}
1200
1201#[cfg(test)]
1202mod test {
1203    use vortex_buffer::BufferMut;
1204    use vortex_buffer::buffer;
1205    use vortex_mask::Mask;
1206
1207    use crate::IntoArray;
1208    use crate::LEGACY_SESSION;
1209    #[expect(deprecated)]
1210    use crate::ToCanonical as _;
1211    use crate::VortexSessionExecute;
1212    use crate::assert_arrays_eq;
1213    use crate::patches::Patches;
1214    use crate::patches::PrimitiveArray;
1215    use crate::search_sorted::SearchResult;
1216    use crate::validity::Validity;
1217
1218    #[test]
1219    fn test_filter() {
1220        let patches = Patches::new(
1221            100,
1222            0,
1223            buffer![10u32, 11, 20].into_array(),
1224            buffer![100, 110, 200].into_array(),
1225            None,
1226        )
1227        .unwrap();
1228
1229        let filtered = patches
1230            .filter(
1231                &Mask::from_indices(100, vec![10, 20, 30]),
1232                &mut LEGACY_SESSION.create_execution_ctx(),
1233            )
1234            .unwrap()
1235            .unwrap();
1236
1237        assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1]));
1238        assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1239    }
1240
1241    #[test]
1242    fn take_with_nulls() {
1243        let patches = Patches::new(
1244            20,
1245            0,
1246            buffer![2u64, 9, 15].into_array(),
1247            PrimitiveArray::new(buffer![33_i32, 44, 55], Validity::AllValid).into_array(),
1248            None,
1249        )
1250        .unwrap();
1251
1252        let taken = patches
1253            .take(
1254                &PrimitiveArray::new(buffer![9, 0], Validity::from_iter(vec![true, false]))
1255                    .into_array(),
1256                &mut LEGACY_SESSION.create_execution_ctx(),
1257            )
1258            .unwrap()
1259            .unwrap();
1260        #[expect(deprecated)]
1261        let primitive_values = taken.values().to_primitive();
1262        #[expect(deprecated)]
1263        let primitive_indices = taken.indices().to_primitive();
1264        assert_eq!(taken.array_len(), 2);
1265        assert_arrays_eq!(
1266            primitive_values,
1267            PrimitiveArray::from_option_iter([Some(44i32)])
1268        );
1269        assert_arrays_eq!(primitive_indices, PrimitiveArray::from_iter([0u64]));
1270        assert_eq!(
1271            primitive_values
1272                .as_ref()
1273                .validity()
1274                .unwrap()
1275                .execute_mask(
1276                    primitive_values.as_ref().len(),
1277                    &mut LEGACY_SESSION.create_execution_ctx()
1278                )
1279                .unwrap(),
1280            Mask::from_iter(vec![true])
1281        );
1282    }
1283
1284    #[test]
1285    fn take_search_with_nulls_chunked() {
1286        let patches = Patches::new(
1287            20,
1288            0,
1289            buffer![2u64, 9, 15].into_array(),
1290            buffer![33_i32, 44, 55].into_array(),
1291            Some(buffer![0u64].into_array()),
1292        )
1293        .unwrap();
1294
1295        let taken = patches
1296            .take_search(
1297                PrimitiveArray::new(buffer![9, 0], Validity::from_iter([true, false])),
1298                true,
1299                &mut LEGACY_SESSION.create_execution_ctx(),
1300            )
1301            .unwrap()
1302            .unwrap();
1303
1304        #[expect(deprecated)]
1305        let primitive_values = taken.values().to_primitive();
1306        assert_eq!(taken.array_len(), 2);
1307        assert_arrays_eq!(
1308            primitive_values,
1309            PrimitiveArray::from_option_iter([Some(44i32), None])
1310        );
1311        assert_arrays_eq!(taken.indices(), PrimitiveArray::from_iter([0u64, 1]));
1312
1313        assert_eq!(
1314            primitive_values
1315                .as_ref()
1316                .validity()
1317                .unwrap()
1318                .execute_mask(
1319                    primitive_values.as_ref().len(),
1320                    &mut LEGACY_SESSION.create_execution_ctx()
1321                )
1322                .unwrap(),
1323            Mask::from_iter([true, false])
1324        );
1325    }
1326
1327    #[test]
1328    fn take_search_chunked_multiple_chunks() {
1329        let patches = Patches::new(
1330            2048,
1331            0,
1332            buffer![100u64, 500, 1200, 1800].into_array(),
1333            buffer![10_i32, 20, 30, 40].into_array(),
1334            Some(buffer![0u64, 2].into_array()),
1335        )
1336        .unwrap();
1337
1338        let taken = patches
1339            .take_search(
1340                PrimitiveArray::new(buffer![500, 1200, 999], Validity::AllValid),
1341                true,
1342                &mut LEGACY_SESSION.create_execution_ctx(),
1343            )
1344            .unwrap()
1345            .unwrap();
1346
1347        assert_eq!(taken.array_len(), 3);
1348        assert_arrays_eq!(
1349            taken.values(),
1350            PrimitiveArray::from_option_iter([Some(20i32), Some(30)])
1351        );
1352    }
1353
1354    #[test]
1355    fn take_search_chunked_indices_with_no_patches() {
1356        let patches = Patches::new(
1357            20,
1358            0,
1359            buffer![2u64, 9, 15].into_array(),
1360            buffer![33_i32, 44, 55].into_array(),
1361            Some(buffer![0u64].into_array()),
1362        )
1363        .unwrap();
1364
1365        let taken = patches
1366            .take_search(
1367                PrimitiveArray::new(buffer![3, 4, 5], Validity::AllValid),
1368                true,
1369                &mut LEGACY_SESSION.create_execution_ctx(),
1370            )
1371            .unwrap();
1372
1373        assert!(taken.is_none());
1374    }
1375
1376    #[test]
1377    fn take_search_chunked_interleaved() {
1378        let patches = Patches::new(
1379            30,
1380            0,
1381            buffer![5u64, 10, 20, 25].into_array(),
1382            buffer![100_i32, 200, 300, 400].into_array(),
1383            Some(buffer![0u64].into_array()),
1384        )
1385        .unwrap();
1386
1387        let taken = patches
1388            .take_search(
1389                PrimitiveArray::new(buffer![10, 15, 20, 99], Validity::AllValid),
1390                true,
1391                &mut LEGACY_SESSION.create_execution_ctx(),
1392            )
1393            .unwrap()
1394            .unwrap();
1395
1396        assert_eq!(taken.array_len(), 4);
1397        assert_arrays_eq!(
1398            taken.values(),
1399            PrimitiveArray::from_option_iter([Some(200i32), Some(300)])
1400        );
1401    }
1402
1403    #[test]
1404    fn test_take_search_multiple_chunk_offsets() {
1405        let patches = Patches::new(
1406            1500,
1407            0,
1408            BufferMut::from_iter(0..1500u64).into_array(),
1409            BufferMut::from_iter(0..1500i32).into_array(),
1410            Some(buffer![0u64, 1024u64].into_array()),
1411        )
1412        .unwrap();
1413
1414        let taken = patches
1415            .take_search(
1416                PrimitiveArray::new(BufferMut::from_iter(0..1500u64), Validity::AllValid),
1417                false,
1418                &mut LEGACY_SESSION.create_execution_ctx(),
1419            )
1420            .unwrap()
1421            .unwrap();
1422
1423        assert_eq!(taken.array_len(), 1500);
1424    }
1425
1426    #[test]
1427    fn test_slice() {
1428        let values = buffer![15_u32, 135, 13531, 42].into_array();
1429        let indices = buffer![10_u64, 11, 50, 100].into_array();
1430
1431        let patches = Patches::new(101, 0, indices, values, None).unwrap();
1432
1433        let sliced = patches.slice(15..100).unwrap().unwrap();
1434        assert_eq!(sliced.array_len(), 100 - 15);
1435        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1436    }
1437
1438    #[test]
1439    fn doubly_sliced() {
1440        let values = buffer![15_u32, 135, 13531, 42].into_array();
1441        let indices = buffer![10_u64, 11, 50, 100].into_array();
1442
1443        let patches = Patches::new(101, 0, indices, values, None).unwrap();
1444
1445        let sliced = patches.slice(15..100).unwrap().unwrap();
1446        assert_eq!(sliced.array_len(), 100 - 15);
1447        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1448
1449        let doubly_sliced = sliced.slice(35..36).unwrap().unwrap();
1450        assert_arrays_eq!(
1451            doubly_sliced.values(),
1452            PrimitiveArray::from_iter([13531u32])
1453        );
1454    }
1455
1456    #[test]
1457    fn test_mask_all_true() {
1458        let patches = Patches::new(
1459            10,
1460            0,
1461            buffer![2u64, 5, 8].into_array(),
1462            buffer![100i32, 200, 300].into_array(),
1463            None,
1464        )
1465        .unwrap();
1466
1467        let mask = Mask::new_true(10);
1468        let masked = patches
1469            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1470            .unwrap();
1471        assert!(masked.is_none());
1472    }
1473
1474    #[test]
1475    fn test_mask_all_false() {
1476        let patches = Patches::new(
1477            10,
1478            0,
1479            buffer![2u64, 5, 8].into_array(),
1480            buffer![100i32, 200, 300].into_array(),
1481            None,
1482        )
1483        .unwrap();
1484
1485        let mask = Mask::new_false(10);
1486        let masked = patches
1487            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1488            .unwrap()
1489            .unwrap();
1490
1491        // No patch values should be masked
1492        assert_arrays_eq!(
1493            masked.values(),
1494            PrimitiveArray::from_iter([100i32, 200, 300])
1495        );
1496        assert!(
1497            masked
1498                .values()
1499                .is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())
1500                .unwrap()
1501        );
1502        assert!(
1503            masked
1504                .values()
1505                .is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())
1506                .unwrap()
1507        );
1508        assert!(
1509            masked
1510                .values()
1511                .is_valid(2, &mut LEGACY_SESSION.create_execution_ctx())
1512                .unwrap()
1513        );
1514
1515        // Indices should remain unchanged
1516        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1517    }
1518
1519    #[test]
1520    fn test_mask_partial() {
1521        let patches = Patches::new(
1522            10,
1523            0,
1524            buffer![2u64, 5, 8].into_array(),
1525            buffer![100i32, 200, 300].into_array(),
1526            None,
1527        )
1528        .unwrap();
1529
1530        // Mask that removes patches at indices 2 and 8 (but not 5)
1531        let mask = Mask::from_iter([
1532            false, false, true, false, false, false, false, false, true, false,
1533        ]);
1534        let masked = patches
1535            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1536            .unwrap()
1537            .unwrap();
1538
1539        // Only the patch at index 5 should remain
1540        assert_eq!(masked.values().len(), 1);
1541        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1542
1543        // Only index 5 should remain
1544        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64]));
1545    }
1546
1547    #[test]
1548    fn test_mask_with_offset() {
1549        let patches = Patches::new(
1550            10,
1551            5,                                  // offset
1552            buffer![7u64, 10, 13].into_array(), // actual indices are 2, 5, 8
1553            buffer![100i32, 200, 300].into_array(),
1554            None,
1555        )
1556        .unwrap();
1557
1558        // Mask that sets actual index 2 to null
1559        let mask = Mask::from_iter([
1560            false, false, true, false, false, false, false, false, false, false,
1561        ]);
1562
1563        let masked = patches
1564            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1565            .unwrap()
1566            .unwrap();
1567        assert_eq!(masked.array_len(), 10);
1568        assert_eq!(masked.offset(), 5);
1569        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([10u64, 13]));
1570        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32, 300]));
1571    }
1572
1573    #[test]
1574    fn test_mask_nullable_values() {
1575        let patches = Patches::new(
1576            10,
1577            0,
1578            buffer![2u64, 5, 8].into_array(),
1579            PrimitiveArray::from_option_iter([Some(100i32), None, Some(300)]).into_array(),
1580            None,
1581        )
1582        .unwrap();
1583
1584        // Test masking removes patch at index 2
1585        let mask = Mask::from_iter([
1586            false, false, true, false, false, false, false, false, false, false,
1587        ]);
1588        let masked = patches
1589            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1590            .unwrap()
1591            .unwrap();
1592
1593        // Patches at indices 5 and 8 should remain
1594        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64, 8]));
1595
1596        // Values should be the null and 300
1597        #[expect(deprecated)]
1598        let masked_values = masked.values().to_primitive();
1599        assert_eq!(masked_values.len(), 2);
1600        assert!(
1601            !masked_values
1602                .is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())
1603                .unwrap()
1604        ); // the null value at index 5
1605        assert!(
1606            masked_values
1607                .is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())
1608                .unwrap()
1609        ); // the 300 value at index 8
1610        assert_eq!(
1611            i32::try_from(
1612                &masked_values
1613                    .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
1614                    .unwrap()
1615            )
1616            .unwrap(),
1617            300i32
1618        );
1619    }
1620
1621    #[test]
1622    fn test_filter_keep_all() {
1623        let patches = Patches::new(
1624            10,
1625            0,
1626            buffer![2u64, 5, 8].into_array(),
1627            buffer![100i32, 200, 300].into_array(),
1628            None,
1629        )
1630        .unwrap();
1631
1632        // Keep all indices (mask with indices 0-9)
1633        let mask = Mask::from_indices(10, 0..10);
1634        let filtered = patches
1635            .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1636            .unwrap()
1637            .unwrap();
1638
1639        assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1640        assert_arrays_eq!(
1641            filtered.values(),
1642            PrimitiveArray::from_iter([100i32, 200, 300])
1643        );
1644    }
1645
1646    #[test]
1647    fn test_filter_none() {
1648        let patches = Patches::new(
1649            10,
1650            0,
1651            buffer![2u64, 5, 8].into_array(),
1652            buffer![100i32, 200, 300].into_array(),
1653            None,
1654        )
1655        .unwrap();
1656
1657        // Filter out all (empty mask means keep nothing)
1658        let mask = Mask::from_indices(10, vec![]);
1659        let filtered = patches
1660            .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1661            .unwrap();
1662        assert!(filtered.is_none());
1663    }
1664
1665    #[test]
1666    fn test_filter_with_indices() {
1667        let patches = Patches::new(
1668            10,
1669            0,
1670            buffer![2u64, 5, 8].into_array(),
1671            buffer![100i32, 200, 300].into_array(),
1672            None,
1673        )
1674        .unwrap();
1675
1676        // Keep indices 2, 5, 9 (so patches at 2 and 5 remain)
1677        let mask = Mask::from_indices(10, vec![2, 5, 9]);
1678        let filtered = patches
1679            .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1680            .unwrap()
1681            .unwrap();
1682
1683        assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1])); // Adjusted indices
1684        assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1685    }
1686
1687    #[test]
1688    fn test_slice_full_range() {
1689        let patches = Patches::new(
1690            10,
1691            0,
1692            buffer![2u64, 5, 8].into_array(),
1693            buffer![100i32, 200, 300].into_array(),
1694            None,
1695        )
1696        .unwrap();
1697
1698        let sliced = patches.slice(0..10).unwrap().unwrap();
1699
1700        assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1701        assert_arrays_eq!(
1702            sliced.values(),
1703            PrimitiveArray::from_iter([100i32, 200, 300])
1704        );
1705    }
1706
1707    #[test]
1708    fn test_slice_partial() {
1709        let patches = Patches::new(
1710            10,
1711            0,
1712            buffer![2u64, 5, 8].into_array(),
1713            buffer![100i32, 200, 300].into_array(),
1714            None,
1715        )
1716        .unwrap();
1717
1718        // Slice from 3 to 8 (includes patch at 5)
1719        let sliced = patches.slice(3..8).unwrap().unwrap();
1720
1721        assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([5u64])); // Index stays the same
1722        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1723        assert_eq!(sliced.array_len(), 5); // 8 - 3 = 5
1724        assert_eq!(sliced.offset(), 3); // New offset
1725    }
1726
1727    #[test]
1728    fn test_slice_no_patches() {
1729        let patches = Patches::new(
1730            10,
1731            0,
1732            buffer![2u64, 5, 8].into_array(),
1733            buffer![100i32, 200, 300].into_array(),
1734            None,
1735        )
1736        .unwrap();
1737
1738        // Slice from 6 to 7 (no patches in this range)
1739        let sliced = patches.slice(6..7).unwrap();
1740        assert!(sliced.is_none());
1741    }
1742
1743    #[test]
1744    fn test_slice_with_offset() {
1745        let patches = Patches::new(
1746            10,
1747            5,                                  // offset
1748            buffer![7u64, 10, 13].into_array(), // actual indices are 2, 5, 8
1749            buffer![100i32, 200, 300].into_array(),
1750            None,
1751        )
1752        .unwrap();
1753
1754        // Slice from 3 to 8 (includes patch at actual index 5)
1755        let sliced = patches.slice(3..8).unwrap().unwrap();
1756
1757        assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([10u64])); // Index stays the same (offset + 5 = 10)
1758        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1759        assert_eq!(sliced.offset(), 8); // New offset = 5 + 3
1760    }
1761
1762    #[test]
1763    fn test_patch_values() {
1764        let patches = Patches::new(
1765            10,
1766            0,
1767            buffer![2u64, 5, 8].into_array(),
1768            buffer![100i32, 200, 300].into_array(),
1769            None,
1770        )
1771        .unwrap();
1772
1773        #[expect(deprecated)]
1774        let values = patches.values().to_primitive();
1775        assert_eq!(
1776            i32::try_from(
1777                &values
1778                    .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
1779                    .unwrap()
1780            )
1781            .unwrap(),
1782            100i32
1783        );
1784        assert_eq!(
1785            i32::try_from(
1786                &values
1787                    .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
1788                    .unwrap()
1789            )
1790            .unwrap(),
1791            200i32
1792        );
1793        assert_eq!(
1794            i32::try_from(
1795                &values
1796                    .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
1797                    .unwrap()
1798            )
1799            .unwrap(),
1800            300i32
1801        );
1802    }
1803
1804    #[test]
1805    fn test_indices_range() {
1806        let patches = Patches::new(
1807            10,
1808            0,
1809            buffer![2u64, 5, 8].into_array(),
1810            buffer![100i32, 200, 300].into_array(),
1811            None,
1812        )
1813        .unwrap();
1814
1815        assert_eq!(patches.min_index().unwrap(), 2);
1816        assert_eq!(patches.max_index().unwrap(), 8);
1817    }
1818
1819    #[test]
1820    fn test_search_index() {
1821        let patches = Patches::new(
1822            10,
1823            0,
1824            buffer![2u64, 5, 8].into_array(),
1825            buffer![100i32, 200, 300].into_array(),
1826            None,
1827        )
1828        .unwrap();
1829
1830        // Search for exact indices
1831        assert_eq!(patches.search_index(2).unwrap(), SearchResult::Found(0));
1832        assert_eq!(patches.search_index(5).unwrap(), SearchResult::Found(1));
1833        assert_eq!(patches.search_index(8).unwrap(), SearchResult::Found(2));
1834
1835        // Search for non-patch indices
1836        assert_eq!(patches.search_index(0).unwrap(), SearchResult::NotFound(0));
1837        assert_eq!(patches.search_index(3).unwrap(), SearchResult::NotFound(1));
1838        assert_eq!(patches.search_index(6).unwrap(), SearchResult::NotFound(2));
1839        assert_eq!(patches.search_index(9).unwrap(), SearchResult::NotFound(3));
1840    }
1841
1842    #[test]
1843    fn test_mask_boundary_patches() {
1844        // Test masking patches at array boundaries
1845        let patches = Patches::new(
1846            10,
1847            0,
1848            buffer![0u64, 9].into_array(),
1849            buffer![100i32, 200].into_array(),
1850            None,
1851        )
1852        .unwrap();
1853
1854        let mask = Mask::from_iter([
1855            true, false, false, false, false, false, false, false, false, false,
1856        ]);
1857        let masked = patches
1858            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1859            .unwrap();
1860        assert!(masked.is_some());
1861        let masked = masked.unwrap();
1862        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([9u64]));
1863        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1864    }
1865
1866    #[test]
1867    fn test_mask_all_patches_removed() {
1868        // Test when all patches are masked out
1869        let patches = Patches::new(
1870            10,
1871            0,
1872            buffer![2u64, 5, 8].into_array(),
1873            buffer![100i32, 200, 300].into_array(),
1874            None,
1875        )
1876        .unwrap();
1877
1878        // Mask that removes all patches
1879        let mask = Mask::from_iter([
1880            false, false, true, false, false, true, false, false, true, false,
1881        ]);
1882        let masked = patches
1883            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1884            .unwrap();
1885        assert!(masked.is_none());
1886    }
1887
1888    #[test]
1889    fn test_mask_no_patches_removed() {
1890        // Test when no patches are masked
1891        let patches = Patches::new(
1892            10,
1893            0,
1894            buffer![2u64, 5, 8].into_array(),
1895            buffer![100i32, 200, 300].into_array(),
1896            None,
1897        )
1898        .unwrap();
1899
1900        // Mask that doesn't affect any patches
1901        let mask = Mask::from_iter([
1902            true, false, false, true, false, false, true, false, false, true,
1903        ]);
1904        let masked = patches
1905            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1906            .unwrap()
1907            .unwrap();
1908
1909        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1910        assert_arrays_eq!(
1911            masked.values(),
1912            PrimitiveArray::from_iter([100i32, 200, 300])
1913        );
1914    }
1915
1916    #[test]
1917    fn test_mask_single_patch() {
1918        // Test with a single patch
1919        let patches = Patches::new(
1920            5,
1921            0,
1922            buffer![2u64].into_array(),
1923            buffer![42i32].into_array(),
1924            None,
1925        )
1926        .unwrap();
1927
1928        // Mask that removes the single patch
1929        let mask = Mask::from_iter([false, false, true, false, false]);
1930        let masked = patches
1931            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1932            .unwrap();
1933        assert!(masked.is_none());
1934
1935        // Mask that keeps the single patch
1936        let mask = Mask::from_iter([true, false, false, true, false]);
1937        let masked = patches
1938            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1939            .unwrap()
1940            .unwrap();
1941        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64]));
1942    }
1943
1944    #[test]
1945    fn test_mask_contiguous_patches() {
1946        // Test with contiguous patches
1947        let patches = Patches::new(
1948            10,
1949            0,
1950            buffer![3u64, 4, 5, 6].into_array(),
1951            buffer![100i32, 200, 300, 400].into_array(),
1952            None,
1953        )
1954        .unwrap();
1955
1956        // Mask that removes middle patches
1957        let mask = Mask::from_iter([
1958            false, false, false, false, true, true, false, false, false, false,
1959        ]);
1960        let masked = patches
1961            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1962            .unwrap()
1963            .unwrap();
1964
1965        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([3u64, 6]));
1966        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 400]));
1967    }
1968
1969    #[test]
1970    fn test_mask_with_large_offset() {
1971        // Test with a large offset that shifts all indices
1972        let patches = Patches::new(
1973            20,
1974            15,
1975            buffer![16u64, 17, 19].into_array(), // actual indices are 1, 2, 4
1976            buffer![100i32, 200, 300].into_array(),
1977            None,
1978        )
1979        .unwrap();
1980
1981        // Mask that removes the patch at actual index 2
1982        let mask = Mask::from_iter([
1983            false, false, true, false, false, false, false, false, false, false, false, false,
1984            false, false, false, false, false, false, false, false,
1985        ]);
1986        let masked = patches
1987            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1988            .unwrap()
1989            .unwrap();
1990
1991        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([16u64, 19]));
1992        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 300]));
1993    }
1994
1995    #[test]
1996    #[should_panic(expected = "Filter mask length 5 does not match array length 10")]
1997    fn test_mask_wrong_length() {
1998        let patches = Patches::new(
1999            10,
2000            0,
2001            buffer![2u64, 5, 8].into_array(),
2002            buffer![100i32, 200, 300].into_array(),
2003            None,
2004        )
2005        .unwrap();
2006
2007        // Mask with wrong length
2008        let mask = Mask::from_iter([false, false, true, false, false]);
2009        patches
2010            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
2011            .unwrap();
2012    }
2013
2014    #[test]
2015    fn test_chunk_offsets_search() {
2016        let indices = buffer![100u64, 200, 3000, 3100].into_array();
2017        let values = buffer![10i32, 20, 30, 40].into_array();
2018        let chunk_offsets = buffer![0u64, 2, 2, 3].into_array();
2019        let patches = Patches::new(4096, 0, indices, values, Some(chunk_offsets)).unwrap();
2020
2021        assert!(patches.chunk_offsets.is_some());
2022
2023        // chunk 0: patches at 100, 200
2024        assert_eq!(patches.search_index(100).unwrap(), SearchResult::Found(0));
2025        assert_eq!(patches.search_index(200).unwrap(), SearchResult::Found(1));
2026
2027        // chunks 1, 2: no patches
2028        assert_eq!(
2029            patches.search_index(1500).unwrap(),
2030            SearchResult::NotFound(2)
2031        );
2032        assert_eq!(
2033            patches.search_index(2000).unwrap(),
2034            SearchResult::NotFound(2)
2035        );
2036
2037        // chunk 3: patches at 3000, 3100
2038        assert_eq!(patches.search_index(3000).unwrap(), SearchResult::Found(2));
2039        assert_eq!(patches.search_index(3100).unwrap(), SearchResult::Found(3));
2040
2041        assert_eq!(
2042            patches.search_index(1024).unwrap(),
2043            SearchResult::NotFound(2)
2044        );
2045    }
2046
2047    #[test]
2048    fn test_chunk_offsets_with_slice() {
2049        let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
2050        let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
2051        let chunk_offsets = buffer![0u64, 2, 6].into_array();
2052        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2053
2054        let sliced = patches.slice(1000..2200).unwrap().unwrap();
2055
2056        assert!(sliced.chunk_offsets.is_some());
2057        assert_eq!(sliced.offset(), 1000);
2058
2059        assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(0));
2060        assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
2061        assert_eq!(sliced.search_index(1100).unwrap(), SearchResult::Found(4));
2062
2063        assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(1));
2064    }
2065
2066    #[test]
2067    fn test_chunk_offsets_with_slice_after_first_chunk() {
2068        let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
2069        let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
2070        let chunk_offsets = buffer![0u64, 2, 6].into_array();
2071        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2072
2073        let sliced = patches.slice(1300..2200).unwrap().unwrap();
2074
2075        assert!(sliced.chunk_offsets.is_some());
2076        assert_eq!(sliced.offset(), 1300);
2077
2078        assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2079        assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(1));
2080        assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
2081        assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(2));
2082        assert_eq!(sliced.search_index(900).unwrap(), SearchResult::NotFound(4));
2083    }
2084
2085    #[test]
2086    fn test_chunk_offsets_slice_empty_result() {
2087        let indices = buffer![100u64, 200, 3000, 3100].into_array();
2088        let values = buffer![10i32, 20, 30, 40].into_array();
2089        let chunk_offsets = buffer![0u64, 2].into_array();
2090        let patches = Patches::new(4000, 0, indices, values, Some(chunk_offsets)).unwrap();
2091
2092        let sliced = patches.slice(1000..2000).unwrap();
2093        assert!(sliced.is_none());
2094    }
2095
2096    #[test]
2097    fn test_chunk_offsets_slice_single_patch() {
2098        let indices = buffer![100u64, 1200, 1300, 2500].into_array();
2099        let values = buffer![10i32, 20, 30, 40].into_array();
2100        let chunk_offsets = buffer![0u64, 1, 3].into_array();
2101        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2102
2103        let sliced = patches.slice(1100..1250).unwrap().unwrap();
2104
2105        assert_eq!(sliced.num_patches(), 1);
2106        assert_eq!(sliced.offset(), 1100);
2107        assert_eq!(sliced.search_index(100).unwrap(), SearchResult::Found(0)); // 1200 - 1100 = 100
2108        assert_eq!(sliced.search_index(50).unwrap(), SearchResult::NotFound(0));
2109        assert_eq!(sliced.search_index(150).unwrap(), SearchResult::NotFound(1));
2110    }
2111
2112    #[test]
2113    fn test_chunk_offsets_slice_across_chunks() {
2114        let indices = buffer![100u64, 200, 1100, 1200, 2100, 2200].into_array();
2115        let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2116        let chunk_offsets = buffer![0u64, 2, 4].into_array();
2117        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2118
2119        let sliced = patches.slice(150..2150).unwrap().unwrap();
2120
2121        assert_eq!(sliced.num_patches(), 4);
2122        assert_eq!(sliced.offset(), 150);
2123
2124        assert_eq!(sliced.search_index(50).unwrap(), SearchResult::Found(0)); // 200 - 150 = 50
2125        assert_eq!(sliced.search_index(950).unwrap(), SearchResult::Found(1)); // 1100 - 150 = 950
2126        assert_eq!(sliced.search_index(1050).unwrap(), SearchResult::Found(2)); // 1200 - 150 = 1050
2127        assert_eq!(sliced.search_index(1950).unwrap(), SearchResult::Found(3)); // 2100 - 150 = 1950
2128    }
2129
2130    #[test]
2131    fn test_chunk_offsets_boundary_searches() {
2132        let indices = buffer![1023u64, 1024, 1025, 2047, 2048].into_array();
2133        let values = buffer![10i32, 20, 30, 40, 50].into_array();
2134        let chunk_offsets = buffer![0u64, 1, 4].into_array();
2135        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2136
2137        assert_eq!(patches.search_index(1023).unwrap(), SearchResult::Found(0));
2138        assert_eq!(patches.search_index(1024).unwrap(), SearchResult::Found(1));
2139        assert_eq!(patches.search_index(1025).unwrap(), SearchResult::Found(2));
2140        assert_eq!(patches.search_index(2047).unwrap(), SearchResult::Found(3));
2141        assert_eq!(patches.search_index(2048).unwrap(), SearchResult::Found(4));
2142
2143        assert_eq!(
2144            patches.search_index(1022).unwrap(),
2145            SearchResult::NotFound(0)
2146        );
2147        assert_eq!(
2148            patches.search_index(2046).unwrap(),
2149            SearchResult::NotFound(3)
2150        );
2151    }
2152
2153    #[test]
2154    fn test_chunk_offsets_slice_edge_cases() {
2155        let indices = buffer![0u64, 1, 1023, 1024, 2047, 2048].into_array();
2156        let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2157        let chunk_offsets = buffer![0u64, 3, 5].into_array();
2158        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2159
2160        // Slice at the very beginning
2161        let sliced = patches.slice(0..10).unwrap().unwrap();
2162        assert_eq!(sliced.num_patches(), 2);
2163        assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2164        assert_eq!(sliced.search_index(1).unwrap(), SearchResult::Found(1));
2165
2166        // Slice at the very end
2167        let sliced = patches.slice(2040..3000).unwrap().unwrap();
2168        assert_eq!(sliced.num_patches(), 2); // patches at 2047 and 2048
2169        assert_eq!(sliced.search_index(7).unwrap(), SearchResult::Found(0)); // 2047 - 2040
2170        assert_eq!(sliced.search_index(8).unwrap(), SearchResult::Found(1)); // 2048 - 2040
2171    }
2172
2173    #[test]
2174    fn test_chunk_offsets_slice_nested() {
2175        let indices = buffer![100u64, 200, 300, 400, 500, 600].into_array();
2176        let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2177        let chunk_offsets = buffer![0u64].into_array();
2178        let patches = Patches::new(1000, 0, indices, values, Some(chunk_offsets)).unwrap();
2179
2180        let sliced1 = patches.slice(150..550).unwrap().unwrap();
2181        assert_eq!(sliced1.num_patches(), 4); // 200, 300, 400, 500
2182
2183        let sliced2 = sliced1.slice(100..250).unwrap().unwrap();
2184        assert_eq!(sliced2.num_patches(), 1); // 300
2185        assert_eq!(sliced2.offset(), 250);
2186
2187        assert_eq!(sliced2.search_index(50).unwrap(), SearchResult::Found(0)); // 300 - 250
2188        assert_eq!(
2189            sliced2.search_index(150).unwrap(),
2190            SearchResult::NotFound(1)
2191        );
2192    }
2193
2194    #[test]
2195    fn test_nested_slice_with_dropped_first_chunk() {
2196        // PATCH_CHUNK_SIZE = 1024, so the two patches land in different chunks.
2197        let indices = buffer![0u64, 1024].into_array();
2198        let values = buffer![1i32, 2].into_array();
2199        let chunk_offsets = buffer![0u64, 1].into_array();
2200        let patches = Patches::new(2048, 0, indices, values, Some(chunk_offsets)).unwrap();
2201
2202        // Drop chunk 0, then re-slice the result.
2203        let dropped_first = patches.slice(1024..2048).unwrap().unwrap();
2204        let resliced = dropped_first.slice(0..1024).unwrap().unwrap();
2205        assert_eq!(resliced.num_patches(), 1);
2206    }
2207
2208    #[test]
2209    fn test_index_larger_than_length() {
2210        let chunk_offsets = buffer![0u64].into_array();
2211        let indices = buffer![1023u64].into_array();
2212        let values = buffer![42i32].into_array();
2213        let patches = Patches::new(1024, 0, indices, values, Some(chunk_offsets)).unwrap();
2214        assert_eq!(
2215            patches.search_index(2048).unwrap(),
2216            SearchResult::NotFound(1)
2217        );
2218    }
2219}