Skip to main content

vortex_array/
patches.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::cmp::Ordering;
5use std::fmt::Debug;
6use std::hash::Hash;
7use std::ops::Range;
8
9use itertools::Itertools;
10use num_traits::NumCast;
11use vortex_buffer::BitBuffer;
12use vortex_buffer::BufferMut;
13use vortex_error::VortexError;
14use vortex_error::VortexExpect;
15use vortex_error::VortexResult;
16use vortex_error::vortex_bail;
17use vortex_error::vortex_ensure;
18use vortex_error::vortex_err;
19use vortex_mask::AllOr;
20use vortex_mask::Mask;
21use vortex_mask::MaskMut;
22use vortex_utils::aliases::hash_map::HashMap;
23
24use crate::ArrayRef;
25use crate::ArrayVisitor;
26use crate::DynArray;
27use crate::ExecutionCtx;
28use crate::IntoArray;
29use crate::ToCanonical;
30use crate::arrays::BoolArray;
31use crate::arrays::PrimitiveArray;
32use crate::builtins::ArrayBuiltins;
33use crate::compute::is_sorted;
34use crate::dtype::DType;
35use crate::dtype::IntegerPType;
36use crate::dtype::NativePType;
37use crate::dtype::Nullability::NonNullable;
38use crate::dtype::PType;
39use crate::dtype::UnsignedPType;
40use crate::match_each_integer_ptype;
41use crate::match_each_unsigned_integer_ptype;
42use crate::scalar::PValue;
43use crate::scalar::Scalar;
44use crate::search_sorted::SearchResult;
45use crate::search_sorted::SearchSorted;
46use crate::search_sorted::SearchSortedSide;
47use crate::validity::Validity;
48use crate::vtable::ValidityHelper;
49
50/// One patch index offset is stored for each chunk.
51/// This allows for constant time patch index lookups.
52pub const PATCH_CHUNK_SIZE: usize = 1024;
53
54#[derive(Copy, Clone, prost::Message)]
55pub struct PatchesMetadata {
56    #[prost(uint64, tag = "1")]
57    len: u64,
58    #[prost(uint64, tag = "2")]
59    offset: u64,
60    #[prost(enumeration = "PType", tag = "3")]
61    indices_ptype: i32,
62    #[prost(uint64, optional, tag = "4")]
63    chunk_offsets_len: Option<u64>,
64    #[prost(enumeration = "PType", optional, tag = "5")]
65    chunk_offsets_ptype: Option<i32>,
66    #[prost(uint64, optional, tag = "6")]
67    offset_within_chunk: Option<u64>,
68}
69
70impl PatchesMetadata {
71    #[inline]
72    pub fn new(
73        len: usize,
74        offset: usize,
75        indices_ptype: PType,
76        chunk_offsets_len: Option<usize>,
77        chunk_offsets_ptype: Option<PType>,
78        offset_within_chunk: Option<usize>,
79    ) -> Self {
80        Self {
81            len: len as u64,
82            offset: offset as u64,
83            indices_ptype: indices_ptype as i32,
84            chunk_offsets_len: chunk_offsets_len.map(|len| len as u64),
85            chunk_offsets_ptype: chunk_offsets_ptype.map(|pt| pt as i32),
86            offset_within_chunk: offset_within_chunk.map(|len| len as u64),
87        }
88    }
89
90    #[inline]
91    pub fn len(&self) -> VortexResult<usize> {
92        usize::try_from(self.len).map_err(|_| vortex_err!("len does not fit in usize"))
93    }
94
95    #[inline]
96    pub fn is_empty(&self) -> bool {
97        self.len == 0
98    }
99
100    #[inline]
101    pub fn offset(&self) -> VortexResult<usize> {
102        usize::try_from(self.offset).map_err(|_| vortex_err!("offset does not fit in usize"))
103    }
104
105    #[inline]
106    pub fn chunk_offsets_dtype(&self) -> VortexResult<Option<DType>> {
107        self.chunk_offsets_ptype
108            .map(|t| {
109                PType::try_from(t)
110                    .map_err(|e| vortex_err!("invalid i32 value {t} for PType: {}", e))
111                    .map(|ptype| DType::Primitive(ptype, NonNullable))
112            })
113            .transpose()
114    }
115
116    #[inline]
117    pub fn indices_dtype(&self) -> VortexResult<DType> {
118        let ptype = PType::try_from(self.indices_ptype).map_err(|e| {
119            vortex_err!("invalid i32 value {} for PType: {}", self.indices_ptype, e)
120        })?;
121        vortex_ensure!(
122            ptype.is_unsigned_int(),
123            "Patch indices must be unsigned integers"
124        );
125        Ok(DType::Primitive(ptype, NonNullable))
126    }
127}
128
129/// A helper for working with patched arrays.
130#[derive(Debug, Clone)]
131pub struct Patches {
132    array_len: usize,
133    offset: usize,
134    indices: ArrayRef,
135    values: ArrayRef,
136    /// Stores the patch index offset for each chunk.
137    ///
138    /// This allows us to lookup the patches for a given chunk in constant time via
139    /// `patch_indices[chunk_offsets[i]..chunk_offsets[i+1]]`.
140    ///
141    /// This is optional for compatibility reasons.
142    chunk_offsets: Option<ArrayRef>,
143    /// Chunk offsets are only sliced off in case the slice is fully
144    /// outside of the chunk range.
145    ///
146    /// Though the range for indices and values is sliced in terms of
147    /// individual elements, not chunks. To account for that we do a
148    /// saturating sub when adjusting the indices based on the chunk offset.
149    ///
150    /// `offset_within_chunk` is necessary in order to keep track of how many
151    /// elements were sliced off within the chunk.
152    offset_within_chunk: Option<usize>,
153}
154
155impl Patches {
156    pub fn new(
157        array_len: usize,
158        offset: usize,
159        indices: ArrayRef,
160        values: ArrayRef,
161        chunk_offsets: Option<ArrayRef>,
162    ) -> VortexResult<Self> {
163        vortex_ensure!(
164            indices.len() == values.len(),
165            "Patch indices and values must have the same length"
166        );
167        vortex_ensure!(
168            indices.dtype().is_unsigned_int() && !indices.dtype().is_nullable(),
169            "Patch indices must be non-nullable unsigned integers, got {:?}",
170            indices.dtype()
171        );
172
173        vortex_ensure!(
174            indices.len() <= array_len,
175            "Patch indices must be shorter than the array length"
176        );
177        vortex_ensure!(!indices.is_empty(), "Patch indices must not be empty");
178
179        // Perform validation of components when they are host-resident.
180        // This is not possible to do eagerly when the data is on GPU memory.
181        if indices.is_host() && values.is_host() {
182            let max = usize::try_from(&indices.scalar_at(indices.len() - 1)?)
183                .map_err(|_| vortex_err!("indices must be a number"))?;
184            vortex_ensure!(
185                max - offset < array_len,
186                "Patch indices {max:?}, offset {offset} are longer than the array length {array_len}"
187            );
188
189            debug_assert!(
190                is_sorted(&indices).unwrap_or(Some(false)).unwrap_or(false),
191                "Patch indices must be sorted"
192            );
193        }
194
195        Ok(Self {
196            array_len,
197            offset,
198            indices,
199            values,
200            chunk_offsets: chunk_offsets.clone(),
201            // Initialize with `Some(0)` only if `chunk_offsets` are set.
202            offset_within_chunk: chunk_offsets.map(|_| 0),
203        })
204    }
205
206    /// Construct new patches without validating any of the arguments
207    ///
208    /// # Safety
209    ///
210    /// Users have to assert that
211    /// * Indices and values have the same length
212    /// * Indices is an unsigned integer type
213    /// * Indices must be sorted
214    /// * Last value in indices is smaller than array_len
215    pub unsafe fn new_unchecked(
216        array_len: usize,
217        offset: usize,
218        indices: ArrayRef,
219        values: ArrayRef,
220        chunk_offsets: Option<ArrayRef>,
221        offset_within_chunk: Option<usize>,
222    ) -> Self {
223        Self {
224            array_len,
225            offset,
226            indices,
227            values,
228            chunk_offsets,
229            offset_within_chunk,
230        }
231    }
232
233    #[inline]
234    pub fn array_len(&self) -> usize {
235        self.array_len
236    }
237
238    #[inline]
239    pub fn num_patches(&self) -> usize {
240        self.indices.len()
241    }
242
243    #[inline]
244    pub fn dtype(&self) -> &DType {
245        self.values.dtype()
246    }
247
248    #[inline]
249    pub fn indices(&self) -> &ArrayRef {
250        &self.indices
251    }
252
253    #[inline]
254    pub fn into_indices(self) -> ArrayRef {
255        self.indices
256    }
257
258    #[inline]
259    pub fn indices_mut(&mut self) -> &mut ArrayRef {
260        &mut self.indices
261    }
262
263    #[inline]
264    pub fn values(&self) -> &ArrayRef {
265        &self.values
266    }
267
268    #[inline]
269    pub fn into_values(self) -> ArrayRef {
270        self.values
271    }
272
273    #[inline]
274    pub fn values_mut(&mut self) -> &mut ArrayRef {
275        &mut self.values
276    }
277
278    #[inline]
279    // Absolute offset: 0 if the array is unsliced.
280    pub fn offset(&self) -> usize {
281        self.offset
282    }
283
284    #[inline]
285    pub fn chunk_offsets(&self) -> &Option<ArrayRef> {
286        &self.chunk_offsets
287    }
288
289    #[inline]
290    pub fn chunk_offset_at(&self, idx: usize) -> VortexResult<usize> {
291        let Some(chunk_offsets) = &self.chunk_offsets else {
292            vortex_bail!("chunk_offsets must be set to retrieve offset at index")
293        };
294
295        chunk_offsets
296            .scalar_at(idx)?
297            .as_primitive()
298            .as_::<usize>()
299            .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))
300    }
301
302    /// Returns the number of patches sliced off from the current first chunk.
303    ///
304    /// When patches are sliced, the chunk offsets array is also sliced to only include
305    /// chunks that overlap with the slice range. However, the slice boundary may fall
306    /// in the middle of a chunk's patch range. This offset indicates how many patches
307    /// at the start of the first chunk should be skipped.
308    ///
309    /// Returns `None` if chunk offsets are not set.
310    #[inline]
311    pub fn offset_within_chunk(&self) -> Option<usize> {
312        self.offset_within_chunk
313    }
314
315    #[inline]
316    pub fn indices_ptype(&self) -> VortexResult<PType> {
317        PType::try_from(self.indices.dtype())
318            .map_err(|_| vortex_err!("indices dtype is not primitive"))
319    }
320
321    pub fn to_metadata(&self, len: usize, dtype: &DType) -> VortexResult<PatchesMetadata> {
322        if self.indices.len() > len {
323            vortex_bail!(
324                "Patch indices {} are longer than the array length {}",
325                self.indices.len(),
326                len
327            );
328        }
329        if self.values.dtype() != dtype {
330            vortex_bail!(
331                "Patch values dtype {} does not match array dtype {}",
332                self.values.dtype(),
333                dtype
334            );
335        }
336        let chunk_offsets_len = self.chunk_offsets.as_ref().map(|co| co.len());
337        let chunk_offsets_ptype = self.chunk_offsets.as_ref().map(|co| co.dtype().as_ptype());
338
339        Ok(PatchesMetadata::new(
340            self.indices.len(),
341            self.offset,
342            self.indices.dtype().as_ptype(),
343            chunk_offsets_len,
344            chunk_offsets_ptype,
345            self.offset_within_chunk,
346        ))
347    }
348
349    pub fn cast_values(self, values_dtype: &DType) -> VortexResult<Self> {
350        // SAFETY: casting does not affect the relationship between the indices and values
351        unsafe {
352            Ok(Self::new_unchecked(
353                self.array_len,
354                self.offset,
355                self.indices,
356                self.values.cast(values_dtype.clone())?,
357                self.chunk_offsets,
358                self.offset_within_chunk,
359            ))
360        }
361    }
362
363    /// Get the patched value at a given index if it exists.
364    pub fn get_patched(&self, index: usize) -> VortexResult<Option<Scalar>> {
365        self.search_index(index)?
366            .to_found()
367            .map(|patch_idx| self.values().scalar_at(patch_idx))
368            .transpose()
369    }
370
371    /// Searches for `index` in the indices array.
372    ///
373    /// Chooses between chunked search when [`Self::chunk_offsets`] is
374    /// available, and binary search otherwise. The `index` parameter is
375    /// adjusted by [`Self::offset`] for both.
376    ///
377    /// # Arguments
378    /// * `index` - The index to search for
379    ///
380    /// # Returns
381    /// * [`SearchResult::Found(patch_idx)`] - If a patch exists at this index, returns the
382    ///   position in the patches array
383    /// * [`SearchResult::NotFound(insertion_point)`] - If no patch exists, returns where
384    ///   a patch at this index would be inserted to maintain sorted order
385    ///
386    /// [`SearchResult::Found(patch_idx)`]: SearchResult::Found
387    /// [`SearchResult::NotFound(insertion_point)`]: SearchResult::NotFound
388    pub fn search_index(&self, index: usize) -> VortexResult<SearchResult> {
389        if self.chunk_offsets.is_some() {
390            return self.search_index_chunked(index);
391        }
392
393        Self::search_index_binary_search(&self.indices, index + self.offset)
394    }
395
396    /// Binary searches for `needle` in the indices array.
397    ///
398    /// # Returns
399    /// [`SearchResult::Found`] with the position if needle exists, or [`SearchResult::NotFound`]
400    /// with the insertion point if not found.
401    fn search_index_binary_search(indices: &ArrayRef, needle: usize) -> VortexResult<SearchResult> {
402        if indices.is_canonical() {
403            let primitive = indices.to_primitive();
404            match_each_integer_ptype!(primitive.ptype(), |T| {
405                let Ok(needle) = T::try_from(needle) else {
406                    // If the needle is not of type T, then it cannot possibly be in this array.
407                    //
408                    // The needle is a non-negative integer (a usize); therefore, it must be larger
409                    // than all values in this array.
410                    return Ok(SearchResult::NotFound(primitive.len()));
411                };
412                return primitive
413                    .as_slice::<T>()
414                    .search_sorted(&needle, SearchSortedSide::Left);
415            });
416        }
417        indices
418            .as_primitive_typed()
419            .search_sorted(&PValue::U64(needle as u64), SearchSortedSide::Left)
420    }
421
422    /// Constant time searches for `index` in the indices array.
423    ///
424    /// First determines which chunk the target index falls into, then performs
425    /// a binary search within that chunk's range.
426    ///
427    /// Returns a [`SearchResult`] indicating either the exact patch index if found,
428    /// or the insertion point if not found.
429    ///
430    /// Returns an error if `chunk_offsets` or `offset_within_chunk` are not set.
431    fn search_index_chunked(&self, index: usize) -> VortexResult<SearchResult> {
432        let Some(chunk_offsets) = &self.chunk_offsets else {
433            vortex_bail!("chunk_offsets is required to be set")
434        };
435
436        let Some(offset_within_chunk) = self.offset_within_chunk else {
437            vortex_bail!("offset_within_chunk is required to be set")
438        };
439
440        if index >= self.array_len() {
441            return Ok(SearchResult::NotFound(self.indices().len()));
442        }
443
444        let chunk_idx = (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE;
445
446        // Patch index offsets are absolute and need to be offset by the first chunk of the current slice.
447        let base_offset = self.chunk_offset_at(0)?;
448
449        let patches_start_idx = (self.chunk_offset_at(chunk_idx)? - base_offset)
450            // Chunk offsets are only sliced off in case the slice is fully
451            // outside of the chunk range.
452            //
453            // Though the range for indices and values is sliced in terms of
454            // individual elements, not chunks. To account for that we do a
455            // saturating sub when adjusting the indices based on the chunk offset.
456            .saturating_sub(offset_within_chunk);
457
458        let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
459            self.chunk_offset_at(chunk_idx + 1)? - base_offset - offset_within_chunk
460        } else {
461            self.indices.len()
462        };
463
464        let chunk_indices = self.indices.slice(patches_start_idx..patches_end_idx)?;
465        let result = Self::search_index_binary_search(&chunk_indices, index + self.offset)?;
466
467        Ok(match result {
468            SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
469            SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
470        })
471    }
472
473    /// Batch version of `search_index`.
474    ///
475    /// In contrast to `search_index`, this function requires `indices` as
476    /// well as `chunk_offsets` to be passed as slices. This is to avoid
477    /// redundant canonicalization and `scalar_at` lookups across calls.
478    fn search_index_chunked_batch<T, O>(
479        &self,
480        indices: &[T],
481        chunk_offsets: &[O],
482        index: T,
483    ) -> VortexResult<SearchResult>
484    where
485        T: UnsignedPType,
486        O: UnsignedPType,
487        usize: TryFrom<T>,
488        usize: TryFrom<O>,
489    {
490        let Some(offset_within_chunk) = self.offset_within_chunk else {
491            vortex_bail!("offset_within_chunk is required to be set")
492        };
493
494        let chunk_idx = {
495            let Ok(index) = usize::try_from(index) else {
496                // If the needle cannot be converted to usize, it's larger than all values in this array.
497                return Ok(SearchResult::NotFound(indices.len()));
498            };
499
500            if index >= self.array_len() {
501                return Ok(SearchResult::NotFound(self.indices().len()));
502            }
503
504            (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE
505        };
506
507        // Patch index offsets are absolute and need to be offset by the first chunk of the current slice.
508        let chunk_offset = usize::try_from(chunk_offsets[chunk_idx] - chunk_offsets[0])
509            .map_err(|_| vortex_err!("chunk_offset failed to convert to usize"))?;
510
511        let patches_start_idx = chunk_offset
512            // Chunk offsets are only sliced off in case the slice is fully
513            // outside of the chunk range.
514            //
515            // Though the range for indices and values is sliced in terms of
516            // individual elements, not chunks. To account for that we do a
517            // saturating sub when adjusting the indices based on the chunk offset.
518            .saturating_sub(offset_within_chunk);
519
520        let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
521            let base_offset_end = chunk_offsets[chunk_idx + 1];
522
523            let offset_within_chunk = O::from(offset_within_chunk)
524                .ok_or_else(|| vortex_err!("offset_within_chunk failed to convert to O"))?;
525
526            usize::try_from(base_offset_end - chunk_offsets[0] - offset_within_chunk)
527                .map_err(|_| vortex_err!("patches_end_idx failed to convert to usize"))?
528        } else {
529            self.indices.len()
530        };
531
532        let Some(offset) = T::from(self.offset) else {
533            // If the offset cannot be converted to T, it's larger than all values in this array.
534            return Ok(SearchResult::NotFound(indices.len()));
535        };
536
537        let chunk_indices = &indices[patches_start_idx..patches_end_idx];
538        let result = chunk_indices.search_sorted(&(index + offset), SearchSortedSide::Left)?;
539
540        Ok(match result {
541            SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
542            SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
543        })
544    }
545
546    /// Returns the minimum patch index
547    pub fn min_index(&self) -> VortexResult<usize> {
548        let first = self
549            .indices
550            .scalar_at(0)?
551            .as_primitive()
552            .as_::<usize>()
553            .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
554        Ok(first - self.offset)
555    }
556
557    /// Returns the maximum patch index
558    pub fn max_index(&self) -> VortexResult<usize> {
559        let last = self
560            .indices
561            .scalar_at(self.indices.len() - 1)?
562            .as_primitive()
563            .as_::<usize>()
564            .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
565        Ok(last - self.offset)
566    }
567
568    /// Filter the patches by a mask, resulting in new patches for the filtered array.
569    pub fn filter(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
570        if mask.len() != self.array_len {
571            vortex_bail!(
572                "Filter mask length {} does not match array length {}",
573                mask.len(),
574                self.array_len
575            );
576        }
577
578        match mask.indices() {
579            AllOr::All => Ok(Some(self.clone())),
580            AllOr::None => Ok(None),
581            AllOr::Some(mask_indices) => {
582                let flat_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
583                match_each_unsigned_integer_ptype!(flat_indices.ptype(), |I| {
584                    filter_patches_with_mask(
585                        flat_indices.as_slice::<I>(),
586                        self.offset(),
587                        self.values(),
588                        mask_indices,
589                    )
590                })
591            }
592        }
593    }
594
595    /// Mask the patches, REMOVING the patches where the mask is true.
596    /// Unlike filter, this preserves the patch indices.
597    /// Unlike mask on a single array, this does not set masked values to null.
598    // TODO(joe): make this lazy and remove the ctx.
599    pub fn mask(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
600        if mask.len() != self.array_len {
601            vortex_bail!(
602                "Filter mask length {} does not match array length {}",
603                mask.len(),
604                self.array_len
605            );
606        }
607
608        let filter_mask = match mask.bit_buffer() {
609            AllOr::All => return Ok(None),
610            AllOr::None => return Ok(Some(self.clone())),
611            AllOr::Some(masked) => {
612                let patch_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
613                match_each_unsigned_integer_ptype!(patch_indices.ptype(), |P| {
614                    let patch_indices = patch_indices.as_slice::<P>();
615                    Mask::from_buffer(BitBuffer::collect_bool(patch_indices.len(), |i| {
616                        #[allow(clippy::cast_possible_truncation)]
617                        let idx = (patch_indices[i] as usize) - self.offset;
618                        !masked.value(idx)
619                    }))
620                })
621            }
622        };
623
624        if filter_mask.all_false() {
625            return Ok(None);
626        }
627
628        // SAFETY: filtering indices/values with same mask maintains their 1:1 relationship
629        let filtered_indices = self.indices.filter(filter_mask.clone())?;
630        let filtered_values = self.values.filter(filter_mask)?;
631
632        Ok(Some(Self {
633            array_len: self.array_len,
634            offset: self.offset,
635            indices: filtered_indices,
636            values: filtered_values,
637            // TODO(0ax1): Chunk offsets are invalid after a filter is applied.
638            chunk_offsets: None,
639            offset_within_chunk: self.offset_within_chunk,
640        }))
641    }
642
643    /// Slice the patches by a range of the patched array.
644    pub fn slice(&self, range: Range<usize>) -> VortexResult<Option<Self>> {
645        let slice_start_idx = self.search_index(range.start)?.to_index();
646        let slice_end_idx = self.search_index(range.end)?.to_index();
647
648        if slice_start_idx == slice_end_idx {
649            return Ok(None);
650        }
651
652        let values = self.values().slice(slice_start_idx..slice_end_idx)?;
653        let indices = self.indices().slice(slice_start_idx..slice_end_idx)?;
654
655        let chunk_offsets = self
656            .chunk_offsets
657            .as_ref()
658            .map(|chunk_offsets| -> VortexResult<ArrayRef> {
659                let chunk_relative_offset = self.offset % PATCH_CHUNK_SIZE;
660                let chunk_start_idx = (chunk_relative_offset + range.start) / PATCH_CHUNK_SIZE;
661                let chunk_end_idx = (chunk_relative_offset + range.end).div_ceil(PATCH_CHUNK_SIZE);
662                chunk_offsets.slice(chunk_start_idx..chunk_end_idx)
663            })
664            .transpose()?;
665
666        let offset_within_chunk = chunk_offsets
667            .as_ref()
668            .map(|chunk_offsets| -> VortexResult<usize> {
669                let base_offset = chunk_offsets
670                    .scalar_at(0)?
671                    .as_primitive()
672                    .as_::<usize>()
673                    .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))?;
674                Ok(slice_start_idx - base_offset)
675            })
676            .transpose()?;
677
678        Ok(Some(Self {
679            array_len: range.len(),
680            offset: range.start + self.offset(),
681            indices,
682            values,
683            chunk_offsets,
684            offset_within_chunk,
685        }))
686    }
687
688    // https://docs.google.com/spreadsheets/d/1D9vBZ1QJ6mwcIvV5wIL0hjGgVchcEnAyhvitqWu2ugU
689    const PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN: f64 = 5.0;
690
691    fn is_map_faster_than_search(&self, take_indices: &PrimitiveArray) -> bool {
692        (self.num_patches() as f64 / take_indices.len() as f64)
693            < Self::PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN
694    }
695
696    /// Take the indices from the patches
697    ///
698    /// Any nulls in take_indices are added to the resulting patches.
699    pub fn take_with_nulls(
700        &self,
701        take_indices: &ArrayRef,
702        ctx: &mut ExecutionCtx,
703    ) -> VortexResult<Option<Self>> {
704        if take_indices.is_empty() {
705            return Ok(None);
706        }
707
708        let take_indices = take_indices.to_array().execute::<PrimitiveArray>(ctx)?;
709        if self.is_map_faster_than_search(&take_indices) {
710            self.take_map(take_indices, true, ctx)
711        } else {
712            self.take_search(take_indices, true, ctx)
713        }
714    }
715
716    /// Take the indices from the patches.
717    ///
718    /// Any nulls in take_indices are ignored.
719    pub fn take(
720        &self,
721        take_indices: &ArrayRef,
722        ctx: &mut ExecutionCtx,
723    ) -> VortexResult<Option<Self>> {
724        if take_indices.is_empty() {
725            return Ok(None);
726        }
727
728        let take_indices = take_indices.to_array().execute::<PrimitiveArray>(ctx)?;
729        if self.is_map_faster_than_search(&take_indices) {
730            self.take_map(take_indices, false, ctx)
731        } else {
732            self.take_search(take_indices, false, ctx)
733        }
734    }
735
736    #[expect(
737        clippy::cognitive_complexity,
738        reason = "complexity is from nested match_each_* macros"
739    )]
740    pub fn take_search(
741        &self,
742        take_indices: PrimitiveArray,
743        include_nulls: bool,
744        ctx: &mut ExecutionCtx,
745    ) -> VortexResult<Option<Self>> {
746        let take_indices_validity = take_indices.validity();
747        let patch_indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
748        let chunk_offsets = self
749            .chunk_offsets()
750            .as_ref()
751            .map(|co| co.clone().execute::<PrimitiveArray>(ctx))
752            .transpose()?;
753
754        let (values_indices, new_indices): (BufferMut<u64>, BufferMut<u64>) =
755            match_each_unsigned_integer_ptype!(patch_indices.ptype(), |PatchT| {
756                let patch_indices_slice = patch_indices.as_slice::<PatchT>();
757                match_each_integer_ptype!(take_indices.ptype(), |TakeT| {
758                    let take_slice = take_indices.as_slice::<TakeT>();
759
760                    if let Some(chunk_offsets) = chunk_offsets {
761                        match_each_unsigned_integer_ptype!(chunk_offsets.ptype(), |OffsetT| {
762                            let chunk_offsets = chunk_offsets.as_slice::<OffsetT>();
763                            take_indices_with_search_fn(
764                                patch_indices_slice,
765                                take_slice,
766                                take_indices.validity_mask()?,
767                                include_nulls,
768                                |take_idx| {
769                                    self.search_index_chunked_batch(
770                                        patch_indices_slice,
771                                        chunk_offsets,
772                                        take_idx,
773                                    )
774                                },
775                            )?
776                        })
777                    } else {
778                        take_indices_with_search_fn(
779                            patch_indices_slice,
780                            take_slice,
781                            take_indices.validity_mask()?,
782                            include_nulls,
783                            |take_idx| {
784                                let Some(offset) = <PatchT as NumCast>::from(self.offset) else {
785                                    // If the offset cannot be converted to T, it's larger than all values in this array.
786                                    return Ok(SearchResult::NotFound(patch_indices_slice.len()));
787                                };
788
789                                patch_indices_slice
790                                    .search_sorted(&(take_idx + offset), SearchSortedSide::Left)
791                            },
792                        )?
793                    }
794                })
795            });
796
797        if new_indices.is_empty() {
798            return Ok(None);
799        }
800
801        let new_indices = new_indices.into_array();
802        let new_array_len = take_indices.len();
803        let values_validity = take_indices_validity.take(&new_indices)?;
804
805        Ok(Some(Self {
806            array_len: new_array_len,
807            offset: 0,
808            indices: new_indices.clone(),
809            values: self
810                .values()
811                .take(PrimitiveArray::new(values_indices, values_validity).into_array())?,
812            chunk_offsets: None,
813            offset_within_chunk: Some(0), // Reset when creating new Patches.
814        }))
815    }
816
817    pub fn take_map(
818        &self,
819        take_indices: PrimitiveArray,
820        include_nulls: bool,
821        ctx: &mut ExecutionCtx,
822    ) -> VortexResult<Option<Self>> {
823        let indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
824        let new_length = take_indices.len();
825
826        let min_index = self.min_index()?;
827        let max_index = self.max_index()?;
828
829        let Some((new_sparse_indices, value_indices)) =
830            match_each_unsigned_integer_ptype!(indices.ptype(), |Indices| {
831                match_each_integer_ptype!(take_indices.ptype(), |TakeIndices| {
832                    take_map::<_, TakeIndices>(
833                        indices.as_slice::<Indices>(),
834                        take_indices,
835                        self.offset(),
836                        min_index,
837                        max_index,
838                        include_nulls,
839                    )?
840                })
841            })
842        else {
843            return Ok(None);
844        };
845
846        let taken_values = self.values().take(value_indices)?;
847
848        Ok(Some(Patches {
849            array_len: new_length,
850            offset: 0,
851            indices: new_sparse_indices,
852            values: taken_values,
853            // TODO(0ax1): Chunk offsets are invalid after take is applied.
854            chunk_offsets: None,
855            offset_within_chunk: self.offset_within_chunk,
856        }))
857    }
858
859    /// Apply patches to a mutable buffer and validity mask.
860    ///
861    /// This method applies the patch values to the given buffer at the positions specified by the
862    /// patch indices. For non-null patch values, it updates the buffer and marks the position as
863    /// valid. For null patch values, it marks the position as invalid.
864    ///
865    /// # Safety
866    ///
867    /// - All patch indices after offset adjustment must be valid indices into the buffer.
868    /// - The buffer and validity mask must have the same length.
869    pub unsafe fn apply_to_buffer<P: NativePType>(
870        &self,
871        buffer: &mut [P],
872        validity: &mut MaskMut,
873        ctx: &mut ExecutionCtx,
874    ) {
875        let patch_indices = self
876            .indices
877            .clone()
878            .execute::<PrimitiveArray>(ctx)
879            .vortex_expect("patch indices must be convertible to PrimitiveArray");
880        let patch_values = self
881            .values
882            .clone()
883            .execute::<PrimitiveArray>(ctx)
884            .vortex_expect("patch values must be convertible to PrimitiveArray");
885        let patches_validity = patch_values.validity();
886
887        let patch_values_slice = patch_values.as_slice::<P>();
888        match_each_unsigned_integer_ptype!(patch_indices.ptype(), |I| {
889            let patch_indices_slice = patch_indices.as_slice::<I>();
890
891            // SAFETY:
892            // - `Patches` invariant guarantees indices are sorted and within array bounds.
893            // - `patch_indices` and `patch_values` have equal length (from `Patches` invariant).
894            // - `buffer` and `validity` have equal length (precondition).
895            // - All patch indices are valid after offset adjustment (precondition).
896            unsafe {
897                apply_patches_to_buffer_inner(
898                    buffer,
899                    validity,
900                    patch_indices_slice,
901                    self.offset,
902                    patch_values_slice,
903                    patches_validity,
904                    ctx,
905                );
906            }
907        });
908    }
909
910    pub fn map_values<F>(self, f: F) -> VortexResult<Self>
911    where
912        F: FnOnce(ArrayRef) -> VortexResult<ArrayRef>,
913    {
914        let values = f(self.values)?;
915        if self.indices.len() != values.len() {
916            vortex_bail!(
917                "map_values must preserve length: expected {} received {}",
918                self.indices.len(),
919                values.len()
920            )
921        }
922
923        Ok(Self {
924            array_len: self.array_len,
925            offset: self.offset,
926            indices: self.indices,
927            values,
928            chunk_offsets: self.chunk_offsets,
929            offset_within_chunk: self.offset_within_chunk,
930        })
931    }
932}
933
934/// Helper function to apply patches to a buffer.
935///
936/// # Safety
937///
938/// - All indices in `patch_indices` after subtracting `patch_offset` must be valid indices
939///   into both `buffer` and `validity`.
940/// - `patch_indices` must be sorted in ascending order.
941/// - `patch_indices` and `patch_values` must have the same length.
942/// - `buffer` and `validity` must have the same length.
943unsafe fn apply_patches_to_buffer_inner<P, I>(
944    buffer: &mut [P],
945    validity: &mut MaskMut,
946    patch_indices: &[I],
947    patch_offset: usize,
948    patch_values: &[P],
949    patches_validity: &Validity,
950    ctx: &mut ExecutionCtx,
951) where
952    P: NativePType,
953    I: UnsignedPType,
954{
955    debug_assert!(!patch_indices.is_empty());
956    debug_assert_eq!(patch_indices.len(), patch_values.len());
957    debug_assert_eq!(buffer.len(), validity.len());
958
959    match patches_validity {
960        Validity::NonNullable | Validity::AllValid => {
961            // All patch values are valid, apply them all.
962            for (&i, &value) in patch_indices.iter().zip_eq(patch_values) {
963                let index = i.as_() - patch_offset;
964
965                // SAFETY: `index` is valid because caller guarantees all patch indices are within
966                // bounds after offset adjustment.
967                unsafe {
968                    validity.set_unchecked(index);
969                }
970                buffer[index] = value;
971            }
972        }
973        Validity::AllInvalid => {
974            // All patch values are null, just mark positions as invalid.
975            for &i in patch_indices {
976                let index = i.as_() - patch_offset;
977
978                // SAFETY: `index` is valid because caller guarantees all patch indices are within
979                // bounds after offset adjustment.
980                unsafe {
981                    validity.unset_unchecked(index);
982                }
983            }
984        }
985        Validity::Array(array) => {
986            // Some patch values may be null, check each one.
987            let bool_array = array
988                .clone()
989                .execute::<BoolArray>(ctx)
990                .vortex_expect("validity array must be convertible to BoolArray");
991            let mask = bool_array.to_bit_buffer();
992            for (patch_idx, (&i, &value)) in patch_indices.iter().zip_eq(patch_values).enumerate() {
993                let index = i.as_() - patch_offset;
994
995                // SAFETY: `index` and `patch_idx` are valid because caller guarantees all patch
996                // indices are within bounds after offset adjustment.
997                unsafe {
998                    if mask.value_unchecked(patch_idx) {
999                        buffer[index] = value;
1000                        validity.set_unchecked(index);
1001                    } else {
1002                        validity.unset_unchecked(index);
1003                    }
1004                }
1005            }
1006        }
1007    }
1008}
1009
1010fn take_map<I: NativePType + Hash + Eq + TryFrom<usize>, T: NativePType>(
1011    indices: &[I],
1012    take_indices: PrimitiveArray,
1013    indices_offset: usize,
1014    min_index: usize,
1015    max_index: usize,
1016    include_nulls: bool,
1017) -> VortexResult<Option<(ArrayRef, ArrayRef)>>
1018where
1019    usize: TryFrom<T>,
1020    VortexError: From<<I as TryFrom<usize>>::Error>,
1021{
1022    let take_indices_len = take_indices.len();
1023    let take_indices_validity = take_indices.validity();
1024    let take_indices_validity_mask = take_indices_validity.to_mask(take_indices_len);
1025    let take_indices = take_indices.as_slice::<T>();
1026    let offset_i = I::try_from(indices_offset)?;
1027
1028    let sparse_index_to_value_index: HashMap<I, usize> = indices
1029        .iter()
1030        .copied()
1031        .map(|idx| idx - offset_i)
1032        .enumerate()
1033        .map(|(value_index, sparse_index)| (sparse_index, value_index))
1034        .collect();
1035
1036    let mut new_sparse_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1037    let mut value_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1038
1039    for (idx_in_take, &take_idx) in take_indices.iter().enumerate() {
1040        let ti = usize::try_from(take_idx)
1041            .map_err(|_| vortex_err!("Failed to convert index to usize"))?;
1042
1043        // If we have to take nulls the take index doesn't matter, make it 0 for consistency
1044        let is_null = match take_indices_validity_mask.bit_buffer() {
1045            AllOr::All => false,
1046            AllOr::None => true,
1047            AllOr::Some(buf) => !buf.value(idx_in_take),
1048        };
1049        if is_null {
1050            if include_nulls {
1051                new_sparse_indices.push(idx_in_take as u64);
1052                value_indices.push(0);
1053            }
1054        } else if ti >= min_index && ti <= max_index {
1055            let ti_as_i = I::try_from(ti)
1056                .map_err(|_| vortex_err!("take index does not fit in index type"))?;
1057            if let Some(&value_index) = sparse_index_to_value_index.get(&ti_as_i) {
1058                new_sparse_indices.push(idx_in_take as u64);
1059                value_indices.push(value_index as u64);
1060            }
1061        }
1062    }
1063
1064    if new_sparse_indices.is_empty() {
1065        return Ok(None);
1066    }
1067
1068    let new_sparse_indices = new_sparse_indices.into_array();
1069    let values_validity = take_indices_validity.take(&new_sparse_indices)?;
1070    Ok(Some((
1071        new_sparse_indices,
1072        PrimitiveArray::new(value_indices, values_validity).into_array(),
1073    )))
1074}
1075
1076/// Filter patches with the provided mask (in flattened space).
1077///
1078/// The filter mask may contain indices that are non-patched. The return value of this function
1079/// is a new set of `Patches` with the indices relative to the provided `mask` rank, and the
1080/// patch values.
1081fn filter_patches_with_mask<T: IntegerPType>(
1082    patch_indices: &[T],
1083    offset: usize,
1084    patch_values: &ArrayRef,
1085    mask_indices: &[usize],
1086) -> VortexResult<Option<Patches>> {
1087    let true_count = mask_indices.len();
1088    let mut new_patch_indices = BufferMut::<u64>::with_capacity(true_count);
1089    let mut new_mask_indices = Vec::with_capacity(true_count);
1090
1091    // Attempt to move the window by `STRIDE` elements on each iteration. This assumes that
1092    // the patches are relatively sparse compared to the overall mask, and so many indices in the
1093    // mask will end up being skipped.
1094    const STRIDE: usize = 4;
1095
1096    let mut mask_idx = 0usize;
1097    let mut true_idx = 0usize;
1098
1099    while mask_idx < patch_indices.len() && true_idx < true_count {
1100        // NOTE: we are searching for overlaps between sorted, unaligned indices in `patch_indices`
1101        //  and `mask_indices`. We assume that Patches are sparse relative to the global space of
1102        //  the mask (which covers both patch and non-patch values of the parent array), and so to
1103        //  quickly jump through regions with no overlap, we attempt to move our pointers by STRIDE
1104        //  elements on each iteration. If we cannot rule out overlap due to min/max values, we
1105        //  fallback to performing a two-way iterator merge.
1106        if (mask_idx + STRIDE) < patch_indices.len() && (true_idx + STRIDE) < mask_indices.len() {
1107            // Load a vector of each into our registers.
1108            let left_min = patch_indices[mask_idx]
1109                .to_usize()
1110                .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1111                - offset;
1112            let left_max = patch_indices[mask_idx + STRIDE]
1113                .to_usize()
1114                .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1115                - offset;
1116            let right_min = mask_indices[true_idx];
1117            let right_max = mask_indices[true_idx + STRIDE];
1118
1119            if left_min > right_max {
1120                // Advance right side
1121                true_idx += STRIDE;
1122                continue;
1123            } else if right_min > left_max {
1124                mask_idx += STRIDE;
1125                continue;
1126            } else {
1127                // Fallthrough to direct comparison path.
1128            }
1129        }
1130
1131        // Two-way sorted iterator merge:
1132
1133        let left = patch_indices[mask_idx]
1134            .to_usize()
1135            .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1136            - offset;
1137        let right = mask_indices[true_idx];
1138
1139        match left.cmp(&right) {
1140            Ordering::Less => {
1141                mask_idx += 1;
1142            }
1143            Ordering::Greater => {
1144                true_idx += 1;
1145            }
1146            Ordering::Equal => {
1147                // Save the mask index as well as the positional index.
1148                new_mask_indices.push(mask_idx);
1149                new_patch_indices.push(true_idx as u64);
1150
1151                mask_idx += 1;
1152                true_idx += 1;
1153            }
1154        }
1155    }
1156
1157    if new_mask_indices.is_empty() {
1158        return Ok(None);
1159    }
1160
1161    let new_patch_indices = new_patch_indices.into_array();
1162    let new_patch_values =
1163        patch_values.filter(Mask::from_indices(patch_values.len(), new_mask_indices))?;
1164
1165    Ok(Some(Patches::new(
1166        true_count,
1167        0,
1168        new_patch_indices,
1169        new_patch_values,
1170        // TODO(0ax1): Chunk offsets are invalid after a filter is applied.
1171        None,
1172    )?))
1173}
1174
1175fn take_indices_with_search_fn<
1176    I: UnsignedPType,
1177    T: IntegerPType,
1178    F: Fn(I) -> VortexResult<SearchResult>,
1179>(
1180    indices: &[I],
1181    take_indices: &[T],
1182    take_validity: Mask,
1183    include_nulls: bool,
1184    search_fn: F,
1185) -> VortexResult<(BufferMut<u64>, BufferMut<u64>)> {
1186    let mut values_indices = BufferMut::with_capacity(take_indices.len());
1187    let mut new_indices = BufferMut::with_capacity(take_indices.len());
1188
1189    for (new_patch_idx, &take_idx) in take_indices.iter().enumerate() {
1190        if !take_validity.value(new_patch_idx) {
1191            if include_nulls {
1192                // For nulls, patch index doesn't matter - use 0 for consistency
1193                values_indices.push(0u64);
1194                new_indices.push(new_patch_idx as u64);
1195            }
1196            continue;
1197        } else {
1198            let search_result = match I::from(take_idx) {
1199                Some(idx) => search_fn(idx)?,
1200                None => SearchResult::NotFound(indices.len()),
1201            };
1202
1203            if let Some(patch_idx) = search_result.to_found() {
1204                values_indices.push(patch_idx as u64);
1205                new_indices.push(new_patch_idx as u64);
1206            }
1207        }
1208    }
1209
1210    Ok((values_indices, new_indices))
1211}
1212
1213#[cfg(test)]
1214mod test {
1215    use vortex_buffer::BufferMut;
1216    use vortex_buffer::buffer;
1217    use vortex_mask::Mask;
1218
1219    use crate::IntoArray;
1220    use crate::LEGACY_SESSION;
1221    use crate::ToCanonical;
1222    use crate::VortexSessionExecute;
1223    use crate::assert_arrays_eq;
1224    use crate::patches::Patches;
1225    use crate::patches::PrimitiveArray;
1226    use crate::search_sorted::SearchResult;
1227    use crate::validity::Validity;
1228
1229    #[test]
1230    fn test_filter() {
1231        let patches = Patches::new(
1232            100,
1233            0,
1234            buffer![10u32, 11, 20].into_array(),
1235            buffer![100, 110, 200].into_array(),
1236            None,
1237        )
1238        .unwrap();
1239
1240        let filtered = patches
1241            .filter(
1242                &Mask::from_indices(100, vec![10, 20, 30]),
1243                &mut LEGACY_SESSION.create_execution_ctx(),
1244            )
1245            .unwrap()
1246            .unwrap();
1247
1248        assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1]));
1249        assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1250    }
1251
1252    #[test]
1253    fn take_with_nulls() {
1254        let patches = Patches::new(
1255            20,
1256            0,
1257            buffer![2u64, 9, 15].into_array(),
1258            PrimitiveArray::new(buffer![33_i32, 44, 55], Validity::AllValid).into_array(),
1259            None,
1260        )
1261        .unwrap();
1262
1263        let taken = patches
1264            .take(
1265                &PrimitiveArray::new(buffer![9, 0], Validity::from_iter(vec![true, false]))
1266                    .into_array(),
1267                &mut LEGACY_SESSION.create_execution_ctx(),
1268            )
1269            .unwrap()
1270            .unwrap();
1271        let primitive_values = taken.values().to_primitive();
1272        let primitive_indices = taken.indices().to_primitive();
1273        assert_eq!(taken.array_len(), 2);
1274        assert_arrays_eq!(
1275            primitive_values,
1276            PrimitiveArray::from_option_iter([Some(44i32)])
1277        );
1278        assert_arrays_eq!(primitive_indices, PrimitiveArray::from_iter([0u64]));
1279        assert_eq!(
1280            primitive_values.validity_mask().unwrap(),
1281            Mask::from_iter(vec![true])
1282        );
1283    }
1284
1285    #[test]
1286    fn take_search_with_nulls_chunked() {
1287        let patches = Patches::new(
1288            20,
1289            0,
1290            buffer![2u64, 9, 15].into_array(),
1291            buffer![33_i32, 44, 55].into_array(),
1292            Some(buffer![0u64].into_array()),
1293        )
1294        .unwrap();
1295
1296        let taken = patches
1297            .take_search(
1298                PrimitiveArray::new(buffer![9, 0], Validity::from_iter([true, false])),
1299                true,
1300                &mut LEGACY_SESSION.create_execution_ctx(),
1301            )
1302            .unwrap()
1303            .unwrap();
1304
1305        let primitive_values = taken.values().to_primitive();
1306        assert_eq!(taken.array_len(), 2);
1307        assert_arrays_eq!(
1308            primitive_values,
1309            PrimitiveArray::from_option_iter([Some(44i32), None])
1310        );
1311        assert_arrays_eq!(taken.indices(), PrimitiveArray::from_iter([0u64, 1]));
1312
1313        assert_eq!(
1314            primitive_values.validity_mask().unwrap(),
1315            Mask::from_iter([true, false])
1316        );
1317    }
1318
1319    #[test]
1320    fn take_search_chunked_multiple_chunks() {
1321        let patches = Patches::new(
1322            2048,
1323            0,
1324            buffer![100u64, 500, 1200, 1800].into_array(),
1325            buffer![10_i32, 20, 30, 40].into_array(),
1326            Some(buffer![0u64, 2].into_array()),
1327        )
1328        .unwrap();
1329
1330        let taken = patches
1331            .take_search(
1332                PrimitiveArray::new(buffer![500, 1200, 999], Validity::AllValid),
1333                true,
1334                &mut LEGACY_SESSION.create_execution_ctx(),
1335            )
1336            .unwrap()
1337            .unwrap();
1338
1339        assert_eq!(taken.array_len(), 3);
1340        assert_arrays_eq!(
1341            taken.values(),
1342            PrimitiveArray::from_option_iter([Some(20i32), Some(30)])
1343        );
1344    }
1345
1346    #[test]
1347    fn take_search_chunked_indices_with_no_patches() {
1348        let patches = Patches::new(
1349            20,
1350            0,
1351            buffer![2u64, 9, 15].into_array(),
1352            buffer![33_i32, 44, 55].into_array(),
1353            Some(buffer![0u64].into_array()),
1354        )
1355        .unwrap();
1356
1357        let taken = patches
1358            .take_search(
1359                PrimitiveArray::new(buffer![3, 4, 5], Validity::AllValid),
1360                true,
1361                &mut LEGACY_SESSION.create_execution_ctx(),
1362            )
1363            .unwrap();
1364
1365        assert!(taken.is_none());
1366    }
1367
1368    #[test]
1369    fn take_search_chunked_interleaved() {
1370        let patches = Patches::new(
1371            30,
1372            0,
1373            buffer![5u64, 10, 20, 25].into_array(),
1374            buffer![100_i32, 200, 300, 400].into_array(),
1375            Some(buffer![0u64].into_array()),
1376        )
1377        .unwrap();
1378
1379        let taken = patches
1380            .take_search(
1381                PrimitiveArray::new(buffer![10, 15, 20, 99], Validity::AllValid),
1382                true,
1383                &mut LEGACY_SESSION.create_execution_ctx(),
1384            )
1385            .unwrap()
1386            .unwrap();
1387
1388        assert_eq!(taken.array_len(), 4);
1389        assert_arrays_eq!(
1390            taken.values(),
1391            PrimitiveArray::from_option_iter([Some(200i32), Some(300)])
1392        );
1393    }
1394
1395    #[test]
1396    fn test_take_search_multiple_chunk_offsets() {
1397        let patches = Patches::new(
1398            1500,
1399            0,
1400            BufferMut::from_iter(0..1500u64).into_array(),
1401            BufferMut::from_iter(0..1500i32).into_array(),
1402            Some(buffer![0u64, 1024u64].into_array()),
1403        )
1404        .unwrap();
1405
1406        let taken = patches
1407            .take_search(
1408                PrimitiveArray::new(BufferMut::from_iter(0..1500u64), Validity::AllValid),
1409                false,
1410                &mut LEGACY_SESSION.create_execution_ctx(),
1411            )
1412            .unwrap()
1413            .unwrap();
1414
1415        assert_eq!(taken.array_len(), 1500);
1416    }
1417
1418    #[test]
1419    fn test_slice() {
1420        let values = buffer![15_u32, 135, 13531, 42].into_array();
1421        let indices = buffer![10_u64, 11, 50, 100].into_array();
1422
1423        let patches = Patches::new(101, 0, indices, values, None).unwrap();
1424
1425        let sliced = patches.slice(15..100).unwrap().unwrap();
1426        assert_eq!(sliced.array_len(), 100 - 15);
1427        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1428    }
1429
1430    #[test]
1431    fn doubly_sliced() {
1432        let values = buffer![15_u32, 135, 13531, 42].into_array();
1433        let indices = buffer![10_u64, 11, 50, 100].into_array();
1434
1435        let patches = Patches::new(101, 0, indices, values, None).unwrap();
1436
1437        let sliced = patches.slice(15..100).unwrap().unwrap();
1438        assert_eq!(sliced.array_len(), 100 - 15);
1439        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1440
1441        let doubly_sliced = sliced.slice(35..36).unwrap().unwrap();
1442        assert_arrays_eq!(
1443            doubly_sliced.values(),
1444            PrimitiveArray::from_iter([13531u32])
1445        );
1446    }
1447
1448    #[test]
1449    fn test_mask_all_true() {
1450        let patches = Patches::new(
1451            10,
1452            0,
1453            buffer![2u64, 5, 8].into_array(),
1454            buffer![100i32, 200, 300].into_array(),
1455            None,
1456        )
1457        .unwrap();
1458
1459        let mask = Mask::new_true(10);
1460        let masked = patches
1461            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1462            .unwrap();
1463        assert!(masked.is_none());
1464    }
1465
1466    #[test]
1467    fn test_mask_all_false() {
1468        let patches = Patches::new(
1469            10,
1470            0,
1471            buffer![2u64, 5, 8].into_array(),
1472            buffer![100i32, 200, 300].into_array(),
1473            None,
1474        )
1475        .unwrap();
1476
1477        let mask = Mask::new_false(10);
1478        let masked = patches
1479            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1480            .unwrap()
1481            .unwrap();
1482
1483        // No patch values should be masked
1484        assert_arrays_eq!(
1485            masked.values(),
1486            PrimitiveArray::from_iter([100i32, 200, 300])
1487        );
1488        assert!(masked.values().is_valid(0).unwrap());
1489        assert!(masked.values().is_valid(1).unwrap());
1490        assert!(masked.values().is_valid(2).unwrap());
1491
1492        // Indices should remain unchanged
1493        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1494    }
1495
1496    #[test]
1497    fn test_mask_partial() {
1498        let patches = Patches::new(
1499            10,
1500            0,
1501            buffer![2u64, 5, 8].into_array(),
1502            buffer![100i32, 200, 300].into_array(),
1503            None,
1504        )
1505        .unwrap();
1506
1507        // Mask that removes patches at indices 2 and 8 (but not 5)
1508        let mask = Mask::from_iter([
1509            false, false, true, false, false, false, false, false, true, false,
1510        ]);
1511        let masked = patches
1512            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1513            .unwrap()
1514            .unwrap();
1515
1516        // Only the patch at index 5 should remain
1517        assert_eq!(masked.values().len(), 1);
1518        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1519
1520        // Only index 5 should remain
1521        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64]));
1522    }
1523
1524    #[test]
1525    fn test_mask_with_offset() {
1526        let patches = Patches::new(
1527            10,
1528            5,                                  // offset
1529            buffer![7u64, 10, 13].into_array(), // actual indices are 2, 5, 8
1530            buffer![100i32, 200, 300].into_array(),
1531            None,
1532        )
1533        .unwrap();
1534
1535        // Mask that sets actual index 2 to null
1536        let mask = Mask::from_iter([
1537            false, false, true, false, false, false, false, false, false, false,
1538        ]);
1539
1540        let masked = patches
1541            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1542            .unwrap()
1543            .unwrap();
1544        assert_eq!(masked.array_len(), 10);
1545        assert_eq!(masked.offset(), 5);
1546        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([10u64, 13]));
1547        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32, 300]));
1548    }
1549
1550    #[test]
1551    fn test_mask_nullable_values() {
1552        let patches = Patches::new(
1553            10,
1554            0,
1555            buffer![2u64, 5, 8].into_array(),
1556            PrimitiveArray::from_option_iter([Some(100i32), None, Some(300)]).into_array(),
1557            None,
1558        )
1559        .unwrap();
1560
1561        // Test masking removes patch at index 2
1562        let mask = Mask::from_iter([
1563            false, false, true, false, false, false, false, false, false, false,
1564        ]);
1565        let masked = patches
1566            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1567            .unwrap()
1568            .unwrap();
1569
1570        // Patches at indices 5 and 8 should remain
1571        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64, 8]));
1572
1573        // Values should be the null and 300
1574        let masked_values = masked.values().to_primitive();
1575        assert_eq!(masked_values.len(), 2);
1576        assert!(!masked_values.is_valid(0).unwrap()); // the null value at index 5
1577        assert!(masked_values.is_valid(1).unwrap()); // the 300 value at index 8
1578        assert_eq!(
1579            i32::try_from(&masked_values.scalar_at(1).unwrap()).unwrap(),
1580            300i32
1581        );
1582    }
1583
1584    #[test]
1585    fn test_filter_keep_all() {
1586        let patches = Patches::new(
1587            10,
1588            0,
1589            buffer![2u64, 5, 8].into_array(),
1590            buffer![100i32, 200, 300].into_array(),
1591            None,
1592        )
1593        .unwrap();
1594
1595        // Keep all indices (mask with indices 0-9)
1596        let mask = Mask::from_indices(10, (0..10).collect());
1597        let filtered = patches
1598            .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1599            .unwrap()
1600            .unwrap();
1601
1602        assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1603        assert_arrays_eq!(
1604            filtered.values(),
1605            PrimitiveArray::from_iter([100i32, 200, 300])
1606        );
1607    }
1608
1609    #[test]
1610    fn test_filter_none() {
1611        let patches = Patches::new(
1612            10,
1613            0,
1614            buffer![2u64, 5, 8].into_array(),
1615            buffer![100i32, 200, 300].into_array(),
1616            None,
1617        )
1618        .unwrap();
1619
1620        // Filter out all (empty mask means keep nothing)
1621        let mask = Mask::from_indices(10, vec![]);
1622        let filtered = patches
1623            .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1624            .unwrap();
1625        assert!(filtered.is_none());
1626    }
1627
1628    #[test]
1629    fn test_filter_with_indices() {
1630        let patches = Patches::new(
1631            10,
1632            0,
1633            buffer![2u64, 5, 8].into_array(),
1634            buffer![100i32, 200, 300].into_array(),
1635            None,
1636        )
1637        .unwrap();
1638
1639        // Keep indices 2, 5, 9 (so patches at 2 and 5 remain)
1640        let mask = Mask::from_indices(10, vec![2, 5, 9]);
1641        let filtered = patches
1642            .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1643            .unwrap()
1644            .unwrap();
1645
1646        assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1])); // Adjusted indices
1647        assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1648    }
1649
1650    #[test]
1651    fn test_slice_full_range() {
1652        let patches = Patches::new(
1653            10,
1654            0,
1655            buffer![2u64, 5, 8].into_array(),
1656            buffer![100i32, 200, 300].into_array(),
1657            None,
1658        )
1659        .unwrap();
1660
1661        let sliced = patches.slice(0..10).unwrap().unwrap();
1662
1663        assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1664        assert_arrays_eq!(
1665            sliced.values(),
1666            PrimitiveArray::from_iter([100i32, 200, 300])
1667        );
1668    }
1669
1670    #[test]
1671    fn test_slice_partial() {
1672        let patches = Patches::new(
1673            10,
1674            0,
1675            buffer![2u64, 5, 8].into_array(),
1676            buffer![100i32, 200, 300].into_array(),
1677            None,
1678        )
1679        .unwrap();
1680
1681        // Slice from 3 to 8 (includes patch at 5)
1682        let sliced = patches.slice(3..8).unwrap().unwrap();
1683
1684        assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([5u64])); // Index stays the same
1685        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1686        assert_eq!(sliced.array_len(), 5); // 8 - 3 = 5
1687        assert_eq!(sliced.offset(), 3); // New offset
1688    }
1689
1690    #[test]
1691    fn test_slice_no_patches() {
1692        let patches = Patches::new(
1693            10,
1694            0,
1695            buffer![2u64, 5, 8].into_array(),
1696            buffer![100i32, 200, 300].into_array(),
1697            None,
1698        )
1699        .unwrap();
1700
1701        // Slice from 6 to 7 (no patches in this range)
1702        let sliced = patches.slice(6..7).unwrap();
1703        assert!(sliced.is_none());
1704    }
1705
1706    #[test]
1707    fn test_slice_with_offset() {
1708        let patches = Patches::new(
1709            10,
1710            5,                                  // offset
1711            buffer![7u64, 10, 13].into_array(), // actual indices are 2, 5, 8
1712            buffer![100i32, 200, 300].into_array(),
1713            None,
1714        )
1715        .unwrap();
1716
1717        // Slice from 3 to 8 (includes patch at actual index 5)
1718        let sliced = patches.slice(3..8).unwrap().unwrap();
1719
1720        assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([10u64])); // Index stays the same (offset + 5 = 10)
1721        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1722        assert_eq!(sliced.offset(), 8); // New offset = 5 + 3
1723    }
1724
1725    #[test]
1726    fn test_patch_values() {
1727        let patches = Patches::new(
1728            10,
1729            0,
1730            buffer![2u64, 5, 8].into_array(),
1731            buffer![100i32, 200, 300].into_array(),
1732            None,
1733        )
1734        .unwrap();
1735
1736        let values = patches.values().to_primitive();
1737        assert_eq!(
1738            i32::try_from(&values.scalar_at(0).unwrap()).unwrap(),
1739            100i32
1740        );
1741        assert_eq!(
1742            i32::try_from(&values.scalar_at(1).unwrap()).unwrap(),
1743            200i32
1744        );
1745        assert_eq!(
1746            i32::try_from(&values.scalar_at(2).unwrap()).unwrap(),
1747            300i32
1748        );
1749    }
1750
1751    #[test]
1752    fn test_indices_range() {
1753        let patches = Patches::new(
1754            10,
1755            0,
1756            buffer![2u64, 5, 8].into_array(),
1757            buffer![100i32, 200, 300].into_array(),
1758            None,
1759        )
1760        .unwrap();
1761
1762        assert_eq!(patches.min_index().unwrap(), 2);
1763        assert_eq!(patches.max_index().unwrap(), 8);
1764    }
1765
1766    #[test]
1767    fn test_search_index() {
1768        let patches = Patches::new(
1769            10,
1770            0,
1771            buffer![2u64, 5, 8].into_array(),
1772            buffer![100i32, 200, 300].into_array(),
1773            None,
1774        )
1775        .unwrap();
1776
1777        // Search for exact indices
1778        assert_eq!(patches.search_index(2).unwrap(), SearchResult::Found(0));
1779        assert_eq!(patches.search_index(5).unwrap(), SearchResult::Found(1));
1780        assert_eq!(patches.search_index(8).unwrap(), SearchResult::Found(2));
1781
1782        // Search for non-patch indices
1783        assert_eq!(patches.search_index(0).unwrap(), SearchResult::NotFound(0));
1784        assert_eq!(patches.search_index(3).unwrap(), SearchResult::NotFound(1));
1785        assert_eq!(patches.search_index(6).unwrap(), SearchResult::NotFound(2));
1786        assert_eq!(patches.search_index(9).unwrap(), SearchResult::NotFound(3));
1787    }
1788
1789    #[test]
1790    fn test_mask_boundary_patches() {
1791        // Test masking patches at array boundaries
1792        let patches = Patches::new(
1793            10,
1794            0,
1795            buffer![0u64, 9].into_array(),
1796            buffer![100i32, 200].into_array(),
1797            None,
1798        )
1799        .unwrap();
1800
1801        let mask = Mask::from_iter([
1802            true, false, false, false, false, false, false, false, false, false,
1803        ]);
1804        let masked = patches
1805            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1806            .unwrap();
1807        assert!(masked.is_some());
1808        let masked = masked.unwrap();
1809        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([9u64]));
1810        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1811    }
1812
1813    #[test]
1814    fn test_mask_all_patches_removed() {
1815        // Test when all patches are masked out
1816        let patches = Patches::new(
1817            10,
1818            0,
1819            buffer![2u64, 5, 8].into_array(),
1820            buffer![100i32, 200, 300].into_array(),
1821            None,
1822        )
1823        .unwrap();
1824
1825        // Mask that removes all patches
1826        let mask = Mask::from_iter([
1827            false, false, true, false, false, true, false, false, true, false,
1828        ]);
1829        let masked = patches
1830            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1831            .unwrap();
1832        assert!(masked.is_none());
1833    }
1834
1835    #[test]
1836    fn test_mask_no_patches_removed() {
1837        // Test when no patches are masked
1838        let patches = Patches::new(
1839            10,
1840            0,
1841            buffer![2u64, 5, 8].into_array(),
1842            buffer![100i32, 200, 300].into_array(),
1843            None,
1844        )
1845        .unwrap();
1846
1847        // Mask that doesn't affect any patches
1848        let mask = Mask::from_iter([
1849            true, false, false, true, false, false, true, false, false, true,
1850        ]);
1851        let masked = patches
1852            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1853            .unwrap()
1854            .unwrap();
1855
1856        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1857        assert_arrays_eq!(
1858            masked.values(),
1859            PrimitiveArray::from_iter([100i32, 200, 300])
1860        );
1861    }
1862
1863    #[test]
1864    fn test_mask_single_patch() {
1865        // Test with a single patch
1866        let patches = Patches::new(
1867            5,
1868            0,
1869            buffer![2u64].into_array(),
1870            buffer![42i32].into_array(),
1871            None,
1872        )
1873        .unwrap();
1874
1875        // Mask that removes the single patch
1876        let mask = Mask::from_iter([false, false, true, false, false]);
1877        let masked = patches
1878            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1879            .unwrap();
1880        assert!(masked.is_none());
1881
1882        // Mask that keeps the single patch
1883        let mask = Mask::from_iter([true, false, false, true, false]);
1884        let masked = patches
1885            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1886            .unwrap()
1887            .unwrap();
1888        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64]));
1889    }
1890
1891    #[test]
1892    fn test_mask_contiguous_patches() {
1893        // Test with contiguous patches
1894        let patches = Patches::new(
1895            10,
1896            0,
1897            buffer![3u64, 4, 5, 6].into_array(),
1898            buffer![100i32, 200, 300, 400].into_array(),
1899            None,
1900        )
1901        .unwrap();
1902
1903        // Mask that removes middle patches
1904        let mask = Mask::from_iter([
1905            false, false, false, false, true, true, false, false, false, false,
1906        ]);
1907        let masked = patches
1908            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1909            .unwrap()
1910            .unwrap();
1911
1912        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([3u64, 6]));
1913        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 400]));
1914    }
1915
1916    #[test]
1917    fn test_mask_with_large_offset() {
1918        // Test with a large offset that shifts all indices
1919        let patches = Patches::new(
1920            20,
1921            15,
1922            buffer![16u64, 17, 19].into_array(), // actual indices are 1, 2, 4
1923            buffer![100i32, 200, 300].into_array(),
1924            None,
1925        )
1926        .unwrap();
1927
1928        // Mask that removes the patch at actual index 2
1929        let mask = Mask::from_iter([
1930            false, false, true, false, false, false, false, false, false, false, false, false,
1931            false, false, false, false, false, false, false, false,
1932        ]);
1933        let masked = patches
1934            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1935            .unwrap()
1936            .unwrap();
1937
1938        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([16u64, 19]));
1939        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 300]));
1940    }
1941
1942    #[test]
1943    #[should_panic(expected = "Filter mask length 5 does not match array length 10")]
1944    fn test_mask_wrong_length() {
1945        let patches = Patches::new(
1946            10,
1947            0,
1948            buffer![2u64, 5, 8].into_array(),
1949            buffer![100i32, 200, 300].into_array(),
1950            None,
1951        )
1952        .unwrap();
1953
1954        // Mask with wrong length
1955        let mask = Mask::from_iter([false, false, true, false, false]);
1956        patches
1957            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1958            .unwrap();
1959    }
1960
1961    #[test]
1962    fn test_chunk_offsets_search() {
1963        let indices = buffer![100u64, 200, 3000, 3100].into_array();
1964        let values = buffer![10i32, 20, 30, 40].into_array();
1965        let chunk_offsets = buffer![0u64, 2, 2, 3].into_array();
1966        let patches = Patches::new(4096, 0, indices, values, Some(chunk_offsets)).unwrap();
1967
1968        assert!(patches.chunk_offsets.is_some());
1969
1970        // chunk 0: patches at 100, 200
1971        assert_eq!(patches.search_index(100).unwrap(), SearchResult::Found(0));
1972        assert_eq!(patches.search_index(200).unwrap(), SearchResult::Found(1));
1973
1974        // chunks 1, 2: no patches
1975        assert_eq!(
1976            patches.search_index(1500).unwrap(),
1977            SearchResult::NotFound(2)
1978        );
1979        assert_eq!(
1980            patches.search_index(2000).unwrap(),
1981            SearchResult::NotFound(2)
1982        );
1983
1984        // chunk 3: patches at 3000, 3100
1985        assert_eq!(patches.search_index(3000).unwrap(), SearchResult::Found(2));
1986        assert_eq!(patches.search_index(3100).unwrap(), SearchResult::Found(3));
1987
1988        assert_eq!(
1989            patches.search_index(1024).unwrap(),
1990            SearchResult::NotFound(2)
1991        );
1992    }
1993
1994    #[test]
1995    fn test_chunk_offsets_with_slice() {
1996        let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
1997        let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
1998        let chunk_offsets = buffer![0u64, 2, 6].into_array();
1999        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2000
2001        let sliced = patches.slice(1000..2200).unwrap().unwrap();
2002
2003        assert!(sliced.chunk_offsets.is_some());
2004        assert_eq!(sliced.offset(), 1000);
2005
2006        assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(0));
2007        assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
2008        assert_eq!(sliced.search_index(1100).unwrap(), SearchResult::Found(4));
2009
2010        assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(1));
2011    }
2012
2013    #[test]
2014    fn test_chunk_offsets_with_slice_after_first_chunk() {
2015        let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
2016        let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
2017        let chunk_offsets = buffer![0u64, 2, 6].into_array();
2018        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2019
2020        let sliced = patches.slice(1300..2200).unwrap().unwrap();
2021
2022        assert!(sliced.chunk_offsets.is_some());
2023        assert_eq!(sliced.offset(), 1300);
2024
2025        assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2026        assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(1));
2027        assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
2028        assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(2));
2029        assert_eq!(sliced.search_index(900).unwrap(), SearchResult::NotFound(4));
2030    }
2031
2032    #[test]
2033    fn test_chunk_offsets_slice_empty_result() {
2034        let indices = buffer![100u64, 200, 3000, 3100].into_array();
2035        let values = buffer![10i32, 20, 30, 40].into_array();
2036        let chunk_offsets = buffer![0u64, 2].into_array();
2037        let patches = Patches::new(4000, 0, indices, values, Some(chunk_offsets)).unwrap();
2038
2039        let sliced = patches.slice(1000..2000).unwrap();
2040        assert!(sliced.is_none());
2041    }
2042
2043    #[test]
2044    fn test_chunk_offsets_slice_single_patch() {
2045        let indices = buffer![100u64, 1200, 1300, 2500].into_array();
2046        let values = buffer![10i32, 20, 30, 40].into_array();
2047        let chunk_offsets = buffer![0u64, 1, 3].into_array();
2048        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2049
2050        let sliced = patches.slice(1100..1250).unwrap().unwrap();
2051
2052        assert_eq!(sliced.num_patches(), 1);
2053        assert_eq!(sliced.offset(), 1100);
2054        assert_eq!(sliced.search_index(100).unwrap(), SearchResult::Found(0)); // 1200 - 1100 = 100
2055        assert_eq!(sliced.search_index(50).unwrap(), SearchResult::NotFound(0));
2056        assert_eq!(sliced.search_index(150).unwrap(), SearchResult::NotFound(1));
2057    }
2058
2059    #[test]
2060    fn test_chunk_offsets_slice_across_chunks() {
2061        let indices = buffer![100u64, 200, 1100, 1200, 2100, 2200].into_array();
2062        let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2063        let chunk_offsets = buffer![0u64, 2, 4].into_array();
2064        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2065
2066        let sliced = patches.slice(150..2150).unwrap().unwrap();
2067
2068        assert_eq!(sliced.num_patches(), 4);
2069        assert_eq!(sliced.offset(), 150);
2070
2071        assert_eq!(sliced.search_index(50).unwrap(), SearchResult::Found(0)); // 200 - 150 = 50
2072        assert_eq!(sliced.search_index(950).unwrap(), SearchResult::Found(1)); // 1100 - 150 = 950
2073        assert_eq!(sliced.search_index(1050).unwrap(), SearchResult::Found(2)); // 1200 - 150 = 1050
2074        assert_eq!(sliced.search_index(1950).unwrap(), SearchResult::Found(3)); // 2100 - 150 = 1950
2075    }
2076
2077    #[test]
2078    fn test_chunk_offsets_boundary_searches() {
2079        let indices = buffer![1023u64, 1024, 1025, 2047, 2048].into_array();
2080        let values = buffer![10i32, 20, 30, 40, 50].into_array();
2081        let chunk_offsets = buffer![0u64, 1, 4].into_array();
2082        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2083
2084        assert_eq!(patches.search_index(1023).unwrap(), SearchResult::Found(0));
2085        assert_eq!(patches.search_index(1024).unwrap(), SearchResult::Found(1));
2086        assert_eq!(patches.search_index(1025).unwrap(), SearchResult::Found(2));
2087        assert_eq!(patches.search_index(2047).unwrap(), SearchResult::Found(3));
2088        assert_eq!(patches.search_index(2048).unwrap(), SearchResult::Found(4));
2089
2090        assert_eq!(
2091            patches.search_index(1022).unwrap(),
2092            SearchResult::NotFound(0)
2093        );
2094        assert_eq!(
2095            patches.search_index(2046).unwrap(),
2096            SearchResult::NotFound(3)
2097        );
2098    }
2099
2100    #[test]
2101    fn test_chunk_offsets_slice_edge_cases() {
2102        let indices = buffer![0u64, 1, 1023, 1024, 2047, 2048].into_array();
2103        let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2104        let chunk_offsets = buffer![0u64, 3, 5].into_array();
2105        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2106
2107        // Slice at the very beginning
2108        let sliced = patches.slice(0..10).unwrap().unwrap();
2109        assert_eq!(sliced.num_patches(), 2);
2110        assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2111        assert_eq!(sliced.search_index(1).unwrap(), SearchResult::Found(1));
2112
2113        // Slice at the very end
2114        let sliced = patches.slice(2040..3000).unwrap().unwrap();
2115        assert_eq!(sliced.num_patches(), 2); // patches at 2047 and 2048
2116        assert_eq!(sliced.search_index(7).unwrap(), SearchResult::Found(0)); // 2047 - 2040
2117        assert_eq!(sliced.search_index(8).unwrap(), SearchResult::Found(1)); // 2048 - 2040
2118    }
2119
2120    #[test]
2121    fn test_chunk_offsets_slice_nested() {
2122        let indices = buffer![100u64, 200, 300, 400, 500, 600].into_array();
2123        let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2124        let chunk_offsets = buffer![0u64].into_array();
2125        let patches = Patches::new(1000, 0, indices, values, Some(chunk_offsets)).unwrap();
2126
2127        let sliced1 = patches.slice(150..550).unwrap().unwrap();
2128        assert_eq!(sliced1.num_patches(), 4); // 200, 300, 400, 500
2129
2130        let sliced2 = sliced1.slice(100..250).unwrap().unwrap();
2131        assert_eq!(sliced2.num_patches(), 1); // 300
2132        assert_eq!(sliced2.offset(), 250);
2133
2134        assert_eq!(sliced2.search_index(50).unwrap(), SearchResult::Found(0)); // 300 - 250
2135        assert_eq!(
2136            sliced2.search_index(150).unwrap(),
2137            SearchResult::NotFound(1)
2138        );
2139    }
2140
2141    #[test]
2142    fn test_index_larger_than_length() {
2143        let chunk_offsets = buffer![0u64].into_array();
2144        let indices = buffer![1023u64].into_array();
2145        let values = buffer![42i32].into_array();
2146        let patches = Patches::new(1024, 0, indices, values, Some(chunk_offsets)).unwrap();
2147        assert_eq!(
2148            patches.search_index(2048).unwrap(),
2149            SearchResult::NotFound(1)
2150        );
2151    }
2152}