Skip to main content

vortex_array/
patches.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::cmp::Ordering;
5use std::fmt::Debug;
6use std::hash::Hash;
7use std::ops::Range;
8
9use itertools::Itertools;
10use num_traits::NumCast;
11use vortex_buffer::BitBuffer;
12use vortex_buffer::BufferMut;
13use vortex_error::VortexError;
14use vortex_error::VortexExpect;
15use vortex_error::VortexResult;
16use vortex_error::vortex_bail;
17use vortex_error::vortex_ensure;
18use vortex_error::vortex_err;
19use vortex_mask::AllOr;
20use vortex_mask::Mask;
21use vortex_mask::MaskMut;
22use vortex_utils::aliases::hash_map::HashMap;
23
24use crate::Array;
25use crate::ArrayRef;
26use crate::ArrayVisitor;
27use crate::ExecutionCtx;
28use crate::IntoArray;
29use crate::ToCanonical;
30use crate::arrays::BoolArray;
31use crate::arrays::PrimitiveArray;
32use crate::builtins::ArrayBuiltins;
33use crate::compute::is_sorted;
34use crate::dtype::DType;
35use crate::dtype::IntegerPType;
36use crate::dtype::NativePType;
37use crate::dtype::Nullability::NonNullable;
38use crate::dtype::PType;
39use crate::dtype::UnsignedPType;
40use crate::match_each_integer_ptype;
41use crate::match_each_unsigned_integer_ptype;
42use crate::scalar::PValue;
43use crate::scalar::Scalar;
44use crate::search_sorted::SearchResult;
45use crate::search_sorted::SearchSorted;
46use crate::search_sorted::SearchSortedSide;
47use crate::validity::Validity;
48use crate::vtable::ValidityHelper;
49
50/// One patch index offset is stored for each chunk.
51/// This allows for constant time patch index lookups.
52pub const PATCH_CHUNK_SIZE: usize = 1024;
53
54#[derive(Copy, Clone, prost::Message)]
55pub struct PatchesMetadata {
56    #[prost(uint64, tag = "1")]
57    len: u64,
58    #[prost(uint64, tag = "2")]
59    offset: u64,
60    #[prost(enumeration = "PType", tag = "3")]
61    indices_ptype: i32,
62    #[prost(uint64, optional, tag = "4")]
63    chunk_offsets_len: Option<u64>,
64    #[prost(enumeration = "PType", optional, tag = "5")]
65    chunk_offsets_ptype: Option<i32>,
66    #[prost(uint64, optional, tag = "6")]
67    offset_within_chunk: Option<u64>,
68}
69
70impl PatchesMetadata {
71    #[inline]
72    pub fn new(
73        len: usize,
74        offset: usize,
75        indices_ptype: PType,
76        chunk_offsets_len: Option<usize>,
77        chunk_offsets_ptype: Option<PType>,
78        offset_within_chunk: Option<usize>,
79    ) -> Self {
80        Self {
81            len: len as u64,
82            offset: offset as u64,
83            indices_ptype: indices_ptype as i32,
84            chunk_offsets_len: chunk_offsets_len.map(|len| len as u64),
85            chunk_offsets_ptype: chunk_offsets_ptype.map(|pt| pt as i32),
86            offset_within_chunk: offset_within_chunk.map(|len| len as u64),
87        }
88    }
89
90    #[inline]
91    pub fn len(&self) -> VortexResult<usize> {
92        usize::try_from(self.len).map_err(|_| vortex_err!("len does not fit in usize"))
93    }
94
95    #[inline]
96    pub fn is_empty(&self) -> bool {
97        self.len == 0
98    }
99
100    #[inline]
101    pub fn offset(&self) -> VortexResult<usize> {
102        usize::try_from(self.offset).map_err(|_| vortex_err!("offset does not fit in usize"))
103    }
104
105    #[inline]
106    pub fn chunk_offsets_dtype(&self) -> VortexResult<Option<DType>> {
107        self.chunk_offsets_ptype
108            .map(|t| {
109                PType::try_from(t)
110                    .map_err(|e| vortex_err!("invalid i32 value {t} for PType: {}", e))
111                    .map(|ptype| DType::Primitive(ptype, NonNullable))
112            })
113            .transpose()
114    }
115
116    #[inline]
117    pub fn indices_dtype(&self) -> VortexResult<DType> {
118        let ptype = PType::try_from(self.indices_ptype).map_err(|e| {
119            vortex_err!("invalid i32 value {} for PType: {}", self.indices_ptype, e)
120        })?;
121        vortex_ensure!(
122            ptype.is_unsigned_int(),
123            "Patch indices must be unsigned integers"
124        );
125        Ok(DType::Primitive(ptype, NonNullable))
126    }
127}
128
129/// A helper for working with patched arrays.
130#[derive(Debug, Clone)]
131pub struct Patches {
132    array_len: usize,
133    offset: usize,
134    indices: ArrayRef,
135    values: ArrayRef,
136    /// Stores the patch index offset for each chunk.
137    ///
138    /// This allows us to lookup the patches for a given chunk in constant time via
139    /// `patch_indices[chunk_offsets[i]..chunk_offsets[i+1]]`.
140    ///
141    /// This is optional for compatibility reasons.
142    chunk_offsets: Option<ArrayRef>,
143    /// Chunk offsets are only sliced off in case the slice is fully
144    /// outside of the chunk range.
145    ///
146    /// Though the range for indices and values is sliced in terms of
147    /// individual elements, not chunks. To account for that we do a
148    /// saturating sub when adjusting the indices based on the chunk offset.
149    ///
150    /// `offset_within_chunk` is necessary in order to keep track of how many
151    /// elements were sliced off within the chunk.
152    offset_within_chunk: Option<usize>,
153}
154
155impl Patches {
156    pub fn new(
157        array_len: usize,
158        offset: usize,
159        indices: ArrayRef,
160        values: ArrayRef,
161        chunk_offsets: Option<ArrayRef>,
162    ) -> VortexResult<Self> {
163        vortex_ensure!(
164            indices.len() == values.len(),
165            "Patch indices and values must have the same length"
166        );
167        vortex_ensure!(
168            indices.dtype().is_unsigned_int() && !indices.dtype().is_nullable(),
169            "Patch indices must be non-nullable unsigned integers, got {:?}",
170            indices.dtype()
171        );
172
173        vortex_ensure!(
174            indices.len() <= array_len,
175            "Patch indices must be shorter than the array length"
176        );
177        vortex_ensure!(!indices.is_empty(), "Patch indices must not be empty");
178
179        // Perform validation of components when they are host-resident.
180        // This is not possible to do eagerly when the data is on GPU memory.
181        if indices.is_host() && values.is_host() {
182            let max = usize::try_from(&indices.scalar_at(indices.len() - 1)?)
183                .map_err(|_| vortex_err!("indices must be a number"))?;
184            vortex_ensure!(
185                max - offset < array_len,
186                "Patch indices {max:?}, offset {offset} are longer than the array length {array_len}"
187            );
188
189            debug_assert!(
190                is_sorted(&indices).unwrap_or(Some(false)).unwrap_or(false),
191                "Patch indices must be sorted"
192            );
193        }
194
195        Ok(Self {
196            array_len,
197            offset,
198            indices,
199            values,
200            chunk_offsets: chunk_offsets.clone(),
201            // Initialize with `Some(0)` only if `chunk_offsets` are set.
202            offset_within_chunk: chunk_offsets.map(|_| 0),
203        })
204    }
205
206    /// Construct new patches without validating any of the arguments
207    ///
208    /// # Safety
209    ///
210    /// Users have to assert that
211    /// * Indices and values have the same length
212    /// * Indices is an unsigned integer type
213    /// * Indices must be sorted
214    /// * Last value in indices is smaller than array_len
215    pub unsafe fn new_unchecked(
216        array_len: usize,
217        offset: usize,
218        indices: ArrayRef,
219        values: ArrayRef,
220        chunk_offsets: Option<ArrayRef>,
221        offset_within_chunk: Option<usize>,
222    ) -> Self {
223        Self {
224            array_len,
225            offset,
226            indices,
227            values,
228            chunk_offsets,
229            offset_within_chunk,
230        }
231    }
232
233    #[inline]
234    pub fn array_len(&self) -> usize {
235        self.array_len
236    }
237
238    #[inline]
239    pub fn num_patches(&self) -> usize {
240        self.indices.len()
241    }
242
243    #[inline]
244    pub fn dtype(&self) -> &DType {
245        self.values.dtype()
246    }
247
248    #[inline]
249    pub fn indices(&self) -> &ArrayRef {
250        &self.indices
251    }
252
253    #[inline]
254    pub fn into_indices(self) -> ArrayRef {
255        self.indices
256    }
257
258    #[inline]
259    pub fn indices_mut(&mut self) -> &mut ArrayRef {
260        &mut self.indices
261    }
262
263    #[inline]
264    pub fn values(&self) -> &ArrayRef {
265        &self.values
266    }
267
268    #[inline]
269    pub fn into_values(self) -> ArrayRef {
270        self.values
271    }
272
273    #[inline]
274    pub fn values_mut(&mut self) -> &mut ArrayRef {
275        &mut self.values
276    }
277
278    #[inline]
279    // Absolute offset: 0 if the array is unsliced.
280    pub fn offset(&self) -> usize {
281        self.offset
282    }
283
284    #[inline]
285    pub fn chunk_offsets(&self) -> &Option<ArrayRef> {
286        &self.chunk_offsets
287    }
288
289    #[inline]
290    pub fn chunk_offset_at(&self, idx: usize) -> VortexResult<usize> {
291        let Some(chunk_offsets) = &self.chunk_offsets else {
292            vortex_bail!("chunk_offsets must be set to retrieve offset at index")
293        };
294
295        chunk_offsets
296            .scalar_at(idx)?
297            .as_primitive()
298            .as_::<usize>()
299            .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))
300    }
301
302    /// Returns the number of patches sliced off from the current first chunk.
303    ///
304    /// When patches are sliced, the chunk offsets array is also sliced to only include
305    /// chunks that overlap with the slice range. However, the slice boundary may fall
306    /// in the middle of a chunk's patch range. This offset indicates how many patches
307    /// at the start of the first chunk should be skipped.
308    ///
309    /// Returns `None` if chunk offsets are not set.
310    #[inline]
311    pub fn offset_within_chunk(&self) -> Option<usize> {
312        self.offset_within_chunk
313    }
314
315    #[inline]
316    pub fn indices_ptype(&self) -> VortexResult<PType> {
317        PType::try_from(self.indices.dtype())
318            .map_err(|_| vortex_err!("indices dtype is not primitive"))
319    }
320
321    pub fn to_metadata(&self, len: usize, dtype: &DType) -> VortexResult<PatchesMetadata> {
322        if self.indices.len() > len {
323            vortex_bail!(
324                "Patch indices {} are longer than the array length {}",
325                self.indices.len(),
326                len
327            );
328        }
329        if self.values.dtype() != dtype {
330            vortex_bail!(
331                "Patch values dtype {} does not match array dtype {}",
332                self.values.dtype(),
333                dtype
334            );
335        }
336        let chunk_offsets_len = self.chunk_offsets.as_ref().map(|co| co.len());
337        let chunk_offsets_ptype = self.chunk_offsets.as_ref().map(|co| co.dtype().as_ptype());
338
339        Ok(PatchesMetadata::new(
340            self.indices.len(),
341            self.offset,
342            self.indices.dtype().as_ptype(),
343            chunk_offsets_len,
344            chunk_offsets_ptype,
345            self.offset_within_chunk,
346        ))
347    }
348
349    pub fn cast_values(self, values_dtype: &DType) -> VortexResult<Self> {
350        // SAFETY: casting does not affect the relationship between the indices and values
351        unsafe {
352            Ok(Self::new_unchecked(
353                self.array_len,
354                self.offset,
355                self.indices,
356                self.values.cast(values_dtype.clone())?,
357                self.chunk_offsets,
358                self.offset_within_chunk,
359            ))
360        }
361    }
362
363    /// Get the patched value at a given index if it exists.
364    pub fn get_patched(&self, index: usize) -> VortexResult<Option<Scalar>> {
365        self.search_index(index)?
366            .to_found()
367            .map(|patch_idx| self.values().scalar_at(patch_idx))
368            .transpose()
369    }
370
371    /// Searches for `index` in the indices array.
372    ///
373    /// Chooses between chunked search when [`Self::chunk_offsets`] is
374    /// available, and binary search otherwise. The `index` parameter is
375    /// adjusted by [`Self::offset`] for both.
376    ///
377    /// # Arguments
378    /// * `index` - The index to search for
379    ///
380    /// # Returns
381    /// * [`SearchResult::Found(patch_idx)`] - If a patch exists at this index, returns the
382    ///   position in the patches array
383    /// * [`SearchResult::NotFound(insertion_point)`] - If no patch exists, returns where
384    ///   a patch at this index would be inserted to maintain sorted order
385    ///
386    /// [`SearchResult::Found(patch_idx)`]: SearchResult::Found
387    /// [`SearchResult::NotFound(insertion_point)`]: SearchResult::NotFound
388    pub fn search_index(&self, index: usize) -> VortexResult<SearchResult> {
389        if self.chunk_offsets.is_some() {
390            return self.search_index_chunked(index);
391        }
392
393        Self::search_index_binary_search(&self.indices, index + self.offset)
394    }
395
396    /// Binary searches for `needle` in the indices array.
397    ///
398    /// # Returns
399    /// [`SearchResult::Found`] with the position if needle exists, or [`SearchResult::NotFound`]
400    /// with the insertion point if not found.
401    fn search_index_binary_search(indices: &ArrayRef, needle: usize) -> VortexResult<SearchResult> {
402        if indices.is_canonical() {
403            let primitive = indices.to_primitive();
404            match_each_integer_ptype!(primitive.ptype(), |T| {
405                let Ok(needle) = T::try_from(needle) else {
406                    // If the needle is not of type T, then it cannot possibly be in this array.
407                    //
408                    // The needle is a non-negative integer (a usize); therefore, it must be larger
409                    // than all values in this array.
410                    return Ok(SearchResult::NotFound(primitive.len()));
411                };
412                return primitive
413                    .as_slice::<T>()
414                    .search_sorted(&needle, SearchSortedSide::Left);
415            });
416        }
417        indices
418            .as_primitive_typed()
419            .search_sorted(&PValue::U64(needle as u64), SearchSortedSide::Left)
420    }
421
422    /// Constant time searches for `index` in the indices array.
423    ///
424    /// First determines which chunk the target index falls into, then performs
425    /// a binary search within that chunk's range.
426    ///
427    /// Returns a [`SearchResult`] indicating either the exact patch index if found,
428    /// or the insertion point if not found.
429    ///
430    /// Returns an error if `chunk_offsets` or `offset_within_chunk` are not set.
431    fn search_index_chunked(&self, index: usize) -> VortexResult<SearchResult> {
432        let Some(chunk_offsets) = &self.chunk_offsets else {
433            vortex_bail!("chunk_offsets is required to be set")
434        };
435
436        let Some(offset_within_chunk) = self.offset_within_chunk else {
437            vortex_bail!("offset_within_chunk is required to be set")
438        };
439
440        if index >= self.array_len() {
441            return Ok(SearchResult::NotFound(self.indices().len()));
442        }
443
444        let chunk_idx = (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE;
445
446        // Patch index offsets are absolute and need to be offset by the first chunk of the current slice.
447        let base_offset = self.chunk_offset_at(0)?;
448
449        let patches_start_idx = (self.chunk_offset_at(chunk_idx)? - base_offset)
450            // Chunk offsets are only sliced off in case the slice is fully
451            // outside of the chunk range.
452            //
453            // Though the range for indices and values is sliced in terms of
454            // individual elements, not chunks. To account for that we do a
455            // saturating sub when adjusting the indices based on the chunk offset.
456            .saturating_sub(offset_within_chunk);
457
458        let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
459            self.chunk_offset_at(chunk_idx + 1)? - base_offset - offset_within_chunk
460        } else {
461            self.indices.len()
462        };
463
464        let chunk_indices = self.indices.slice(patches_start_idx..patches_end_idx)?;
465        let result = Self::search_index_binary_search(&chunk_indices, index + self.offset)?;
466
467        Ok(match result {
468            SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
469            SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
470        })
471    }
472
473    /// Batch version of `search_index`.
474    ///
475    /// In contrast to `search_index`, this function requires `indices` as
476    /// well as `chunk_offsets` to be passed as slices. This is to avoid
477    /// redundant canonicalization and `scalar_at` lookups across calls.
478    fn search_index_chunked_batch<T, O>(
479        &self,
480        indices: &[T],
481        chunk_offsets: &[O],
482        index: T,
483    ) -> VortexResult<SearchResult>
484    where
485        T: UnsignedPType,
486        O: UnsignedPType,
487        usize: TryFrom<T>,
488        usize: TryFrom<O>,
489    {
490        let Some(offset_within_chunk) = self.offset_within_chunk else {
491            vortex_bail!("offset_within_chunk is required to be set")
492        };
493
494        let chunk_idx = {
495            let Ok(index) = usize::try_from(index) else {
496                // If the needle cannot be converted to usize, it's larger than all values in this array.
497                return Ok(SearchResult::NotFound(indices.len()));
498            };
499
500            if index >= self.array_len() {
501                return Ok(SearchResult::NotFound(self.indices().len()));
502            }
503
504            (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE
505        };
506
507        // Patch index offsets are absolute and need to be offset by the first chunk of the current slice.
508        let chunk_offset = usize::try_from(chunk_offsets[chunk_idx] - chunk_offsets[0])
509            .map_err(|_| vortex_err!("chunk_offset failed to convert to usize"))?;
510
511        let patches_start_idx = chunk_offset
512            // Chunk offsets are only sliced off in case the slice is fully
513            // outside of the chunk range.
514            //
515            // Though the range for indices and values is sliced in terms of
516            // individual elements, not chunks. To account for that we do a
517            // saturating sub when adjusting the indices based on the chunk offset.
518            .saturating_sub(offset_within_chunk);
519
520        let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
521            let base_offset_end = chunk_offsets[chunk_idx + 1];
522
523            let offset_within_chunk = O::from(offset_within_chunk)
524                .ok_or_else(|| vortex_err!("offset_within_chunk failed to convert to O"))?;
525
526            usize::try_from(base_offset_end - chunk_offsets[0] - offset_within_chunk)
527                .map_err(|_| vortex_err!("patches_end_idx failed to convert to usize"))?
528        } else {
529            self.indices.len()
530        };
531
532        let Some(offset) = T::from(self.offset) else {
533            // If the offset cannot be converted to T, it's larger than all values in this array.
534            return Ok(SearchResult::NotFound(indices.len()));
535        };
536
537        let chunk_indices = &indices[patches_start_idx..patches_end_idx];
538        let result = chunk_indices.search_sorted(&(index + offset), SearchSortedSide::Left)?;
539
540        Ok(match result {
541            SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
542            SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
543        })
544    }
545
546    /// Returns the minimum patch index
547    pub fn min_index(&self) -> VortexResult<usize> {
548        let first = self
549            .indices
550            .scalar_at(0)?
551            .as_primitive()
552            .as_::<usize>()
553            .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
554        Ok(first - self.offset)
555    }
556
557    /// Returns the maximum patch index
558    pub fn max_index(&self) -> VortexResult<usize> {
559        let last = self
560            .indices
561            .scalar_at(self.indices.len() - 1)?
562            .as_primitive()
563            .as_::<usize>()
564            .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
565        Ok(last - self.offset)
566    }
567
568    /// Filter the patches by a mask, resulting in new patches for the filtered array.
569    pub fn filter(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
570        if mask.len() != self.array_len {
571            vortex_bail!(
572                "Filter mask length {} does not match array length {}",
573                mask.len(),
574                self.array_len
575            );
576        }
577
578        match mask.indices() {
579            AllOr::All => Ok(Some(self.clone())),
580            AllOr::None => Ok(None),
581            AllOr::Some(mask_indices) => {
582                let flat_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
583                match_each_unsigned_integer_ptype!(flat_indices.ptype(), |I| {
584                    filter_patches_with_mask(
585                        flat_indices.as_slice::<I>(),
586                        self.offset(),
587                        self.values(),
588                        mask_indices,
589                    )
590                })
591            }
592        }
593    }
594
595    /// Mask the patches, REMOVING the patches where the mask is true.
596    /// Unlike filter, this preserves the patch indices.
597    /// Unlike mask on a single array, this does not set masked values to null.
598    // TODO(joe): make this lazy and remove the ctx.
599    pub fn mask(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
600        if mask.len() != self.array_len {
601            vortex_bail!(
602                "Filter mask length {} does not match array length {}",
603                mask.len(),
604                self.array_len
605            );
606        }
607
608        let filter_mask = match mask.bit_buffer() {
609            AllOr::All => return Ok(None),
610            AllOr::None => return Ok(Some(self.clone())),
611            AllOr::Some(masked) => {
612                let patch_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
613                match_each_unsigned_integer_ptype!(patch_indices.ptype(), |P| {
614                    let patch_indices = patch_indices.as_slice::<P>();
615                    Mask::from_buffer(BitBuffer::collect_bool(patch_indices.len(), |i| {
616                        #[allow(clippy::cast_possible_truncation)]
617                        let idx = (patch_indices[i] as usize) - self.offset;
618                        !masked.value(idx)
619                    }))
620                })
621            }
622        };
623
624        if filter_mask.all_false() {
625            return Ok(None);
626        }
627
628        // SAFETY: filtering indices/values with same mask maintains their 1:1 relationship
629        let filtered_indices = self
630            .indices
631            .filter(filter_mask.clone())?
632            .to_canonical()?
633            .into_array();
634        let filtered_values = self
635            .values
636            .filter(filter_mask)?
637            .to_canonical()?
638            .into_array();
639
640        Ok(Some(Self {
641            array_len: self.array_len,
642            offset: self.offset,
643            indices: filtered_indices,
644            values: filtered_values,
645            // TODO(0ax1): Chunk offsets are invalid after a filter is applied.
646            chunk_offsets: None,
647            offset_within_chunk: self.offset_within_chunk,
648        }))
649    }
650
651    /// Slice the patches by a range of the patched array.
652    pub fn slice(&self, range: Range<usize>) -> VortexResult<Option<Self>> {
653        let slice_start_idx = self.search_index(range.start)?.to_index();
654        let slice_end_idx = self.search_index(range.end)?.to_index();
655
656        if slice_start_idx == slice_end_idx {
657            return Ok(None);
658        }
659
660        let values = self.values().slice(slice_start_idx..slice_end_idx)?;
661        let indices = self.indices().slice(slice_start_idx..slice_end_idx)?;
662
663        let chunk_offsets = self
664            .chunk_offsets
665            .as_ref()
666            .map(|chunk_offsets| -> VortexResult<ArrayRef> {
667                let chunk_relative_offset = self.offset % PATCH_CHUNK_SIZE;
668                let chunk_start_idx = (chunk_relative_offset + range.start) / PATCH_CHUNK_SIZE;
669                let chunk_end_idx = (chunk_relative_offset + range.end).div_ceil(PATCH_CHUNK_SIZE);
670                chunk_offsets.slice(chunk_start_idx..chunk_end_idx)
671            })
672            .transpose()?;
673
674        let offset_within_chunk = chunk_offsets
675            .as_ref()
676            .map(|chunk_offsets| -> VortexResult<usize> {
677                let base_offset = chunk_offsets
678                    .scalar_at(0)?
679                    .as_primitive()
680                    .as_::<usize>()
681                    .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))?;
682                Ok(slice_start_idx - base_offset)
683            })
684            .transpose()?;
685
686        Ok(Some(Self {
687            array_len: range.len(),
688            offset: range.start + self.offset(),
689            indices,
690            values,
691            chunk_offsets,
692            offset_within_chunk,
693        }))
694    }
695
696    // https://docs.google.com/spreadsheets/d/1D9vBZ1QJ6mwcIvV5wIL0hjGgVchcEnAyhvitqWu2ugU
697    const PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN: f64 = 5.0;
698
699    fn is_map_faster_than_search(&self, take_indices: &PrimitiveArray) -> bool {
700        (self.num_patches() as f64 / take_indices.len() as f64)
701            < Self::PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN
702    }
703
704    /// Take the indices from the patches
705    ///
706    /// Any nulls in take_indices are added to the resulting patches.
707    pub fn take_with_nulls(
708        &self,
709        take_indices: &ArrayRef,
710        ctx: &mut ExecutionCtx,
711    ) -> VortexResult<Option<Self>> {
712        if take_indices.is_empty() {
713            return Ok(None);
714        }
715
716        let take_indices = take_indices.to_array().execute::<PrimitiveArray>(ctx)?;
717        if self.is_map_faster_than_search(&take_indices) {
718            self.take_map(take_indices, true, ctx)
719        } else {
720            self.take_search(take_indices, true, ctx)
721        }
722    }
723
724    /// Take the indices from the patches.
725    ///
726    /// Any nulls in take_indices are ignored.
727    pub fn take(
728        &self,
729        take_indices: &ArrayRef,
730        ctx: &mut ExecutionCtx,
731    ) -> VortexResult<Option<Self>> {
732        if take_indices.is_empty() {
733            return Ok(None);
734        }
735
736        let take_indices = take_indices.to_array().execute::<PrimitiveArray>(ctx)?;
737        if self.is_map_faster_than_search(&take_indices) {
738            self.take_map(take_indices, false, ctx)
739        } else {
740            self.take_search(take_indices, false, ctx)
741        }
742    }
743
744    #[expect(
745        clippy::cognitive_complexity,
746        reason = "complexity is from nested match_each_* macros"
747    )]
748    pub fn take_search(
749        &self,
750        take_indices: PrimitiveArray,
751        include_nulls: bool,
752        ctx: &mut ExecutionCtx,
753    ) -> VortexResult<Option<Self>> {
754        let take_indices_validity = take_indices.validity();
755        let patch_indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
756        let chunk_offsets = self
757            .chunk_offsets()
758            .as_ref()
759            .map(|co| co.clone().execute::<PrimitiveArray>(ctx))
760            .transpose()?;
761
762        let (values_indices, new_indices): (BufferMut<u64>, BufferMut<u64>) =
763            match_each_unsigned_integer_ptype!(patch_indices.ptype(), |PatchT| {
764                let patch_indices_slice = patch_indices.as_slice::<PatchT>();
765                match_each_integer_ptype!(take_indices.ptype(), |TakeT| {
766                    let take_slice = take_indices.as_slice::<TakeT>();
767
768                    if let Some(chunk_offsets) = chunk_offsets {
769                        match_each_unsigned_integer_ptype!(chunk_offsets.ptype(), |OffsetT| {
770                            let chunk_offsets = chunk_offsets.as_slice::<OffsetT>();
771                            take_indices_with_search_fn(
772                                patch_indices_slice,
773                                take_slice,
774                                take_indices.validity_mask()?,
775                                include_nulls,
776                                |take_idx| {
777                                    self.search_index_chunked_batch(
778                                        patch_indices_slice,
779                                        chunk_offsets,
780                                        take_idx,
781                                    )
782                                },
783                            )?
784                        })
785                    } else {
786                        take_indices_with_search_fn(
787                            patch_indices_slice,
788                            take_slice,
789                            take_indices.validity_mask()?,
790                            include_nulls,
791                            |take_idx| {
792                                let Some(offset) = <PatchT as NumCast>::from(self.offset) else {
793                                    // If the offset cannot be converted to T, it's larger than all values in this array.
794                                    return Ok(SearchResult::NotFound(patch_indices_slice.len()));
795                                };
796
797                                patch_indices_slice
798                                    .search_sorted(&(take_idx + offset), SearchSortedSide::Left)
799                            },
800                        )?
801                    }
802                })
803            });
804
805        if new_indices.is_empty() {
806            return Ok(None);
807        }
808
809        let new_indices = new_indices.into_array();
810        let new_array_len = take_indices.len();
811        let values_validity = take_indices_validity.take(&new_indices)?;
812
813        Ok(Some(Self {
814            array_len: new_array_len,
815            offset: 0,
816            indices: new_indices.clone(),
817            values: self
818                .values()
819                .take(PrimitiveArray::new(values_indices, values_validity).into_array())?,
820            chunk_offsets: None,
821            offset_within_chunk: Some(0), // Reset when creating new Patches.
822        }))
823    }
824
825    pub fn take_map(
826        &self,
827        take_indices: PrimitiveArray,
828        include_nulls: bool,
829        ctx: &mut ExecutionCtx,
830    ) -> VortexResult<Option<Self>> {
831        let indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
832        let new_length = take_indices.len();
833
834        let min_index = self.min_index()?;
835        let max_index = self.max_index()?;
836
837        let Some((new_sparse_indices, value_indices)) =
838            match_each_unsigned_integer_ptype!(indices.ptype(), |Indices| {
839                match_each_integer_ptype!(take_indices.ptype(), |TakeIndices| {
840                    take_map::<_, TakeIndices>(
841                        indices.as_slice::<Indices>(),
842                        take_indices,
843                        self.offset(),
844                        min_index,
845                        max_index,
846                        include_nulls,
847                    )?
848                })
849            })
850        else {
851            return Ok(None);
852        };
853
854        let taken_values = self.values().take(value_indices)?;
855
856        Ok(Some(Patches {
857            array_len: new_length,
858            offset: 0,
859            indices: new_sparse_indices,
860            values: taken_values,
861            // TODO(0ax1): Chunk offsets are invalid after take is applied.
862            chunk_offsets: None,
863            offset_within_chunk: self.offset_within_chunk,
864        }))
865    }
866
867    /// Apply patches to a mutable buffer and validity mask.
868    ///
869    /// This method applies the patch values to the given buffer at the positions specified by the
870    /// patch indices. For non-null patch values, it updates the buffer and marks the position as
871    /// valid. For null patch values, it marks the position as invalid.
872    ///
873    /// # Safety
874    ///
875    /// - All patch indices after offset adjustment must be valid indices into the buffer.
876    /// - The buffer and validity mask must have the same length.
877    pub unsafe fn apply_to_buffer<P: NativePType>(
878        &self,
879        buffer: &mut [P],
880        validity: &mut MaskMut,
881        ctx: &mut ExecutionCtx,
882    ) {
883        let patch_indices = self
884            .indices
885            .clone()
886            .execute::<PrimitiveArray>(ctx)
887            .vortex_expect("patch indices must be convertible to PrimitiveArray");
888        let patch_values = self
889            .values
890            .clone()
891            .execute::<PrimitiveArray>(ctx)
892            .vortex_expect("patch values must be convertible to PrimitiveArray");
893        let patches_validity = patch_values.validity();
894
895        let patch_values_slice = patch_values.as_slice::<P>();
896        match_each_unsigned_integer_ptype!(patch_indices.ptype(), |I| {
897            let patch_indices_slice = patch_indices.as_slice::<I>();
898
899            // SAFETY:
900            // - `Patches` invariant guarantees indices are sorted and within array bounds.
901            // - `patch_indices` and `patch_values` have equal length (from `Patches` invariant).
902            // - `buffer` and `validity` have equal length (precondition).
903            // - All patch indices are valid after offset adjustment (precondition).
904            unsafe {
905                apply_patches_to_buffer_inner(
906                    buffer,
907                    validity,
908                    patch_indices_slice,
909                    self.offset,
910                    patch_values_slice,
911                    patches_validity,
912                    ctx,
913                );
914            }
915        });
916    }
917
918    pub fn map_values<F>(self, f: F) -> VortexResult<Self>
919    where
920        F: FnOnce(ArrayRef) -> VortexResult<ArrayRef>,
921    {
922        let values = f(self.values)?;
923        if self.indices.len() != values.len() {
924            vortex_bail!(
925                "map_values must preserve length: expected {} received {}",
926                self.indices.len(),
927                values.len()
928            )
929        }
930
931        Ok(Self {
932            array_len: self.array_len,
933            offset: self.offset,
934            indices: self.indices,
935            values,
936            chunk_offsets: self.chunk_offsets,
937            offset_within_chunk: self.offset_within_chunk,
938        })
939    }
940}
941
942/// Helper function to apply patches to a buffer.
943///
944/// # Safety
945///
946/// - All indices in `patch_indices` after subtracting `patch_offset` must be valid indices
947///   into both `buffer` and `validity`.
948/// - `patch_indices` must be sorted in ascending order.
949/// - `patch_indices` and `patch_values` must have the same length.
950/// - `buffer` and `validity` must have the same length.
951unsafe fn apply_patches_to_buffer_inner<P, I>(
952    buffer: &mut [P],
953    validity: &mut MaskMut,
954    patch_indices: &[I],
955    patch_offset: usize,
956    patch_values: &[P],
957    patches_validity: &Validity,
958    ctx: &mut ExecutionCtx,
959) where
960    P: NativePType,
961    I: UnsignedPType,
962{
963    debug_assert!(!patch_indices.is_empty());
964    debug_assert_eq!(patch_indices.len(), patch_values.len());
965    debug_assert_eq!(buffer.len(), validity.len());
966
967    match patches_validity {
968        Validity::NonNullable | Validity::AllValid => {
969            // All patch values are valid, apply them all.
970            for (&i, &value) in patch_indices.iter().zip_eq(patch_values) {
971                let index = i.as_() - patch_offset;
972
973                // SAFETY: `index` is valid because caller guarantees all patch indices are within
974                // bounds after offset adjustment.
975                unsafe {
976                    validity.set_unchecked(index);
977                }
978                buffer[index] = value;
979            }
980        }
981        Validity::AllInvalid => {
982            // All patch values are null, just mark positions as invalid.
983            for &i in patch_indices {
984                let index = i.as_() - patch_offset;
985
986                // SAFETY: `index` is valid because caller guarantees all patch indices are within
987                // bounds after offset adjustment.
988                unsafe {
989                    validity.unset_unchecked(index);
990                }
991            }
992        }
993        Validity::Array(array) => {
994            // Some patch values may be null, check each one.
995            let bool_array = array
996                .clone()
997                .execute::<BoolArray>(ctx)
998                .vortex_expect("validity array must be convertible to BoolArray");
999            let mask = bool_array.to_bit_buffer();
1000            for (patch_idx, (&i, &value)) in patch_indices.iter().zip_eq(patch_values).enumerate() {
1001                let index = i.as_() - patch_offset;
1002
1003                // SAFETY: `index` and `patch_idx` are valid because caller guarantees all patch
1004                // indices are within bounds after offset adjustment.
1005                unsafe {
1006                    if mask.value_unchecked(patch_idx) {
1007                        buffer[index] = value;
1008                        validity.set_unchecked(index);
1009                    } else {
1010                        validity.unset_unchecked(index);
1011                    }
1012                }
1013            }
1014        }
1015    }
1016}
1017
1018fn take_map<I: NativePType + Hash + Eq + TryFrom<usize>, T: NativePType>(
1019    indices: &[I],
1020    take_indices: PrimitiveArray,
1021    indices_offset: usize,
1022    min_index: usize,
1023    max_index: usize,
1024    include_nulls: bool,
1025) -> VortexResult<Option<(ArrayRef, ArrayRef)>>
1026where
1027    usize: TryFrom<T>,
1028    VortexError: From<<I as TryFrom<usize>>::Error>,
1029{
1030    let take_indices_len = take_indices.len();
1031    let take_indices_validity = take_indices.validity();
1032    let take_indices_validity_mask = take_indices_validity.to_mask(take_indices_len);
1033    let take_indices = take_indices.as_slice::<T>();
1034    let offset_i = I::try_from(indices_offset)?;
1035
1036    let sparse_index_to_value_index: HashMap<I, usize> = indices
1037        .iter()
1038        .copied()
1039        .map(|idx| idx - offset_i)
1040        .enumerate()
1041        .map(|(value_index, sparse_index)| (sparse_index, value_index))
1042        .collect();
1043
1044    let mut new_sparse_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1045    let mut value_indices = BufferMut::<u64>::with_capacity(take_indices.len());
1046
1047    for (idx_in_take, &take_idx) in take_indices.iter().enumerate() {
1048        let ti = usize::try_from(take_idx)
1049            .map_err(|_| vortex_err!("Failed to convert index to usize"))?;
1050
1051        // If we have to take nulls the take index doesn't matter, make it 0 for consistency
1052        let is_null = match take_indices_validity_mask.bit_buffer() {
1053            AllOr::All => false,
1054            AllOr::None => true,
1055            AllOr::Some(buf) => !buf.value(idx_in_take),
1056        };
1057        if is_null {
1058            if include_nulls {
1059                new_sparse_indices.push(idx_in_take as u64);
1060                value_indices.push(0);
1061            }
1062        } else if ti >= min_index && ti <= max_index {
1063            let ti_as_i = I::try_from(ti)
1064                .map_err(|_| vortex_err!("take index does not fit in index type"))?;
1065            if let Some(&value_index) = sparse_index_to_value_index.get(&ti_as_i) {
1066                new_sparse_indices.push(idx_in_take as u64);
1067                value_indices.push(value_index as u64);
1068            }
1069        }
1070    }
1071
1072    if new_sparse_indices.is_empty() {
1073        return Ok(None);
1074    }
1075
1076    let new_sparse_indices = new_sparse_indices.into_array();
1077    let values_validity = take_indices_validity.take(&new_sparse_indices)?;
1078    Ok(Some((
1079        new_sparse_indices,
1080        PrimitiveArray::new(value_indices, values_validity).into_array(),
1081    )))
1082}
1083
1084/// Filter patches with the provided mask (in flattened space).
1085///
1086/// The filter mask may contain indices that are non-patched. The return value of this function
1087/// is a new set of `Patches` with the indices relative to the provided `mask` rank, and the
1088/// patch values.
1089fn filter_patches_with_mask<T: IntegerPType>(
1090    patch_indices: &[T],
1091    offset: usize,
1092    patch_values: &ArrayRef,
1093    mask_indices: &[usize],
1094) -> VortexResult<Option<Patches>> {
1095    let true_count = mask_indices.len();
1096    let mut new_patch_indices = BufferMut::<u64>::with_capacity(true_count);
1097    let mut new_mask_indices = Vec::with_capacity(true_count);
1098
1099    // Attempt to move the window by `STRIDE` elements on each iteration. This assumes that
1100    // the patches are relatively sparse compared to the overall mask, and so many indices in the
1101    // mask will end up being skipped.
1102    const STRIDE: usize = 4;
1103
1104    let mut mask_idx = 0usize;
1105    let mut true_idx = 0usize;
1106
1107    while mask_idx < patch_indices.len() && true_idx < true_count {
1108        // NOTE: we are searching for overlaps between sorted, unaligned indices in `patch_indices`
1109        //  and `mask_indices`. We assume that Patches are sparse relative to the global space of
1110        //  the mask (which covers both patch and non-patch values of the parent array), and so to
1111        //  quickly jump through regions with no overlap, we attempt to move our pointers by STRIDE
1112        //  elements on each iteration. If we cannot rule out overlap due to min/max values, we
1113        //  fallback to performing a two-way iterator merge.
1114        if (mask_idx + STRIDE) < patch_indices.len() && (true_idx + STRIDE) < mask_indices.len() {
1115            // Load a vector of each into our registers.
1116            let left_min = patch_indices[mask_idx]
1117                .to_usize()
1118                .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1119                - offset;
1120            let left_max = patch_indices[mask_idx + STRIDE]
1121                .to_usize()
1122                .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1123                - offset;
1124            let right_min = mask_indices[true_idx];
1125            let right_max = mask_indices[true_idx + STRIDE];
1126
1127            if left_min > right_max {
1128                // Advance right side
1129                true_idx += STRIDE;
1130                continue;
1131            } else if right_min > left_max {
1132                mask_idx += STRIDE;
1133                continue;
1134            } else {
1135                // Fallthrough to direct comparison path.
1136            }
1137        }
1138
1139        // Two-way sorted iterator merge:
1140
1141        let left = patch_indices[mask_idx]
1142            .to_usize()
1143            .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1144            - offset;
1145        let right = mask_indices[true_idx];
1146
1147        match left.cmp(&right) {
1148            Ordering::Less => {
1149                mask_idx += 1;
1150            }
1151            Ordering::Greater => {
1152                true_idx += 1;
1153            }
1154            Ordering::Equal => {
1155                // Save the mask index as well as the positional index.
1156                new_mask_indices.push(mask_idx);
1157                new_patch_indices.push(true_idx as u64);
1158
1159                mask_idx += 1;
1160                true_idx += 1;
1161            }
1162        }
1163    }
1164
1165    if new_mask_indices.is_empty() {
1166        return Ok(None);
1167    }
1168
1169    let new_patch_indices = new_patch_indices.into_array();
1170    let new_patch_values = patch_values
1171        .filter(Mask::from_indices(patch_values.len(), new_mask_indices))?
1172        .to_canonical()?
1173        .into_array();
1174
1175    Ok(Some(Patches::new(
1176        true_count,
1177        0,
1178        new_patch_indices,
1179        new_patch_values,
1180        // TODO(0ax1): Chunk offsets are invalid after a filter is applied.
1181        None,
1182    )?))
1183}
1184
1185fn take_indices_with_search_fn<
1186    I: UnsignedPType,
1187    T: IntegerPType,
1188    F: Fn(I) -> VortexResult<SearchResult>,
1189>(
1190    indices: &[I],
1191    take_indices: &[T],
1192    take_validity: Mask,
1193    include_nulls: bool,
1194    search_fn: F,
1195) -> VortexResult<(BufferMut<u64>, BufferMut<u64>)> {
1196    let mut values_indices = BufferMut::with_capacity(take_indices.len());
1197    let mut new_indices = BufferMut::with_capacity(take_indices.len());
1198
1199    for (new_patch_idx, &take_idx) in take_indices.iter().enumerate() {
1200        if !take_validity.value(new_patch_idx) {
1201            if include_nulls {
1202                // For nulls, patch index doesn't matter - use 0 for consistency
1203                values_indices.push(0u64);
1204                new_indices.push(new_patch_idx as u64);
1205            }
1206            continue;
1207        } else {
1208            let search_result = match I::from(take_idx) {
1209                Some(idx) => search_fn(idx)?,
1210                None => SearchResult::NotFound(indices.len()),
1211            };
1212
1213            if let Some(patch_idx) = search_result.to_found() {
1214                values_indices.push(patch_idx as u64);
1215                new_indices.push(new_patch_idx as u64);
1216            }
1217        }
1218    }
1219
1220    Ok((values_indices, new_indices))
1221}
1222
1223#[cfg(test)]
1224mod test {
1225    use vortex_buffer::BufferMut;
1226    use vortex_buffer::buffer;
1227    use vortex_mask::Mask;
1228
1229    use crate::IntoArray;
1230    use crate::LEGACY_SESSION;
1231    use crate::ToCanonical;
1232    use crate::VortexSessionExecute;
1233    use crate::arrays::PrimitiveArray;
1234    use crate::assert_arrays_eq;
1235    use crate::patches::Patches;
1236    use crate::search_sorted::SearchResult;
1237    use crate::validity::Validity;
1238
1239    #[test]
1240    fn test_filter() {
1241        let patches = Patches::new(
1242            100,
1243            0,
1244            buffer![10u32, 11, 20].into_array(),
1245            buffer![100, 110, 200].into_array(),
1246            None,
1247        )
1248        .unwrap();
1249
1250        let filtered = patches
1251            .filter(
1252                &Mask::from_indices(100, vec![10, 20, 30]),
1253                &mut LEGACY_SESSION.create_execution_ctx(),
1254            )
1255            .unwrap()
1256            .unwrap();
1257
1258        assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1]));
1259        assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1260    }
1261
1262    #[test]
1263    fn take_with_nulls() {
1264        let patches = Patches::new(
1265            20,
1266            0,
1267            buffer![2u64, 9, 15].into_array(),
1268            PrimitiveArray::new(buffer![33_i32, 44, 55], Validity::AllValid).into_array(),
1269            None,
1270        )
1271        .unwrap();
1272
1273        let taken = patches
1274            .take(
1275                &PrimitiveArray::new(buffer![9, 0], Validity::from_iter(vec![true, false]))
1276                    .into_array(),
1277                &mut LEGACY_SESSION.create_execution_ctx(),
1278            )
1279            .unwrap()
1280            .unwrap();
1281        let primitive_values = taken.values().to_primitive();
1282        let primitive_indices = taken.indices().to_primitive();
1283        assert_eq!(taken.array_len(), 2);
1284        assert_arrays_eq!(
1285            primitive_values,
1286            PrimitiveArray::from_option_iter([Some(44i32)])
1287        );
1288        assert_arrays_eq!(primitive_indices, PrimitiveArray::from_iter([0u64]));
1289        assert_eq!(
1290            primitive_values.validity_mask().unwrap(),
1291            Mask::from_iter(vec![true])
1292        );
1293    }
1294
1295    #[test]
1296    fn take_search_with_nulls_chunked() {
1297        let patches = Patches::new(
1298            20,
1299            0,
1300            buffer![2u64, 9, 15].into_array(),
1301            buffer![33_i32, 44, 55].into_array(),
1302            Some(buffer![0u64].into_array()),
1303        )
1304        .unwrap();
1305
1306        let taken = patches
1307            .take_search(
1308                PrimitiveArray::new(buffer![9, 0], Validity::from_iter([true, false])),
1309                true,
1310                &mut LEGACY_SESSION.create_execution_ctx(),
1311            )
1312            .unwrap()
1313            .unwrap();
1314
1315        let primitive_values = taken.values().to_primitive();
1316        assert_eq!(taken.array_len(), 2);
1317        assert_arrays_eq!(
1318            primitive_values,
1319            PrimitiveArray::from_option_iter([Some(44i32), None])
1320        );
1321        assert_arrays_eq!(taken.indices(), PrimitiveArray::from_iter([0u64, 1]));
1322
1323        assert_eq!(
1324            primitive_values.validity_mask().unwrap(),
1325            Mask::from_iter([true, false])
1326        );
1327    }
1328
1329    #[test]
1330    fn take_search_chunked_multiple_chunks() {
1331        let patches = Patches::new(
1332            2048,
1333            0,
1334            buffer![100u64, 500, 1200, 1800].into_array(),
1335            buffer![10_i32, 20, 30, 40].into_array(),
1336            Some(buffer![0u64, 2].into_array()),
1337        )
1338        .unwrap();
1339
1340        let taken = patches
1341            .take_search(
1342                PrimitiveArray::new(buffer![500, 1200, 999], Validity::AllValid),
1343                true,
1344                &mut LEGACY_SESSION.create_execution_ctx(),
1345            )
1346            .unwrap()
1347            .unwrap();
1348
1349        assert_eq!(taken.array_len(), 3);
1350        assert_arrays_eq!(
1351            taken.values(),
1352            PrimitiveArray::from_option_iter([Some(20i32), Some(30)])
1353        );
1354    }
1355
1356    #[test]
1357    fn take_search_chunked_indices_with_no_patches() {
1358        let patches = Patches::new(
1359            20,
1360            0,
1361            buffer![2u64, 9, 15].into_array(),
1362            buffer![33_i32, 44, 55].into_array(),
1363            Some(buffer![0u64].into_array()),
1364        )
1365        .unwrap();
1366
1367        let taken = patches
1368            .take_search(
1369                PrimitiveArray::new(buffer![3, 4, 5], Validity::AllValid),
1370                true,
1371                &mut LEGACY_SESSION.create_execution_ctx(),
1372            )
1373            .unwrap();
1374
1375        assert!(taken.is_none());
1376    }
1377
1378    #[test]
1379    fn take_search_chunked_interleaved() {
1380        let patches = Patches::new(
1381            30,
1382            0,
1383            buffer![5u64, 10, 20, 25].into_array(),
1384            buffer![100_i32, 200, 300, 400].into_array(),
1385            Some(buffer![0u64].into_array()),
1386        )
1387        .unwrap();
1388
1389        let taken = patches
1390            .take_search(
1391                PrimitiveArray::new(buffer![10, 15, 20, 99], Validity::AllValid),
1392                true,
1393                &mut LEGACY_SESSION.create_execution_ctx(),
1394            )
1395            .unwrap()
1396            .unwrap();
1397
1398        assert_eq!(taken.array_len(), 4);
1399        assert_arrays_eq!(
1400            taken.values(),
1401            PrimitiveArray::from_option_iter([Some(200i32), Some(300)])
1402        );
1403    }
1404
1405    #[test]
1406    fn test_take_search_multiple_chunk_offsets() {
1407        let patches = Patches::new(
1408            1500,
1409            0,
1410            BufferMut::from_iter(0..1500u64).into_array(),
1411            BufferMut::from_iter(0..1500i32).into_array(),
1412            Some(buffer![0u64, 1024u64].into_array()),
1413        )
1414        .unwrap();
1415
1416        let taken = patches
1417            .take_search(
1418                PrimitiveArray::new(BufferMut::from_iter(0..1500u64), Validity::AllValid),
1419                false,
1420                &mut LEGACY_SESSION.create_execution_ctx(),
1421            )
1422            .unwrap()
1423            .unwrap();
1424
1425        assert_eq!(taken.array_len(), 1500);
1426    }
1427
1428    #[test]
1429    fn test_slice() {
1430        let values = buffer![15_u32, 135, 13531, 42].into_array();
1431        let indices = buffer![10_u64, 11, 50, 100].into_array();
1432
1433        let patches = Patches::new(101, 0, indices, values, None).unwrap();
1434
1435        let sliced = patches.slice(15..100).unwrap().unwrap();
1436        assert_eq!(sliced.array_len(), 100 - 15);
1437        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1438    }
1439
1440    #[test]
1441    fn doubly_sliced() {
1442        let values = buffer![15_u32, 135, 13531, 42].into_array();
1443        let indices = buffer![10_u64, 11, 50, 100].into_array();
1444
1445        let patches = Patches::new(101, 0, indices, values, None).unwrap();
1446
1447        let sliced = patches.slice(15..100).unwrap().unwrap();
1448        assert_eq!(sliced.array_len(), 100 - 15);
1449        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1450
1451        let doubly_sliced = sliced.slice(35..36).unwrap().unwrap();
1452        assert_arrays_eq!(
1453            doubly_sliced.values(),
1454            PrimitiveArray::from_iter([13531u32])
1455        );
1456    }
1457
1458    #[test]
1459    fn test_mask_all_true() {
1460        let patches = Patches::new(
1461            10,
1462            0,
1463            buffer![2u64, 5, 8].into_array(),
1464            buffer![100i32, 200, 300].into_array(),
1465            None,
1466        )
1467        .unwrap();
1468
1469        let mask = Mask::new_true(10);
1470        let masked = patches
1471            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1472            .unwrap();
1473        assert!(masked.is_none());
1474    }
1475
1476    #[test]
1477    fn test_mask_all_false() {
1478        let patches = Patches::new(
1479            10,
1480            0,
1481            buffer![2u64, 5, 8].into_array(),
1482            buffer![100i32, 200, 300].into_array(),
1483            None,
1484        )
1485        .unwrap();
1486
1487        let mask = Mask::new_false(10);
1488        let masked = patches
1489            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1490            .unwrap()
1491            .unwrap();
1492
1493        // No patch values should be masked
1494        assert_arrays_eq!(
1495            masked.values(),
1496            PrimitiveArray::from_iter([100i32, 200, 300])
1497        );
1498        assert!(masked.values().is_valid(0).unwrap());
1499        assert!(masked.values().is_valid(1).unwrap());
1500        assert!(masked.values().is_valid(2).unwrap());
1501
1502        // Indices should remain unchanged
1503        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1504    }
1505
1506    #[test]
1507    fn test_mask_partial() {
1508        let patches = Patches::new(
1509            10,
1510            0,
1511            buffer![2u64, 5, 8].into_array(),
1512            buffer![100i32, 200, 300].into_array(),
1513            None,
1514        )
1515        .unwrap();
1516
1517        // Mask that removes patches at indices 2 and 8 (but not 5)
1518        let mask = Mask::from_iter([
1519            false, false, true, false, false, false, false, false, true, false,
1520        ]);
1521        let masked = patches
1522            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1523            .unwrap()
1524            .unwrap();
1525
1526        // Only the patch at index 5 should remain
1527        assert_eq!(masked.values().len(), 1);
1528        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1529
1530        // Only index 5 should remain
1531        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64]));
1532    }
1533
1534    #[test]
1535    fn test_mask_with_offset() {
1536        let patches = Patches::new(
1537            10,
1538            5,                                  // offset
1539            buffer![7u64, 10, 13].into_array(), // actual indices are 2, 5, 8
1540            buffer![100i32, 200, 300].into_array(),
1541            None,
1542        )
1543        .unwrap();
1544
1545        // Mask that sets actual index 2 to null
1546        let mask = Mask::from_iter([
1547            false, false, true, false, false, false, false, false, false, false,
1548        ]);
1549
1550        let masked = patches
1551            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1552            .unwrap()
1553            .unwrap();
1554        assert_eq!(masked.array_len(), 10);
1555        assert_eq!(masked.offset(), 5);
1556        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([10u64, 13]));
1557        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32, 300]));
1558    }
1559
1560    #[test]
1561    fn test_mask_nullable_values() {
1562        let patches = Patches::new(
1563            10,
1564            0,
1565            buffer![2u64, 5, 8].into_array(),
1566            PrimitiveArray::from_option_iter([Some(100i32), None, Some(300)]).into_array(),
1567            None,
1568        )
1569        .unwrap();
1570
1571        // Test masking removes patch at index 2
1572        let mask = Mask::from_iter([
1573            false, false, true, false, false, false, false, false, false, false,
1574        ]);
1575        let masked = patches
1576            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1577            .unwrap()
1578            .unwrap();
1579
1580        // Patches at indices 5 and 8 should remain
1581        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64, 8]));
1582
1583        // Values should be the null and 300
1584        let masked_values = masked.values().to_primitive();
1585        assert_eq!(masked_values.len(), 2);
1586        assert!(!masked_values.is_valid(0).unwrap()); // the null value at index 5
1587        assert!(masked_values.is_valid(1).unwrap()); // the 300 value at index 8
1588        assert_eq!(
1589            i32::try_from(&masked_values.scalar_at(1).unwrap()).unwrap(),
1590            300i32
1591        );
1592    }
1593
1594    #[test]
1595    fn test_filter_keep_all() {
1596        let patches = Patches::new(
1597            10,
1598            0,
1599            buffer![2u64, 5, 8].into_array(),
1600            buffer![100i32, 200, 300].into_array(),
1601            None,
1602        )
1603        .unwrap();
1604
1605        // Keep all indices (mask with indices 0-9)
1606        let mask = Mask::from_indices(10, (0..10).collect());
1607        let filtered = patches
1608            .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1609            .unwrap()
1610            .unwrap();
1611
1612        assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1613        assert_arrays_eq!(
1614            filtered.values(),
1615            PrimitiveArray::from_iter([100i32, 200, 300])
1616        );
1617    }
1618
1619    #[test]
1620    fn test_filter_none() {
1621        let patches = Patches::new(
1622            10,
1623            0,
1624            buffer![2u64, 5, 8].into_array(),
1625            buffer![100i32, 200, 300].into_array(),
1626            None,
1627        )
1628        .unwrap();
1629
1630        // Filter out all (empty mask means keep nothing)
1631        let mask = Mask::from_indices(10, vec![]);
1632        let filtered = patches
1633            .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1634            .unwrap();
1635        assert!(filtered.is_none());
1636    }
1637
1638    #[test]
1639    fn test_filter_with_indices() {
1640        let patches = Patches::new(
1641            10,
1642            0,
1643            buffer![2u64, 5, 8].into_array(),
1644            buffer![100i32, 200, 300].into_array(),
1645            None,
1646        )
1647        .unwrap();
1648
1649        // Keep indices 2, 5, 9 (so patches at 2 and 5 remain)
1650        let mask = Mask::from_indices(10, vec![2, 5, 9]);
1651        let filtered = patches
1652            .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1653            .unwrap()
1654            .unwrap();
1655
1656        assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1])); // Adjusted indices
1657        assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1658    }
1659
1660    #[test]
1661    fn test_slice_full_range() {
1662        let patches = Patches::new(
1663            10,
1664            0,
1665            buffer![2u64, 5, 8].into_array(),
1666            buffer![100i32, 200, 300].into_array(),
1667            None,
1668        )
1669        .unwrap();
1670
1671        let sliced = patches.slice(0..10).unwrap().unwrap();
1672
1673        assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1674        assert_arrays_eq!(
1675            sliced.values(),
1676            PrimitiveArray::from_iter([100i32, 200, 300])
1677        );
1678    }
1679
1680    #[test]
1681    fn test_slice_partial() {
1682        let patches = Patches::new(
1683            10,
1684            0,
1685            buffer![2u64, 5, 8].into_array(),
1686            buffer![100i32, 200, 300].into_array(),
1687            None,
1688        )
1689        .unwrap();
1690
1691        // Slice from 3 to 8 (includes patch at 5)
1692        let sliced = patches.slice(3..8).unwrap().unwrap();
1693
1694        assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([5u64])); // Index stays the same
1695        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1696        assert_eq!(sliced.array_len(), 5); // 8 - 3 = 5
1697        assert_eq!(sliced.offset(), 3); // New offset
1698    }
1699
1700    #[test]
1701    fn test_slice_no_patches() {
1702        let patches = Patches::new(
1703            10,
1704            0,
1705            buffer![2u64, 5, 8].into_array(),
1706            buffer![100i32, 200, 300].into_array(),
1707            None,
1708        )
1709        .unwrap();
1710
1711        // Slice from 6 to 7 (no patches in this range)
1712        let sliced = patches.slice(6..7).unwrap();
1713        assert!(sliced.is_none());
1714    }
1715
1716    #[test]
1717    fn test_slice_with_offset() {
1718        let patches = Patches::new(
1719            10,
1720            5,                                  // offset
1721            buffer![7u64, 10, 13].into_array(), // actual indices are 2, 5, 8
1722            buffer![100i32, 200, 300].into_array(),
1723            None,
1724        )
1725        .unwrap();
1726
1727        // Slice from 3 to 8 (includes patch at actual index 5)
1728        let sliced = patches.slice(3..8).unwrap().unwrap();
1729
1730        assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([10u64])); // Index stays the same (offset + 5 = 10)
1731        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1732        assert_eq!(sliced.offset(), 8); // New offset = 5 + 3
1733    }
1734
1735    #[test]
1736    fn test_patch_values() {
1737        let patches = Patches::new(
1738            10,
1739            0,
1740            buffer![2u64, 5, 8].into_array(),
1741            buffer![100i32, 200, 300].into_array(),
1742            None,
1743        )
1744        .unwrap();
1745
1746        let values = patches.values().to_primitive();
1747        assert_eq!(
1748            i32::try_from(&values.scalar_at(0).unwrap()).unwrap(),
1749            100i32
1750        );
1751        assert_eq!(
1752            i32::try_from(&values.scalar_at(1).unwrap()).unwrap(),
1753            200i32
1754        );
1755        assert_eq!(
1756            i32::try_from(&values.scalar_at(2).unwrap()).unwrap(),
1757            300i32
1758        );
1759    }
1760
1761    #[test]
1762    fn test_indices_range() {
1763        let patches = Patches::new(
1764            10,
1765            0,
1766            buffer![2u64, 5, 8].into_array(),
1767            buffer![100i32, 200, 300].into_array(),
1768            None,
1769        )
1770        .unwrap();
1771
1772        assert_eq!(patches.min_index().unwrap(), 2);
1773        assert_eq!(patches.max_index().unwrap(), 8);
1774    }
1775
1776    #[test]
1777    fn test_search_index() {
1778        let patches = Patches::new(
1779            10,
1780            0,
1781            buffer![2u64, 5, 8].into_array(),
1782            buffer![100i32, 200, 300].into_array(),
1783            None,
1784        )
1785        .unwrap();
1786
1787        // Search for exact indices
1788        assert_eq!(patches.search_index(2).unwrap(), SearchResult::Found(0));
1789        assert_eq!(patches.search_index(5).unwrap(), SearchResult::Found(1));
1790        assert_eq!(patches.search_index(8).unwrap(), SearchResult::Found(2));
1791
1792        // Search for non-patch indices
1793        assert_eq!(patches.search_index(0).unwrap(), SearchResult::NotFound(0));
1794        assert_eq!(patches.search_index(3).unwrap(), SearchResult::NotFound(1));
1795        assert_eq!(patches.search_index(6).unwrap(), SearchResult::NotFound(2));
1796        assert_eq!(patches.search_index(9).unwrap(), SearchResult::NotFound(3));
1797    }
1798
1799    #[test]
1800    fn test_mask_boundary_patches() {
1801        // Test masking patches at array boundaries
1802        let patches = Patches::new(
1803            10,
1804            0,
1805            buffer![0u64, 9].into_array(),
1806            buffer![100i32, 200].into_array(),
1807            None,
1808        )
1809        .unwrap();
1810
1811        let mask = Mask::from_iter([
1812            true, false, false, false, false, false, false, false, false, false,
1813        ]);
1814        let masked = patches
1815            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1816            .unwrap();
1817        assert!(masked.is_some());
1818        let masked = masked.unwrap();
1819        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([9u64]));
1820        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1821    }
1822
1823    #[test]
1824    fn test_mask_all_patches_removed() {
1825        // Test when all patches are masked out
1826        let patches = Patches::new(
1827            10,
1828            0,
1829            buffer![2u64, 5, 8].into_array(),
1830            buffer![100i32, 200, 300].into_array(),
1831            None,
1832        )
1833        .unwrap();
1834
1835        // Mask that removes all patches
1836        let mask = Mask::from_iter([
1837            false, false, true, false, false, true, false, false, true, false,
1838        ]);
1839        let masked = patches
1840            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1841            .unwrap();
1842        assert!(masked.is_none());
1843    }
1844
1845    #[test]
1846    fn test_mask_no_patches_removed() {
1847        // Test when no patches are masked
1848        let patches = Patches::new(
1849            10,
1850            0,
1851            buffer![2u64, 5, 8].into_array(),
1852            buffer![100i32, 200, 300].into_array(),
1853            None,
1854        )
1855        .unwrap();
1856
1857        // Mask that doesn't affect any patches
1858        let mask = Mask::from_iter([
1859            true, false, false, true, false, false, true, false, false, true,
1860        ]);
1861        let masked = patches
1862            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1863            .unwrap()
1864            .unwrap();
1865
1866        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1867        assert_arrays_eq!(
1868            masked.values(),
1869            PrimitiveArray::from_iter([100i32, 200, 300])
1870        );
1871    }
1872
1873    #[test]
1874    fn test_mask_single_patch() {
1875        // Test with a single patch
1876        let patches = Patches::new(
1877            5,
1878            0,
1879            buffer![2u64].into_array(),
1880            buffer![42i32].into_array(),
1881            None,
1882        )
1883        .unwrap();
1884
1885        // Mask that removes the single patch
1886        let mask = Mask::from_iter([false, false, true, false, false]);
1887        let masked = patches
1888            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1889            .unwrap();
1890        assert!(masked.is_none());
1891
1892        // Mask that keeps the single patch
1893        let mask = Mask::from_iter([true, false, false, true, false]);
1894        let masked = patches
1895            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1896            .unwrap()
1897            .unwrap();
1898        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64]));
1899    }
1900
1901    #[test]
1902    fn test_mask_contiguous_patches() {
1903        // Test with contiguous patches
1904        let patches = Patches::new(
1905            10,
1906            0,
1907            buffer![3u64, 4, 5, 6].into_array(),
1908            buffer![100i32, 200, 300, 400].into_array(),
1909            None,
1910        )
1911        .unwrap();
1912
1913        // Mask that removes middle patches
1914        let mask = Mask::from_iter([
1915            false, false, false, false, true, true, false, false, false, false,
1916        ]);
1917        let masked = patches
1918            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1919            .unwrap()
1920            .unwrap();
1921
1922        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([3u64, 6]));
1923        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 400]));
1924    }
1925
1926    #[test]
1927    fn test_mask_with_large_offset() {
1928        // Test with a large offset that shifts all indices
1929        let patches = Patches::new(
1930            20,
1931            15,
1932            buffer![16u64, 17, 19].into_array(), // actual indices are 1, 2, 4
1933            buffer![100i32, 200, 300].into_array(),
1934            None,
1935        )
1936        .unwrap();
1937
1938        // Mask that removes the patch at actual index 2
1939        let mask = Mask::from_iter([
1940            false, false, true, false, false, false, false, false, false, false, false, false,
1941            false, false, false, false, false, false, false, false,
1942        ]);
1943        let masked = patches
1944            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1945            .unwrap()
1946            .unwrap();
1947
1948        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([16u64, 19]));
1949        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 300]));
1950    }
1951
1952    #[test]
1953    #[should_panic(expected = "Filter mask length 5 does not match array length 10")]
1954    fn test_mask_wrong_length() {
1955        let patches = Patches::new(
1956            10,
1957            0,
1958            buffer![2u64, 5, 8].into_array(),
1959            buffer![100i32, 200, 300].into_array(),
1960            None,
1961        )
1962        .unwrap();
1963
1964        // Mask with wrong length
1965        let mask = Mask::from_iter([false, false, true, false, false]);
1966        patches
1967            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1968            .unwrap();
1969    }
1970
1971    #[test]
1972    fn test_chunk_offsets_search() {
1973        let indices = buffer![100u64, 200, 3000, 3100].into_array();
1974        let values = buffer![10i32, 20, 30, 40].into_array();
1975        let chunk_offsets = buffer![0u64, 2, 2, 3].into_array();
1976        let patches = Patches::new(4096, 0, indices, values, Some(chunk_offsets)).unwrap();
1977
1978        assert!(patches.chunk_offsets.is_some());
1979
1980        // chunk 0: patches at 100, 200
1981        assert_eq!(patches.search_index(100).unwrap(), SearchResult::Found(0));
1982        assert_eq!(patches.search_index(200).unwrap(), SearchResult::Found(1));
1983
1984        // chunks 1, 2: no patches
1985        assert_eq!(
1986            patches.search_index(1500).unwrap(),
1987            SearchResult::NotFound(2)
1988        );
1989        assert_eq!(
1990            patches.search_index(2000).unwrap(),
1991            SearchResult::NotFound(2)
1992        );
1993
1994        // chunk 3: patches at 3000, 3100
1995        assert_eq!(patches.search_index(3000).unwrap(), SearchResult::Found(2));
1996        assert_eq!(patches.search_index(3100).unwrap(), SearchResult::Found(3));
1997
1998        assert_eq!(
1999            patches.search_index(1024).unwrap(),
2000            SearchResult::NotFound(2)
2001        );
2002    }
2003
2004    #[test]
2005    fn test_chunk_offsets_with_slice() {
2006        let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
2007        let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
2008        let chunk_offsets = buffer![0u64, 2, 6].into_array();
2009        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2010
2011        let sliced = patches.slice(1000..2200).unwrap().unwrap();
2012
2013        assert!(sliced.chunk_offsets.is_some());
2014        assert_eq!(sliced.offset(), 1000);
2015
2016        assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(0));
2017        assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
2018        assert_eq!(sliced.search_index(1100).unwrap(), SearchResult::Found(4));
2019
2020        assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(1));
2021    }
2022
2023    #[test]
2024    fn test_chunk_offsets_with_slice_after_first_chunk() {
2025        let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
2026        let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
2027        let chunk_offsets = buffer![0u64, 2, 6].into_array();
2028        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2029
2030        let sliced = patches.slice(1300..2200).unwrap().unwrap();
2031
2032        assert!(sliced.chunk_offsets.is_some());
2033        assert_eq!(sliced.offset(), 1300);
2034
2035        assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2036        assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(1));
2037        assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
2038        assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(2));
2039        assert_eq!(sliced.search_index(900).unwrap(), SearchResult::NotFound(4));
2040    }
2041
2042    #[test]
2043    fn test_chunk_offsets_slice_empty_result() {
2044        let indices = buffer![100u64, 200, 3000, 3100].into_array();
2045        let values = buffer![10i32, 20, 30, 40].into_array();
2046        let chunk_offsets = buffer![0u64, 2].into_array();
2047        let patches = Patches::new(4000, 0, indices, values, Some(chunk_offsets)).unwrap();
2048
2049        let sliced = patches.slice(1000..2000).unwrap();
2050        assert!(sliced.is_none());
2051    }
2052
2053    #[test]
2054    fn test_chunk_offsets_slice_single_patch() {
2055        let indices = buffer![100u64, 1200, 1300, 2500].into_array();
2056        let values = buffer![10i32, 20, 30, 40].into_array();
2057        let chunk_offsets = buffer![0u64, 1, 3].into_array();
2058        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2059
2060        let sliced = patches.slice(1100..1250).unwrap().unwrap();
2061
2062        assert_eq!(sliced.num_patches(), 1);
2063        assert_eq!(sliced.offset(), 1100);
2064        assert_eq!(sliced.search_index(100).unwrap(), SearchResult::Found(0)); // 1200 - 1100 = 100
2065        assert_eq!(sliced.search_index(50).unwrap(), SearchResult::NotFound(0));
2066        assert_eq!(sliced.search_index(150).unwrap(), SearchResult::NotFound(1));
2067    }
2068
2069    #[test]
2070    fn test_chunk_offsets_slice_across_chunks() {
2071        let indices = buffer![100u64, 200, 1100, 1200, 2100, 2200].into_array();
2072        let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2073        let chunk_offsets = buffer![0u64, 2, 4].into_array();
2074        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2075
2076        let sliced = patches.slice(150..2150).unwrap().unwrap();
2077
2078        assert_eq!(sliced.num_patches(), 4);
2079        assert_eq!(sliced.offset(), 150);
2080
2081        assert_eq!(sliced.search_index(50).unwrap(), SearchResult::Found(0)); // 200 - 150 = 50
2082        assert_eq!(sliced.search_index(950).unwrap(), SearchResult::Found(1)); // 1100 - 150 = 950
2083        assert_eq!(sliced.search_index(1050).unwrap(), SearchResult::Found(2)); // 1200 - 150 = 1050
2084        assert_eq!(sliced.search_index(1950).unwrap(), SearchResult::Found(3)); // 2100 - 150 = 1950
2085    }
2086
2087    #[test]
2088    fn test_chunk_offsets_boundary_searches() {
2089        let indices = buffer![1023u64, 1024, 1025, 2047, 2048].into_array();
2090        let values = buffer![10i32, 20, 30, 40, 50].into_array();
2091        let chunk_offsets = buffer![0u64, 1, 4].into_array();
2092        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2093
2094        assert_eq!(patches.search_index(1023).unwrap(), SearchResult::Found(0));
2095        assert_eq!(patches.search_index(1024).unwrap(), SearchResult::Found(1));
2096        assert_eq!(patches.search_index(1025).unwrap(), SearchResult::Found(2));
2097        assert_eq!(patches.search_index(2047).unwrap(), SearchResult::Found(3));
2098        assert_eq!(patches.search_index(2048).unwrap(), SearchResult::Found(4));
2099
2100        assert_eq!(
2101            patches.search_index(1022).unwrap(),
2102            SearchResult::NotFound(0)
2103        );
2104        assert_eq!(
2105            patches.search_index(2046).unwrap(),
2106            SearchResult::NotFound(3)
2107        );
2108    }
2109
2110    #[test]
2111    fn test_chunk_offsets_slice_edge_cases() {
2112        let indices = buffer![0u64, 1, 1023, 1024, 2047, 2048].into_array();
2113        let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2114        let chunk_offsets = buffer![0u64, 3, 5].into_array();
2115        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2116
2117        // Slice at the very beginning
2118        let sliced = patches.slice(0..10).unwrap().unwrap();
2119        assert_eq!(sliced.num_patches(), 2);
2120        assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2121        assert_eq!(sliced.search_index(1).unwrap(), SearchResult::Found(1));
2122
2123        // Slice at the very end
2124        let sliced = patches.slice(2040..3000).unwrap().unwrap();
2125        assert_eq!(sliced.num_patches(), 2); // patches at 2047 and 2048
2126        assert_eq!(sliced.search_index(7).unwrap(), SearchResult::Found(0)); // 2047 - 2040
2127        assert_eq!(sliced.search_index(8).unwrap(), SearchResult::Found(1)); // 2048 - 2040
2128    }
2129
2130    #[test]
2131    fn test_chunk_offsets_slice_nested() {
2132        let indices = buffer![100u64, 200, 300, 400, 500, 600].into_array();
2133        let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2134        let chunk_offsets = buffer![0u64].into_array();
2135        let patches = Patches::new(1000, 0, indices, values, Some(chunk_offsets)).unwrap();
2136
2137        let sliced1 = patches.slice(150..550).unwrap().unwrap();
2138        assert_eq!(sliced1.num_patches(), 4); // 200, 300, 400, 500
2139
2140        let sliced2 = sliced1.slice(100..250).unwrap().unwrap();
2141        assert_eq!(sliced2.num_patches(), 1); // 300
2142        assert_eq!(sliced2.offset(), 250);
2143
2144        assert_eq!(sliced2.search_index(50).unwrap(), SearchResult::Found(0)); // 300 - 250
2145        assert_eq!(
2146            sliced2.search_index(150).unwrap(),
2147            SearchResult::NotFound(1)
2148        );
2149    }
2150
2151    #[test]
2152    fn test_index_larger_than_length() {
2153        let chunk_offsets = buffer![0u64].into_array();
2154        let indices = buffer![1023u64].into_array();
2155        let values = buffer![42i32].into_array();
2156        let patches = Patches::new(1024, 0, indices, values, Some(chunk_offsets)).unwrap();
2157        assert_eq!(
2158            patches.search_index(2048).unwrap(),
2159            SearchResult::NotFound(1)
2160        );
2161    }
2162}