Skip to main content

vortex_array/
patches.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::cmp::Ordering;
5use std::fmt::Debug;
6use std::hash::Hash;
7use std::ops::Range;
8
9use num_traits::NumCast;
10use vortex_buffer::BitBuffer;
11use vortex_buffer::BufferMut;
12use vortex_error::VortexError;
13use vortex_error::VortexResult;
14use vortex_error::vortex_bail;
15use vortex_error::vortex_ensure;
16use vortex_error::vortex_err;
17use vortex_mask::AllOr;
18use vortex_mask::Mask;
19use vortex_utils::aliases::hash_map::HashMap;
20
21use crate::ArrayRef;
22use crate::ExecutionCtx;
23use crate::IntoArray;
24use crate::LEGACY_SESSION;
25use crate::ToCanonical;
26use crate::VortexSessionExecute;
27use crate::arrays::PrimitiveArray;
28use crate::builtins::ArrayBuiltins;
29use crate::dtype::DType;
30use crate::dtype::IntegerPType;
31use crate::dtype::NativePType;
32use crate::dtype::Nullability;
33use crate::dtype::Nullability::NonNullable;
34use crate::dtype::PType;
35use crate::dtype::UnsignedPType;
36use crate::match_each_integer_ptype;
37use crate::match_each_unsigned_integer_ptype;
38use crate::scalar::PValue;
39use crate::scalar::Scalar;
40use crate::search_sorted::SearchResult;
41use crate::search_sorted::SearchSorted;
42use crate::search_sorted::SearchSortedSide;
43use crate::validity::Validity;
44
45/// One patch index offset is stored for each chunk.
46/// This allows for constant time patch index lookups.
47pub const PATCH_CHUNK_SIZE: usize = 1024;
48
49#[derive(Copy, Clone, prost::Message)]
50pub struct PatchesMetadata {
51    #[prost(uint64, tag = "1")]
52    len: u64,
53    #[prost(uint64, tag = "2")]
54    offset: u64,
55    #[prost(enumeration = "PType", tag = "3")]
56    indices_ptype: i32,
57    #[prost(uint64, optional, tag = "4")]
58    chunk_offsets_len: Option<u64>,
59    #[prost(enumeration = "PType", optional, tag = "5")]
60    chunk_offsets_ptype: Option<i32>,
61    #[prost(uint64, optional, tag = "6")]
62    offset_within_chunk: Option<u64>,
63}
64
65impl PatchesMetadata {
66    #[inline]
67    pub fn new(
68        len: usize,
69        offset: usize,
70        indices_ptype: PType,
71        chunk_offsets_len: Option<usize>,
72        chunk_offsets_ptype: Option<PType>,
73        offset_within_chunk: Option<usize>,
74    ) -> Self {
75        Self {
76            len: len as u64,
77            offset: offset as u64,
78            indices_ptype: indices_ptype as i32,
79            chunk_offsets_len: chunk_offsets_len.map(|len| len as u64),
80            chunk_offsets_ptype: chunk_offsets_ptype.map(|pt| pt as i32),
81            offset_within_chunk: offset_within_chunk.map(|len| len as u64),
82        }
83    }
84
85    #[inline]
86    pub fn len(&self) -> VortexResult<usize> {
87        usize::try_from(self.len).map_err(|_| vortex_err!("len does not fit in usize"))
88    }
89
90    #[inline]
91    pub fn is_empty(&self) -> bool {
92        self.len == 0
93    }
94
95    #[inline]
96    pub fn offset(&self) -> VortexResult<usize> {
97        usize::try_from(self.offset).map_err(|_| vortex_err!("offset does not fit in usize"))
98    }
99
100    #[inline]
101    pub fn chunk_offsets_dtype(&self) -> VortexResult<Option<DType>> {
102        self.chunk_offsets_ptype
103            .map(|t| {
104                PType::try_from(t)
105                    .map_err(|e| vortex_err!("invalid i32 value {t} for PType: {}", e))
106                    .map(|ptype| DType::Primitive(ptype, NonNullable))
107            })
108            .transpose()
109    }
110
111    #[inline]
112    pub fn indices_dtype(&self) -> VortexResult<DType> {
113        let ptype = PType::try_from(self.indices_ptype).map_err(|e| {
114            vortex_err!("invalid i32 value {} for PType: {}", self.indices_ptype, e)
115        })?;
116        vortex_ensure!(
117            ptype.is_unsigned_int(),
118            "Patch indices must be unsigned integers"
119        );
120        Ok(DType::Primitive(ptype, NonNullable))
121    }
122}
123
124/// A helper for working with patched arrays.
125#[derive(Debug, Clone)]
126pub struct Patches {
127    array_len: usize,
128    offset: usize,
129    indices: ArrayRef,
130    values: ArrayRef,
131    /// Stores the patch index offset for each chunk.
132    ///
133    /// This allows us to lookup the patches for a given chunk in constant time via
134    /// `patch_indices[chunk_offsets[i]..chunk_offsets[i+1]]`.
135    ///
136    /// This is optional for compatibility reasons.
137    chunk_offsets: Option<ArrayRef>,
138    /// Chunk offsets are only sliced off in case the slice is fully
139    /// outside of the chunk range.
140    ///
141    /// Though the range for indices and values is sliced in terms of
142    /// individual elements, not chunks. To account for that we do a
143    /// saturating sub when adjusting the indices based on the chunk offset.
144    ///
145    /// `offset_within_chunk` is necessary in order to keep track of how many
146    /// elements were sliced off within the chunk.
147    offset_within_chunk: Option<usize>,
148}
149
150impl Patches {
151    pub fn new(
152        array_len: usize,
153        offset: usize,
154        indices: ArrayRef,
155        values: ArrayRef,
156        chunk_offsets: Option<ArrayRef>,
157    ) -> VortexResult<Self> {
158        vortex_ensure!(
159            indices.len() == values.len(),
160            "Patch indices and values must have the same length"
161        );
162        vortex_ensure!(
163            indices.dtype().is_unsigned_int() && !indices.dtype().is_nullable(),
164            "Patch indices must be non-nullable unsigned integers, got {:?}",
165            indices.dtype()
166        );
167
168        vortex_ensure!(
169            indices.len() <= array_len,
170            "Patch indices must be shorter than the array length"
171        );
172        vortex_ensure!(!indices.is_empty(), "Patch indices must not be empty");
173
174        // Perform validation of components when they are host-resident.
175        // This is not possible to do eagerly when the data is on GPU memory.
176        if indices.is_host() && values.is_host() {
177            let max = usize::try_from(&indices.execute_scalar(
178                indices.len() - 1,
179                &mut LEGACY_SESSION.create_execution_ctx(),
180            )?)
181            .map_err(|_| vortex_err!("indices must be a number"))?;
182            vortex_ensure!(
183                max - offset < array_len,
184                "Patch indices {max:?}, offset {offset} are longer than the array length {array_len}"
185            );
186
187            #[cfg(debug_assertions)]
188            {
189                use crate::VortexSessionExecute;
190                let mut ctx = LEGACY_SESSION.create_execution_ctx();
191                assert!(
192                    crate::aggregate_fn::fns::is_sorted::is_sorted(&indices, &mut ctx)
193                        .unwrap_or(false),
194                    "Patch indices must be sorted"
195                );
196            }
197        }
198
199        Ok(Self {
200            array_len,
201            offset,
202            indices,
203            values,
204            chunk_offsets: chunk_offsets.clone(),
205            // Initialize with `Some(0)` only if `chunk_offsets` are set.
206            offset_within_chunk: chunk_offsets.map(|_| 0),
207        })
208    }
209
210    /// Construct new patches without validating any of the arguments
211    ///
212    /// # Safety
213    ///
214    /// Users have to assert that
215    /// * Indices and values have the same length
216    /// * Indices is an unsigned integer type
217    /// * Indices must be sorted
218    /// * Last value in indices is smaller than array_len
219    pub unsafe fn new_unchecked(
220        array_len: usize,
221        offset: usize,
222        indices: ArrayRef,
223        values: ArrayRef,
224        chunk_offsets: Option<ArrayRef>,
225        offset_within_chunk: Option<usize>,
226    ) -> Self {
227        Self {
228            array_len,
229            offset,
230            indices,
231            values,
232            chunk_offsets,
233            offset_within_chunk,
234        }
235    }
236
237    #[inline]
238    pub fn array_len(&self) -> usize {
239        self.array_len
240    }
241
242    #[inline]
243    pub fn num_patches(&self) -> usize {
244        self.indices.len()
245    }
246
247    #[inline]
248    pub fn dtype(&self) -> &DType {
249        self.values.dtype()
250    }
251
252    #[inline]
253    pub fn indices(&self) -> &ArrayRef {
254        &self.indices
255    }
256
257    #[inline]
258    pub fn into_indices(self) -> ArrayRef {
259        self.indices
260    }
261
262    #[inline]
263    pub fn indices_mut(&mut self) -> &mut ArrayRef {
264        &mut self.indices
265    }
266
267    #[inline]
268    pub fn values(&self) -> &ArrayRef {
269        &self.values
270    }
271
272    #[inline]
273    pub fn into_values(self) -> ArrayRef {
274        self.values
275    }
276
277    #[inline]
278    pub fn values_mut(&mut self) -> &mut ArrayRef {
279        &mut self.values
280    }
281
282    #[inline]
283    // Absolute offset: 0 if the array is unsliced.
284    pub fn offset(&self) -> usize {
285        self.offset
286    }
287
288    #[inline]
289    pub fn chunk_offsets(&self) -> &Option<ArrayRef> {
290        &self.chunk_offsets
291    }
292
293    #[inline]
294    pub fn chunk_offset_at(&self, idx: usize) -> VortexResult<usize> {
295        let Some(chunk_offsets) = &self.chunk_offsets else {
296            vortex_bail!("chunk_offsets must be set to retrieve offset at index")
297        };
298
299        chunk_offsets
300            .execute_scalar(idx, &mut LEGACY_SESSION.create_execution_ctx())?
301            .as_primitive()
302            .as_::<usize>()
303            .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))
304    }
305
306    /// Returns the number of patches sliced off from the current first chunk.
307    ///
308    /// When patches are sliced, the chunk offsets array is also sliced to only include
309    /// chunks that overlap with the slice range. However, the slice boundary may fall
310    /// in the middle of a chunk's patch range. This offset indicates how many patches
311    /// at the start of the first chunk should be skipped.
312    ///
313    /// Returns `None` if chunk offsets are not set.
314    #[inline]
315    pub fn offset_within_chunk(&self) -> Option<usize> {
316        self.offset_within_chunk
317    }
318
319    #[inline]
320    pub fn indices_ptype(&self) -> VortexResult<PType> {
321        PType::try_from(self.indices.dtype())
322            .map_err(|_| vortex_err!("indices dtype is not primitive"))
323    }
324
325    pub fn to_metadata(&self, len: usize, dtype: &DType) -> VortexResult<PatchesMetadata> {
326        if self.indices.len() > len {
327            vortex_bail!(
328                "Patch indices {} are longer than the array length {}",
329                self.indices.len(),
330                len
331            );
332        }
333        if self.values.dtype() != dtype {
334            vortex_bail!(
335                "Patch values dtype {} does not match array dtype {}",
336                self.values.dtype(),
337                dtype
338            );
339        }
340        let chunk_offsets_len = self.chunk_offsets.as_ref().map(|co| co.len());
341        let chunk_offsets_ptype = self.chunk_offsets.as_ref().map(|co| co.dtype().as_ptype());
342
343        Ok(PatchesMetadata::new(
344            self.indices.len(),
345            self.offset,
346            self.indices.dtype().as_ptype(),
347            chunk_offsets_len,
348            chunk_offsets_ptype,
349            self.offset_within_chunk,
350        ))
351    }
352
353    pub fn cast_values(self, values_dtype: &DType) -> VortexResult<Self> {
354        // SAFETY: casting does not affect the relationship between the indices and values
355        unsafe {
356            Ok(Self::new_unchecked(
357                self.array_len,
358                self.offset,
359                self.indices,
360                self.values.cast(values_dtype.clone())?,
361                self.chunk_offsets,
362                self.offset_within_chunk,
363            ))
364        }
365    }
366
367    /// Get the patched value at a given index if it exists.
368    pub fn get_patched(&self, index: usize) -> VortexResult<Option<Scalar>> {
369        self.search_index(index)?
370            .to_found()
371            .map(|patch_idx| {
372                self.values()
373                    .execute_scalar(patch_idx, &mut LEGACY_SESSION.create_execution_ctx())
374            })
375            .transpose()
376    }
377
378    /// Searches for `index` in the indices array.
379    ///
380    /// Chooses between chunked search when [`Self::chunk_offsets`] is
381    /// available, and binary search otherwise. The `index` parameter is
382    /// adjusted by [`Self::offset`] for both.
383    ///
384    /// # Arguments
385    /// * `index` - The index to search for
386    ///
387    /// # Returns
388    /// * [`SearchResult::Found(patch_idx)`] - If a patch exists at this index, returns the
389    ///   position in the patches array
390    /// * [`SearchResult::NotFound(insertion_point)`] - If no patch exists, returns where
391    ///   a patch at this index would be inserted to maintain sorted order
392    ///
393    /// [`SearchResult::Found(patch_idx)`]: SearchResult::Found
394    /// [`SearchResult::NotFound(insertion_point)`]: SearchResult::NotFound
395    pub fn search_index(&self, index: usize) -> VortexResult<SearchResult> {
396        if self.chunk_offsets.is_some() {
397            return self.search_index_chunked(index);
398        }
399
400        Self::search_index_binary_search(&self.indices, index + self.offset)
401    }
402
403    /// Binary searches for `needle` in the indices array.
404    ///
405    /// # Returns
406    /// [`SearchResult::Found`] with the position if needle exists, or [`SearchResult::NotFound`]
407    /// with the insertion point if not found.
408    fn search_index_binary_search(indices: &ArrayRef, needle: usize) -> VortexResult<SearchResult> {
409        if indices.is_canonical() {
410            let primitive = indices.to_primitive();
411            match_each_integer_ptype!(primitive.ptype(), |T| {
412                let Ok(needle) = T::try_from(needle) else {
413                    // If the needle is not of type T, then it cannot possibly be in this array.
414                    //
415                    // The needle is a non-negative integer (a usize); therefore, it must be larger
416                    // than all values in this array.
417                    return Ok(SearchResult::NotFound(primitive.len()));
418                };
419                return primitive
420                    .as_slice::<T>()
421                    .search_sorted(&needle, SearchSortedSide::Left);
422            });
423        }
424        indices
425            .as_primitive_typed()
426            .search_sorted(&PValue::U64(needle as u64), SearchSortedSide::Left)
427    }
428
429    /// Constant time searches for `index` in the indices array.
430    ///
431    /// First determines which chunk the target index falls into, then performs
432    /// a binary search within that chunk's range.
433    ///
434    /// Returns a [`SearchResult`] indicating either the exact patch index if found,
435    /// or the insertion point if not found.
436    ///
437    /// Returns an error if `chunk_offsets` or `offset_within_chunk` are not set.
438    fn search_index_chunked(&self, index: usize) -> VortexResult<SearchResult> {
439        let Some(chunk_offsets) = &self.chunk_offsets else {
440            vortex_bail!("chunk_offsets is required to be set")
441        };
442
443        let Some(offset_within_chunk) = self.offset_within_chunk else {
444            vortex_bail!("offset_within_chunk is required to be set")
445        };
446
447        if index >= self.array_len() {
448            return Ok(SearchResult::NotFound(self.indices().len()));
449        }
450
451        let chunk_idx = (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE;
452
453        // Patch index offsets are absolute and need to be offset by the first chunk of the current slice.
454        let base_offset = self.chunk_offset_at(0)?;
455
456        let patches_start_idx = (self.chunk_offset_at(chunk_idx)? - base_offset)
457            // Chunk offsets are only sliced off in case the slice is fully
458            // outside of the chunk range.
459            //
460            // Though the range for indices and values is sliced in terms of
461            // individual elements, not chunks. To account for that we do a
462            // saturating sub when adjusting the indices based on the chunk offset.
463            .saturating_sub(offset_within_chunk);
464
465        let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
466            (self.chunk_offset_at(chunk_idx + 1)? - base_offset)
467                .saturating_sub(offset_within_chunk)
468                .min(self.indices.len())
469        } else {
470            self.indices.len()
471        };
472
473        let chunk_indices = self.indices.slice(patches_start_idx..patches_end_idx)?;
474        let result = Self::search_index_binary_search(&chunk_indices, index + self.offset)?;
475
476        Ok(match result {
477            SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
478            SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
479        })
480    }
481
482    /// Batch version of `search_index`.
483    ///
484    /// In contrast to `search_index`, this function requires `indices` as
485    /// well as `chunk_offsets` to be passed as slices. This is to avoid
486    /// redundant canonicalization and `scalar_at` lookups across calls.
487    fn search_index_chunked_batch<T, O>(
488        &self,
489        indices: &[T],
490        chunk_offsets: &[O],
491        index: T,
492    ) -> VortexResult<SearchResult>
493    where
494        T: UnsignedPType,
495        O: UnsignedPType,
496        usize: TryFrom<T>,
497        usize: TryFrom<O>,
498    {
499        let Some(offset_within_chunk) = self.offset_within_chunk else {
500            vortex_bail!("offset_within_chunk is required to be set")
501        };
502
503        let chunk_idx = {
504            let Ok(index) = usize::try_from(index) else {
505                // If the needle cannot be converted to usize, it's larger than all values in this array.
506                return Ok(SearchResult::NotFound(indices.len()));
507            };
508
509            if index >= self.array_len() {
510                return Ok(SearchResult::NotFound(self.indices().len()));
511            }
512
513            (index + self.offset % PATCH_CHUNK_SIZE) / PATCH_CHUNK_SIZE
514        };
515
516        // Patch index offsets are absolute and need to be offset by the first chunk of the current slice.
517        let chunk_offset = usize::try_from(chunk_offsets[chunk_idx] - chunk_offsets[0])
518            .map_err(|_| vortex_err!("chunk_offset failed to convert to usize"))?;
519
520        let patches_start_idx = chunk_offset
521            // Chunk offsets are only sliced off in case the slice is fully
522            // outside of the chunk range.
523            //
524            // Though the range for indices and values is sliced in terms of
525            // individual elements, not chunks. To account for that we do a
526            // saturating sub when adjusting the indices based on the chunk offset.
527            .saturating_sub(offset_within_chunk);
528
529        let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
530            usize::try_from(chunk_offsets[chunk_idx + 1] - chunk_offsets[0])
531                .map_err(|_| vortex_err!("patches_end_idx failed to convert to usize"))?
532                .saturating_sub(offset_within_chunk)
533                .min(indices.len())
534        } else {
535            self.indices.len()
536        };
537
538        let Some(offset) = T::from(self.offset) else {
539            // If the offset cannot be converted to T, it's larger than all values in this array.
540            return Ok(SearchResult::NotFound(indices.len()));
541        };
542
543        let chunk_indices = &indices[patches_start_idx..patches_end_idx];
544        let result = chunk_indices.search_sorted(&(index + offset), SearchSortedSide::Left)?;
545
546        Ok(match result {
547            SearchResult::Found(idx) => SearchResult::Found(patches_start_idx + idx),
548            SearchResult::NotFound(idx) => SearchResult::NotFound(patches_start_idx + idx),
549        })
550    }
551
552    /// Returns the minimum patch index
553    pub fn min_index(&self) -> VortexResult<usize> {
554        let first = self
555            .indices
556            .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())?
557            .as_primitive()
558            .as_::<usize>()
559            .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
560        Ok(first - self.offset)
561    }
562
563    /// Returns the maximum patch index
564    pub fn max_index(&self) -> VortexResult<usize> {
565        let last = self
566            .indices
567            .execute_scalar(
568                self.indices.len() - 1,
569                &mut LEGACY_SESSION.create_execution_ctx(),
570            )?
571            .as_primitive()
572            .as_::<usize>()
573            .ok_or_else(|| vortex_err!("index does not fit in usize"))?;
574        Ok(last - self.offset)
575    }
576
577    /// Filter the patches by a mask, resulting in new patches for the filtered array.
578    pub fn filter(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
579        if mask.len() != self.array_len {
580            vortex_bail!(
581                "Filter mask length {} does not match array length {}",
582                mask.len(),
583                self.array_len
584            );
585        }
586
587        match mask.indices() {
588            AllOr::All => Ok(Some(self.clone())),
589            AllOr::None => Ok(None),
590            AllOr::Some(mask_indices) => {
591                let flat_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
592                match_each_unsigned_integer_ptype!(flat_indices.ptype(), |I| {
593                    filter_patches_with_mask(
594                        flat_indices.as_slice::<I>(),
595                        self.offset(),
596                        self.values(),
597                        mask_indices,
598                    )
599                })
600            }
601        }
602    }
603
604    /// Mask the patches, REMOVING the patches where the mask is true.
605    /// Unlike filter, this preserves the patch indices.
606    /// Unlike mask on a single array, this does not set masked values to null.
607    // TODO(joe): make this lazy and remove the ctx.
608    pub fn mask(&self, mask: &Mask, ctx: &mut ExecutionCtx) -> VortexResult<Option<Self>> {
609        if mask.len() != self.array_len {
610            vortex_bail!(
611                "Filter mask length {} does not match array length {}",
612                mask.len(),
613                self.array_len
614            );
615        }
616
617        let filter_mask = match mask.bit_buffer() {
618            AllOr::All => return Ok(None),
619            AllOr::None => return Ok(Some(self.clone())),
620            AllOr::Some(masked) => {
621                let patch_indices = self.indices().clone().execute::<PrimitiveArray>(ctx)?;
622                match_each_unsigned_integer_ptype!(patch_indices.ptype(), |P| {
623                    let patch_indices = patch_indices.as_slice::<P>();
624                    Mask::from_buffer(BitBuffer::collect_bool(patch_indices.len(), |i| {
625                        #[allow(clippy::cast_possible_truncation)]
626                        let idx = (patch_indices[i] as usize) - self.offset;
627                        !masked.value(idx)
628                    }))
629                })
630            }
631        };
632
633        if filter_mask.all_false() {
634            return Ok(None);
635        }
636
637        // SAFETY: filtering indices/values with same mask maintains their 1:1 relationship
638        let filtered_indices = self.indices.filter(filter_mask.clone())?;
639        let filtered_values = self.values.filter(filter_mask)?;
640
641        Ok(Some(Self {
642            array_len: self.array_len,
643            offset: self.offset,
644            indices: filtered_indices,
645            values: filtered_values,
646            // TODO(0ax1): Chunk offsets are invalid after a filter is applied.
647            chunk_offsets: None,
648            offset_within_chunk: self.offset_within_chunk,
649        }))
650    }
651
652    /// Slice the patches by a range of the patched array.
653    pub fn slice(&self, range: Range<usize>) -> VortexResult<Option<Self>> {
654        let slice_start_idx = self.search_index(range.start)?.to_index();
655        let slice_end_idx = self.search_index(range.end)?.to_index();
656
657        if slice_start_idx == slice_end_idx {
658            return Ok(None);
659        }
660
661        let values = self.values().slice(slice_start_idx..slice_end_idx)?;
662        let indices = self.indices().slice(slice_start_idx..slice_end_idx)?;
663
664        let new_chunk_offsets = self
665            .chunk_offsets
666            .as_ref()
667            .map(|chunk_offsets| -> VortexResult<ArrayRef> {
668                let chunk_relative_offset = self.offset % PATCH_CHUNK_SIZE;
669                let chunk_start_idx = (chunk_relative_offset + range.start) / PATCH_CHUNK_SIZE;
670                let chunk_end_idx = (chunk_relative_offset + range.end).div_ceil(PATCH_CHUNK_SIZE);
671                chunk_offsets.slice(chunk_start_idx..chunk_end_idx)
672            })
673            .transpose()?;
674
675        let offset_within_chunk = new_chunk_offsets
676            .as_ref()
677            .map(|new_chunk_offsets| -> VortexResult<usize> {
678                let new_chunk_base = new_chunk_offsets
679                    .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())?
680                    .as_primitive()
681                    .as_::<usize>()
682                    .ok_or_else(|| vortex_err!("chunk offset does not fit in usize"))?;
683                let parent_chunk_base = self.chunk_offset_at(0)?;
684                let parent_within = self.offset_within_chunk.unwrap_or(0);
685                Ok(parent_chunk_base + parent_within + slice_start_idx - new_chunk_base)
686            })
687            .transpose()?;
688
689        Ok(Some(Self {
690            array_len: range.len(),
691            offset: range.start + self.offset(),
692            indices,
693            values,
694            chunk_offsets: new_chunk_offsets,
695            offset_within_chunk,
696        }))
697    }
698
699    // https://docs.google.com/spreadsheets/d/1D9vBZ1QJ6mwcIvV5wIL0hjGgVchcEnAyhvitqWu2ugU
700    const PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN: f64 = 5.0;
701
702    fn is_map_faster_than_search(&self, take_indices: &PrimitiveArray) -> bool {
703        (self.num_patches() as f64 / take_indices.len() as f64)
704            < Self::PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN
705    }
706
707    /// Take the indices from the patches
708    ///
709    /// Any nulls in take_indices are added to the resulting patches.
710    pub fn take_with_nulls(
711        &self,
712        take_indices: &ArrayRef,
713        ctx: &mut ExecutionCtx,
714    ) -> VortexResult<Option<Self>> {
715        if take_indices.is_empty() {
716            return Ok(None);
717        }
718
719        let take_indices = take_indices.clone().execute::<PrimitiveArray>(ctx)?;
720        if self.is_map_faster_than_search(&take_indices) {
721            self.take_map(take_indices, true, ctx)
722        } else {
723            self.take_search(take_indices, true, ctx)
724        }
725    }
726
727    /// Take the indices from the patches.
728    ///
729    /// Any nulls in take_indices are ignored.
730    pub fn take(
731        &self,
732        take_indices: &ArrayRef,
733        ctx: &mut ExecutionCtx,
734    ) -> VortexResult<Option<Self>> {
735        if take_indices.is_empty() {
736            return Ok(None);
737        }
738
739        let take_indices = take_indices.clone().execute::<PrimitiveArray>(ctx)?;
740        if self.is_map_faster_than_search(&take_indices) {
741            self.take_map(take_indices, false, ctx)
742        } else {
743            self.take_search(take_indices, false, ctx)
744        }
745    }
746
747    #[expect(
748        clippy::cognitive_complexity,
749        reason = "complexity is from nested match_each_* macros"
750    )]
751    pub fn take_search(
752        &self,
753        take_indices: PrimitiveArray,
754        include_nulls: bool,
755        ctx: &mut ExecutionCtx,
756    ) -> VortexResult<Option<Self>> {
757        let take_indices_validity = take_indices.validity()?;
758        let patch_indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
759        let chunk_offsets = self
760            .chunk_offsets()
761            .as_ref()
762            .map(|co| co.clone().execute::<PrimitiveArray>(ctx))
763            .transpose()?;
764
765        let (values_indices, new_indices): (BufferMut<u64>, BufferMut<u64>) =
766            match_each_unsigned_integer_ptype!(patch_indices.ptype(), |PatchT| {
767                let patch_indices_slice = patch_indices.as_slice::<PatchT>();
768                match_each_integer_ptype!(take_indices.ptype(), |TakeT| {
769                    let take_slice = take_indices.as_slice::<TakeT>();
770
771                    if let Some(chunk_offsets) = chunk_offsets {
772                        match_each_unsigned_integer_ptype!(chunk_offsets.ptype(), |OffsetT| {
773                            let chunk_offsets = chunk_offsets.as_slice::<OffsetT>();
774                            take_indices_with_search_fn(
775                                patch_indices_slice,
776                                take_slice,
777                                take_indices
778                                    .as_ref()
779                                    .validity()?
780                                    .to_mask(take_indices.as_ref().len(), ctx)?,
781                                include_nulls,
782                                |take_idx| {
783                                    self.search_index_chunked_batch(
784                                        patch_indices_slice,
785                                        chunk_offsets,
786                                        take_idx,
787                                    )
788                                },
789                            )?
790                        })
791                    } else {
792                        take_indices_with_search_fn(
793                            patch_indices_slice,
794                            take_slice,
795                            take_indices
796                                .as_ref()
797                                .validity()?
798                                .to_mask(take_indices.as_ref().len(), ctx)?,
799                            include_nulls,
800                            |take_idx| {
801                                let Some(offset) = <PatchT as NumCast>::from(self.offset) else {
802                                    // If the offset cannot be converted to T, it's larger than all values in this array.
803                                    return Ok(SearchResult::NotFound(patch_indices_slice.len()));
804                                };
805
806                                patch_indices_slice
807                                    .search_sorted(&(take_idx + offset), SearchSortedSide::Left)
808                            },
809                        )?
810                    }
811                })
812            });
813
814        if new_indices.is_empty() {
815            return Ok(None);
816        }
817
818        let new_indices = new_indices.into_array();
819        let new_array_len = take_indices.len();
820        let values_validity = take_indices_validity.take(&new_indices)?;
821
822        Ok(Some(Self {
823            array_len: new_array_len,
824            offset: 0,
825            indices: new_indices,
826            values: self
827                .values()
828                .take(PrimitiveArray::new(values_indices, values_validity).into_array())?,
829            chunk_offsets: None,
830            offset_within_chunk: Some(0), // Reset when creating new Patches.
831        }))
832    }
833
834    pub fn take_map(
835        &self,
836        take_indices: PrimitiveArray,
837        include_nulls: bool,
838        ctx: &mut ExecutionCtx,
839    ) -> VortexResult<Option<Self>> {
840        let indices = self.indices.clone().execute::<PrimitiveArray>(ctx)?;
841        let new_length = take_indices.len();
842
843        let min_index = self.min_index()?;
844        let max_index = self.max_index()?;
845
846        let Some((new_sparse_indices, value_indices)) =
847            match_each_unsigned_integer_ptype!(indices.ptype(), |Indices| {
848                match_each_integer_ptype!(take_indices.ptype(), |TakeIndices| {
849                    let take_validity = take_indices
850                        .validity()?
851                        .execute_mask(take_indices.len(), ctx)?;
852                    let take_nullability = take_indices.validity()?.nullability();
853                    let take_slice = take_indices.as_slice::<TakeIndices>();
854                    take_map::<_, TakeIndices>(
855                        indices.as_slice::<Indices>(),
856                        take_slice,
857                        take_validity,
858                        take_nullability,
859                        self.offset(),
860                        min_index,
861                        max_index,
862                        include_nulls,
863                    )?
864                })
865            })
866        else {
867            return Ok(None);
868        };
869
870        let taken_values = self.values().take(value_indices)?;
871
872        Ok(Some(Patches {
873            array_len: new_length,
874            offset: 0,
875            indices: new_sparse_indices,
876            values: taken_values,
877            // TODO(0ax1): Chunk offsets are invalid after take is applied.
878            chunk_offsets: None,
879            offset_within_chunk: self.offset_within_chunk,
880        }))
881    }
882
883    pub fn map_values<F>(self, f: F) -> VortexResult<Self>
884    where
885        F: FnOnce(ArrayRef) -> VortexResult<ArrayRef>,
886    {
887        let values = f(self.values)?;
888        if self.indices.len() != values.len() {
889            vortex_bail!(
890                "map_values must preserve length: expected {} received {}",
891                self.indices.len(),
892                values.len()
893            )
894        }
895
896        Ok(Self {
897            array_len: self.array_len,
898            offset: self.offset,
899            indices: self.indices,
900            values,
901            chunk_offsets: self.chunk_offsets,
902            offset_within_chunk: self.offset_within_chunk,
903        })
904    }
905}
906
907#[expect(clippy::too_many_arguments)] // private function, can clean up one day
908fn take_map<I: NativePType + Hash + Eq + TryFrom<usize>, T: NativePType>(
909    indices: &[I],
910    take_indices: &[T],
911    take_validity: Mask,
912    take_nullability: Nullability,
913    indices_offset: usize,
914    min_index: usize,
915    max_index: usize,
916    include_nulls: bool,
917) -> VortexResult<Option<(ArrayRef, ArrayRef)>>
918where
919    usize: TryFrom<T>,
920    VortexError: From<<I as TryFrom<usize>>::Error>,
921{
922    let offset_i = I::try_from(indices_offset)?;
923
924    let sparse_index_to_value_index: HashMap<I, usize> = indices
925        .iter()
926        .copied()
927        .map(|idx| idx - offset_i)
928        .enumerate()
929        .map(|(value_index, sparse_index)| (sparse_index, value_index))
930        .collect();
931
932    let mut new_sparse_indices = BufferMut::<u64>::with_capacity(take_indices.len());
933    let mut value_indices = BufferMut::<u64>::with_capacity(take_indices.len());
934
935    for (idx_in_take, &take_idx) in take_indices.iter().enumerate() {
936        let ti = usize::try_from(take_idx)
937            .map_err(|_| vortex_err!("Failed to convert index to usize"))?;
938
939        // If we have to take nulls the take index doesn't matter, make it 0 for consistency
940        let is_null = match take_validity.bit_buffer() {
941            AllOr::All => false,
942            AllOr::None => true,
943            AllOr::Some(buf) => !buf.value(idx_in_take),
944        };
945        if is_null {
946            if include_nulls {
947                new_sparse_indices.push(idx_in_take as u64);
948                value_indices.push(0);
949            }
950        } else if ti >= min_index && ti <= max_index {
951            let ti_as_i = I::try_from(ti)
952                .map_err(|_| vortex_err!("take index does not fit in index type"))?;
953            if let Some(&value_index) = sparse_index_to_value_index.get(&ti_as_i) {
954                new_sparse_indices.push(idx_in_take as u64);
955                value_indices.push(value_index as u64);
956            }
957        }
958    }
959
960    if new_sparse_indices.is_empty() {
961        return Ok(None);
962    }
963
964    let new_sparse_indices = new_sparse_indices.into_array();
965    let values_validity =
966        Validity::from_mask(take_validity, take_nullability).take(&new_sparse_indices)?;
967    Ok(Some((
968        new_sparse_indices,
969        PrimitiveArray::new(value_indices, values_validity).into_array(),
970    )))
971}
972
973/// Filter patches with the provided mask (in flattened space).
974///
975/// The filter mask may contain indices that are non-patched. The return value of this function
976/// is a new set of `Patches` with the indices relative to the provided `mask` rank, and the
977/// patch values.
978fn filter_patches_with_mask<T: IntegerPType>(
979    patch_indices: &[T],
980    offset: usize,
981    patch_values: &ArrayRef,
982    mask_indices: &[usize],
983) -> VortexResult<Option<Patches>> {
984    let true_count = mask_indices.len();
985    let mut new_patch_indices = BufferMut::<u64>::with_capacity(true_count);
986    let mut new_mask_indices = Vec::with_capacity(true_count);
987
988    // Attempt to move the window by `STRIDE` elements on each iteration. This assumes that
989    // the patches are relatively sparse compared to the overall mask, and so many indices in the
990    // mask will end up being skipped.
991    const STRIDE: usize = 4;
992
993    let mut mask_idx = 0usize;
994    let mut true_idx = 0usize;
995
996    while mask_idx < patch_indices.len() && true_idx < true_count {
997        // NOTE: we are searching for overlaps between sorted, unaligned indices in `patch_indices`
998        //  and `mask_indices`. We assume that Patches are sparse relative to the global space of
999        //  the mask (which covers both patch and non-patch values of the parent array), and so to
1000        //  quickly jump through regions with no overlap, we attempt to move our pointers by STRIDE
1001        //  elements on each iteration. If we cannot rule out overlap due to min/max values, we
1002        //  fallback to performing a two-way iterator merge.
1003        if (mask_idx + STRIDE) < patch_indices.len() && (true_idx + STRIDE) < mask_indices.len() {
1004            // Load a vector of each into our registers.
1005            let left_min = patch_indices[mask_idx]
1006                .to_usize()
1007                .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1008                - offset;
1009            let left_max = patch_indices[mask_idx + STRIDE]
1010                .to_usize()
1011                .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1012                - offset;
1013            let right_min = mask_indices[true_idx];
1014            let right_max = mask_indices[true_idx + STRIDE];
1015
1016            if left_min > right_max {
1017                // Advance right side
1018                true_idx += STRIDE;
1019                continue;
1020            } else if right_min > left_max {
1021                mask_idx += STRIDE;
1022                continue;
1023            } else {
1024                // Fallthrough to direct comparison path.
1025            }
1026        }
1027
1028        // Two-way sorted iterator merge:
1029
1030        let left = patch_indices[mask_idx]
1031            .to_usize()
1032            .ok_or_else(|| vortex_err!("patch index does not fit in usize"))?
1033            - offset;
1034        let right = mask_indices[true_idx];
1035
1036        match left.cmp(&right) {
1037            Ordering::Less => {
1038                mask_idx += 1;
1039            }
1040            Ordering::Greater => {
1041                true_idx += 1;
1042            }
1043            Ordering::Equal => {
1044                // Save the mask index as well as the positional index.
1045                new_mask_indices.push(mask_idx);
1046                new_patch_indices.push(true_idx as u64);
1047
1048                mask_idx += 1;
1049                true_idx += 1;
1050            }
1051        }
1052    }
1053
1054    if new_mask_indices.is_empty() {
1055        return Ok(None);
1056    }
1057
1058    let new_patch_indices = new_patch_indices.into_array();
1059    let new_patch_values =
1060        patch_values.filter(Mask::from_indices(patch_values.len(), new_mask_indices))?;
1061
1062    Ok(Some(Patches::new(
1063        true_count,
1064        0,
1065        new_patch_indices,
1066        new_patch_values,
1067        // TODO(0ax1): Chunk offsets are invalid after a filter is applied.
1068        None,
1069    )?))
1070}
1071
1072fn take_indices_with_search_fn<
1073    I: UnsignedPType,
1074    T: IntegerPType,
1075    F: Fn(I) -> VortexResult<SearchResult>,
1076>(
1077    indices: &[I],
1078    take_indices: &[T],
1079    take_validity: Mask,
1080    include_nulls: bool,
1081    search_fn: F,
1082) -> VortexResult<(BufferMut<u64>, BufferMut<u64>)> {
1083    let mut values_indices = BufferMut::with_capacity(take_indices.len());
1084    let mut new_indices = BufferMut::with_capacity(take_indices.len());
1085
1086    for (new_patch_idx, &take_idx) in take_indices.iter().enumerate() {
1087        if !take_validity.value(new_patch_idx) {
1088            if include_nulls {
1089                // For nulls, patch index doesn't matter - use 0 for consistency
1090                values_indices.push(0u64);
1091                new_indices.push(new_patch_idx as u64);
1092            }
1093            continue;
1094        } else {
1095            let search_result = match I::from(take_idx) {
1096                Some(idx) => search_fn(idx)?,
1097                None => SearchResult::NotFound(indices.len()),
1098            };
1099
1100            if let Some(patch_idx) = search_result.to_found() {
1101                values_indices.push(patch_idx as u64);
1102                new_indices.push(new_patch_idx as u64);
1103            }
1104        }
1105    }
1106
1107    Ok((values_indices, new_indices))
1108}
1109
1110#[cfg(test)]
1111mod test {
1112    use vortex_buffer::BufferMut;
1113    use vortex_buffer::buffer;
1114    use vortex_mask::Mask;
1115
1116    use crate::IntoArray;
1117    use crate::LEGACY_SESSION;
1118    use crate::ToCanonical;
1119    use crate::VortexSessionExecute;
1120    use crate::assert_arrays_eq;
1121    use crate::patches::Patches;
1122    use crate::patches::PrimitiveArray;
1123    use crate::search_sorted::SearchResult;
1124    use crate::validity::Validity;
1125
1126    #[test]
1127    fn test_filter() {
1128        let patches = Patches::new(
1129            100,
1130            0,
1131            buffer![10u32, 11, 20].into_array(),
1132            buffer![100, 110, 200].into_array(),
1133            None,
1134        )
1135        .unwrap();
1136
1137        let filtered = patches
1138            .filter(
1139                &Mask::from_indices(100, vec![10, 20, 30]),
1140                &mut LEGACY_SESSION.create_execution_ctx(),
1141            )
1142            .unwrap()
1143            .unwrap();
1144
1145        assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1]));
1146        assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1147    }
1148
1149    #[test]
1150    fn take_with_nulls() {
1151        let patches = Patches::new(
1152            20,
1153            0,
1154            buffer![2u64, 9, 15].into_array(),
1155            PrimitiveArray::new(buffer![33_i32, 44, 55], Validity::AllValid).into_array(),
1156            None,
1157        )
1158        .unwrap();
1159
1160        let taken = patches
1161            .take(
1162                &PrimitiveArray::new(buffer![9, 0], Validity::from_iter(vec![true, false]))
1163                    .into_array(),
1164                &mut LEGACY_SESSION.create_execution_ctx(),
1165            )
1166            .unwrap()
1167            .unwrap();
1168        let primitive_values = taken.values().to_primitive();
1169        let primitive_indices = taken.indices().to_primitive();
1170        assert_eq!(taken.array_len(), 2);
1171        assert_arrays_eq!(
1172            primitive_values,
1173            PrimitiveArray::from_option_iter([Some(44i32)])
1174        );
1175        assert_arrays_eq!(primitive_indices, PrimitiveArray::from_iter([0u64]));
1176        assert_eq!(
1177            primitive_values
1178                .as_ref()
1179                .validity()
1180                .unwrap()
1181                .to_mask(
1182                    primitive_values.as_ref().len(),
1183                    &mut LEGACY_SESSION.create_execution_ctx()
1184                )
1185                .unwrap(),
1186            Mask::from_iter(vec![true])
1187        );
1188    }
1189
1190    #[test]
1191    fn take_search_with_nulls_chunked() {
1192        let patches = Patches::new(
1193            20,
1194            0,
1195            buffer![2u64, 9, 15].into_array(),
1196            buffer![33_i32, 44, 55].into_array(),
1197            Some(buffer![0u64].into_array()),
1198        )
1199        .unwrap();
1200
1201        let taken = patches
1202            .take_search(
1203                PrimitiveArray::new(buffer![9, 0], Validity::from_iter([true, false])),
1204                true,
1205                &mut LEGACY_SESSION.create_execution_ctx(),
1206            )
1207            .unwrap()
1208            .unwrap();
1209
1210        let primitive_values = taken.values().to_primitive();
1211        assert_eq!(taken.array_len(), 2);
1212        assert_arrays_eq!(
1213            primitive_values,
1214            PrimitiveArray::from_option_iter([Some(44i32), None])
1215        );
1216        assert_arrays_eq!(taken.indices(), PrimitiveArray::from_iter([0u64, 1]));
1217
1218        assert_eq!(
1219            primitive_values
1220                .as_ref()
1221                .validity()
1222                .unwrap()
1223                .to_mask(
1224                    primitive_values.as_ref().len(),
1225                    &mut LEGACY_SESSION.create_execution_ctx()
1226                )
1227                .unwrap(),
1228            Mask::from_iter([true, false])
1229        );
1230    }
1231
1232    #[test]
1233    fn take_search_chunked_multiple_chunks() {
1234        let patches = Patches::new(
1235            2048,
1236            0,
1237            buffer![100u64, 500, 1200, 1800].into_array(),
1238            buffer![10_i32, 20, 30, 40].into_array(),
1239            Some(buffer![0u64, 2].into_array()),
1240        )
1241        .unwrap();
1242
1243        let taken = patches
1244            .take_search(
1245                PrimitiveArray::new(buffer![500, 1200, 999], Validity::AllValid),
1246                true,
1247                &mut LEGACY_SESSION.create_execution_ctx(),
1248            )
1249            .unwrap()
1250            .unwrap();
1251
1252        assert_eq!(taken.array_len(), 3);
1253        assert_arrays_eq!(
1254            taken.values(),
1255            PrimitiveArray::from_option_iter([Some(20i32), Some(30)])
1256        );
1257    }
1258
1259    #[test]
1260    fn take_search_chunked_indices_with_no_patches() {
1261        let patches = Patches::new(
1262            20,
1263            0,
1264            buffer![2u64, 9, 15].into_array(),
1265            buffer![33_i32, 44, 55].into_array(),
1266            Some(buffer![0u64].into_array()),
1267        )
1268        .unwrap();
1269
1270        let taken = patches
1271            .take_search(
1272                PrimitiveArray::new(buffer![3, 4, 5], Validity::AllValid),
1273                true,
1274                &mut LEGACY_SESSION.create_execution_ctx(),
1275            )
1276            .unwrap();
1277
1278        assert!(taken.is_none());
1279    }
1280
1281    #[test]
1282    fn take_search_chunked_interleaved() {
1283        let patches = Patches::new(
1284            30,
1285            0,
1286            buffer![5u64, 10, 20, 25].into_array(),
1287            buffer![100_i32, 200, 300, 400].into_array(),
1288            Some(buffer![0u64].into_array()),
1289        )
1290        .unwrap();
1291
1292        let taken = patches
1293            .take_search(
1294                PrimitiveArray::new(buffer![10, 15, 20, 99], Validity::AllValid),
1295                true,
1296                &mut LEGACY_SESSION.create_execution_ctx(),
1297            )
1298            .unwrap()
1299            .unwrap();
1300
1301        assert_eq!(taken.array_len(), 4);
1302        assert_arrays_eq!(
1303            taken.values(),
1304            PrimitiveArray::from_option_iter([Some(200i32), Some(300)])
1305        );
1306    }
1307
1308    #[test]
1309    fn test_take_search_multiple_chunk_offsets() {
1310        let patches = Patches::new(
1311            1500,
1312            0,
1313            BufferMut::from_iter(0..1500u64).into_array(),
1314            BufferMut::from_iter(0..1500i32).into_array(),
1315            Some(buffer![0u64, 1024u64].into_array()),
1316        )
1317        .unwrap();
1318
1319        let taken = patches
1320            .take_search(
1321                PrimitiveArray::new(BufferMut::from_iter(0..1500u64), Validity::AllValid),
1322                false,
1323                &mut LEGACY_SESSION.create_execution_ctx(),
1324            )
1325            .unwrap()
1326            .unwrap();
1327
1328        assert_eq!(taken.array_len(), 1500);
1329    }
1330
1331    #[test]
1332    fn test_slice() {
1333        let values = buffer![15_u32, 135, 13531, 42].into_array();
1334        let indices = buffer![10_u64, 11, 50, 100].into_array();
1335
1336        let patches = Patches::new(101, 0, indices, values, None).unwrap();
1337
1338        let sliced = patches.slice(15..100).unwrap().unwrap();
1339        assert_eq!(sliced.array_len(), 100 - 15);
1340        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1341    }
1342
1343    #[test]
1344    fn doubly_sliced() {
1345        let values = buffer![15_u32, 135, 13531, 42].into_array();
1346        let indices = buffer![10_u64, 11, 50, 100].into_array();
1347
1348        let patches = Patches::new(101, 0, indices, values, None).unwrap();
1349
1350        let sliced = patches.slice(15..100).unwrap().unwrap();
1351        assert_eq!(sliced.array_len(), 100 - 15);
1352        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([13531u32]));
1353
1354        let doubly_sliced = sliced.slice(35..36).unwrap().unwrap();
1355        assert_arrays_eq!(
1356            doubly_sliced.values(),
1357            PrimitiveArray::from_iter([13531u32])
1358        );
1359    }
1360
1361    #[test]
1362    fn test_mask_all_true() {
1363        let patches = Patches::new(
1364            10,
1365            0,
1366            buffer![2u64, 5, 8].into_array(),
1367            buffer![100i32, 200, 300].into_array(),
1368            None,
1369        )
1370        .unwrap();
1371
1372        let mask = Mask::new_true(10);
1373        let masked = patches
1374            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1375            .unwrap();
1376        assert!(masked.is_none());
1377    }
1378
1379    #[test]
1380    fn test_mask_all_false() {
1381        let patches = Patches::new(
1382            10,
1383            0,
1384            buffer![2u64, 5, 8].into_array(),
1385            buffer![100i32, 200, 300].into_array(),
1386            None,
1387        )
1388        .unwrap();
1389
1390        let mask = Mask::new_false(10);
1391        let masked = patches
1392            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1393            .unwrap()
1394            .unwrap();
1395
1396        // No patch values should be masked
1397        assert_arrays_eq!(
1398            masked.values(),
1399            PrimitiveArray::from_iter([100i32, 200, 300])
1400        );
1401        assert!(
1402            masked
1403                .values()
1404                .is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())
1405                .unwrap()
1406        );
1407        assert!(
1408            masked
1409                .values()
1410                .is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())
1411                .unwrap()
1412        );
1413        assert!(
1414            masked
1415                .values()
1416                .is_valid(2, &mut LEGACY_SESSION.create_execution_ctx())
1417                .unwrap()
1418        );
1419
1420        // Indices should remain unchanged
1421        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1422    }
1423
1424    #[test]
1425    fn test_mask_partial() {
1426        let patches = Patches::new(
1427            10,
1428            0,
1429            buffer![2u64, 5, 8].into_array(),
1430            buffer![100i32, 200, 300].into_array(),
1431            None,
1432        )
1433        .unwrap();
1434
1435        // Mask that removes patches at indices 2 and 8 (but not 5)
1436        let mask = Mask::from_iter([
1437            false, false, true, false, false, false, false, false, true, false,
1438        ]);
1439        let masked = patches
1440            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1441            .unwrap()
1442            .unwrap();
1443
1444        // Only the patch at index 5 should remain
1445        assert_eq!(masked.values().len(), 1);
1446        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1447
1448        // Only index 5 should remain
1449        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64]));
1450    }
1451
1452    #[test]
1453    fn test_mask_with_offset() {
1454        let patches = Patches::new(
1455            10,
1456            5,                                  // offset
1457            buffer![7u64, 10, 13].into_array(), // actual indices are 2, 5, 8
1458            buffer![100i32, 200, 300].into_array(),
1459            None,
1460        )
1461        .unwrap();
1462
1463        // Mask that sets actual index 2 to null
1464        let mask = Mask::from_iter([
1465            false, false, true, false, false, false, false, false, false, false,
1466        ]);
1467
1468        let masked = patches
1469            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1470            .unwrap()
1471            .unwrap();
1472        assert_eq!(masked.array_len(), 10);
1473        assert_eq!(masked.offset(), 5);
1474        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([10u64, 13]));
1475        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32, 300]));
1476    }
1477
1478    #[test]
1479    fn test_mask_nullable_values() {
1480        let patches = Patches::new(
1481            10,
1482            0,
1483            buffer![2u64, 5, 8].into_array(),
1484            PrimitiveArray::from_option_iter([Some(100i32), None, Some(300)]).into_array(),
1485            None,
1486        )
1487        .unwrap();
1488
1489        // Test masking removes patch at index 2
1490        let mask = Mask::from_iter([
1491            false, false, true, false, false, false, false, false, false, false,
1492        ]);
1493        let masked = patches
1494            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1495            .unwrap()
1496            .unwrap();
1497
1498        // Patches at indices 5 and 8 should remain
1499        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([5u64, 8]));
1500
1501        // Values should be the null and 300
1502        let masked_values = masked.values().to_primitive();
1503        assert_eq!(masked_values.len(), 2);
1504        assert!(
1505            !masked_values
1506                .is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())
1507                .unwrap()
1508        ); // the null value at index 5
1509        assert!(
1510            masked_values
1511                .is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())
1512                .unwrap()
1513        ); // the 300 value at index 8
1514        assert_eq!(
1515            i32::try_from(
1516                &masked_values
1517                    .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
1518                    .unwrap()
1519            )
1520            .unwrap(),
1521            300i32
1522        );
1523    }
1524
1525    #[test]
1526    fn test_filter_keep_all() {
1527        let patches = Patches::new(
1528            10,
1529            0,
1530            buffer![2u64, 5, 8].into_array(),
1531            buffer![100i32, 200, 300].into_array(),
1532            None,
1533        )
1534        .unwrap();
1535
1536        // Keep all indices (mask with indices 0-9)
1537        let mask = Mask::from_indices(10, (0..10).collect());
1538        let filtered = patches
1539            .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1540            .unwrap()
1541            .unwrap();
1542
1543        assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1544        assert_arrays_eq!(
1545            filtered.values(),
1546            PrimitiveArray::from_iter([100i32, 200, 300])
1547        );
1548    }
1549
1550    #[test]
1551    fn test_filter_none() {
1552        let patches = Patches::new(
1553            10,
1554            0,
1555            buffer![2u64, 5, 8].into_array(),
1556            buffer![100i32, 200, 300].into_array(),
1557            None,
1558        )
1559        .unwrap();
1560
1561        // Filter out all (empty mask means keep nothing)
1562        let mask = Mask::from_indices(10, vec![]);
1563        let filtered = patches
1564            .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1565            .unwrap();
1566        assert!(filtered.is_none());
1567    }
1568
1569    #[test]
1570    fn test_filter_with_indices() {
1571        let patches = Patches::new(
1572            10,
1573            0,
1574            buffer![2u64, 5, 8].into_array(),
1575            buffer![100i32, 200, 300].into_array(),
1576            None,
1577        )
1578        .unwrap();
1579
1580        // Keep indices 2, 5, 9 (so patches at 2 and 5 remain)
1581        let mask = Mask::from_indices(10, vec![2, 5, 9]);
1582        let filtered = patches
1583            .filter(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1584            .unwrap()
1585            .unwrap();
1586
1587        assert_arrays_eq!(filtered.indices(), PrimitiveArray::from_iter([0u64, 1])); // Adjusted indices
1588        assert_arrays_eq!(filtered.values(), PrimitiveArray::from_iter([100i32, 200]));
1589    }
1590
1591    #[test]
1592    fn test_slice_full_range() {
1593        let patches = Patches::new(
1594            10,
1595            0,
1596            buffer![2u64, 5, 8].into_array(),
1597            buffer![100i32, 200, 300].into_array(),
1598            None,
1599        )
1600        .unwrap();
1601
1602        let sliced = patches.slice(0..10).unwrap().unwrap();
1603
1604        assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1605        assert_arrays_eq!(
1606            sliced.values(),
1607            PrimitiveArray::from_iter([100i32, 200, 300])
1608        );
1609    }
1610
1611    #[test]
1612    fn test_slice_partial() {
1613        let patches = Patches::new(
1614            10,
1615            0,
1616            buffer![2u64, 5, 8].into_array(),
1617            buffer![100i32, 200, 300].into_array(),
1618            None,
1619        )
1620        .unwrap();
1621
1622        // Slice from 3 to 8 (includes patch at 5)
1623        let sliced = patches.slice(3..8).unwrap().unwrap();
1624
1625        assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([5u64])); // Index stays the same
1626        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1627        assert_eq!(sliced.array_len(), 5); // 8 - 3 = 5
1628        assert_eq!(sliced.offset(), 3); // New offset
1629    }
1630
1631    #[test]
1632    fn test_slice_no_patches() {
1633        let patches = Patches::new(
1634            10,
1635            0,
1636            buffer![2u64, 5, 8].into_array(),
1637            buffer![100i32, 200, 300].into_array(),
1638            None,
1639        )
1640        .unwrap();
1641
1642        // Slice from 6 to 7 (no patches in this range)
1643        let sliced = patches.slice(6..7).unwrap();
1644        assert!(sliced.is_none());
1645    }
1646
1647    #[test]
1648    fn test_slice_with_offset() {
1649        let patches = Patches::new(
1650            10,
1651            5,                                  // offset
1652            buffer![7u64, 10, 13].into_array(), // actual indices are 2, 5, 8
1653            buffer![100i32, 200, 300].into_array(),
1654            None,
1655        )
1656        .unwrap();
1657
1658        // Slice from 3 to 8 (includes patch at actual index 5)
1659        let sliced = patches.slice(3..8).unwrap().unwrap();
1660
1661        assert_arrays_eq!(sliced.indices(), PrimitiveArray::from_iter([10u64])); // Index stays the same (offset + 5 = 10)
1662        assert_arrays_eq!(sliced.values(), PrimitiveArray::from_iter([200i32]));
1663        assert_eq!(sliced.offset(), 8); // New offset = 5 + 3
1664    }
1665
1666    #[test]
1667    fn test_patch_values() {
1668        let patches = Patches::new(
1669            10,
1670            0,
1671            buffer![2u64, 5, 8].into_array(),
1672            buffer![100i32, 200, 300].into_array(),
1673            None,
1674        )
1675        .unwrap();
1676
1677        let values = patches.values().to_primitive();
1678        assert_eq!(
1679            i32::try_from(
1680                &values
1681                    .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
1682                    .unwrap()
1683            )
1684            .unwrap(),
1685            100i32
1686        );
1687        assert_eq!(
1688            i32::try_from(
1689                &values
1690                    .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
1691                    .unwrap()
1692            )
1693            .unwrap(),
1694            200i32
1695        );
1696        assert_eq!(
1697            i32::try_from(
1698                &values
1699                    .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
1700                    .unwrap()
1701            )
1702            .unwrap(),
1703            300i32
1704        );
1705    }
1706
1707    #[test]
1708    fn test_indices_range() {
1709        let patches = Patches::new(
1710            10,
1711            0,
1712            buffer![2u64, 5, 8].into_array(),
1713            buffer![100i32, 200, 300].into_array(),
1714            None,
1715        )
1716        .unwrap();
1717
1718        assert_eq!(patches.min_index().unwrap(), 2);
1719        assert_eq!(patches.max_index().unwrap(), 8);
1720    }
1721
1722    #[test]
1723    fn test_search_index() {
1724        let patches = Patches::new(
1725            10,
1726            0,
1727            buffer![2u64, 5, 8].into_array(),
1728            buffer![100i32, 200, 300].into_array(),
1729            None,
1730        )
1731        .unwrap();
1732
1733        // Search for exact indices
1734        assert_eq!(patches.search_index(2).unwrap(), SearchResult::Found(0));
1735        assert_eq!(patches.search_index(5).unwrap(), SearchResult::Found(1));
1736        assert_eq!(patches.search_index(8).unwrap(), SearchResult::Found(2));
1737
1738        // Search for non-patch indices
1739        assert_eq!(patches.search_index(0).unwrap(), SearchResult::NotFound(0));
1740        assert_eq!(patches.search_index(3).unwrap(), SearchResult::NotFound(1));
1741        assert_eq!(patches.search_index(6).unwrap(), SearchResult::NotFound(2));
1742        assert_eq!(patches.search_index(9).unwrap(), SearchResult::NotFound(3));
1743    }
1744
1745    #[test]
1746    fn test_mask_boundary_patches() {
1747        // Test masking patches at array boundaries
1748        let patches = Patches::new(
1749            10,
1750            0,
1751            buffer![0u64, 9].into_array(),
1752            buffer![100i32, 200].into_array(),
1753            None,
1754        )
1755        .unwrap();
1756
1757        let mask = Mask::from_iter([
1758            true, false, false, false, false, false, false, false, false, false,
1759        ]);
1760        let masked = patches
1761            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1762            .unwrap();
1763        assert!(masked.is_some());
1764        let masked = masked.unwrap();
1765        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([9u64]));
1766        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([200i32]));
1767    }
1768
1769    #[test]
1770    fn test_mask_all_patches_removed() {
1771        // Test when all patches are masked out
1772        let patches = Patches::new(
1773            10,
1774            0,
1775            buffer![2u64, 5, 8].into_array(),
1776            buffer![100i32, 200, 300].into_array(),
1777            None,
1778        )
1779        .unwrap();
1780
1781        // Mask that removes all patches
1782        let mask = Mask::from_iter([
1783            false, false, true, false, false, true, false, false, true, false,
1784        ]);
1785        let masked = patches
1786            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1787            .unwrap();
1788        assert!(masked.is_none());
1789    }
1790
1791    #[test]
1792    fn test_mask_no_patches_removed() {
1793        // Test when no patches are masked
1794        let patches = Patches::new(
1795            10,
1796            0,
1797            buffer![2u64, 5, 8].into_array(),
1798            buffer![100i32, 200, 300].into_array(),
1799            None,
1800        )
1801        .unwrap();
1802
1803        // Mask that doesn't affect any patches
1804        let mask = Mask::from_iter([
1805            true, false, false, true, false, false, true, false, false, true,
1806        ]);
1807        let masked = patches
1808            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1809            .unwrap()
1810            .unwrap();
1811
1812        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64, 5, 8]));
1813        assert_arrays_eq!(
1814            masked.values(),
1815            PrimitiveArray::from_iter([100i32, 200, 300])
1816        );
1817    }
1818
1819    #[test]
1820    fn test_mask_single_patch() {
1821        // Test with a single patch
1822        let patches = Patches::new(
1823            5,
1824            0,
1825            buffer![2u64].into_array(),
1826            buffer![42i32].into_array(),
1827            None,
1828        )
1829        .unwrap();
1830
1831        // Mask that removes the single patch
1832        let mask = Mask::from_iter([false, false, true, false, false]);
1833        let masked = patches
1834            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1835            .unwrap();
1836        assert!(masked.is_none());
1837
1838        // Mask that keeps the single patch
1839        let mask = Mask::from_iter([true, false, false, true, false]);
1840        let masked = patches
1841            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1842            .unwrap()
1843            .unwrap();
1844        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([2u64]));
1845    }
1846
1847    #[test]
1848    fn test_mask_contiguous_patches() {
1849        // Test with contiguous patches
1850        let patches = Patches::new(
1851            10,
1852            0,
1853            buffer![3u64, 4, 5, 6].into_array(),
1854            buffer![100i32, 200, 300, 400].into_array(),
1855            None,
1856        )
1857        .unwrap();
1858
1859        // Mask that removes middle patches
1860        let mask = Mask::from_iter([
1861            false, false, false, false, true, true, false, false, false, false,
1862        ]);
1863        let masked = patches
1864            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1865            .unwrap()
1866            .unwrap();
1867
1868        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([3u64, 6]));
1869        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 400]));
1870    }
1871
1872    #[test]
1873    fn test_mask_with_large_offset() {
1874        // Test with a large offset that shifts all indices
1875        let patches = Patches::new(
1876            20,
1877            15,
1878            buffer![16u64, 17, 19].into_array(), // actual indices are 1, 2, 4
1879            buffer![100i32, 200, 300].into_array(),
1880            None,
1881        )
1882        .unwrap();
1883
1884        // Mask that removes the patch at actual index 2
1885        let mask = Mask::from_iter([
1886            false, false, true, false, false, false, false, false, false, false, false, false,
1887            false, false, false, false, false, false, false, false,
1888        ]);
1889        let masked = patches
1890            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1891            .unwrap()
1892            .unwrap();
1893
1894        assert_arrays_eq!(masked.indices(), PrimitiveArray::from_iter([16u64, 19]));
1895        assert_arrays_eq!(masked.values(), PrimitiveArray::from_iter([100i32, 300]));
1896    }
1897
1898    #[test]
1899    #[should_panic(expected = "Filter mask length 5 does not match array length 10")]
1900    fn test_mask_wrong_length() {
1901        let patches = Patches::new(
1902            10,
1903            0,
1904            buffer![2u64, 5, 8].into_array(),
1905            buffer![100i32, 200, 300].into_array(),
1906            None,
1907        )
1908        .unwrap();
1909
1910        // Mask with wrong length
1911        let mask = Mask::from_iter([false, false, true, false, false]);
1912        patches
1913            .mask(&mask, &mut LEGACY_SESSION.create_execution_ctx())
1914            .unwrap();
1915    }
1916
1917    #[test]
1918    fn test_chunk_offsets_search() {
1919        let indices = buffer![100u64, 200, 3000, 3100].into_array();
1920        let values = buffer![10i32, 20, 30, 40].into_array();
1921        let chunk_offsets = buffer![0u64, 2, 2, 3].into_array();
1922        let patches = Patches::new(4096, 0, indices, values, Some(chunk_offsets)).unwrap();
1923
1924        assert!(patches.chunk_offsets.is_some());
1925
1926        // chunk 0: patches at 100, 200
1927        assert_eq!(patches.search_index(100).unwrap(), SearchResult::Found(0));
1928        assert_eq!(patches.search_index(200).unwrap(), SearchResult::Found(1));
1929
1930        // chunks 1, 2: no patches
1931        assert_eq!(
1932            patches.search_index(1500).unwrap(),
1933            SearchResult::NotFound(2)
1934        );
1935        assert_eq!(
1936            patches.search_index(2000).unwrap(),
1937            SearchResult::NotFound(2)
1938        );
1939
1940        // chunk 3: patches at 3000, 3100
1941        assert_eq!(patches.search_index(3000).unwrap(), SearchResult::Found(2));
1942        assert_eq!(patches.search_index(3100).unwrap(), SearchResult::Found(3));
1943
1944        assert_eq!(
1945            patches.search_index(1024).unwrap(),
1946            SearchResult::NotFound(2)
1947        );
1948    }
1949
1950    #[test]
1951    fn test_chunk_offsets_with_slice() {
1952        let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
1953        let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
1954        let chunk_offsets = buffer![0u64, 2, 6].into_array();
1955        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
1956
1957        let sliced = patches.slice(1000..2200).unwrap().unwrap();
1958
1959        assert!(sliced.chunk_offsets.is_some());
1960        assert_eq!(sliced.offset(), 1000);
1961
1962        assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(0));
1963        assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
1964        assert_eq!(sliced.search_index(1100).unwrap(), SearchResult::Found(4));
1965
1966        assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(1));
1967    }
1968
1969    #[test]
1970    fn test_chunk_offsets_with_slice_after_first_chunk() {
1971        let indices = buffer![100u64, 500, 1200, 1300, 1500, 1800, 2100, 2500].into_array();
1972        let values = buffer![10i32, 20, 30, 35, 40, 45, 50, 60].into_array();
1973        let chunk_offsets = buffer![0u64, 2, 6].into_array();
1974        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
1975
1976        let sliced = patches.slice(1300..2200).unwrap().unwrap();
1977
1978        assert!(sliced.chunk_offsets.is_some());
1979        assert_eq!(sliced.offset(), 1300);
1980
1981        assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
1982        assert_eq!(sliced.search_index(200).unwrap(), SearchResult::Found(1));
1983        assert_eq!(sliced.search_index(500).unwrap(), SearchResult::Found(2));
1984        assert_eq!(sliced.search_index(250).unwrap(), SearchResult::NotFound(2));
1985        assert_eq!(sliced.search_index(900).unwrap(), SearchResult::NotFound(4));
1986    }
1987
1988    #[test]
1989    fn test_chunk_offsets_slice_empty_result() {
1990        let indices = buffer![100u64, 200, 3000, 3100].into_array();
1991        let values = buffer![10i32, 20, 30, 40].into_array();
1992        let chunk_offsets = buffer![0u64, 2].into_array();
1993        let patches = Patches::new(4000, 0, indices, values, Some(chunk_offsets)).unwrap();
1994
1995        let sliced = patches.slice(1000..2000).unwrap();
1996        assert!(sliced.is_none());
1997    }
1998
1999    #[test]
2000    fn test_chunk_offsets_slice_single_patch() {
2001        let indices = buffer![100u64, 1200, 1300, 2500].into_array();
2002        let values = buffer![10i32, 20, 30, 40].into_array();
2003        let chunk_offsets = buffer![0u64, 1, 3].into_array();
2004        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2005
2006        let sliced = patches.slice(1100..1250).unwrap().unwrap();
2007
2008        assert_eq!(sliced.num_patches(), 1);
2009        assert_eq!(sliced.offset(), 1100);
2010        assert_eq!(sliced.search_index(100).unwrap(), SearchResult::Found(0)); // 1200 - 1100 = 100
2011        assert_eq!(sliced.search_index(50).unwrap(), SearchResult::NotFound(0));
2012        assert_eq!(sliced.search_index(150).unwrap(), SearchResult::NotFound(1));
2013    }
2014
2015    #[test]
2016    fn test_chunk_offsets_slice_across_chunks() {
2017        let indices = buffer![100u64, 200, 1100, 1200, 2100, 2200].into_array();
2018        let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2019        let chunk_offsets = buffer![0u64, 2, 4].into_array();
2020        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2021
2022        let sliced = patches.slice(150..2150).unwrap().unwrap();
2023
2024        assert_eq!(sliced.num_patches(), 4);
2025        assert_eq!(sliced.offset(), 150);
2026
2027        assert_eq!(sliced.search_index(50).unwrap(), SearchResult::Found(0)); // 200 - 150 = 50
2028        assert_eq!(sliced.search_index(950).unwrap(), SearchResult::Found(1)); // 1100 - 150 = 950
2029        assert_eq!(sliced.search_index(1050).unwrap(), SearchResult::Found(2)); // 1200 - 150 = 1050
2030        assert_eq!(sliced.search_index(1950).unwrap(), SearchResult::Found(3)); // 2100 - 150 = 1950
2031    }
2032
2033    #[test]
2034    fn test_chunk_offsets_boundary_searches() {
2035        let indices = buffer![1023u64, 1024, 1025, 2047, 2048].into_array();
2036        let values = buffer![10i32, 20, 30, 40, 50].into_array();
2037        let chunk_offsets = buffer![0u64, 1, 4].into_array();
2038        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2039
2040        assert_eq!(patches.search_index(1023).unwrap(), SearchResult::Found(0));
2041        assert_eq!(patches.search_index(1024).unwrap(), SearchResult::Found(1));
2042        assert_eq!(patches.search_index(1025).unwrap(), SearchResult::Found(2));
2043        assert_eq!(patches.search_index(2047).unwrap(), SearchResult::Found(3));
2044        assert_eq!(patches.search_index(2048).unwrap(), SearchResult::Found(4));
2045
2046        assert_eq!(
2047            patches.search_index(1022).unwrap(),
2048            SearchResult::NotFound(0)
2049        );
2050        assert_eq!(
2051            patches.search_index(2046).unwrap(),
2052            SearchResult::NotFound(3)
2053        );
2054    }
2055
2056    #[test]
2057    fn test_chunk_offsets_slice_edge_cases() {
2058        let indices = buffer![0u64, 1, 1023, 1024, 2047, 2048].into_array();
2059        let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2060        let chunk_offsets = buffer![0u64, 3, 5].into_array();
2061        let patches = Patches::new(3000, 0, indices, values, Some(chunk_offsets)).unwrap();
2062
2063        // Slice at the very beginning
2064        let sliced = patches.slice(0..10).unwrap().unwrap();
2065        assert_eq!(sliced.num_patches(), 2);
2066        assert_eq!(sliced.search_index(0).unwrap(), SearchResult::Found(0));
2067        assert_eq!(sliced.search_index(1).unwrap(), SearchResult::Found(1));
2068
2069        // Slice at the very end
2070        let sliced = patches.slice(2040..3000).unwrap().unwrap();
2071        assert_eq!(sliced.num_patches(), 2); // patches at 2047 and 2048
2072        assert_eq!(sliced.search_index(7).unwrap(), SearchResult::Found(0)); // 2047 - 2040
2073        assert_eq!(sliced.search_index(8).unwrap(), SearchResult::Found(1)); // 2048 - 2040
2074    }
2075
2076    #[test]
2077    fn test_chunk_offsets_slice_nested() {
2078        let indices = buffer![100u64, 200, 300, 400, 500, 600].into_array();
2079        let values = buffer![10i32, 20, 30, 40, 50, 60].into_array();
2080        let chunk_offsets = buffer![0u64].into_array();
2081        let patches = Patches::new(1000, 0, indices, values, Some(chunk_offsets)).unwrap();
2082
2083        let sliced1 = patches.slice(150..550).unwrap().unwrap();
2084        assert_eq!(sliced1.num_patches(), 4); // 200, 300, 400, 500
2085
2086        let sliced2 = sliced1.slice(100..250).unwrap().unwrap();
2087        assert_eq!(sliced2.num_patches(), 1); // 300
2088        assert_eq!(sliced2.offset(), 250);
2089
2090        assert_eq!(sliced2.search_index(50).unwrap(), SearchResult::Found(0)); // 300 - 250
2091        assert_eq!(
2092            sliced2.search_index(150).unwrap(),
2093            SearchResult::NotFound(1)
2094        );
2095    }
2096
2097    #[test]
2098    fn test_nested_slice_with_dropped_first_chunk() {
2099        // PATCH_CHUNK_SIZE = 1024, so the two patches land in different chunks.
2100        let indices = buffer![0u64, 1024].into_array();
2101        let values = buffer![1i32, 2].into_array();
2102        let chunk_offsets = buffer![0u64, 1].into_array();
2103        let patches = Patches::new(2048, 0, indices, values, Some(chunk_offsets)).unwrap();
2104
2105        // Drop chunk 0, then re-slice the result.
2106        let dropped_first = patches.slice(1024..2048).unwrap().unwrap();
2107        let resliced = dropped_first.slice(0..1024).unwrap().unwrap();
2108        assert_eq!(resliced.num_patches(), 1);
2109    }
2110
2111    #[test]
2112    fn test_index_larger_than_length() {
2113        let chunk_offsets = buffer![0u64].into_array();
2114        let indices = buffer![1023u64].into_array();
2115        let values = buffer![42i32].into_array();
2116        let patches = Patches::new(1024, 0, indices, values, Some(chunk_offsets)).unwrap();
2117        assert_eq!(
2118            patches.search_index(2048).unwrap(),
2119            SearchResult::NotFound(1)
2120        );
2121    }
2122}