minarrow/structs/views/
array_view.rs

1//! # **ArrayView Module** - *Windowed View over an Array*
2//!
3//! `ArrayV` is a **logical, read-only, zero-copy view** into a contiguous window
4//! `[offset .. offset + len)` of any [`Array`] variant.
5//!
6//! ## Purpose
7//! - Provides indexable, bounds-checked access to a subrange of an array without copying buffers.
8//! - Caches null counts per view for efficient repeated queries.
9//! - Acts as a unifying abstraction for windowed operations across all array types.
10//!
11//! ## Behaviour
12//! - All indices are **relative** to the view's start.
13//! - Internally retains an `Arc` reference to the parent array's buffers.
14//! - Windowing and slicing are O(1) operations (pointer + metadata updates only).
15//! - Cached null counts are stored in a `Cell` for fast repeated access.
16//!
17//! ## Threading
18//! - Not thread-safe due to `Cell`.
19//! - For parallelism, create per-thread clones with [`slice`](ArrayV::slice).
20//!
21//! ## Interop
22//! - Convert back to a full array via [`to_array`](ArrayV::to_array).
23//! - Promote to `(Array, offset, len)` tuple with [`as_tuple`](ArrayV::as_tuple).
24//! - Access raw data pointer and element size via [`data_ptr_and_byte_len`](ArrayV::data_ptr_and_byte_len).
25//!
26//! ## Invariants
27//! - `offset + len <= array.len()`
28//! - `len` reflects the **logical** number of elements in the view.
29
30use std::cell::Cell;
31use std::fmt::{self, Debug, Display, Formatter};
32
33use crate::traits::print::MAX_PREVIEW;
34use crate::{Array, BitmaskV, FieldArray, MaskedArray, TextArray};
35
36/// # ArrayView
37/// 
38/// Logical, windowed view over an `Array`.
39///
40/// ## Purpose
41/// This is used to return an indexable view over a subset of the array.
42/// Additionally, it can be used to cache null counts for those regions,
43/// which can be used to speed up calculations.
44///
45/// ## Behaviour
46/// - Indices are always relative to the window.
47/// - Holds a reference to the original `Array` and window bounds.
48/// - Windowing uses an arc clone
49/// - All access (get/index, etc.) is offset-correct and bounds-checked.
50/// - Null count is computed once (on demand or at creation) and cached for subsequent use.
51/// 
52/// ## Notes
53/// - Use [`slice`](Self::slice) to derive smaller views without data copy.
54/// - Use [`to_array`](Self::to_array) to materialise as an owned array.
55#[derive(Clone, PartialEq)]
56pub struct ArrayV {
57    pub array: Array, // contains Arc<inner>
58    pub offset: usize,
59    len: usize,
60    null_count: Cell<Option<usize>>
61}
62
63impl ArrayV {
64    /// Construct a windowed view of `array[offset..offset+len)`, with optional precomputed null count.
65    #[inline]
66    pub fn new(array: Array, offset: usize, len: usize) -> Self {
67        assert!(
68            offset + len <= array.len(),
69            "ArrayView: window out of bounds (offset + len = {}, array.len = {})",
70            offset + len,
71            array.len()
72        );
73        Self {
74            array,
75            offset,
76            len,
77            null_count: Cell::new(None)
78        }
79    }
80
81    /// Construct a windowed view, supplying a precomputed null count.
82    #[inline]
83    pub fn with_null_count(array: Array, offset: usize, len: usize, null_count: usize) -> Self {
84        assert!(
85            offset + len <= array.len(),
86            "ArrayView: window out of bounds (offset + len = {}, array.len = {})",
87            offset + len,
88            array.len()
89        );
90        Self {
91            array,
92            offset,
93            len,
94            null_count: Cell::new(Some(null_count))
95        }
96    }
97
98    /// Return the logical length of the view.
99    #[inline]
100    pub fn len(&self) -> usize {
101        self.len
102    }
103
104    /// Returns true if the view is empty.
105    #[inline]
106    pub fn is_empty(&self) -> bool {
107        self.len == 0
108    }
109
110    /// Returns the value at logical index `i` within the window, or `None` if out of bounds or null.
111    #[inline]
112    pub fn get<T: MaskedArray + 'static>(&self, i: usize) -> Option<T::CopyType> {
113        if i >= self.len {
114            return None;
115        }
116        self.array.inner::<T>().get(self.offset + i)
117    }
118
119    /// Returns the value at logical index `i` within the window (unchecked).
120    #[inline]
121    pub fn get_unchecked<T: MaskedArray + 'static>(&self, i: usize) -> Option<T::CopyType> {
122        unsafe { self.array.inner::<T>().get_unchecked(self.offset + i) }
123    }
124
125    /// Returns the string value at logical index `i` within the window, or `None` if out of bounds or null.
126    #[inline]
127    pub fn get_str(&self, i: usize) -> Option<&str> {
128        if i >= self.len {
129            return None;
130        }
131        match &self.array {
132            Array::TextArray(TextArray::String32(arr)) => arr.get_str(self.offset + i),
133            #[cfg(feature = "large_string")]
134            Array::TextArray(TextArray::String64(arr)) => arr.get_str(self.offset + i),
135            #[cfg(feature = "extended_categorical")]
136            Array::TextArray(TextArray::Categorical8(arr)) => arr.get_str(self.offset + i),
137            #[cfg(feature = "extended_categorical")]
138            Array::TextArray(TextArray::Categorical16(arr)) => arr.get_str(self.offset + i),
139            Array::TextArray(TextArray::Categorical32(arr)) => arr.get_str(self.offset + i),
140            #[cfg(feature = "extended_categorical")]
141            Array::TextArray(TextArray::Categorical64(arr)) => arr.get_str(self.offset + i),
142            _ => None
143        }
144    }
145
146    /// Returns the string value at logical index `i` within the window.
147    ///
148    /// # Safety
149    /// Skips bounds checks, but will still return `None` if null.
150    #[inline]
151    pub unsafe fn get_str_unchecked(&self, i: usize) -> Option<&str> {
152        match &self.array {
153            Array::TextArray(TextArray::String32(arr)) => {
154                if arr.is_null(self.offset + i) {
155                    None
156                } else {
157                    Some(unsafe { arr.get_str_unchecked(self.offset + i) })
158                }
159            }
160            #[cfg(feature = "large_string")]
161            Array::TextArray(TextArray::String64(arr)) => {
162                if arr.is_null(self.offset + i) {
163                    None
164                } else {
165                    Some(unsafe { arr.get_str_unchecked(self.offset + i) })
166                }
167            }
168            #[cfg(feature = "extended_categorical")]
169            Array::TextArray(TextArray::Categorical8(arr)) => {
170                if arr.is_null(self.offset + i) {
171                    None
172                } else {
173                    Some(unsafe { arr.get_str_unchecked(self.offset + i) })
174                }
175            }
176            #[cfg(feature = "extended_categorical")]
177            Array::TextArray(TextArray::Categorical16(arr)) => {
178                if arr.is_null(self.offset + i) {
179                    None
180                } else {
181                    Some(unsafe { arr.get_str_unchecked(self.offset + i) })
182                }
183            }
184            Array::TextArray(TextArray::Categorical32(arr)) => {
185                if arr.is_null(self.offset + i) {
186                    None
187                } else {
188                    Some(unsafe { arr.get_str_unchecked(self.offset + i) })
189                }
190            }
191            #[cfg(feature = "extended_categorical")]
192            Array::TextArray(TextArray::Categorical64(arr)) => {
193                if arr.is_null(self.offset + i) {
194                    None
195                } else {
196                    Some(unsafe { arr.get_str_unchecked(self.offset + i) })
197                }
198            }
199            _ => None
200        }
201    }
202
203    /// Returns a new window view into a sub-range of this view.
204    #[inline]
205    pub fn slice(&self, offset: usize, len: usize) -> Self {
206        assert!(offset + len <= self.len, "ArrayView::slice: out of bounds");
207        Self {
208            array: self.array.clone(), // arc clone
209            offset: self.offset + offset,
210            len,
211            null_count: Cell::new(None)
212        }
213    }
214
215    /// Materialise a deep copy as an owned `Array` for the window.
216    #[inline]
217    pub fn to_array(&self) -> Array {
218        self.array.slice_clone(self.offset, self.len)
219    }
220
221    /// Returns a pointer and metadata for raw access
222    ///
223    /// This is not logical length - it is total raw bytes in the buffer,
224    /// so for non-fixed width types such as bit-packed booleans
225    /// or strings, please factor this in accordingly.
226    #[inline]
227    pub fn data_ptr_and_byte_len(&self) -> (*const u8, usize, usize) {
228        let (ptr, _total_len, elem_size) = self.array.data_ptr_and_byte_len();
229        let windowed_ptr = unsafe { ptr.add(self.offset * elem_size) };
230        (windowed_ptr, self.len, elem_size)
231    }
232
233    /// Returns the exclusive end index of the window (relative to parent array).
234    #[inline]
235    pub fn end(&self) -> usize {
236        self.offset + self.len
237    }
238
239    /// Returns the underlying window as a tuple: (&Array, offset, len).
240    #[inline]
241    pub fn as_tuple(&self) -> (Array, usize, usize) {
242        (self.array.clone(), self.offset, self.len) // arc clone
243    }
244
245    /// Returns the null count in the window, caching the result after first calculation.
246    #[inline]
247    pub fn null_count(&self) -> usize {
248        if let Some(count) = self.null_count.get() {
249            return count;
250        }
251        let count = match self.array.null_mask() {
252            Some(mask) => mask.view(self.offset, self.len).count_zeros(),
253            None => 0
254        };
255        self.null_count.set(Some(count));
256        count
257    }
258
259    /// Returns a windowed view over the underlying null mask, if any.
260    #[inline]
261    pub fn null_mask_view(&self) -> Option<BitmaskV> {
262        self.array.null_mask().map(|mask| mask.view(self.offset, self.len))
263    }
264
265    /// Set the cached null count (advanced use only; not thread-safe if mutated after use).
266    #[inline]
267    pub fn set_null_count(&self, count: usize) {
268        self.null_count.set(Some(count));
269    }
270}
271
272/// Array -> ArrayView
273///
274/// Uses Offset 0 and length self.len()
275impl From<Array> for ArrayV {
276    fn from(array: Array) -> Self {
277        let len = array.len();
278        ArrayV {
279            array,
280            offset: 0,
281            len,
282            null_count: Cell::new(None)
283        }
284    }
285}
286
287/// FieldArray -> ArrayView
288///
289/// Takes self.array then offset 0, length self.len())
290impl From<FieldArray> for ArrayV {
291    fn from(field_array: FieldArray) -> Self {
292        let len = field_array.len();
293        ArrayV {
294            array: field_array.array,
295            offset: 0,
296            len,
297            null_count: Cell::new(None)
298        }
299    }
300}
301
302// We do not implement `Index` as `ArrayView` cannot safely return
303// a reference to an element.
304
305impl Debug for ArrayV {
306    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
307        f.debug_struct("ArrayView")
308            .field("offset", &self.offset)
309            .field("len", &self.len)
310            .field("array", &self.array)
311            .field("cached_null_count", &self.null_count.get())
312            .finish()
313    }
314}
315
316impl Display for ArrayV {
317    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
318        let nulls = self.null_count();
319        let head_len = self.len.min(MAX_PREVIEW);
320
321        writeln!(
322            f,
323            "ArrayView [{} values] (offset: {}, nulls: {})",
324            self.len, self.offset, nulls
325        )?;
326
327        // Take a view into the head_len elements
328        let display_view = Self {
329            array: self.array.clone(), // arc clone
330            offset: self.offset,
331            len: head_len,
332            null_count: self.null_count.clone(),
333        };
334
335        // Delegate to the inner array's Display
336        for line in format!("{display_view}").lines() {
337            writeln!(f, "  {line}")?;
338        }
339
340        if self.len > MAX_PREVIEW {
341            writeln!(f, "  ... ({} more rows)", self.len - MAX_PREVIEW)?;
342        }
343
344        Ok(())
345    }
346}
347
348
349#[cfg(test)]
350mod tests {
351    use std::sync::Arc;
352
353    use super::*;
354    use crate::{Array, Bitmask, IntegerArray, NumericArray, vec64};
355
356    #[test]
357    fn test_array_view_basic_indexing_and_slice() {
358        let mut arr = IntegerArray::<i32>::default();
359        arr.push(11);
360        arr.push(22);
361        arr.push(33);
362        arr.push(44);
363
364        let array = Array::NumericArray(NumericArray::Int32(Arc::new(arr)));
365        let view = ArrayV::new(array, 1, 2);
366
367        // Basic indexing within window
368        assert_eq!(view.len(), 2);
369        assert_eq!(view.offset, 1);
370        assert_eq!(view.get::<IntegerArray<i32>>(0), Some(22));
371        assert_eq!(view.get::<IntegerArray<i32>>(1), Some(33));
372        assert_eq!(view.get::<IntegerArray<i32>>(2), None);
373
374        // Slicing the view produces the correct sub-window
375        let sub = view.slice(1, 1);
376        assert_eq!(sub.len(), 1);
377        assert_eq!(sub.get::<IntegerArray<i32>>(0), Some(33));
378        assert_eq!(sub.get::<IntegerArray<i32>>(1), None);
379    }
380
381    #[test]
382    fn test_array_view_null_count_and_cache() {
383        let mut arr = IntegerArray::<i32>::default();
384        arr.push(1);
385        arr.push(2);
386        arr.push(3);
387        arr.push(4);
388
389        // Null mask: only index 2 is null
390        let mut mask = Bitmask::new_set_all(4, true);
391        mask.set(2, false);
392        arr.null_mask = Some(mask);
393
394        let array = Array::NumericArray(NumericArray::Int32(Arc::new(arr)));
395
396        let view = ArrayV::new(array, 0, 4);
397        assert_eq!(view.null_count(), 1, "Null count should detect one null");
398        // Should use cached value next time
399        assert_eq!(view.null_count(), 1);
400
401        // Subwindow which excludes the null
402        let view2 = view.slice(0, 2);
403        assert_eq!(view2.null_count(), 0);
404        // Subwindow which includes only the null
405        let view3 = view.slice(2, 2);
406        assert_eq!(view3.null_count(), 1);
407    }
408
409    #[test]
410    fn test_array_view_with_supplied_null_count() {
411        let mut arr = IntegerArray::<i32>::default();
412        arr.push(5);
413        arr.push(6);
414
415        let array = Array::NumericArray(NumericArray::Int32(Arc::new(arr)));
416        let view = ArrayV::with_null_count(array, 0, 2, 99);
417        // Should always report the supplied cached value
418        assert_eq!(view.null_count(), 99);
419        view.set_null_count(101);
420        assert_eq!(view.null_count(), 101);
421    }
422
423    #[test]
424    fn test_array_view_to_array_and_as_tuple() {
425        let mut arr = IntegerArray::<i32>::default();
426        for v in 10..20 {
427            arr.push(v);
428        }
429        let array = Array::NumericArray(NumericArray::Int32(Arc::new(arr)));
430        let view = ArrayV::new(array.clone(), 4, 3);
431        let arr2 = view.to_array();
432        // Copy should be [14, 15, 16]
433        if let Array::NumericArray(NumericArray::Int32(a2)) = arr2 {
434            assert_eq!(a2.data, vec64![14, 15, 16]);
435        } else {
436            panic!("Unexpected variant");
437        }
438
439        // as_tuple returns correct metadata
440        let tup = view.as_tuple();
441        assert_eq!(&tup.0, &array);
442        assert_eq!(tup.1, 4);
443        assert_eq!(tup.2, 3);
444    }
445
446    #[test]
447    fn test_array_view_null_mask_view() {
448        let mut arr = IntegerArray::<i32>::default();
449        arr.push(2);
450        arr.push(4);
451        arr.push(6);
452
453        let mut mask = Bitmask::new_set_all(3, true);
454        mask.set(0, false);
455        arr.null_mask = Some(mask);
456
457        let array = Array::NumericArray(NumericArray::Int32(Arc::new(arr)));
458        let view = ArrayV::new(array.clone(), 1, 2);
459        let mask_view = view.null_mask_view().expect("Should have mask");
460        assert_eq!(mask_view.len(), 2);
461        // Should map to bits 1 and 2 of original mask
462        assert!(mask_view.get(0));
463        assert!(mask_view.get(1));
464    }
465}