minarrow/structs/
field_array.rs

1//! # **FieldArray Module** - *De-Facto *Column* Array type w' Tagged Arrow Metadata*
2//!
3//! Couples a `Field` (array-level schema metadata) with an immutable `Array` of values.
4//!
5//! Used as the primary column representation in `Minarrow` tables, ensuring
6//! schema and data remain consistent.  
7//!
8//! Supports creation from raw components or by inferring schema from arrays,
9//! and is the unit transferred over Arrow FFI or to external libraries
10//! such as Apache Arrow or Polars.
11
12use std::collections::BTreeMap;
13use std::fmt::{Display, Formatter};
14use std::sync::Arc;
15
16#[cfg(feature = "cast_arrow")]
17use arrow::array::ArrayRef;
18#[cfg(feature = "cast_polars")]
19use polars::series::Series;
20
21#[cfg(feature = "views")]
22use crate::aliases::FieldAVT;
23use crate::ffi::arrow_dtype::ArrowType;
24use crate::{Array, Field};
25
26
27/// # FieldArray
28/// 
29/// Named and typed data column with associated array values.
30///
31/// ## Role
32/// - Combines a `Field` with an immutable `Array` instance.
33/// - `FieldArray` integrates naturally into a `Table`, where immutability enforces row-length guarantees.
34/// It can also serve as a self-documenting array and is required when sending `Minarrow` data
35/// over FFI to `Apache Arrow`. In such cases, it's worth ensuring the correct logical `Datetime` Arrow type
36/// is built when constructing the `Field`, as this determines the `Arrow` type on the receiving side.
37/// 
38/// ##  
39/// ```rust
40/// use minarrow::{Array, Field, FieldArray, MaskedArray};
41/// use minarrow::structs::field_array::{field_array};
42/// use minarrow::ffi::arrow_dtype::ArrowType;
43/// use minarrow::structs::variants::integer::IntegerArray;
44///
45/// // Build a typed array
46/// let mut ints = IntegerArray::<i32>::default();
47/// ints.push(1);
48/// ints.push(2);
49/// let arr = Array::from_int32(ints);
50///
51/// // Fast constructor - infers type/nullability
52/// let fa = field_array("id", arr.clone());
53///
54/// assert_eq!(fa.field.name, "id");
55/// assert_eq!(fa.arrow_type(), ArrowType::Int32);
56/// assert_eq!(fa.len(), 2);
57///
58/// // Take an owned slice [offset..offset+len)
59/// let sub = fa.slice_clone(0, 1);
60/// assert_eq!(sub.len(), 1);
61/// 
62/// // Standard constructor 
63/// 
64/// // Describe it with a Field and wrap as FieldArray
65/// let field = Field::new("id", ArrowType::Int32, false, None);
66/// let fa = FieldArray::new(field, arr);
67///
68/// assert_eq!(fa.field.name, "id");
69/// assert_eq!(fa.arrow_type(), ArrowType::Int32);
70/// assert_eq!(fa.len(), 2);
71///
72/// // Take an owned slice [offset..offset+len)
73/// let sub = fa.slice_clone(0, 1);
74/// assert_eq!(sub.len(), 1);
75/// ```
76#[derive(Debug, Clone, PartialEq)]
77pub struct FieldArray {
78    /// Array metadata
79    pub field: Arc<Field>,
80
81    /// The array's inner payload is wrapped in Arc for immutability
82    /// so it can safely share across threads.
83    /// When part of a Table *(or higher-dimensional structure)*,
84    /// immutability also upholds shape constraints.
85    pub array: Array,
86
87    /// Null count for the immutable array to support skipping null-mask
88    /// operations when it's `0`, and/or related strategies.
89    pub null_count: usize
90}
91
92impl FieldArray {
93    /// Constructs a new `FieldArray` from an existing `Field` and `Array`.
94    pub fn new(field: Field, array: Array) -> Self {
95        let null_count = array.null_count();
96        FieldArray { field: field.into(), array, null_count }
97    }
98
99    /// Constructs a new `FieldArray` from an existing `Arc<Field>` and `Array`.
100    pub fn new_arc(field: Arc<Field>, array: Array) -> Self {
101        let null_count = array.null_count();
102        FieldArray { field: field, array, null_count }
103    }
104
105    /// Constructs a new `FieldArray` from a name and any supported typed array,
106    /// automatically wrapping as `Array` and inferring type and nullability.
107    pub fn from_inner<N, A>(name: N, arr: A) -> Self
108    where
109        N: Into<String>,
110        A: Into<Array>
111    {
112        let array: Array = arr.into();
113        let dtype = array.arrow_type();
114        let nullable = array.is_nullable();
115        let field = Field::new(name, dtype, nullable, None);
116        FieldArray::new(field, array)
117    }
118
119    /// Constructs a new `FieldArray` from raw field components and an `Array`.
120    pub fn from_parts<T: Into<String>>(
121        field_name: T,
122        dtype: ArrowType,
123        nullable: Option<bool>,
124        metadata: Option<BTreeMap<String, String>>,
125        array: Array
126    ) -> Self {
127        let null_count = array.null_count();
128        let field = Field {
129            name: field_name.into(),
130            dtype,
131            nullable: nullable.unwrap_or_else(|| array.is_nullable()),
132            metadata: metadata.unwrap_or_default()
133        };
134        FieldArray {
135            field: field.into(),
136            array: array.into(),
137            null_count
138        }
139    }
140
141    pub fn len(&self) -> usize {
142        self.array.len()
143    }
144
145    pub fn is_empty(&self) -> bool {
146        self.array.len() == 0
147    }
148
149    pub fn arrow_type(&self) -> ArrowType {
150        self.field.dtype.clone()
151    }
152
153    /// Returns a zero-copy view (`FieldArraySlice`) into the window `[offset, offset+len)`.
154    ///
155    /// The returned object holds references into the original `FieldArray`.
156    ///
157    /// The `(&Array, Offset, WindowLength), &Field)` `FieldArraySlice` pattern here
158    /// is a once-off we avoid recommending.
159    #[cfg(feature = "views")]
160    #[inline]
161    pub fn to_window(&self, offset: usize, len: usize) -> FieldAVT {
162        ((&self.array, offset, len), &self.field)
163    }
164
165    /// Returns a new owned FieldArray with array sliced `[offset, offset+len)`.
166    pub fn slice_clone(&self, offset: usize, len: usize) -> Self {
167        let array: Array = self.array.slice_clone(offset, len).into();
168        let null_count = array.null_count();
169        FieldArray {
170            field: self.field.clone(),
171            array: array.into(),
172            null_count
173        }
174    }
175
176    /// Updates the cached null_count from the underlying array.
177    /// Should be called after any mutation of the array that could change null count.
178    #[inline]
179    pub fn refresh_null_count(&mut self) {
180        self.null_count = self.array.null_count();
181    }
182
183    /// Returns the cached null count.
184    /// This is kept in sync with the underlying array via refresh_null_count().
185    #[inline]
186    pub fn null_count(&self) -> usize {
187        self.null_count
188    }
189
190    /// Concatenates another FieldArray's data into this one using copy-on-write semantics.
191    /// If this FieldArray's array has Arc reference count > 1, the data is cloned first.
192    /// Both FieldArrays must have compatible types. Updates the cached null_count.
193    pub fn concat_field_array(&mut self, other: &FieldArray) {
194        self.array.concat_array(&other.array);
195        self.refresh_null_count();
196    }
197
198    /// Provides mutable access to the underlying array with automatic null_count refresh.
199    /// Uses copy-on-write semantics - clones array data if Arc reference count > 1.
200    /// Use this for operations that may change the null count.
201    pub fn with_array_mut<F, R>(&mut self, f: F) -> R 
202    where
203        F: FnOnce(&mut Array) -> R,
204    {
205        let result = f(&mut self.array);
206        self.refresh_null_count();
207        result
208    }
209
210    /// Export this field+array over FFI and import into arrow-rs.
211    #[cfg(feature = "cast_arrow")]
212    #[inline]
213    pub fn to_apache_arrow(&self) -> ArrayRef {
214        self.array.to_apache_arrow_with_field(&self.field)
215    }
216
217    // ** The below polars function is tested tests/polars.rs **
218
219    /// Casts the FieldArray to a polars Series
220    #[cfg(feature = "cast_polars")]
221    pub fn to_polars(&self) -> Series {
222        let name = self.field.name.as_str();
223        self.array.to_polars_with_field(name, &self.field)
224    }
225}
226
227/// Creates a new basic field array based on a name and an existing array
228pub fn field_array<T: Into<String>>(name: T, array: Array) -> FieldArray {
229    let dtype = array.arrow_type();
230    let nullable = array.is_nullable();
231    let field = Field::new(name, dtype, nullable, None);
232    FieldArray::new(field, array)
233}
234
235impl Display for FieldArray {
236    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
237        writeln!(
238            f,
239            "\nFieldArray \"{}\" [{} values] (dtype: {:?})",
240            self.field.name,
241            self.array.len(),
242            self.field.dtype
243        )?;
244        self.array.fmt(f)
245    }
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251    use crate::structs::variants::integer::IntegerArray;
252    use crate::traits::masked_array::MaskedArray;
253
254    #[test]
255    fn test_field_array_basic_construction() {
256        let mut arr = IntegerArray::<i32>::default();
257        arr.push(1);
258        arr.push(2);
259        let array = Array::from_int32(arr);
260
261        let field = Field::new("my_col", ArrowType::Int32, false, None);
262        let field_array = FieldArray::new(field.clone(), array.clone());
263
264        assert_eq!(field_array.len(), 2);
265        assert_eq!(field_array.field, field.into());
266        assert_eq!(field_array.array, array.into());
267    }
268
269    #[test]
270    fn test_field_array_from_parts_infers_nullability() {
271        let mut arr = IntegerArray::<i64>::default();
272        arr.push(10);
273        arr.push_null(); // makes it nullable
274        let array = Array::from_int64(arr);
275
276        let field_array =
277            FieldArray::from_parts("nullable_col", ArrowType::Int64, None, None, array.clone());
278
279        assert_eq!(field_array.field.name, "nullable_col");
280        assert_eq!(field_array.field.dtype, ArrowType::Int64);
281        assert_eq!(field_array.field.nullable, true);
282        assert_eq!(field_array.len(), 2);
283        assert_eq!(field_array.array, array.into());
284    }
285
286    #[cfg(feature = "views")]
287    #[test]
288    fn test_field_array_slice() {
289        let mut arr = IntegerArray::<i32>::default();
290        arr.push(10);
291        arr.push(20);
292        arr.push(30);
293
294        let fa = field_array("x", Array::from_int32(arr));
295        let view = fa.to_window(1, 2);
296        assert_eq!(view.1.name, "x");
297        assert_eq!(view.0.2, 2);
298        assert_eq!(view.0.1, 1);
299        assert_eq!(view.0.2, 2);
300        assert_eq!(view.0.0.len(), 3);
301    }
302
303    #[test]
304    fn test_null_count_cache_sync_concat() {
305        // Create first FieldArray with nulls
306        let mut arr1 = IntegerArray::<i32>::default();
307        arr1.push(1);
308        arr1.push_null();
309        arr1.push(3);
310        let mut fa1 = field_array("test", Array::from_int32(arr1));
311        assert_eq!(fa1.null_count(), 1);
312
313        // Create second FieldArray with nulls
314        let mut arr2 = IntegerArray::<i32>::default();
315        arr2.push_null();
316        arr2.push(5);
317        let fa2 = field_array("test", Array::from_int32(arr2));
318        assert_eq!(fa2.null_count(), 1);
319
320        // Concatenate and verify null_count cache is updated
321        fa1.concat_field_array(&fa2);
322        assert_eq!(fa1.len(), 5);
323        assert_eq!(fa1.null_count(), 2); // Should be 2 nulls total
324    }
325
326    #[test]
327    fn test_null_count_cache_sync_with_array_mut() {
328        let mut arr = IntegerArray::<i32>::default();
329        arr.push(1);
330        arr.push(2);
331        let mut fa = field_array("test", Array::from_int32(arr));
332        assert_eq!(fa.null_count(), 0);
333
334        // Mutate through with_array_mut to add nulls
335        fa.with_array_mut(|array| {
336            array.concat_array(&Array::from_int32({
337                let mut new_arr = IntegerArray::<i32>::default();
338                new_arr.push_null();
339                new_arr.push_null();
340                new_arr
341            }));
342        });
343
344        assert_eq!(fa.len(), 4);
345        assert_eq!(fa.null_count(), 2); // Cache should be refreshed automatically
346    }
347
348    #[test]
349    fn test_refresh_null_count() {
350        let mut arr = IntegerArray::<i32>::default();
351        arr.push(1);
352        arr.push(2);
353        let mut fa = field_array("test", Array::from_int32(arr));
354        assert_eq!(fa.null_count(), 0);
355
356        // Manually mutate underlying array (simulating external mutation)
357        if let Array::NumericArray(crate::NumericArray::Int32(int_arr)) = &mut fa.array {
358            use crate::traits::masked_array::MaskedArray;
359            std::sync::Arc::make_mut(int_arr).push_null();
360        }
361
362        // Cache is now stale
363        assert_eq!(fa.null_count, 0); // Cached value still 0
364        assert_eq!(fa.array.null_count(), 1); // Actual value is 1
365
366        // Refresh the cache
367        fa.refresh_null_count();
368        assert_eq!(fa.null_count(), 1); // Cache now updated
369    }
370}