Skip to main content

ferray_ma/
masked_array.rs

1// ferray-ma: MaskedArray<T, D> type (REQ-1, REQ-2, REQ-3)
2//
3// ## REQ status
4//
5// All SHIPPED — this is the audited, green core type. Consumers are the
6// `ferray-python` `PyMaskedArray` shims in `ferray-python/src/ma.rs` (built on
7// the `match_ma!` dispatch) plus the in-crate re-export `MaskedArray` from
8// `ferray-ma/src/lib.rs`.
9//
10// | REQ | Status | Evidence |
11// |-----|--------|----------|
12// | REQ-1 (data+mask pairing) | SHIPPED | `struct MaskedArray<T, D>` holds the `data: Array<T, D>` + `mask: Array<bool, D>` pair (this file), plus `fill_value`/`hard_mask` state. Consumer: every `PyMaskedArray` variant in `ferray-python/src/ma.rs` wraps a `MaskedArray`. |
13// | REQ-2 (`new(data, mask)`) | SHIPPED | `MaskedArray::new` (shape-checked) + `MaskedArray::from_data` (this file). Consumer: the `PyMaskedArray` constructor path in `ferray-python/src/ma.rs`. |
14// | REQ-3 (`data()`/`mask()`) | SHIPPED | `MaskedArray::mask` / `mask_opt` / `data_mut` accessors (this file); data read via the `AsRef<Array<T, D>>` interop in `interop.rs`. Consumer: the `.data`/`.mask` getters of `PyMaskedArray` in `ferray-python/src/ma.rs`. |
15// | REQ-5 (per-dtype default fill) | SHIPPED | `default_fill_value` (this file) mirrors numpy's `default_filler` (`numpy/ma/core.py:163`), wired into `MaskedArray::new`/`from_data`. Non-test production consumers: `filled_default` (`filled.rs`), masked-slot filling in `masked_unary`/`masked_binary` (`ufunc_support.rs`), `ma_apply_unary` (`interop.rs`). |
16//
17// Note: REQ-5's float/int/bool/complex default-fill matrix is anchored here
18// (`default_fill_value`); the broader `filled`/`compressed` contract (REQ-6)
19// lives in `filled.rs`.
20
21use std::any::Any;
22use std::sync::{Arc, OnceLock};
23
24use ferray_core::Array;
25use ferray_core::dimension::Dimension;
26use ferray_core::dtype::{DType, Element};
27use ferray_core::error::{FerrayError, FerrayResult};
28
29/// Default fill value for an element type, mirroring numpy's
30/// `default_filler` dict (`numpy/ma/core.py:163-174`):
31///
32/// ```text
33/// default_filler = {'b': True,            # bool
34///                   'f': 1.e20,           # float
35///                   'i': 999999,          # signed integer
36///                   'u': 999999,          # unsigned integer
37///                   ...}
38/// ```
39///
40/// numpy keys `default_filler` by dtype *kind* (`'b'`/`'f'`/`'i'`/`'u'`),
41/// so every float width shares `1e20`, every integer width shares `999999`,
42/// and bool is `True`. We reproduce that by dispatching on [`Element::dtype`]
43/// (the runtime [`DType`] tag) and constructing the concrete numpy default,
44/// then moving it into `T` through a `'static` [`Any`] downcast (sound because
45/// `Element: 'static` and the concrete value's type matches the dtype tag).
46///
47/// Complex dtypes use numpy's `default_filler['c']` of `1e20 + 0j` (#868).
48/// Dtypes with no entry in numpy's `default_filler` (structured, string,
49/// datetime, and the 128-/256-bit integers ferray adds beyond numpy's C scalar
50/// set) fall back to `T::zero()` — they have no masked-array fill-value
51/// contract honored here.
52///
53/// Reference: numpy 2.4.5 live oracle —
54/// `ma.array([1.,2.,3.]).fill_value == np.float64(1e+20)`.
55pub(crate) fn default_fill_value<T: Element>() -> T {
56    // Build the concrete numpy default keyed by dtype kind, then downcast
57    // the boxed `dyn Any` back into `T`. The `unwrap_or_else(T::zero)`
58    // closes both the "downcast type mismatch can't happen" branch and the
59    // "dtype has no numpy default" branch into the additive-identity
60    // fallback, so this never panics and needs no `unwrap`.
61    fn coerce<T: Element>(boxed: Box<dyn Any>) -> Option<T> {
62        boxed.downcast::<T>().ok().map(|b| *b)
63    }
64    let boxed: Option<Box<dyn Any>> = match T::dtype() {
65        // float kind 'f' -> 1e20
66        DType::F32 => Some(Box::new(1e20_f32)),
67        DType::F64 => Some(Box::new(1e20_f64)),
68        // signed integer kind 'i' -> 999999 (numpy casts 999999 into the
69        // target width with numpy cast rules; for i8/i16 that wraps the
70        // same way numpy's `np.array(999999).astype(int8)` does).
71        DType::I8 => Some(Box::new(999_999_i32 as i8)),
72        DType::I16 => Some(Box::new(999_999_i32 as i16)),
73        DType::I32 => Some(Box::new(999_999_i32)),
74        DType::I64 => Some(Box::new(999_999_i64)),
75        // unsigned integer kind 'u' -> 999999
76        DType::U8 => Some(Box::new(999_999_u32 as u8)),
77        DType::U16 => Some(Box::new(999_999_u32 as u16)),
78        DType::U32 => Some(Box::new(999_999_u32)),
79        DType::U64 => Some(Box::new(999_999_u64)),
80        // bool kind 'b' -> True
81        DType::Bool => Some(Box::new(true)),
82        // complex kind 'c' -> 1e20 + 0j (numpy `default_filler['c']`,
83        // `numpy/ma/core.py:163`; verified live numpy 2.4.5:
84        // `np.ma.array([1+2j]).fill_value == (1e+20+0j)`). Both complex widths
85        // share the same `1e20+0j` magnitude (numpy keys `default_filler` by
86        // dtype KIND, so `'c'` covers complex64 and complex128 alike), each
87        // built in its own element width before the `Any` downcast (#868).
88        DType::Complex32 => Some(Box::new(num_complex::Complex::new(1e20_f32, 0.0_f32))),
89        DType::Complex64 => Some(Box::new(num_complex::Complex::new(1e20_f64, 0.0_f64))),
90        // Dtypes numpy's numeric default_filler subset doesn't cover
91        // (128-/256-bit ints, structured, string, datetime/timedelta):
92        // no masked fill-value contract here -> additive identity.
93        _ => None,
94    };
95    boxed
96        .and_then(coerce::<T>)
97        .unwrap_or_else(<T as Element>::zero)
98}
99
100/// A masked array that pairs data with a boolean mask.
101///
102/// Each element position has a corresponding mask bit:
103/// - `true` means the element is **masked** (invalid / missing)
104/// - `false` means the element is valid
105///
106/// All operations (arithmetic, reductions, ufuncs) respect the mask by
107/// skipping masked elements.
108///
109/// The `fill_value` field is the replacement value for masked positions when
110/// the masked array participates in operations or when [`MaskedArray::filled`]
111/// is called without an explicit override. It defaults to numpy's per-dtype
112/// `default_filler` (`numpy/ma/core.py:163`): `1e20` for float, `999999` for
113/// integer, `true` for bool — see [`default_fill_value`].
114///
115/// # Nomask sentinel (#506)
116///
117/// When a [`MaskedArray`] is constructed via [`MaskedArray::from_data`]
118/// the mask is logically "all-false" but is NOT allocated as a full
119/// `Array<bool, D>` up front — the lazy `OnceLock` inside stores
120/// nothing until the first call to [`MaskedArray::mask`]. For arrays
121/// that never touch their mask (e.g. masked ops that short-circuit
122/// via [`MaskedArray::has_real_mask`]), this saves a full bool-sized
123/// allocation proportional to the data size.
124///
125/// The `.mask()` accessor still returns `&Array<bool, D>` so all
126/// existing code continues to work unchanged; the cost is one
127/// lazy allocation on first access. Hot-path code that wants to
128/// avoid the materialization should check `has_real_mask()` first
129/// and skip any mask work when it returns `false`.
130pub struct MaskedArray<T: Element, D: Dimension> {
131    /// The underlying data array.
132    data: Array<T, D>,
133    /// Boolean mask (`true` = masked/invalid). Lazily materialized
134    /// when explicitly queried via [`MaskedArray::mask`] — a
135    /// `from_data`-constructed array with no masked elements pays
136    /// zero allocation cost until that first query.
137    ///
138    /// Wrapped in `Arc` for structural sharing across clones (#512):
139    /// cloning a `MaskedArray` bumps the Arc refcount instead of
140    /// deep-copying the mask, and any mutation that needs a unique
141    /// mask does copy-on-write via [`Self::make_mask_unique`].
142    mask: Arc<OnceLock<Array<bool, D>>>,
143    /// `true` when a non-trivial mask has been explicitly provided
144    /// (via [`MaskedArray::new`] or [`MaskedArray::set_mask`]),
145    /// `false` when the array is in the nomask-sentinel state.
146    ///
147    /// Hot-path consumers should branch on this flag and skip the
148    /// mask-iteration entirely when it is `false` — see
149    /// [`MaskedArray::has_real_mask`].
150    real_mask: bool,
151    /// Whether the mask is hardened (cannot be cleared by assignment).
152    pub(crate) hard_mask: bool,
153    /// Replacement value for masked positions during operations and filling.
154    /// Defaults to numpy's per-dtype `default_filler` (`1e20` float / `999999`
155    /// int / `true` bool); see [`default_fill_value`].
156    pub(crate) fill_value: T,
157}
158
159impl<T: Element, D: Dimension> std::fmt::Debug for MaskedArray<T, D> {
160    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
161        f.debug_struct("MaskedArray")
162            .field("data", &self.data)
163            .field("real_mask", &self.real_mask)
164            .field("hard_mask", &self.hard_mask)
165            .field("fill_value", &self.fill_value)
166            .finish_non_exhaustive()
167    }
168}
169
170impl<T: Element + Clone, D: Dimension> Clone for MaskedArray<T, D> {
171    fn clone(&self) -> Self {
172        // Structural sharing (#512): just bump the Arc refcount instead
173        // of cloning the underlying mask array. Copy-on-write kicks in
174        // via `make_mask_unique` whenever either the parent or the
175        // clone tries to mutate its mask.
176        //
177        // The data array is still deep-cloned because ferray-core's
178        // Array doesn't have Arc-based structural sharing; sharing the
179        // mask alone still saves the larger-of-the-two allocations in
180        // the common "unmasked data transformations" path.
181        Self {
182            data: self.data.clone(),
183            mask: Arc::clone(&self.mask),
184            real_mask: self.real_mask,
185            hard_mask: self.hard_mask,
186            fill_value: self.fill_value.clone(),
187        }
188    }
189}
190
191impl<T: Element, D: Dimension> MaskedArray<T, D> {
192    /// Create a new masked array from data and mask arrays.
193    ///
194    /// The `fill_value` defaults to numpy's per-dtype `default_filler`
195    /// (`numpy/ma/core.py:163`): `1e20` for float, `999999` for integer,
196    /// `true` for bool (see [`default_fill_value`]). Use
197    /// [`MaskedArray::with_fill_value`] to set a custom replacement value.
198    ///
199    /// # Errors
200    /// Returns `FerrayError::ShapeMismatch` if data and mask shapes differ.
201    pub fn new(data: Array<T, D>, mask: Array<bool, D>) -> FerrayResult<Self> {
202        if data.shape() != mask.shape() {
203            return Err(FerrayError::shape_mismatch(format!(
204                "MaskedArray::new: data shape {:?} does not match mask shape {:?}",
205                data.shape(),
206                mask.shape()
207            )));
208        }
209        let lock = OnceLock::new();
210        let _ = lock.set(mask);
211        let fill_value = default_fill_value::<T>();
212        Ok(Self {
213            data,
214            mask: Arc::new(lock),
215            real_mask: true,
216            hard_mask: false,
217            fill_value,
218        })
219    }
220
221    /// Create a masked array with no masked elements (all-false mask).
222    ///
223    /// Does NOT allocate the mask up front — the array is in the
224    /// nomask-sentinel state (#506) until [`MaskedArray::mask`] is
225    /// explicitly called. For code that only uses `data()` or
226    /// short-circuits via [`MaskedArray::has_real_mask`], this saves
227    /// a full-sized bool allocation.
228    ///
229    /// # Errors
230    /// Always returns `Ok` — the `FerrayResult` is preserved for API
231    /// parity with the previous eager implementation.
232    pub fn from_data(data: Array<T, D>) -> FerrayResult<Self> {
233        let fill_value = default_fill_value::<T>();
234        Ok(Self {
235            data,
236            mask: Arc::new(OnceLock::new()),
237            real_mask: false,
238            hard_mask: false,
239            fill_value,
240        })
241    }
242
243    /// Return `true` if this masked array holds a real (explicitly
244    /// provided or materialized) mask. Returns `false` when the array
245    /// is in the nomask-sentinel state and the mask is logically
246    /// all-false.
247    ///
248    /// Hot-path iteration code should branch on this flag to skip
249    /// mask scanning entirely when it returns `false` (#506).
250    #[inline]
251    pub const fn has_real_mask(&self) -> bool {
252        self.real_mask
253    }
254
255    /// Return the fill value used to replace masked positions.
256    ///
257    /// See [`MaskedArray::with_fill_value`] for setting it.
258    #[inline]
259    pub const fn fill_value(&self) -> T
260    where
261        T: Copy,
262    {
263        self.fill_value
264    }
265
266    /// Set the fill value, returning the modified array.
267    ///
268    /// The fill value is used by [`MaskedArray::filled`] (when called
269    /// without an explicit override) and by arithmetic operations as the
270    /// replacement for masked positions in the result data.
271    #[must_use]
272    pub fn with_fill_value(mut self, fill_value: T) -> Self {
273        self.fill_value = fill_value;
274        self
275    }
276
277    /// Replace the fill value in place.
278    pub fn set_fill_value(&mut self, fill_value: T) {
279        self.fill_value = fill_value;
280    }
281
282    /// Return a reference to the underlying data array.
283    #[inline]
284    pub const fn data(&self) -> &Array<T, D> {
285        &self.data
286    }
287
288    /// Return a reference to the mask array.
289    ///
290    /// If the array is in the nomask-sentinel state (constructed via
291    /// [`MaskedArray::from_data`] or otherwise) this lazily allocates
292    /// a full all-false `Array<bool, D>` and caches it for subsequent
293    /// calls. Use [`MaskedArray::has_real_mask`] to check whether the
294    /// mask is known to be trivial first, and skip calling `.mask()`
295    /// entirely on the hot path when you can.
296    pub fn mask(&self) -> &Array<bool, D> {
297        self.mask.get_or_init(|| {
298            Array::<bool, D>::from_elem(self.data.dim().clone(), false)
299                .expect("from_elem with matching dim cannot fail")
300        })
301    }
302
303    /// Return a reference to the mask array if one has been
304    /// materialized, or `None` when the array is still in the
305    /// nomask-sentinel state.
306    ///
307    /// Unlike [`MaskedArray::mask`], this does NOT trigger lazy
308    /// allocation — it's the fast-path query for hot code that
309    /// wants to branch on whether any mask bits are set (#506).
310    #[inline]
311    pub fn mask_opt(&self) -> Option<&Array<bool, D>> {
312        if self.real_mask {
313            // A real mask was set via `new` or `set_mask`; the
314            // OnceLock is guaranteed to be initialized.
315            self.mask.get()
316        } else {
317            None
318        }
319    }
320
321    /// Return a mutable element slice into the underlying data array.
322    ///
323    /// The masked-array invariant (data shape == mask shape) requires
324    /// the data length to remain fixed. Returning `&mut [T]` here
325    /// (instead of `&mut Array<T, D>`) lets callers update individual
326    /// element values while blocking any reshape/resize that would
327    /// break the invariant (#273). Returns `None` for non-contiguous
328    /// data layouts where a flat slice can't be exposed.
329    #[inline]
330    pub fn data_mut(&mut self) -> Option<&mut [T]> {
331        self.data.as_slice_mut()
332    }
333
334    /// Return the shape of the masked array.
335    #[inline]
336    pub fn shape(&self) -> &[usize] {
337        self.data.shape()
338    }
339
340    /// Return the number of dimensions.
341    #[inline]
342    pub fn ndim(&self) -> usize {
343        self.data.ndim()
344    }
345
346    /// Return the total number of elements (including masked).
347    #[inline]
348    pub fn size(&self) -> usize {
349        self.data.size()
350    }
351
352    /// Return the dimension descriptor.
353    #[inline]
354    pub const fn dim(&self) -> &D {
355        self.data.dim()
356    }
357
358    /// Return whether the mask is hardened.
359    #[inline]
360    pub const fn is_hard_mask(&self) -> bool {
361        self.hard_mask
362    }
363
364    /// Internal helper: force the lazy nomask sentinel to materialize a
365    /// concrete `Array<bool, D>` AND ensure the mask's `Arc` is
366    /// uniquely owned (copy-on-write), then return a mutable reference
367    /// to the inner mask.
368    ///
369    /// After this call `real_mask` is `true`, `self.mask` is guaranteed
370    /// to contain an initialized `Array<bool, D>`, and the underlying
371    /// `Arc` has refcount exactly 1 so it's safe to mutate without
372    /// aliasing any other `MaskedArray` that may have cloned from us.
373    fn ensure_materialized_mut(&mut self) -> &mut Array<bool, D> {
374        // Step 1: materialize if we're still in the nomask sentinel
375        // state. We install a fresh Arc<OnceLock> containing an
376        // all-false mask.
377        if !self.real_mask || self.mask.get().is_none() {
378            let fresh = Array::<bool, D>::from_elem(self.data.dim().clone(), false)
379                .expect("from_elem with matching dim cannot fail");
380            let lock = OnceLock::new();
381            let _ = lock.set(fresh);
382            self.mask = Arc::new(lock);
383            self.real_mask = true;
384        }
385
386        // Step 2: copy-on-write — if this Arc is shared with any
387        // clones, deep-copy the inner mask into a fresh Arc so our
388        // mutation doesn't affect the clones. Arc::get_mut returns
389        // None when refcount > 1.
390        if Arc::get_mut(&mut self.mask).is_none() {
391            let cloned_mask = self
392                .mask
393                .get()
394                .expect("real_mask implies OnceLock set")
395                .clone();
396            let new_lock = OnceLock::new();
397            let _ = new_lock.set(cloned_mask);
398            self.mask = Arc::new(new_lock);
399        }
400
401        // Step 3: now we're the unique owner — get_mut on the OnceLock
402        // for the inner Array<bool, D>.
403        Arc::get_mut(&mut self.mask)
404            .expect("just made the Arc unique above")
405            .get_mut()
406            .expect("OnceLock was initialized above")
407    }
408
409    /// Set a mask value at a flat index.
410    ///
411    /// If the mask is hardened, only `true` (masking) is allowed; attempts to
412    /// clear a mask bit are silently ignored.
413    ///
414    /// Setting a mask bit materializes the lazy nomask sentinel into a
415    /// real mask array (#506) — if you set even one bit, the full
416    /// `Array<bool, D>` is allocated.
417    ///
418    /// # Errors
419    /// Returns `FerrayError::IndexOutOfBounds` if `flat_idx >= size`.
420    pub fn set_mask_flat(&mut self, flat_idx: usize, value: bool) -> FerrayResult<()> {
421        let size = self.size();
422        if flat_idx >= size {
423            return Err(FerrayError::index_out_of_bounds(flat_idx as isize, 0, size));
424        }
425        if self.hard_mask && !value {
426            // Hard mask: cannot clear mask bits
427            return Ok(());
428        }
429        // Setting a nomask-sentinel to false is a no-op (mask is
430        // already logically all-false); skip the allocation entirely.
431        if !self.real_mask && !value {
432            return Ok(());
433        }
434        let mask = self.ensure_materialized_mut();
435        // Fast path: contiguous mask — direct O(1) slice indexing.
436        if let Some(slice) = mask.as_slice_mut() {
437            slice[flat_idx] = value;
438        } else {
439            // Non-contiguous: fall back to iterator (rare case).
440            if let Some(m) = mask.iter_mut().nth(flat_idx) {
441                *m = value;
442            }
443        }
444        Ok(())
445    }
446
447    /// Replace the mask with a new one.
448    ///
449    /// If the mask is hardened, only bits that are `true` in both the old and
450    /// new masks (or newly set to `true`) are allowed; cleared bits are ignored.
451    ///
452    /// Passing a new mask always materializes the array out of the
453    /// nomask-sentinel state — the stored mask becomes the provided
454    /// one (possibly unioned with the existing mask if hardened).
455    ///
456    /// # Errors
457    /// Returns `FerrayError::ShapeMismatch` if shapes differ.
458    pub fn set_mask(&mut self, new_mask: Array<bool, D>) -> FerrayResult<()> {
459        if self.data.shape() != new_mask.shape() {
460            return Err(FerrayError::shape_mismatch(format!(
461                "set_mask: mask shape {:?} does not match array shape {:?}",
462                new_mask.shape(),
463                self.data.shape()
464            )));
465        }
466        if self.hard_mask && self.real_mask {
467            // Hard-mask union: merge the new mask with the existing
468            // one, keeping any `true` bits and never clearing.
469            let existing = self.mask.get().expect("real_mask implies OnceLock set");
470            let merged: Vec<bool> = existing
471                .iter()
472                .zip(new_mask.iter())
473                .map(|(old, new)| *old || *new)
474                .collect();
475            let merged_arr = Array::from_vec(self.data.dim().clone(), merged)?;
476            let lock = OnceLock::new();
477            let _ = lock.set(merged_arr);
478            // Install a fresh Arc; any clones keep their own snapshot.
479            self.mask = Arc::new(lock);
480        } else {
481            // Either not hardened or currently in the nomask sentinel
482            // state — unconditionally install the new mask in a fresh
483            // Arc (copy-on-write: clones remain unaffected).
484            let lock = OnceLock::new();
485            let _ = lock.set(new_mask);
486            self.mask = Arc::new(lock);
487        }
488        self.real_mask = true;
489        Ok(())
490    }
491
492    /// Return `true` when this masked array's underlying mask is
493    /// structurally shared with at least one other `MaskedArray`.
494    ///
495    /// After a `clone()` the original and the clone share the same
496    /// mask via `Arc` until one of them mutates it (copy-on-write, #512).
497    /// Hot-path code can use this to reason about memory sharing —
498    /// `shares_mask() == false` means the mask is uniquely owned and
499    /// can be mutated without affecting any other `MaskedArray`.
500    #[inline]
501    pub fn shares_mask(&self) -> bool {
502        Arc::strong_count(&self.mask) > 1
503    }
504}
505
506#[cfg(test)]
507mod tests {
508    use super::*;
509    use ferray_core::Array;
510    use ferray_core::dimension::Ix1;
511
512    fn arr_f64(data: Vec<f64>) -> Array<f64, Ix1> {
513        let n = data.len();
514        Array::<f64, Ix1>::from_vec(Ix1::new([n]), data).unwrap()
515    }
516
517    fn arr_bool(data: Vec<bool>) -> Array<bool, Ix1> {
518        let n = data.len();
519        Array::<bool, Ix1>::from_vec(Ix1::new([n]), data).unwrap()
520    }
521
522    // ---- nomask sentinel (#506) ----
523
524    #[test]
525    fn from_data_starts_in_nomask_sentinel_state() {
526        let ma = MaskedArray::from_data(arr_f64(vec![1.0, 2.0, 3.0])).unwrap();
527        assert!(!ma.has_real_mask());
528        assert!(ma.mask_opt().is_none());
529    }
530
531    #[test]
532    fn new_with_explicit_mask_is_real_mask() {
533        let ma = MaskedArray::new(
534            arr_f64(vec![1.0, 2.0, 3.0]),
535            arr_bool(vec![false, true, false]),
536        )
537        .unwrap();
538        assert!(ma.has_real_mask());
539        assert!(ma.mask_opt().is_some());
540    }
541
542    #[test]
543    fn mask_accessor_lazily_materializes_nomask_sentinel() {
544        let ma = MaskedArray::from_data(arr_f64(vec![1.0, 2.0, 3.0])).unwrap();
545        // Before calling .mask(), the OnceLock is empty.
546        assert!(ma.mask_opt().is_none());
547        // After calling .mask(), we get a full all-false Array<bool, D>.
548        let m = ma.mask();
549        assert_eq!(m.shape(), &[3]);
550        assert_eq!(
551            m.iter().copied().collect::<Vec<_>>(),
552            vec![false, false, false]
553        );
554        // Subsequent calls return the same cached array (no re-alloc).
555        let m2 = ma.mask();
556        assert_eq!(std::ptr::from_ref(m), std::ptr::from_ref(m2));
557        // BUT `has_real_mask` still reports `false` — the lazy
558        // materialization doesn't promote the sentinel to a "real" mask
559        // because the contents are still logically all-false. Hot-path
560        // code can keep skipping.
561        assert!(!ma.has_real_mask());
562    }
563
564    #[test]
565    fn set_mask_flat_false_on_nomask_stays_zero_allocation() {
566        // Setting a position to false on a nomask-sentinel array is a
567        // no-op and should NOT materialize the mask.
568        let mut ma = MaskedArray::from_data(arr_f64(vec![1.0, 2.0, 3.0])).unwrap();
569        ma.set_mask_flat(1, false).unwrap();
570        assert!(!ma.has_real_mask());
571        assert!(ma.mask_opt().is_none());
572    }
573
574    #[test]
575    fn set_mask_flat_true_on_nomask_materializes_and_promotes() {
576        // Setting a position to true forces materialization and
577        // promotes `real_mask` to true.
578        let mut ma = MaskedArray::from_data(arr_f64(vec![1.0, 2.0, 3.0])).unwrap();
579        ma.set_mask_flat(1, true).unwrap();
580        assert!(ma.has_real_mask());
581        let m: Vec<bool> = ma.mask().iter().copied().collect();
582        assert_eq!(m, vec![false, true, false]);
583    }
584
585    #[test]
586    fn set_mask_promotes_and_keeps_provided_values() {
587        let mut ma = MaskedArray::from_data(arr_f64(vec![1.0, 2.0, 3.0])).unwrap();
588        assert!(!ma.has_real_mask());
589        ma.set_mask(arr_bool(vec![true, false, true])).unwrap();
590        assert!(ma.has_real_mask());
591        assert_eq!(
592            ma.mask().iter().copied().collect::<Vec<_>>(),
593            vec![true, false, true]
594        );
595    }
596
597    #[test]
598    fn set_mask_shape_mismatch_errors() {
599        let mut ma = MaskedArray::from_data(arr_f64(vec![1.0, 2.0, 3.0])).unwrap();
600        assert!(ma.set_mask(arr_bool(vec![false; 4])).is_err());
601    }
602
603    #[test]
604    fn clone_preserves_nomask_sentinel_state() {
605        let ma = MaskedArray::from_data(arr_f64(vec![1.0, 2.0, 3.0])).unwrap();
606        let cloned = ma;
607        assert!(!cloned.has_real_mask());
608        assert!(cloned.mask_opt().is_none());
609    }
610
611    #[test]
612    fn clone_after_materialization_copies_the_mask() {
613        let ma = MaskedArray::from_data(arr_f64(vec![1.0, 2.0, 3.0])).unwrap();
614        // Force materialization.
615        let _ = ma.mask();
616        let cloned = ma;
617        // The clone has the same mask contents (all-false).
618        assert_eq!(
619            cloned.mask().iter().copied().collect::<Vec<_>>(),
620            vec![false, false, false]
621        );
622    }
623
624    #[test]
625    fn clone_preserves_real_mask_state() {
626        let ma = MaskedArray::new(
627            arr_f64(vec![1.0, 2.0, 3.0]),
628            arr_bool(vec![false, true, false]),
629        )
630        .unwrap();
631        let cloned = ma;
632        assert!(cloned.has_real_mask());
633        assert_eq!(
634            cloned.mask().iter().copied().collect::<Vec<_>>(),
635            vec![false, true, false]
636        );
637    }
638
639    // ---- shared mask with copy-on-write (#512) ----
640
641    #[test]
642    fn clone_shares_mask_via_arc() {
643        let ma = MaskedArray::new(
644            arr_f64(vec![1.0, 2.0, 3.0]),
645            arr_bool(vec![false, true, false]),
646        )
647        .unwrap();
648        let cloned = ma.clone();
649        // Both copies should report structural sharing.
650        assert!(ma.shares_mask());
651        assert!(cloned.shares_mask());
652    }
653
654    #[test]
655    fn unique_masked_array_does_not_share() {
656        let ma = MaskedArray::new(
657            arr_f64(vec![1.0, 2.0, 3.0]),
658            arr_bool(vec![false, true, false]),
659        )
660        .unwrap();
661        assert!(!ma.shares_mask());
662    }
663
664    #[test]
665    fn copy_on_write_isolates_parent_from_child_mutation() {
666        // Clone, then mutate the mask of the clone. The parent's mask
667        // must be unchanged even though they started sharing an Arc.
668        let parent = MaskedArray::new(
669            arr_f64(vec![1.0, 2.0, 3.0]),
670            arr_bool(vec![false, false, false]),
671        )
672        .unwrap();
673        let mut child = parent.clone();
674        assert!(parent.shares_mask());
675        assert!(child.shares_mask());
676
677        // Mutate the child — triggers copy-on-write.
678        child.set_mask_flat(1, true).unwrap();
679
680        // Parent's mask is still the original all-false.
681        assert_eq!(
682            parent.mask().iter().copied().collect::<Vec<_>>(),
683            vec![false, false, false]
684        );
685        // Child's mask reflects the mutation.
686        assert_eq!(
687            child.mask().iter().copied().collect::<Vec<_>>(),
688            vec![false, true, false]
689        );
690
691        // Parent no longer shares (the child's CoW broke the Arc's
692        // dual ownership by installing its own).
693        assert!(!parent.shares_mask());
694        assert!(!child.shares_mask());
695    }
696
697    #[test]
698    fn copy_on_write_via_set_mask() {
699        // set_mask replaces the Arc entirely, which also implicitly
700        // isolates the two.
701        let parent = MaskedArray::new(
702            arr_f64(vec![1.0, 2.0, 3.0]),
703            arr_bool(vec![false, false, false]),
704        )
705        .unwrap();
706        let mut child = parent.clone();
707        assert!(parent.shares_mask());
708
709        child.set_mask(arr_bool(vec![true, true, true])).unwrap();
710        // Parent still has the original mask.
711        assert_eq!(
712            parent.mask().iter().copied().collect::<Vec<_>>(),
713            vec![false, false, false]
714        );
715        // Child has the new mask.
716        assert_eq!(
717            child.mask().iter().copied().collect::<Vec<_>>(),
718            vec![true, true, true]
719        );
720        assert!(!parent.shares_mask());
721    }
722
723    #[test]
724    fn nomask_sentinel_clones_share_empty_arc() {
725        // A from_data-constructed array in the nomask-sentinel state
726        // still uses an Arc; clones share it.
727        let parent = MaskedArray::from_data(arr_f64(vec![1.0, 2.0, 3.0])).unwrap();
728        let cloned = parent.clone();
729        assert!(parent.shares_mask());
730        assert!(cloned.shares_mask());
731        // Neither has a real mask yet.
732        assert!(!parent.has_real_mask());
733        assert!(!cloned.has_real_mask());
734    }
735
736    #[test]
737    fn hard_mask_union_on_real_mask() {
738        let mut ma = MaskedArray::new(
739            arr_f64(vec![1.0, 2.0, 3.0]),
740            arr_bool(vec![true, false, false]),
741        )
742        .unwrap();
743        ma.harden_mask().unwrap();
744        // Try to clear position 0 and set position 2. With a hard
745        // mask, the union keeps position 0's true bit.
746        ma.set_mask(arr_bool(vec![false, false, true])).unwrap();
747        assert_eq!(
748            ma.mask().iter().copied().collect::<Vec<_>>(),
749            vec![true, false, true]
750        );
751    }
752}