ferray_ma/masked_array.rs
1// ferray-ma: MaskedArray<T, D> type (REQ-1, REQ-2, REQ-3)
2//
3// ## REQ status
4//
5// All SHIPPED — this is the audited, green core type. Consumers are the
6// `ferray-python` `PyMaskedArray` shims in `ferray-python/src/ma.rs` (built on
7// the `match_ma!` dispatch) plus the in-crate re-export `MaskedArray` from
8// `ferray-ma/src/lib.rs`.
9//
10// | REQ | Status | Evidence |
11// |-----|--------|----------|
12// | REQ-1 (data+mask pairing) | SHIPPED | `struct MaskedArray<T, D>` holds the `data: Array<T, D>` + `mask: Array<bool, D>` pair (this file), plus `fill_value`/`hard_mask` state. Consumer: every `PyMaskedArray` variant in `ferray-python/src/ma.rs` wraps a `MaskedArray`. |
13// | REQ-2 (`new(data, mask)`) | SHIPPED | `MaskedArray::new` (shape-checked) + `MaskedArray::from_data` (this file). Consumer: the `PyMaskedArray` constructor path in `ferray-python/src/ma.rs`. |
14// | REQ-3 (`data()`/`mask()`) | SHIPPED | `MaskedArray::mask` / `mask_opt` / `data_mut` accessors (this file); data read via the `AsRef<Array<T, D>>` interop in `interop.rs`. Consumer: the `.data`/`.mask` getters of `PyMaskedArray` in `ferray-python/src/ma.rs`. |
15// | REQ-5 (per-dtype default fill) | SHIPPED | `default_fill_value` (this file) mirrors numpy's `default_filler` (`numpy/ma/core.py:163`), wired into `MaskedArray::new`/`from_data`. Non-test production consumers: `filled_default` (`filled.rs`), masked-slot filling in `masked_unary`/`masked_binary` (`ufunc_support.rs`), `ma_apply_unary` (`interop.rs`). |
16//
17// Note: REQ-5's float/int/bool/complex default-fill matrix is anchored here
18// (`default_fill_value`); the broader `filled`/`compressed` contract (REQ-6)
19// lives in `filled.rs`.
20
21use std::any::Any;
22use std::sync::{Arc, OnceLock};
23
24use ferray_core::Array;
25use ferray_core::dimension::Dimension;
26use ferray_core::dtype::{DType, Element};
27use ferray_core::error::{FerrayError, FerrayResult};
28
29/// Default fill value for an element type, mirroring numpy's
30/// `default_filler` dict (`numpy/ma/core.py:163-174`):
31///
32/// ```text
33/// default_filler = {'b': True, # bool
34/// 'f': 1.e20, # float
35/// 'i': 999999, # signed integer
36/// 'u': 999999, # unsigned integer
37/// ...}
38/// ```
39///
40/// numpy keys `default_filler` by dtype *kind* (`'b'`/`'f'`/`'i'`/`'u'`),
41/// so every float width shares `1e20`, every integer width shares `999999`,
42/// and bool is `True`. We reproduce that by dispatching on [`Element::dtype`]
43/// (the runtime [`DType`] tag) and constructing the concrete numpy default,
44/// then moving it into `T` through a `'static` [`Any`] downcast (sound because
45/// `Element: 'static` and the concrete value's type matches the dtype tag).
46///
47/// Complex dtypes use numpy's `default_filler['c']` of `1e20 + 0j` (#868).
48/// Dtypes with no entry in numpy's `default_filler` (structured, string,
49/// datetime, and the 128-/256-bit integers ferray adds beyond numpy's C scalar
50/// set) fall back to `T::zero()` — they have no masked-array fill-value
51/// contract honored here.
52///
53/// Reference: numpy 2.4.5 live oracle —
54/// `ma.array([1.,2.,3.]).fill_value == np.float64(1e+20)`.
55pub(crate) fn default_fill_value<T: Element>() -> T {
56 // Build the concrete numpy default keyed by dtype kind, then downcast
57 // the boxed `dyn Any` back into `T`. The `unwrap_or_else(T::zero)`
58 // closes both the "downcast type mismatch can't happen" branch and the
59 // "dtype has no numpy default" branch into the additive-identity
60 // fallback, so this never panics and needs no `unwrap`.
61 fn coerce<T: Element>(boxed: Box<dyn Any>) -> Option<T> {
62 boxed.downcast::<T>().ok().map(|b| *b)
63 }
64 let boxed: Option<Box<dyn Any>> = match T::dtype() {
65 // float kind 'f' -> 1e20
66 DType::F32 => Some(Box::new(1e20_f32)),
67 DType::F64 => Some(Box::new(1e20_f64)),
68 // signed integer kind 'i' -> 999999 (numpy casts 999999 into the
69 // target width with numpy cast rules; for i8/i16 that wraps the
70 // same way numpy's `np.array(999999).astype(int8)` does).
71 DType::I8 => Some(Box::new(999_999_i32 as i8)),
72 DType::I16 => Some(Box::new(999_999_i32 as i16)),
73 DType::I32 => Some(Box::new(999_999_i32)),
74 DType::I64 => Some(Box::new(999_999_i64)),
75 // unsigned integer kind 'u' -> 999999
76 DType::U8 => Some(Box::new(999_999_u32 as u8)),
77 DType::U16 => Some(Box::new(999_999_u32 as u16)),
78 DType::U32 => Some(Box::new(999_999_u32)),
79 DType::U64 => Some(Box::new(999_999_u64)),
80 // bool kind 'b' -> True
81 DType::Bool => Some(Box::new(true)),
82 // complex kind 'c' -> 1e20 + 0j (numpy `default_filler['c']`,
83 // `numpy/ma/core.py:163`; verified live numpy 2.4.5:
84 // `np.ma.array([1+2j]).fill_value == (1e+20+0j)`). Both complex widths
85 // share the same `1e20+0j` magnitude (numpy keys `default_filler` by
86 // dtype KIND, so `'c'` covers complex64 and complex128 alike), each
87 // built in its own element width before the `Any` downcast (#868).
88 DType::Complex32 => Some(Box::new(num_complex::Complex::new(1e20_f32, 0.0_f32))),
89 DType::Complex64 => Some(Box::new(num_complex::Complex::new(1e20_f64, 0.0_f64))),
90 // Dtypes numpy's numeric default_filler subset doesn't cover
91 // (128-/256-bit ints, structured, string, datetime/timedelta):
92 // no masked fill-value contract here -> additive identity.
93 _ => None,
94 };
95 boxed
96 .and_then(coerce::<T>)
97 .unwrap_or_else(<T as Element>::zero)
98}
99
100/// A masked array that pairs data with a boolean mask.
101///
102/// Each element position has a corresponding mask bit:
103/// - `true` means the element is **masked** (invalid / missing)
104/// - `false` means the element is valid
105///
106/// All operations (arithmetic, reductions, ufuncs) respect the mask by
107/// skipping masked elements.
108///
109/// The `fill_value` field is the replacement value for masked positions when
110/// the masked array participates in operations or when [`MaskedArray::filled`]
111/// is called without an explicit override. It defaults to numpy's per-dtype
112/// `default_filler` (`numpy/ma/core.py:163`): `1e20` for float, `999999` for
113/// integer, `true` for bool — see [`default_fill_value`].
114///
115/// # Nomask sentinel (#506)
116///
117/// When a [`MaskedArray`] is constructed via [`MaskedArray::from_data`]
118/// the mask is logically "all-false" but is NOT allocated as a full
119/// `Array<bool, D>` up front — the lazy `OnceLock` inside stores
120/// nothing until the first call to [`MaskedArray::mask`]. For arrays
121/// that never touch their mask (e.g. masked ops that short-circuit
122/// via [`MaskedArray::has_real_mask`]), this saves a full bool-sized
123/// allocation proportional to the data size.
124///
125/// The `.mask()` accessor still returns `&Array<bool, D>` so all
126/// existing code continues to work unchanged; the cost is one
127/// lazy allocation on first access. Hot-path code that wants to
128/// avoid the materialization should check `has_real_mask()` first
129/// and skip any mask work when it returns `false`.
130pub struct MaskedArray<T: Element, D: Dimension> {
131 /// The underlying data array.
132 data: Array<T, D>,
133 /// Boolean mask (`true` = masked/invalid). Lazily materialized
134 /// when explicitly queried via [`MaskedArray::mask`] — a
135 /// `from_data`-constructed array with no masked elements pays
136 /// zero allocation cost until that first query.
137 ///
138 /// Wrapped in `Arc` for structural sharing across clones (#512):
139 /// cloning a `MaskedArray` bumps the Arc refcount instead of
140 /// deep-copying the mask, and any mutation that needs a unique
141 /// mask does copy-on-write via [`Self::make_mask_unique`].
142 mask: Arc<OnceLock<Array<bool, D>>>,
143 /// `true` when a non-trivial mask has been explicitly provided
144 /// (via [`MaskedArray::new`] or [`MaskedArray::set_mask`]),
145 /// `false` when the array is in the nomask-sentinel state.
146 ///
147 /// Hot-path consumers should branch on this flag and skip the
148 /// mask-iteration entirely when it is `false` — see
149 /// [`MaskedArray::has_real_mask`].
150 real_mask: bool,
151 /// Whether the mask is hardened (cannot be cleared by assignment).
152 pub(crate) hard_mask: bool,
153 /// Replacement value for masked positions during operations and filling.
154 /// Defaults to numpy's per-dtype `default_filler` (`1e20` float / `999999`
155 /// int / `true` bool); see [`default_fill_value`].
156 pub(crate) fill_value: T,
157}
158
159impl<T: Element, D: Dimension> std::fmt::Debug for MaskedArray<T, D> {
160 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
161 f.debug_struct("MaskedArray")
162 .field("data", &self.data)
163 .field("real_mask", &self.real_mask)
164 .field("hard_mask", &self.hard_mask)
165 .field("fill_value", &self.fill_value)
166 .finish_non_exhaustive()
167 }
168}
169
170impl<T: Element + Clone, D: Dimension> Clone for MaskedArray<T, D> {
171 fn clone(&self) -> Self {
172 // Structural sharing (#512): just bump the Arc refcount instead
173 // of cloning the underlying mask array. Copy-on-write kicks in
174 // via `make_mask_unique` whenever either the parent or the
175 // clone tries to mutate its mask.
176 //
177 // The data array is still deep-cloned because ferray-core's
178 // Array doesn't have Arc-based structural sharing; sharing the
179 // mask alone still saves the larger-of-the-two allocations in
180 // the common "unmasked data transformations" path.
181 Self {
182 data: self.data.clone(),
183 mask: Arc::clone(&self.mask),
184 real_mask: self.real_mask,
185 hard_mask: self.hard_mask,
186 fill_value: self.fill_value.clone(),
187 }
188 }
189}
190
191impl<T: Element, D: Dimension> MaskedArray<T, D> {
192 /// Create a new masked array from data and mask arrays.
193 ///
194 /// The `fill_value` defaults to numpy's per-dtype `default_filler`
195 /// (`numpy/ma/core.py:163`): `1e20` for float, `999999` for integer,
196 /// `true` for bool (see [`default_fill_value`]). Use
197 /// [`MaskedArray::with_fill_value`] to set a custom replacement value.
198 ///
199 /// # Errors
200 /// Returns `FerrayError::ShapeMismatch` if data and mask shapes differ.
201 pub fn new(data: Array<T, D>, mask: Array<bool, D>) -> FerrayResult<Self> {
202 if data.shape() != mask.shape() {
203 return Err(FerrayError::shape_mismatch(format!(
204 "MaskedArray::new: data shape {:?} does not match mask shape {:?}",
205 data.shape(),
206 mask.shape()
207 )));
208 }
209 let lock = OnceLock::new();
210 let _ = lock.set(mask);
211 let fill_value = default_fill_value::<T>();
212 Ok(Self {
213 data,
214 mask: Arc::new(lock),
215 real_mask: true,
216 hard_mask: false,
217 fill_value,
218 })
219 }
220
221 /// Create a masked array with no masked elements (all-false mask).
222 ///
223 /// Does NOT allocate the mask up front — the array is in the
224 /// nomask-sentinel state (#506) until [`MaskedArray::mask`] is
225 /// explicitly called. For code that only uses `data()` or
226 /// short-circuits via [`MaskedArray::has_real_mask`], this saves
227 /// a full-sized bool allocation.
228 ///
229 /// # Errors
230 /// Always returns `Ok` — the `FerrayResult` is preserved for API
231 /// parity with the previous eager implementation.
232 pub fn from_data(data: Array<T, D>) -> FerrayResult<Self> {
233 let fill_value = default_fill_value::<T>();
234 Ok(Self {
235 data,
236 mask: Arc::new(OnceLock::new()),
237 real_mask: false,
238 hard_mask: false,
239 fill_value,
240 })
241 }
242
243 /// Return `true` if this masked array holds a real (explicitly
244 /// provided or materialized) mask. Returns `false` when the array
245 /// is in the nomask-sentinel state and the mask is logically
246 /// all-false.
247 ///
248 /// Hot-path iteration code should branch on this flag to skip
249 /// mask scanning entirely when it returns `false` (#506).
250 #[inline]
251 pub const fn has_real_mask(&self) -> bool {
252 self.real_mask
253 }
254
255 /// Return the fill value used to replace masked positions.
256 ///
257 /// See [`MaskedArray::with_fill_value`] for setting it.
258 #[inline]
259 pub const fn fill_value(&self) -> T
260 where
261 T: Copy,
262 {
263 self.fill_value
264 }
265
266 /// Set the fill value, returning the modified array.
267 ///
268 /// The fill value is used by [`MaskedArray::filled`] (when called
269 /// without an explicit override) and by arithmetic operations as the
270 /// replacement for masked positions in the result data.
271 #[must_use]
272 pub fn with_fill_value(mut self, fill_value: T) -> Self {
273 self.fill_value = fill_value;
274 self
275 }
276
277 /// Replace the fill value in place.
278 pub fn set_fill_value(&mut self, fill_value: T) {
279 self.fill_value = fill_value;
280 }
281
282 /// Return a reference to the underlying data array.
283 #[inline]
284 pub const fn data(&self) -> &Array<T, D> {
285 &self.data
286 }
287
288 /// Return a reference to the mask array.
289 ///
290 /// If the array is in the nomask-sentinel state (constructed via
291 /// [`MaskedArray::from_data`] or otherwise) this lazily allocates
292 /// a full all-false `Array<bool, D>` and caches it for subsequent
293 /// calls. Use [`MaskedArray::has_real_mask`] to check whether the
294 /// mask is known to be trivial first, and skip calling `.mask()`
295 /// entirely on the hot path when you can.
296 pub fn mask(&self) -> &Array<bool, D> {
297 self.mask.get_or_init(|| {
298 Array::<bool, D>::from_elem(self.data.dim().clone(), false)
299 .expect("from_elem with matching dim cannot fail")
300 })
301 }
302
303 /// Return a reference to the mask array if one has been
304 /// materialized, or `None` when the array is still in the
305 /// nomask-sentinel state.
306 ///
307 /// Unlike [`MaskedArray::mask`], this does NOT trigger lazy
308 /// allocation — it's the fast-path query for hot code that
309 /// wants to branch on whether any mask bits are set (#506).
310 #[inline]
311 pub fn mask_opt(&self) -> Option<&Array<bool, D>> {
312 if self.real_mask {
313 // A real mask was set via `new` or `set_mask`; the
314 // OnceLock is guaranteed to be initialized.
315 self.mask.get()
316 } else {
317 None
318 }
319 }
320
321 /// Return a mutable element slice into the underlying data array.
322 ///
323 /// The masked-array invariant (data shape == mask shape) requires
324 /// the data length to remain fixed. Returning `&mut [T]` here
325 /// (instead of `&mut Array<T, D>`) lets callers update individual
326 /// element values while blocking any reshape/resize that would
327 /// break the invariant (#273). Returns `None` for non-contiguous
328 /// data layouts where a flat slice can't be exposed.
329 #[inline]
330 pub fn data_mut(&mut self) -> Option<&mut [T]> {
331 self.data.as_slice_mut()
332 }
333
334 /// Return the shape of the masked array.
335 #[inline]
336 pub fn shape(&self) -> &[usize] {
337 self.data.shape()
338 }
339
340 /// Return the number of dimensions.
341 #[inline]
342 pub fn ndim(&self) -> usize {
343 self.data.ndim()
344 }
345
346 /// Return the total number of elements (including masked).
347 #[inline]
348 pub fn size(&self) -> usize {
349 self.data.size()
350 }
351
352 /// Return the dimension descriptor.
353 #[inline]
354 pub const fn dim(&self) -> &D {
355 self.data.dim()
356 }
357
358 /// Return whether the mask is hardened.
359 #[inline]
360 pub const fn is_hard_mask(&self) -> bool {
361 self.hard_mask
362 }
363
364 /// Internal helper: force the lazy nomask sentinel to materialize a
365 /// concrete `Array<bool, D>` AND ensure the mask's `Arc` is
366 /// uniquely owned (copy-on-write), then return a mutable reference
367 /// to the inner mask.
368 ///
369 /// After this call `real_mask` is `true`, `self.mask` is guaranteed
370 /// to contain an initialized `Array<bool, D>`, and the underlying
371 /// `Arc` has refcount exactly 1 so it's safe to mutate without
372 /// aliasing any other `MaskedArray` that may have cloned from us.
373 fn ensure_materialized_mut(&mut self) -> &mut Array<bool, D> {
374 // Step 1: materialize if we're still in the nomask sentinel
375 // state. We install a fresh Arc<OnceLock> containing an
376 // all-false mask.
377 if !self.real_mask || self.mask.get().is_none() {
378 let fresh = Array::<bool, D>::from_elem(self.data.dim().clone(), false)
379 .expect("from_elem with matching dim cannot fail");
380 let lock = OnceLock::new();
381 let _ = lock.set(fresh);
382 self.mask = Arc::new(lock);
383 self.real_mask = true;
384 }
385
386 // Step 2: copy-on-write — if this Arc is shared with any
387 // clones, deep-copy the inner mask into a fresh Arc so our
388 // mutation doesn't affect the clones. Arc::get_mut returns
389 // None when refcount > 1.
390 if Arc::get_mut(&mut self.mask).is_none() {
391 let cloned_mask = self
392 .mask
393 .get()
394 .expect("real_mask implies OnceLock set")
395 .clone();
396 let new_lock = OnceLock::new();
397 let _ = new_lock.set(cloned_mask);
398 self.mask = Arc::new(new_lock);
399 }
400
401 // Step 3: now we're the unique owner — get_mut on the OnceLock
402 // for the inner Array<bool, D>.
403 Arc::get_mut(&mut self.mask)
404 .expect("just made the Arc unique above")
405 .get_mut()
406 .expect("OnceLock was initialized above")
407 }
408
409 /// Set a mask value at a flat index.
410 ///
411 /// If the mask is hardened, only `true` (masking) is allowed; attempts to
412 /// clear a mask bit are silently ignored.
413 ///
414 /// Setting a mask bit materializes the lazy nomask sentinel into a
415 /// real mask array (#506) — if you set even one bit, the full
416 /// `Array<bool, D>` is allocated.
417 ///
418 /// # Errors
419 /// Returns `FerrayError::IndexOutOfBounds` if `flat_idx >= size`.
420 pub fn set_mask_flat(&mut self, flat_idx: usize, value: bool) -> FerrayResult<()> {
421 let size = self.size();
422 if flat_idx >= size {
423 return Err(FerrayError::index_out_of_bounds(flat_idx as isize, 0, size));
424 }
425 if self.hard_mask && !value {
426 // Hard mask: cannot clear mask bits
427 return Ok(());
428 }
429 // Setting a nomask-sentinel to false is a no-op (mask is
430 // already logically all-false); skip the allocation entirely.
431 if !self.real_mask && !value {
432 return Ok(());
433 }
434 let mask = self.ensure_materialized_mut();
435 // Fast path: contiguous mask — direct O(1) slice indexing.
436 if let Some(slice) = mask.as_slice_mut() {
437 slice[flat_idx] = value;
438 } else {
439 // Non-contiguous: fall back to iterator (rare case).
440 if let Some(m) = mask.iter_mut().nth(flat_idx) {
441 *m = value;
442 }
443 }
444 Ok(())
445 }
446
447 /// Replace the mask with a new one.
448 ///
449 /// If the mask is hardened, only bits that are `true` in both the old and
450 /// new masks (or newly set to `true`) are allowed; cleared bits are ignored.
451 ///
452 /// Passing a new mask always materializes the array out of the
453 /// nomask-sentinel state — the stored mask becomes the provided
454 /// one (possibly unioned with the existing mask if hardened).
455 ///
456 /// # Errors
457 /// Returns `FerrayError::ShapeMismatch` if shapes differ.
458 pub fn set_mask(&mut self, new_mask: Array<bool, D>) -> FerrayResult<()> {
459 if self.data.shape() != new_mask.shape() {
460 return Err(FerrayError::shape_mismatch(format!(
461 "set_mask: mask shape {:?} does not match array shape {:?}",
462 new_mask.shape(),
463 self.data.shape()
464 )));
465 }
466 if self.hard_mask && self.real_mask {
467 // Hard-mask union: merge the new mask with the existing
468 // one, keeping any `true` bits and never clearing.
469 let existing = self.mask.get().expect("real_mask implies OnceLock set");
470 let merged: Vec<bool> = existing
471 .iter()
472 .zip(new_mask.iter())
473 .map(|(old, new)| *old || *new)
474 .collect();
475 let merged_arr = Array::from_vec(self.data.dim().clone(), merged)?;
476 let lock = OnceLock::new();
477 let _ = lock.set(merged_arr);
478 // Install a fresh Arc; any clones keep their own snapshot.
479 self.mask = Arc::new(lock);
480 } else {
481 // Either not hardened or currently in the nomask sentinel
482 // state — unconditionally install the new mask in a fresh
483 // Arc (copy-on-write: clones remain unaffected).
484 let lock = OnceLock::new();
485 let _ = lock.set(new_mask);
486 self.mask = Arc::new(lock);
487 }
488 self.real_mask = true;
489 Ok(())
490 }
491
492 /// Return `true` when this masked array's underlying mask is
493 /// structurally shared with at least one other `MaskedArray`.
494 ///
495 /// After a `clone()` the original and the clone share the same
496 /// mask via `Arc` until one of them mutates it (copy-on-write, #512).
497 /// Hot-path code can use this to reason about memory sharing —
498 /// `shares_mask() == false` means the mask is uniquely owned and
499 /// can be mutated without affecting any other `MaskedArray`.
500 #[inline]
501 pub fn shares_mask(&self) -> bool {
502 Arc::strong_count(&self.mask) > 1
503 }
504}
505
506#[cfg(test)]
507mod tests {
508 use super::*;
509 use ferray_core::Array;
510 use ferray_core::dimension::Ix1;
511
512 fn arr_f64(data: Vec<f64>) -> Array<f64, Ix1> {
513 let n = data.len();
514 Array::<f64, Ix1>::from_vec(Ix1::new([n]), data).unwrap()
515 }
516
517 fn arr_bool(data: Vec<bool>) -> Array<bool, Ix1> {
518 let n = data.len();
519 Array::<bool, Ix1>::from_vec(Ix1::new([n]), data).unwrap()
520 }
521
522 // ---- nomask sentinel (#506) ----
523
524 #[test]
525 fn from_data_starts_in_nomask_sentinel_state() {
526 let ma = MaskedArray::from_data(arr_f64(vec![1.0, 2.0, 3.0])).unwrap();
527 assert!(!ma.has_real_mask());
528 assert!(ma.mask_opt().is_none());
529 }
530
531 #[test]
532 fn new_with_explicit_mask_is_real_mask() {
533 let ma = MaskedArray::new(
534 arr_f64(vec![1.0, 2.0, 3.0]),
535 arr_bool(vec![false, true, false]),
536 )
537 .unwrap();
538 assert!(ma.has_real_mask());
539 assert!(ma.mask_opt().is_some());
540 }
541
542 #[test]
543 fn mask_accessor_lazily_materializes_nomask_sentinel() {
544 let ma = MaskedArray::from_data(arr_f64(vec![1.0, 2.0, 3.0])).unwrap();
545 // Before calling .mask(), the OnceLock is empty.
546 assert!(ma.mask_opt().is_none());
547 // After calling .mask(), we get a full all-false Array<bool, D>.
548 let m = ma.mask();
549 assert_eq!(m.shape(), &[3]);
550 assert_eq!(
551 m.iter().copied().collect::<Vec<_>>(),
552 vec![false, false, false]
553 );
554 // Subsequent calls return the same cached array (no re-alloc).
555 let m2 = ma.mask();
556 assert_eq!(std::ptr::from_ref(m), std::ptr::from_ref(m2));
557 // BUT `has_real_mask` still reports `false` — the lazy
558 // materialization doesn't promote the sentinel to a "real" mask
559 // because the contents are still logically all-false. Hot-path
560 // code can keep skipping.
561 assert!(!ma.has_real_mask());
562 }
563
564 #[test]
565 fn set_mask_flat_false_on_nomask_stays_zero_allocation() {
566 // Setting a position to false on a nomask-sentinel array is a
567 // no-op and should NOT materialize the mask.
568 let mut ma = MaskedArray::from_data(arr_f64(vec![1.0, 2.0, 3.0])).unwrap();
569 ma.set_mask_flat(1, false).unwrap();
570 assert!(!ma.has_real_mask());
571 assert!(ma.mask_opt().is_none());
572 }
573
574 #[test]
575 fn set_mask_flat_true_on_nomask_materializes_and_promotes() {
576 // Setting a position to true forces materialization and
577 // promotes `real_mask` to true.
578 let mut ma = MaskedArray::from_data(arr_f64(vec![1.0, 2.0, 3.0])).unwrap();
579 ma.set_mask_flat(1, true).unwrap();
580 assert!(ma.has_real_mask());
581 let m: Vec<bool> = ma.mask().iter().copied().collect();
582 assert_eq!(m, vec![false, true, false]);
583 }
584
585 #[test]
586 fn set_mask_promotes_and_keeps_provided_values() {
587 let mut ma = MaskedArray::from_data(arr_f64(vec![1.0, 2.0, 3.0])).unwrap();
588 assert!(!ma.has_real_mask());
589 ma.set_mask(arr_bool(vec![true, false, true])).unwrap();
590 assert!(ma.has_real_mask());
591 assert_eq!(
592 ma.mask().iter().copied().collect::<Vec<_>>(),
593 vec![true, false, true]
594 );
595 }
596
597 #[test]
598 fn set_mask_shape_mismatch_errors() {
599 let mut ma = MaskedArray::from_data(arr_f64(vec![1.0, 2.0, 3.0])).unwrap();
600 assert!(ma.set_mask(arr_bool(vec![false; 4])).is_err());
601 }
602
603 #[test]
604 fn clone_preserves_nomask_sentinel_state() {
605 let ma = MaskedArray::from_data(arr_f64(vec![1.0, 2.0, 3.0])).unwrap();
606 let cloned = ma;
607 assert!(!cloned.has_real_mask());
608 assert!(cloned.mask_opt().is_none());
609 }
610
611 #[test]
612 fn clone_after_materialization_copies_the_mask() {
613 let ma = MaskedArray::from_data(arr_f64(vec![1.0, 2.0, 3.0])).unwrap();
614 // Force materialization.
615 let _ = ma.mask();
616 let cloned = ma;
617 // The clone has the same mask contents (all-false).
618 assert_eq!(
619 cloned.mask().iter().copied().collect::<Vec<_>>(),
620 vec![false, false, false]
621 );
622 }
623
624 #[test]
625 fn clone_preserves_real_mask_state() {
626 let ma = MaskedArray::new(
627 arr_f64(vec![1.0, 2.0, 3.0]),
628 arr_bool(vec![false, true, false]),
629 )
630 .unwrap();
631 let cloned = ma;
632 assert!(cloned.has_real_mask());
633 assert_eq!(
634 cloned.mask().iter().copied().collect::<Vec<_>>(),
635 vec![false, true, false]
636 );
637 }
638
639 // ---- shared mask with copy-on-write (#512) ----
640
641 #[test]
642 fn clone_shares_mask_via_arc() {
643 let ma = MaskedArray::new(
644 arr_f64(vec![1.0, 2.0, 3.0]),
645 arr_bool(vec![false, true, false]),
646 )
647 .unwrap();
648 let cloned = ma.clone();
649 // Both copies should report structural sharing.
650 assert!(ma.shares_mask());
651 assert!(cloned.shares_mask());
652 }
653
654 #[test]
655 fn unique_masked_array_does_not_share() {
656 let ma = MaskedArray::new(
657 arr_f64(vec![1.0, 2.0, 3.0]),
658 arr_bool(vec![false, true, false]),
659 )
660 .unwrap();
661 assert!(!ma.shares_mask());
662 }
663
664 #[test]
665 fn copy_on_write_isolates_parent_from_child_mutation() {
666 // Clone, then mutate the mask of the clone. The parent's mask
667 // must be unchanged even though they started sharing an Arc.
668 let parent = MaskedArray::new(
669 arr_f64(vec![1.0, 2.0, 3.0]),
670 arr_bool(vec![false, false, false]),
671 )
672 .unwrap();
673 let mut child = parent.clone();
674 assert!(parent.shares_mask());
675 assert!(child.shares_mask());
676
677 // Mutate the child — triggers copy-on-write.
678 child.set_mask_flat(1, true).unwrap();
679
680 // Parent's mask is still the original all-false.
681 assert_eq!(
682 parent.mask().iter().copied().collect::<Vec<_>>(),
683 vec![false, false, false]
684 );
685 // Child's mask reflects the mutation.
686 assert_eq!(
687 child.mask().iter().copied().collect::<Vec<_>>(),
688 vec![false, true, false]
689 );
690
691 // Parent no longer shares (the child's CoW broke the Arc's
692 // dual ownership by installing its own).
693 assert!(!parent.shares_mask());
694 assert!(!child.shares_mask());
695 }
696
697 #[test]
698 fn copy_on_write_via_set_mask() {
699 // set_mask replaces the Arc entirely, which also implicitly
700 // isolates the two.
701 let parent = MaskedArray::new(
702 arr_f64(vec![1.0, 2.0, 3.0]),
703 arr_bool(vec![false, false, false]),
704 )
705 .unwrap();
706 let mut child = parent.clone();
707 assert!(parent.shares_mask());
708
709 child.set_mask(arr_bool(vec![true, true, true])).unwrap();
710 // Parent still has the original mask.
711 assert_eq!(
712 parent.mask().iter().copied().collect::<Vec<_>>(),
713 vec![false, false, false]
714 );
715 // Child has the new mask.
716 assert_eq!(
717 child.mask().iter().copied().collect::<Vec<_>>(),
718 vec![true, true, true]
719 );
720 assert!(!parent.shares_mask());
721 }
722
723 #[test]
724 fn nomask_sentinel_clones_share_empty_arc() {
725 // A from_data-constructed array in the nomask-sentinel state
726 // still uses an Arc; clones share it.
727 let parent = MaskedArray::from_data(arr_f64(vec![1.0, 2.0, 3.0])).unwrap();
728 let cloned = parent.clone();
729 assert!(parent.shares_mask());
730 assert!(cloned.shares_mask());
731 // Neither has a real mask yet.
732 assert!(!parent.has_real_mask());
733 assert!(!cloned.has_real_mask());
734 }
735
736 #[test]
737 fn hard_mask_union_on_real_mask() {
738 let mut ma = MaskedArray::new(
739 arr_f64(vec![1.0, 2.0, 3.0]),
740 arr_bool(vec![true, false, false]),
741 )
742 .unwrap();
743 ma.harden_mask().unwrap();
744 // Try to clear position 0 and set position 2. With a hard
745 // mask, the union keeps position 0's true bit.
746 ma.set_mask(arr_bool(vec![false, false, true])).unwrap();
747 assert_eq!(
748 ma.mask().iter().copied().collect::<Vec<_>>(),
749 vec![true, false, true]
750 );
751 }
752}