Skip to main content

scirs2_core/memory_efficient/
mmap_array.rs

1//! Memory-Mapped Array Wrapper for SciRS2
2//!
3//! Provides a generic `MmapArray<T>` that wraps a memory-mapped file-backed array
4//! with support for read-only, read-write, and copy-on-write (COW) semantics.
5//!
6//! # Features
7//!
8//! - Zero-copy views into memory-mapped regions
9//! - COW semantics for safe mutation without affecting the underlying file
10//! - Typed access with proper alignment checking
11//! - Integration with ndarray for scientific computing
12//!
13//! # Example
14//!
15//! ```rust,no_run
16//! # #[cfg(feature = "memory_efficient")]
17//! # {
18//! use scirs2_core::memory_efficient::mmap_array::{MmapArray, MmapMode};
19//! use std::path::Path;
20//!
21//! // Create from existing data
22//! let data = vec![1.0f64, 2.0, 3.0, 4.0];
23//! let path = Path::new("/tmp/test_array.dat");
24//! let arr = MmapArray::<f64>::from_slice(&data, path, MmapMode::ReadWrite)
25//!     .expect("Failed to create mmap array");
26//!
27//! // Access as a slice (zero-copy)
28//! let slice = arr.as_slice().expect("Failed to get slice");
29//! assert_eq!(slice.len(), 4);
30//! # }
31//! ```
32
33use crate::error::{CoreError, CoreResult, ErrorContext, ErrorLocation};
34use memmap2::{Mmap, MmapMut, MmapOptions};
35use std::fs::{File, OpenOptions};
36use std::io::Write;
37use std::marker::PhantomData;
38use std::path::{Path, PathBuf};
39use std::sync::atomic::{AtomicBool, Ordering};
40
41/// Mode for memory-mapped array access
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum MmapMode {
44    /// Read-only access. Writes are not permitted.
45    ReadOnly,
46    /// Read-write access. Changes are persisted to the underlying file.
47    ReadWrite,
48    /// Copy-on-write access. Reads come from the file, but writes go to
49    /// a private copy in memory and are NOT persisted to the file.
50    CopyOnWrite,
51}
52
53impl MmapMode {
54    /// Returns a human-readable description of the mode
55    pub const fn description(&self) -> &'static str {
56        match self {
57            MmapMode::ReadOnly => "read-only",
58            MmapMode::ReadWrite => "read-write",
59            MmapMode::CopyOnWrite => "copy-on-write",
60        }
61    }
62}
63
64/// A file header stored at the beginning of MmapArray files.
65/// This allows reopening files with proper type and size information.
66#[repr(C)]
67#[derive(Debug, Clone, Copy)]
68struct MmapArrayHeader {
69    /// Magic bytes for file identification: "SCI2MMAP"
70    magic: [u8; 8],
71    /// Version of the file format
72    version: u32,
73    /// Size of each element in bytes
74    element_size: u32,
75    /// Total number of elements
76    num_elements: u64,
77    /// Alignment requirement for elements
78    alignment: u32,
79    /// Reserved for future use
80    _reserved: [u8; 36],
81}
82
83impl MmapArrayHeader {
84    const MAGIC: [u8; 8] = *b"SCI2MMAP";
85    const VERSION: u32 = 1;
86    const HEADER_SIZE: usize = std::mem::size_of::<Self>();
87
88    fn new<T>(num_elements: usize) -> Self {
89        Self {
90            magic: Self::MAGIC,
91            version: Self::VERSION,
92            element_size: std::mem::size_of::<T>() as u32,
93            num_elements: num_elements as u64,
94            alignment: std::mem::align_of::<T>() as u32,
95            _reserved: [0u8; 36],
96        }
97    }
98
99    fn validate<T>(&self) -> CoreResult<()> {
100        if self.magic != Self::MAGIC {
101            return Err(CoreError::ValidationError(
102                ErrorContext::new("Invalid magic bytes in MmapArray file".to_string())
103                    .with_location(ErrorLocation::new(file!(), line!())),
104            ));
105        }
106        if self.version != Self::VERSION {
107            return Err(CoreError::ValidationError(
108                ErrorContext::new(format!(
109                    "Unsupported MmapArray file version: {} (expected {})",
110                    self.version,
111                    Self::VERSION
112                ))
113                .with_location(ErrorLocation::new(file!(), line!())),
114            ));
115        }
116        if self.element_size != std::mem::size_of::<T>() as u32 {
117            return Err(CoreError::ValidationError(
118                ErrorContext::new(format!(
119                    "Element size mismatch: file has {} bytes, expected {} bytes for type {}",
120                    self.element_size,
121                    std::mem::size_of::<T>(),
122                    std::any::type_name::<T>()
123                ))
124                .with_location(ErrorLocation::new(file!(), line!())),
125            ));
126        }
127        if self.alignment != std::mem::align_of::<T>() as u32 {
128            return Err(CoreError::ValidationError(
129                ErrorContext::new(format!(
130                    "Alignment mismatch: file requires {} bytes, type {} requires {} bytes",
131                    self.alignment,
132                    std::any::type_name::<T>(),
133                    std::mem::align_of::<T>()
134                ))
135                .with_location(ErrorLocation::new(file!(), line!())),
136            ));
137        }
138        Ok(())
139    }
140
141    #[allow(clippy::wrong_self_convention)]
142    fn to_bytes(&self) -> &[u8] {
143        // SAFETY: MmapArrayHeader is repr(C) and contains only plain data types
144        unsafe {
145            std::slice::from_raw_parts(
146                self as *const Self as *const u8,
147                std::mem::size_of::<Self>(),
148            )
149        }
150    }
151
152    fn from_bytes(bytes: &[u8]) -> CoreResult<&Self> {
153        if bytes.len() < std::mem::size_of::<Self>() {
154            return Err(CoreError::ValidationError(
155                ErrorContext::new(format!(
156                    "File too small for header: {} bytes (need at least {})",
157                    bytes.len(),
158                    std::mem::size_of::<Self>()
159                ))
160                .with_location(ErrorLocation::new(file!(), line!())),
161            ));
162        }
163        let ptr = bytes.as_ptr() as *const Self;
164        if (ptr as usize) % std::mem::align_of::<Self>() != 0 {
165            // Copy to aligned buffer if unaligned
166            return Err(CoreError::MemoryError(
167                ErrorContext::new("Header data is not properly aligned".to_string())
168                    .with_location(ErrorLocation::new(file!(), line!())),
169            ));
170        }
171        // SAFETY: We checked size and alignment above
172        Ok(unsafe { &*ptr })
173    }
174}
175
176/// Internal storage for the memory map
177enum MmapStorage {
178    /// Read-only memory map
179    ReadOnly(Mmap),
180    /// Mutable memory map (read-write or COW)
181    Mutable(MmapMut),
182}
183
184/// A generic memory-mapped array backed by a file.
185///
186/// `MmapArray<T>` provides zero-copy access to array data stored in a file,
187/// using the operating system's virtual memory system to page data in and out
188/// as needed.
189///
190/// # Type Requirements
191///
192/// `T` must be `Copy + Send + Sync + 'static` to ensure safe memory-mapped access.
193/// The type must also have a fixed, well-defined memory layout (no padding concerns
194/// for single elements).
195///
196/// # Thread Safety
197///
198/// `MmapArray` is `Send + Sync` and can be shared across threads. However,
199/// concurrent mutation (with `ReadWrite` mode) requires external synchronization.
200pub struct MmapArray<T: Copy + Send + Sync + 'static> {
201    /// The underlying memory map
202    storage: MmapStorage,
203    /// Path to the backing file
204    file_path: PathBuf,
205    /// Access mode
206    mode: MmapMode,
207    /// Number of elements of type T
208    num_elements: usize,
209    /// Data offset (after header) in bytes
210    data_offset: usize,
211    /// Whether the COW copy has been materialized
212    cow_materialized: AtomicBool,
213    /// COW buffer (populated lazily on first write in COW mode)
214    cow_buffer: std::sync::Mutex<Option<Vec<T>>>,
215    /// Phantom type marker
216    _phantom: PhantomData<T>,
217}
218
219// SAFETY: MmapArray is safe to send/share because:
220// 1. T: Send + Sync
221// 2. Mmap/MmapMut are Send + Sync
222// 3. Internal mutation is protected by Mutex
223unsafe impl<T: Copy + Send + Sync + 'static> Send for MmapArray<T> {}
224unsafe impl<T: Copy + Send + Sync + 'static> Sync for MmapArray<T> {}
225
226impl<T: Copy + Send + Sync + 'static> MmapArray<T> {
227    /// Create a new `MmapArray` from a slice of data, writing it to the specified path.
228    ///
229    /// This creates a new file (or truncates an existing one) and writes
230    /// the header and data to it.
231    pub fn from_slice(data: &[T], path: &Path, mode: MmapMode) -> CoreResult<Self> {
232        let num_elements = data.len();
233        let element_size = std::mem::size_of::<T>();
234
235        if element_size == 0 {
236            return Err(CoreError::InvalidArgument(
237                ErrorContext::new("Zero-sized types are not supported for MmapArray".to_string())
238                    .with_location(ErrorLocation::new(file!(), line!())),
239            ));
240        }
241
242        // Write header and data to file
243        let header = MmapArrayHeader::new::<T>(num_elements);
244        let data_bytes = unsafe {
245            std::slice::from_raw_parts(data.as_ptr() as *const u8, std::mem::size_of_val(data))
246        };
247
248        let mut file = OpenOptions::new()
249            .read(true)
250            .write(true)
251            .create(true)
252            .truncate(true)
253            .open(path)
254            .map_err(|e| {
255                CoreError::IoError(
256                    ErrorContext::new(format!("Failed to create file {}: {e}", path.display()))
257                        .with_location(ErrorLocation::new(file!(), line!())),
258                )
259            })?;
260
261        file.write_all(header.to_bytes()).map_err(|e| {
262            CoreError::IoError(
263                ErrorContext::new(format!("Failed to write header: {e}"))
264                    .with_location(ErrorLocation::new(file!(), line!())),
265            )
266        })?;
267        file.write_all(data_bytes).map_err(|e| {
268            CoreError::IoError(
269                ErrorContext::new(format!("Failed to write data: {e}"))
270                    .with_location(ErrorLocation::new(file!(), line!())),
271            )
272        })?;
273        file.flush().map_err(|e| {
274            CoreError::IoError(
275                ErrorContext::new(format!("Failed to flush file: {e}"))
276                    .with_location(ErrorLocation::new(file!(), line!())),
277            )
278        })?;
279
280        // Now memory-map the file
281        Self::open(path, mode)
282    }
283
284    /// Open an existing `MmapArray` file.
285    ///
286    /// The file must have been created by `MmapArray::from_slice` or
287    /// `MmapArray::from_ndarray`. The type parameter `T` must match the
288    /// type used when the file was created.
289    pub fn open(path: &Path, mode: MmapMode) -> CoreResult<Self> {
290        let element_size = std::mem::size_of::<T>();
291        if element_size == 0 {
292            return Err(CoreError::InvalidArgument(
293                ErrorContext::new("Zero-sized types are not supported for MmapArray".to_string())
294                    .with_location(ErrorLocation::new(file!(), line!())),
295            ));
296        }
297
298        let file = match mode {
299            MmapMode::ReadOnly | MmapMode::CopyOnWrite => File::open(path).map_err(|e| {
300                CoreError::IoError(
301                    ErrorContext::new(format!(
302                        "Failed to open file {} for reading: {e}",
303                        path.display()
304                    ))
305                    .with_location(ErrorLocation::new(file!(), line!())),
306                )
307            })?,
308            MmapMode::ReadWrite => OpenOptions::new()
309                .read(true)
310                .write(true)
311                .open(path)
312                .map_err(|e| {
313                    CoreError::IoError(
314                        ErrorContext::new(format!(
315                            "Failed to open file {} for read-write: {e}",
316                            path.display()
317                        ))
318                        .with_location(ErrorLocation::new(file!(), line!())),
319                    )
320                })?,
321        };
322
323        let file_len = file
324            .metadata()
325            .map_err(|e| {
326                CoreError::IoError(
327                    ErrorContext::new(format!("Failed to get file metadata: {e}"))
328                        .with_location(ErrorLocation::new(file!(), line!())),
329                )
330            })?
331            .len() as usize;
332
333        if file_len < MmapArrayHeader::HEADER_SIZE {
334            return Err(CoreError::ValidationError(
335                ErrorContext::new(format!(
336                    "File is too small ({} bytes) to contain a valid MmapArray header ({} bytes required)",
337                    file_len,
338                    MmapArrayHeader::HEADER_SIZE
339                ))
340                .with_location(ErrorLocation::new(file!(), line!())),
341            ));
342        }
343
344        // Memory-map the entire file
345        let storage = match mode {
346            MmapMode::ReadOnly => {
347                let mmap = unsafe {
348                    MmapOptions::new().map(&file).map_err(|e| {
349                        CoreError::IoError(
350                            ErrorContext::new(format!("Failed to create read-only mmap: {e}"))
351                                .with_location(ErrorLocation::new(file!(), line!())),
352                        )
353                    })?
354                };
355                MmapStorage::ReadOnly(mmap)
356            }
357            MmapMode::ReadWrite => {
358                let mmap = unsafe {
359                    MmapOptions::new().map_mut(&file).map_err(|e| {
360                        CoreError::IoError(
361                            ErrorContext::new(format!("Failed to create read-write mmap: {e}"))
362                                .with_location(ErrorLocation::new(file!(), line!())),
363                        )
364                    })?
365                };
366                MmapStorage::Mutable(mmap)
367            }
368            MmapMode::CopyOnWrite => {
369                let mmap = unsafe {
370                    MmapOptions::new().map_copy(&file).map_err(|e| {
371                        CoreError::IoError(
372                            ErrorContext::new(format!("Failed to create COW mmap: {e}"))
373                                .with_location(ErrorLocation::new(file!(), line!())),
374                        )
375                    })?
376                };
377                MmapStorage::Mutable(mmap)
378            }
379        };
380
381        // Read and validate header
382        let raw_bytes = match &storage {
383            MmapStorage::ReadOnly(mmap) => &mmap[..],
384            MmapStorage::Mutable(mmap) => &mmap[..],
385        };
386        let header = MmapArrayHeader::from_bytes(raw_bytes)?;
387        header.validate::<T>()?;
388
389        let num_elements = header.num_elements as usize;
390        let data_offset = MmapArrayHeader::HEADER_SIZE;
391        let expected_data_size = num_elements * element_size;
392
393        if file_len < data_offset + expected_data_size {
394            return Err(CoreError::ValidationError(
395                ErrorContext::new(format!(
396                    "File too small: {} bytes, need {} bytes (header: {}, data: {} elements * {} bytes)",
397                    file_len,
398                    data_offset + expected_data_size,
399                    data_offset,
400                    num_elements,
401                    element_size
402                ))
403                .with_location(ErrorLocation::new(file!(), line!())),
404            ));
405        }
406
407        // Validate data alignment
408        let data_ptr = match &storage {
409            MmapStorage::ReadOnly(mmap) => mmap[data_offset..].as_ptr(),
410            MmapStorage::Mutable(mmap) => mmap[data_offset..].as_ptr(),
411        };
412        if (data_ptr as usize) % std::mem::align_of::<T>() != 0 {
413            return Err(CoreError::MemoryError(
414                ErrorContext::new(format!(
415                    "Memory-mapped data is not properly aligned for type {} (alignment: {}, address: 0x{:x})",
416                    std::any::type_name::<T>(),
417                    std::mem::align_of::<T>(),
418                    data_ptr as usize
419                ))
420                .with_location(ErrorLocation::new(file!(), line!())),
421            ));
422        }
423
424        Ok(Self {
425            storage,
426            file_path: path.to_path_buf(),
427            mode,
428            num_elements,
429            data_offset,
430            cow_materialized: AtomicBool::new(false),
431            cow_buffer: std::sync::Mutex::new(None),
432            _phantom: PhantomData,
433        })
434    }
435
436    /// Create a new `MmapArray` from an ndarray Array1.
437    pub fn from_ndarray(
438        array: &::ndarray::Array1<T>,
439        path: &Path,
440        mode: MmapMode,
441    ) -> CoreResult<Self> {
442        let slice = array.as_slice().ok_or_else(|| {
443            CoreError::InvalidArgument(
444                ErrorContext::new("Array must be contiguous for memory mapping".to_string())
445                    .with_location(ErrorLocation::new(file!(), line!())),
446            )
447        })?;
448        Self::from_slice(slice, path, mode)
449    }
450
451    /// Get the number of elements in the array
452    pub fn len(&self) -> usize {
453        self.num_elements
454    }
455
456    /// Check if the array is empty
457    pub fn is_empty(&self) -> bool {
458        self.num_elements == 0
459    }
460
461    /// Get the size in bytes of the data portion (excluding header)
462    pub fn data_size_bytes(&self) -> usize {
463        self.num_elements * std::mem::size_of::<T>()
464    }
465
466    /// Get the total file size in bytes (header + data)
467    pub fn file_size_bytes(&self) -> usize {
468        self.data_offset + self.data_size_bytes()
469    }
470
471    /// Get the access mode
472    pub fn mode(&self) -> MmapMode {
473        self.mode
474    }
475
476    /// Get the file path
477    pub fn path(&self) -> &Path {
478        &self.file_path
479    }
480
481    /// Get a zero-copy immutable view of the data as a slice.
482    ///
483    /// For COW mode, if the buffer has been materialized (i.e., a write
484    /// occurred), this returns the COW buffer instead of the mmap data.
485    pub fn as_slice(&self) -> CoreResult<&[T]> {
486        if self.mode == MmapMode::CopyOnWrite && self.cow_materialized.load(Ordering::Acquire) {
487            let guard = self.cow_buffer.lock().map_err(|e| {
488                CoreError::MemoryError(
489                    ErrorContext::new(format!("Failed to lock COW buffer: {e}"))
490                        .with_location(ErrorLocation::new(file!(), line!())),
491                )
492            })?;
493            if let Some(ref buf) = *guard {
494                // SAFETY: We return a reference to the Vec's contents.
495                // The Vec is behind a Mutex and won't be dropped while this reference exists
496                // because the caller holds a reference to self.
497                // This is safe as long as the caller doesn't mutate through as_slice_mut simultaneously.
498                let ptr = buf.as_ptr();
499                let len = buf.len();
500                return Ok(unsafe { std::slice::from_raw_parts(ptr, len) });
501            }
502        }
503        self.raw_data_slice()
504    }
505
506    /// Get a zero-copy mutable view of the data as a mutable slice.
507    ///
508    /// # Errors
509    ///
510    /// Returns an error if the array is in ReadOnly mode.
511    ///
512    /// For COW mode, the first call to this method materializes the COW buffer
513    /// by copying the mapped data. Subsequent calls return the COW buffer directly.
514    pub fn as_slice_mut(&mut self) -> CoreResult<&mut [T]> {
515        match self.mode {
516            MmapMode::ReadOnly => Err(CoreError::InvalidArgument(
517                ErrorContext::new("Cannot get mutable slice for read-only MmapArray".to_string())
518                    .with_location(ErrorLocation::new(file!(), line!())),
519            )),
520            MmapMode::ReadWrite => {
521                match &self.storage {
522                    MmapStorage::Mutable(mmap) => {
523                        let ptr = mmap[self.data_offset..].as_ptr() as *mut T;
524                        // SAFETY: We checked mode is ReadWrite, the data is aligned,
525                        // and the mmap is mutable.
526                        Ok(unsafe { std::slice::from_raw_parts_mut(ptr, self.num_elements) })
527                    }
528                    MmapStorage::ReadOnly(_) => Err(CoreError::MemoryError(
529                        ErrorContext::new(
530                            "Internal error: ReadWrite mode but storage is ReadOnly".to_string(),
531                        )
532                        .with_location(ErrorLocation::new(file!(), line!())),
533                    )),
534                }
535            }
536            MmapMode::CopyOnWrite => {
537                self.materialize_cow()?;
538                let mut guard = self.cow_buffer.lock().map_err(|e| {
539                    CoreError::MemoryError(
540                        ErrorContext::new(format!("Failed to lock COW buffer: {e}"))
541                            .with_location(ErrorLocation::new(file!(), line!())),
542                    )
543                })?;
544                if let Some(ref mut buf) = *guard {
545                    let ptr = buf.as_mut_ptr();
546                    let len = buf.len();
547                    // SAFETY: We own the buffer and are returning a mutable reference.
548                    Ok(unsafe { std::slice::from_raw_parts_mut(ptr, len) })
549                } else {
550                    Err(CoreError::MemoryError(
551                        ErrorContext::new(
552                            "COW buffer not materialized despite materialize_cow call".to_string(),
553                        )
554                        .with_location(ErrorLocation::new(file!(), line!())),
555                    ))
556                }
557            }
558        }
559    }
560
561    /// Get a single element by index.
562    pub fn get(&self, index: usize) -> CoreResult<T> {
563        if index >= self.num_elements {
564            return Err(CoreError::InvalidArgument(
565                ErrorContext::new(format!(
566                    "Index {} out of bounds for MmapArray of length {}",
567                    index, self.num_elements
568                ))
569                .with_location(ErrorLocation::new(file!(), line!())),
570            ));
571        }
572        let slice = self.as_slice()?;
573        Ok(slice[index])
574    }
575
576    /// Set a single element by index.
577    ///
578    /// # Errors
579    ///
580    /// Returns an error if the array is in ReadOnly mode or the index is out of bounds.
581    pub fn set(&mut self, index: usize, value: T) -> CoreResult<()> {
582        if index >= self.num_elements {
583            return Err(CoreError::InvalidArgument(
584                ErrorContext::new(format!(
585                    "Index {} out of bounds for MmapArray of length {}",
586                    index, self.num_elements
587                ))
588                .with_location(ErrorLocation::new(file!(), line!())),
589            ));
590        }
591        let slice = self.as_slice_mut()?;
592        slice[index] = value;
593        Ok(())
594    }
595
596    /// Convert to an ndarray Array1 by copying the data.
597    pub fn to_ndarray(&self) -> CoreResult<::ndarray::Array1<T>> {
598        let slice = self.as_slice()?;
599        Ok(::ndarray::Array1::from_vec(slice.to_vec()))
600    }
601
602    /// Convert to an ndarray ArrayView1 (zero-copy).
603    ///
604    /// The returned view borrows from this MmapArray.
605    pub fn as_ndarray_view(&self) -> CoreResult<::ndarray::ArrayView1<T>> {
606        let slice = self.as_slice()?;
607        Ok(::ndarray::ArrayView1::from(slice))
608    }
609
610    /// Flush changes to disk (only meaningful for ReadWrite mode).
611    pub fn flush(&self) -> CoreResult<()> {
612        match (&self.storage, self.mode) {
613            (MmapStorage::Mutable(mmap), MmapMode::ReadWrite) => mmap.flush().map_err(|e| {
614                CoreError::IoError(
615                    ErrorContext::new(format!("Failed to flush mmap: {e}"))
616                        .with_location(ErrorLocation::new(file!(), line!())),
617                )
618            }),
619            _ => Ok(()), // No-op for read-only and COW modes
620        }
621    }
622
623    /// Flush changes asynchronously (non-blocking).
624    pub fn flush_async(&self) -> CoreResult<()> {
625        match (&self.storage, self.mode) {
626            (MmapStorage::Mutable(mmap), MmapMode::ReadWrite) => mmap.flush_async().map_err(|e| {
627                CoreError::IoError(
628                    ErrorContext::new(format!("Failed to async flush mmap: {e}"))
629                        .with_location(ErrorLocation::new(file!(), line!())),
630                )
631            }),
632            _ => Ok(()),
633        }
634    }
635
636    /// Create a read-only view of a subrange of the array (zero-copy).
637    pub fn view(&self, start: usize, len: usize) -> CoreResult<&[T]> {
638        if start + len > self.num_elements {
639            return Err(CoreError::InvalidArgument(
640                ErrorContext::new(format!(
641                    "View range [{}..{}] out of bounds for MmapArray of length {}",
642                    start,
643                    start + len,
644                    self.num_elements
645                ))
646                .with_location(ErrorLocation::new(file!(), line!())),
647            ));
648        }
649        let full_slice = self.as_slice()?;
650        Ok(&full_slice[start..start + len])
651    }
652
653    /// Iterate over the elements of the array.
654    pub fn iter(&self) -> CoreResult<std::slice::Iter<'_, T>> {
655        let slice = self.as_slice()?;
656        Ok(slice.iter())
657    }
658
659    /// Get raw pointer to the data (for advanced use).
660    ///
661    /// # Safety
662    ///
663    /// The returned pointer is only valid while `self` is alive and the
664    /// underlying memory map is not remapped.
665    pub fn as_ptr(&self) -> CoreResult<*const T> {
666        let slice = self.as_slice()?;
667        Ok(slice.as_ptr())
668    }
669
670    /// Apply a function to each element and return a new Vec.
671    pub fn map<U, F>(&self, f: F) -> CoreResult<Vec<U>>
672    where
673        F: Fn(T) -> U,
674    {
675        let slice = self.as_slice()?;
676        Ok(slice.iter().map(|&x| f(x)).collect())
677    }
678
679    /// Apply a function to chunks of elements.
680    /// Useful for processing large arrays without loading everything at once.
681    pub fn chunked_iter(&self, chunk_size: usize) -> CoreResult<MmapChunkIter<'_, T>> {
682        if chunk_size == 0 {
683            return Err(CoreError::InvalidArgument(
684                ErrorContext::new("Chunk size must be greater than 0".to_string())
685                    .with_location(ErrorLocation::new(file!(), line!())),
686            ));
687        }
688        let slice = self.as_slice()?;
689        Ok(MmapChunkIter {
690            data: slice,
691            chunk_size,
692            position: 0,
693        })
694    }
695
696    // --- Private helpers ---
697
698    /// Get the raw data slice from the memory map
699    fn raw_data_slice(&self) -> CoreResult<&[T]> {
700        let raw_bytes = match &self.storage {
701            MmapStorage::ReadOnly(mmap) => &mmap[self.data_offset..],
702            MmapStorage::Mutable(mmap) => &mmap[self.data_offset..],
703        };
704        let ptr = raw_bytes.as_ptr() as *const T;
705
706        // Validate alignment
707        if (ptr as usize) % std::mem::align_of::<T>() != 0 {
708            return Err(CoreError::MemoryError(
709                ErrorContext::new(format!(
710                    "Data pointer is not properly aligned for type {}",
711                    std::any::type_name::<T>()
712                ))
713                .with_location(ErrorLocation::new(file!(), line!())),
714            ));
715        }
716
717        // SAFETY: pointer is aligned, data is within the mmap bounds, and
718        // we validated the number of elements during open/creation
719        Ok(unsafe { std::slice::from_raw_parts(ptr, self.num_elements) })
720    }
721
722    /// Materialize the COW buffer by copying data from the mmap
723    fn materialize_cow(&self) -> CoreResult<()> {
724        if self.cow_materialized.load(Ordering::Acquire) {
725            return Ok(()); // Already materialized
726        }
727
728        let source_slice = self.raw_data_slice()?;
729        let buffer = source_slice.to_vec();
730
731        let mut guard = self.cow_buffer.lock().map_err(|e| {
732            CoreError::MemoryError(
733                ErrorContext::new(format!("Failed to lock COW buffer: {e}"))
734                    .with_location(ErrorLocation::new(file!(), line!())),
735            )
736        })?;
737        if guard.is_none() {
738            *guard = Some(buffer);
739        }
740        self.cow_materialized.store(true, Ordering::Release);
741        Ok(())
742    }
743}
744
745impl<T: Copy + Send + Sync + 'static> std::fmt::Debug for MmapArray<T> {
746    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
747        f.debug_struct("MmapArray")
748            .field("path", &self.file_path)
749            .field("mode", &self.mode)
750            .field("num_elements", &self.num_elements)
751            .field("element_size", &std::mem::size_of::<T>())
752            .field("data_size_bytes", &self.data_size_bytes())
753            .finish()
754    }
755}
756
757/// Iterator over chunks of an MmapArray.
758pub struct MmapChunkIter<'a, T> {
759    data: &'a [T],
760    chunk_size: usize,
761    position: usize,
762}
763
764impl<'a, T> Iterator for MmapChunkIter<'a, T> {
765    type Item = &'a [T];
766
767    fn next(&mut self) -> Option<Self::Item> {
768        if self.position >= self.data.len() {
769            return None;
770        }
771        let end = (self.position + self.chunk_size).min(self.data.len());
772        let chunk = &self.data[self.position..end];
773        self.position = end;
774        Some(chunk)
775    }
776
777    fn size_hint(&self) -> (usize, Option<usize>) {
778        let remaining = self.data.len().saturating_sub(self.position);
779        let count = remaining.div_ceil(self.chunk_size);
780        (count, Some(count))
781    }
782}
783
784impl<'a, T> ExactSizeIterator for MmapChunkIter<'a, T> {}
785
786/// Builder for creating MmapArray instances with configuration options.
787pub struct MmapArrayBuilder<T: Copy + Send + Sync + 'static> {
788    mode: MmapMode,
789    _phantom: PhantomData<T>,
790}
791
792impl<T: Copy + Send + Sync + 'static> MmapArrayBuilder<T> {
793    /// Create a new builder with the specified mode.
794    pub fn new(mode: MmapMode) -> Self {
795        Self {
796            mode,
797            _phantom: PhantomData,
798        }
799    }
800
801    /// Build from a slice of data, writing to the specified path.
802    pub fn from_slice(self, data: &[T], path: &Path) -> CoreResult<MmapArray<T>> {
803        MmapArray::from_slice(data, path, self.mode)
804    }
805
806    /// Build by opening an existing file.
807    pub fn open(self, path: &Path) -> CoreResult<MmapArray<T>> {
808        MmapArray::open(path, self.mode)
809    }
810}
811
812#[cfg(test)]
813mod tests {
814    use super::*;
815
816    #[test]
817    fn test_create_and_read_f64() {
818        let dir = std::env::temp_dir();
819        let path = dir.join("scirs2_mmap_test_f64.dat");
820
821        let data: Vec<f64> = (0..1000).map(|i| i as f64 * 0.5).collect();
822        let arr =
823            MmapArray::from_slice(&data, &path, MmapMode::ReadOnly).expect("Failed to create");
824
825        assert_eq!(arr.len(), 1000);
826        assert!(!arr.is_empty());
827
828        let slice = arr.as_slice().expect("Failed to get slice");
829        for (i, &val) in slice.iter().enumerate() {
830            let expected = i as f64 * 0.5;
831            assert!(
832                (val - expected).abs() < 1e-10,
833                "Mismatch at index {i}: {val} vs {expected}"
834            );
835        }
836
837        // Clean up
838        let _ = std::fs::remove_file(&path);
839    }
840
841    #[test]
842    fn test_create_and_read_i32() {
843        let dir = std::env::temp_dir();
844        let path = dir.join("scirs2_mmap_test_i32.dat");
845
846        let data: Vec<i32> = (0..500).collect();
847        let arr =
848            MmapArray::from_slice(&data, &path, MmapMode::ReadOnly).expect("Failed to create");
849
850        assert_eq!(arr.len(), 500);
851        let val = arr.get(42).expect("Failed to get element");
852        assert_eq!(val, 42);
853
854        let _ = std::fs::remove_file(&path);
855    }
856
857    #[test]
858    fn test_read_write_mode() {
859        let dir = std::env::temp_dir();
860        let path = dir.join("scirs2_mmap_test_rw.dat");
861
862        let data: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0];
863        let mut arr =
864            MmapArray::from_slice(&data, &path, MmapMode::ReadWrite).expect("Failed to create");
865
866        // Mutate through the mmap
867        arr.set(2, 99.0).expect("Failed to set element");
868        arr.flush().expect("Failed to flush");
869
870        // Verify by re-opening
871        let arr2 = MmapArray::<f64>::open(&path, MmapMode::ReadOnly).expect("Failed to reopen");
872        let val = arr2.get(2).expect("Failed to get element");
873        assert!((val - 99.0).abs() < 1e-10);
874
875        let _ = std::fs::remove_file(&path);
876    }
877
878    #[test]
879    fn test_cow_mode() {
880        let dir = std::env::temp_dir();
881        let path = dir.join("scirs2_mmap_test_cow.dat");
882
883        let data: Vec<f64> = vec![10.0, 20.0, 30.0, 40.0];
884        let _arr_rw =
885            MmapArray::from_slice(&data, &path, MmapMode::ReadWrite).expect("Failed to create");
886        drop(_arr_rw);
887
888        // Open in COW mode
889        let mut arr_cow =
890            MmapArray::<f64>::open(&path, MmapMode::CopyOnWrite).expect("Failed to open COW");
891
892        // Read should work
893        let val = arr_cow.get(0).expect("Failed to get");
894        assert!((val - 10.0).abs() < 1e-10);
895
896        // Write should work (goes to COW buffer, not file)
897        arr_cow.set(0, 999.0).expect("Failed to set COW");
898        let val_cow = arr_cow.get(0).expect("Failed to get after COW write");
899        assert!((val_cow - 999.0).abs() < 1e-10);
900
901        // Original file should be unchanged
902        let arr_verify =
903            MmapArray::<f64>::open(&path, MmapMode::ReadOnly).expect("Failed to verify");
904        let val_orig = arr_verify.get(0).expect("Failed to get original");
905        assert!(
906            (val_orig - 10.0).abs() < 1e-10,
907            "COW should not have modified the original file"
908        );
909
910        let _ = std::fs::remove_file(&path);
911    }
912
913    #[test]
914    fn test_readonly_write_fails() {
915        let dir = std::env::temp_dir();
916        let path = dir.join("scirs2_mmap_test_ro_fail.dat");
917
918        let data: Vec<f64> = vec![1.0, 2.0, 3.0];
919        let mut arr =
920            MmapArray::from_slice(&data, &path, MmapMode::ReadOnly).expect("Failed to create");
921
922        let result = arr.set(0, 42.0);
923        assert!(result.is_err(), "Write to read-only should fail");
924
925        let _ = std::fs::remove_file(&path);
926    }
927
928    #[test]
929    fn test_out_of_bounds() {
930        let dir = std::env::temp_dir();
931        let path = dir.join("scirs2_mmap_test_oob.dat");
932
933        let data: Vec<f64> = vec![1.0, 2.0, 3.0];
934        let arr =
935            MmapArray::from_slice(&data, &path, MmapMode::ReadOnly).expect("Failed to create");
936
937        let result = arr.get(100);
938        assert!(result.is_err(), "Out of bounds get should fail");
939
940        let _ = std::fs::remove_file(&path);
941    }
942
943    #[test]
944    fn test_view() {
945        let dir = std::env::temp_dir();
946        let path = dir.join("scirs2_mmap_test_view.dat");
947
948        let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
949        let arr =
950            MmapArray::from_slice(&data, &path, MmapMode::ReadOnly).expect("Failed to create");
951
952        let view = arr.view(10, 20).expect("Failed to get view");
953        assert_eq!(view.len(), 20);
954        assert!((view[0] - 10.0).abs() < 1e-10);
955        assert!((view[19] - 29.0).abs() < 1e-10);
956
957        // Invalid range should fail
958        let result = arr.view(90, 20);
959        assert!(result.is_err(), "Out of bounds view should fail");
960
961        let _ = std::fs::remove_file(&path);
962    }
963
964    #[test]
965    fn test_chunked_iter() {
966        let dir = std::env::temp_dir();
967        let path = dir.join("scirs2_mmap_test_chunks.dat");
968
969        let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
970        let arr =
971            MmapArray::from_slice(&data, &path, MmapMode::ReadOnly).expect("Failed to create");
972
973        let chunks: Vec<&[f64]> = arr
974            .chunked_iter(30)
975            .expect("Failed to get chunks")
976            .collect();
977
978        assert_eq!(chunks.len(), 4); // 100 / 30 = 3 full + 1 partial
979        assert_eq!(chunks[0].len(), 30);
980        assert_eq!(chunks[1].len(), 30);
981        assert_eq!(chunks[2].len(), 30);
982        assert_eq!(chunks[3].len(), 10);
983
984        let _ = std::fs::remove_file(&path);
985    }
986
987    #[test]
988    fn test_to_ndarray() {
989        let dir = std::env::temp_dir();
990        let path = dir.join("scirs2_mmap_test_ndarray.dat");
991
992        let data: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0];
993        let arr =
994            MmapArray::from_slice(&data, &path, MmapMode::ReadOnly).expect("Failed to create");
995
996        let nd = arr.to_ndarray().expect("Failed to convert to ndarray");
997        assert_eq!(nd.len(), 5);
998        assert!((nd[0] - 1.0).abs() < 1e-10);
999        assert!((nd[4] - 5.0).abs() < 1e-10);
1000
1001        let _ = std::fs::remove_file(&path);
1002    }
1003
1004    #[test]
1005    fn test_ndarray_view() {
1006        let dir = std::env::temp_dir();
1007        let path = dir.join("scirs2_mmap_test_ndview.dat");
1008
1009        let data: Vec<f64> = vec![10.0, 20.0, 30.0];
1010        let arr =
1011            MmapArray::from_slice(&data, &path, MmapMode::ReadOnly).expect("Failed to create");
1012
1013        let view = arr.as_ndarray_view().expect("Failed to get ndarray view");
1014        assert_eq!(view.len(), 3);
1015        assert!((view[1] - 20.0).abs() < 1e-10);
1016
1017        let _ = std::fs::remove_file(&path);
1018    }
1019
1020    #[test]
1021    fn test_from_ndarray() {
1022        let dir = std::env::temp_dir();
1023        let path = dir.join("scirs2_mmap_test_fromnd.dat");
1024
1025        let nd = ::ndarray::Array1::from_vec(vec![1.0f64, 2.0, 3.0, 4.0]);
1026        let arr =
1027            MmapArray::from_ndarray(&nd, &path, MmapMode::ReadOnly).expect("Failed to create");
1028
1029        assert_eq!(arr.len(), 4);
1030        let val = arr.get(3).expect("Failed to get");
1031        assert!((val - 4.0).abs() < 1e-10);
1032
1033        let _ = std::fs::remove_file(&path);
1034    }
1035
1036    #[test]
1037    fn test_map_function() {
1038        let dir = std::env::temp_dir();
1039        let path = dir.join("scirs2_mmap_test_map.dat");
1040
1041        let data: Vec<f64> = vec![1.0, 4.0, 9.0, 16.0];
1042        let arr =
1043            MmapArray::from_slice(&data, &path, MmapMode::ReadOnly).expect("Failed to create");
1044
1045        let sqrt_vals = arr.map(|x| x.sqrt()).expect("Failed to map");
1046        assert!((sqrt_vals[0] - 1.0).abs() < 1e-10);
1047        assert!((sqrt_vals[1] - 2.0).abs() < 1e-10);
1048        assert!((sqrt_vals[2] - 3.0).abs() < 1e-10);
1049        assert!((sqrt_vals[3] - 4.0).abs() < 1e-10);
1050
1051        let _ = std::fs::remove_file(&path);
1052    }
1053
1054    #[test]
1055    fn test_builder() {
1056        let dir = std::env::temp_dir();
1057        let path = dir.join("scirs2_mmap_test_builder.dat");
1058
1059        let data: Vec<f64> = vec![100.0, 200.0];
1060        let builder = MmapArrayBuilder::<f64>::new(MmapMode::ReadOnly);
1061        let arr = builder.from_slice(&data, &path).expect("Failed to build");
1062
1063        assert_eq!(arr.len(), 2);
1064        assert_eq!(arr.mode(), MmapMode::ReadOnly);
1065
1066        let _ = std::fs::remove_file(&path);
1067    }
1068
1069    #[test]
1070    fn test_empty_array() {
1071        let dir = std::env::temp_dir();
1072        let path = dir.join("scirs2_mmap_test_empty.dat");
1073
1074        let data: Vec<f64> = vec![];
1075        let arr =
1076            MmapArray::from_slice(&data, &path, MmapMode::ReadOnly).expect("Failed to create");
1077
1078        assert_eq!(arr.len(), 0);
1079        assert!(arr.is_empty());
1080
1081        let slice = arr.as_slice().expect("Failed to get slice");
1082        assert!(slice.is_empty());
1083
1084        let _ = std::fs::remove_file(&path);
1085    }
1086
1087    #[test]
1088    fn test_flush_async() {
1089        let dir = std::env::temp_dir();
1090        let path = dir.join("scirs2_mmap_test_flush_async.dat");
1091
1092        let data: Vec<f64> = vec![1.0, 2.0, 3.0];
1093        let mut arr =
1094            MmapArray::from_slice(&data, &path, MmapMode::ReadWrite).expect("Failed to create");
1095
1096        arr.set(1, 42.0).expect("Failed to set");
1097        arr.flush_async().expect("Failed to async flush");
1098
1099        let _ = std::fs::remove_file(&path);
1100    }
1101
1102    #[test]
1103    fn test_debug_format() {
1104        let dir = std::env::temp_dir();
1105        let path = dir.join("scirs2_mmap_test_debug.dat");
1106
1107        let data: Vec<f64> = vec![1.0];
1108        let arr =
1109            MmapArray::from_slice(&data, &path, MmapMode::ReadOnly).expect("Failed to create");
1110
1111        let debug_str = format!("{:?}", arr);
1112        assert!(debug_str.contains("MmapArray"));
1113        assert!(debug_str.contains("num_elements"));
1114
1115        let _ = std::fs::remove_file(&path);
1116    }
1117
1118    #[test]
1119    fn test_reopen_file() {
1120        let dir = std::env::temp_dir();
1121        let path = dir.join("scirs2_mmap_test_reopen.dat");
1122
1123        // Create and write
1124        let data: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1125        let mut arr1 =
1126            MmapArray::from_slice(&data, &path, MmapMode::ReadWrite).expect("Failed to create");
1127        arr1.set(3, 42.0).expect("Failed to set");
1128        arr1.flush().expect("Failed to flush");
1129        drop(arr1);
1130
1131        // Reopen and verify
1132        let arr2 = MmapArray::<f64>::open(&path, MmapMode::ReadOnly).expect("Failed to reopen");
1133        assert_eq!(arr2.len(), 5);
1134        let val = arr2.get(3).expect("Failed to get");
1135        assert!((val - 42.0).abs() < 1e-10);
1136
1137        let _ = std::fs::remove_file(&path);
1138    }
1139
1140    #[test]
1141    fn test_u8_array() {
1142        let dir = std::env::temp_dir();
1143        let path = dir.join("scirs2_mmap_test_u8.dat");
1144
1145        let data: Vec<u8> = (0..=255).collect();
1146        let arr =
1147            MmapArray::from_slice(&data, &path, MmapMode::ReadOnly).expect("Failed to create");
1148
1149        assert_eq!(arr.len(), 256);
1150        let val = arr.get(128).expect("Failed to get");
1151        assert_eq!(val, 128u8);
1152
1153        let _ = std::fs::remove_file(&path);
1154    }
1155}