Skip to main content

scirs2_core/memory_efficient/
memmap.rs

1//! Memory-mapped array implementation for efficient handling of large datasets.
2//!
3//! This module provides a `MemoryMappedArray` type that uses memory mapping to efficiently
4//! access large datasets stored on disk. Memory mapping allows the operating system to
5//! page in data as needed, reducing memory usage for very large arrays.
6//!
7//! Based on `NumPy`'s memmap implementation, this provides similar functionality in Rust.
8
9use super::validation;
10use crate::error::{CoreError, CoreResult, ErrorContext, ErrorLocation};
11use ::ndarray::{Array, ArrayBase, Data, Dimension, IxDyn};
12use ::serde::{Deserialize, Serialize};
13use memmap2::{Mmap, MmapMut, MmapOptions};
14use oxicode::{config, serde as oxicode_serde};
15use std::fs::{File, OpenOptions};
16use std::io::{Read, Write};
17use std::marker::PhantomData;
18use std::mem;
19use std::path::{Path, PathBuf};
20use std::slice;
21use tempfile::NamedTempFile;
22
23/// Access mode for memory-mapped arrays
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum AccessMode {
26    /// Read-only access
27    ReadOnly,
28    /// Read-write access
29    ReadWrite,
30    /// Write access (creates a new file or overwrites existing one)
31    Write,
32    /// Copy-on-write access (changes not saved to disk)
33    CopyOnWrite,
34}
35
36impl AccessMode {
37    /// Convert to string representation
38    pub const fn as_str(&self) -> &'static str {
39        match self {
40            AccessMode::ReadOnly => "r",
41            AccessMode::ReadWrite => "r+",
42            AccessMode::Write => "w+",
43            AccessMode::CopyOnWrite => "c",
44        }
45    }
46}
47
48/// Implement FromStr for AccessMode to allow parsing from string
49impl std::str::FromStr for AccessMode {
50    type Err = CoreError;
51
52    fn from_str(s: &str) -> Result<Self, Self::Err> {
53        match s {
54            "r" => Ok(AccessMode::ReadOnly),
55            "r+" => Ok(AccessMode::ReadWrite),
56            "w+" => Ok(AccessMode::Write),
57            "c" => Ok(AccessMode::CopyOnWrite),
58            _ => Err(CoreError::ValidationError(
59                ErrorContext::new(format!("Invalid access mode: {s}"))
60                    .with_location(ErrorLocation::new(file!(), line!())),
61            )),
62        }
63    }
64}
65
66/// Memory-mapped array that efficiently maps file data directly into memory
67#[derive(Debug)]
68pub struct MemoryMappedArray<A>
69where
70    A: Clone + Copy + 'static + Send + Sync + Send + Sync,
71{
72    /// The shape of the array
73    pub shape: Vec<usize>,
74    /// The path to the mapped file
75    pub file_path: PathBuf,
76    /// The access mode
77    pub mode: AccessMode,
78    /// The offset in the file where the data starts (in bytes)
79    pub offset: usize,
80    /// The total number of elements
81    pub size: usize,
82    /// The memory-mapped data (read-only)
83    pub(crate) mmap_view: Option<Mmap>,
84    /// The memory-mapped data (mutable)
85    pub(crate) mmap_view_mut: Option<MmapMut>,
86    /// Whether the file is temporary and should be deleted on drop
87    pub(crate) is_temp: bool,
88    /// Phantom data for type parameters
89    pub(crate) phantom: PhantomData<A>,
90}
91
92/// Header information stored at the beginning of the file
93#[derive(Serialize, Deserialize, Debug, Clone)]
94struct MemoryMappedHeader {
95    /// Element type size in bytes
96    element_size: usize,
97    /// Shape of the array
98    shape: Vec<usize>,
99    /// Total number of elements
100    total_elements: usize,
101}
102
103impl<A> Clone for MemoryMappedArray<A>
104where
105    A: Clone + Copy + 'static + Send + Sync,
106{
107    fn clone(&self) -> Self {
108        // Create a new memory mapping with the same parameters
109        // This is safe because we're creating a new mapping to the same file
110        Self::new::<crate::ndarray::OwnedRepr<A>, IxDyn>(
111            None,
112            &self.file_path,
113            self.mode,
114            self.offset,
115        )
116        .expect("Failed to clone memory mapped array")
117    }
118}
119
120impl<A> MemoryMappedArray<A>
121where
122    A: Clone + Copy + 'static + Send + Sync + Send + Sync,
123{
124    /// Create a new reference to the same memory-mapped file
125    pub fn clone_ref(&self) -> CoreResult<Self> {
126        // Create a new MemoryMappedArray with the same parameters
127        // This will properly initialize the mmap views
128        let element_size = mem::size_of::<A>();
129        let data_size = self.size * element_size;
130
131        // Open file based on access mode
132        match self.mode {
133            AccessMode::ReadOnly => {
134                let file = File::open(&self.file_path)
135                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
136
137                let mmap = unsafe {
138                    MmapOptions::new()
139                        .offset(self.offset as u64)
140                        .len(data_size)
141                        .map(&file)
142                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
143                };
144
145                Ok(Self {
146                    shape: self.shape.clone(),
147                    file_path: self.file_path.clone(),
148                    mode: self.mode,
149                    offset: self.offset,
150                    size: self.size,
151                    mmap_view: Some(mmap),
152                    mmap_view_mut: None,
153                    is_temp: false,
154                    phantom: PhantomData,
155                })
156            }
157            AccessMode::ReadWrite | AccessMode::CopyOnWrite => {
158                let file = OpenOptions::new()
159                    .read(true)
160                    .write(true)
161                    .open(&self.file_path)
162                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
163
164                let mmap = unsafe {
165                    MmapOptions::new()
166                        .offset(self.offset as u64)
167                        .len(data_size)
168                        .map_mut(&file)
169                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
170                };
171
172                Ok(Self {
173                    shape: self.shape.clone(),
174                    file_path: self.file_path.clone(),
175                    mode: self.mode,
176                    offset: self.offset,
177                    size: self.size,
178                    mmap_view: None,
179                    mmap_view_mut: Some(mmap),
180                    is_temp: false,
181                    phantom: PhantomData,
182                })
183            }
184            AccessMode::Write => {
185                // For Write mode, we typically shouldn't clone
186                Err(CoreError::InvalidArgument(
187                    ErrorContext::new("Cannot clone a write-only memory-mapped array".to_string())
188                        .with_location(ErrorLocation::new(file!(), line!())),
189                ))
190            }
191        }
192    }
193    /// Validate safety preconditions and create a slice from raw parts
194    ///
195    /// # Safety
196    /// This method performs comprehensive validation before creating the slice
197    fn validate_and_create_slice<'a>(&self, ptr: *const A) -> Result<&'a [A], CoreError> {
198        // Validate safety preconditions for from_raw_parts
199        if ptr.is_null() {
200            return Err(CoreError::MemoryError(
201                ErrorContext::new("Memory map pointer is null".to_string())
202                    .with_location(ErrorLocation::new(file!(), line!())),
203            ));
204        }
205
206        // Check alignment
207        if (ptr as usize) % std::mem::align_of::<A>() != 0 {
208            return Err(CoreError::MemoryError(
209                ErrorContext::new(format!(
210                    "Memory map pointer is not properly aligned for type {} (alignment: {}, address: 0x{:x})",
211                    std::any::type_name::<A>(),
212                    std::mem::align_of::<A>(),
213                    ptr as usize
214                ))
215                .with_location(ErrorLocation::new(file!(), line!())),
216            ));
217        }
218
219        // Check size bounds to prevent overflow
220        let element_size = std::mem::size_of::<A>();
221        if element_size > 0 && self.size > isize::MAX as usize / element_size {
222            return Err(CoreError::MemoryError(
223                ErrorContext::new(format!(
224                    "Array size {} exceeds maximum safe size for slice creation",
225                    self.size
226                ))
227                .with_location(ErrorLocation::new(file!(), line!())),
228            ));
229        }
230
231        // Check that we don't exceed the memory map bounds
232        let total_bytes = self.size.checked_mul(element_size).ok_or_else(|| {
233            CoreError::MemoryError(
234                ErrorContext::new("Array size calculation overflows".to_string())
235                    .with_location(ErrorLocation::new(file!(), line!())),
236            )
237        })?;
238
239        let mmap_len = if let Some(ref mmap) = self.mmap_view {
240            mmap.len()
241        } else if let Some(ref mmap_mut) = self.mmap_view_mut {
242            mmap_mut.len()
243        } else {
244            return Err(CoreError::MemoryError(
245                ErrorContext::new("No memory map available".to_string())
246                    .with_location(ErrorLocation::new(file!(), line!())),
247            ));
248        };
249
250        if total_bytes > mmap_len {
251            return Err(CoreError::MemoryError(
252                ErrorContext::new(format!(
253                    "Requested array size {total_bytes} bytes exceeds memory map size {mmap_len} bytes"
254                ))
255                .with_location(ErrorLocation::new(file!(), line!())),
256            ));
257        }
258
259        // Now it's safe to create the slice
260        // SAFETY: We have validated:
261        // 1. ptr is not null
262        // 2. ptr is properly aligned for type A
263        // 3. self.size * element_size <= isize::MAX
264        // 4. the memory region is valid (within the memory map bounds)
265        Ok(unsafe { slice::from_raw_parts(ptr, self.size) })
266    }
267
268    /// Get the underlying slice of data
269    pub fn as_slice(&self) -> &[A] {
270        match (&self.mmap_view, &self.mmap_view_mut) {
271            (Some(view), _) => {
272                let ptr = view.as_ptr() as *const A;
273                // SAFETY: The memory map is valid for the lifetime of self
274                unsafe { slice::from_raw_parts(ptr, self.size) }
275            }
276            (_, Some(view)) => {
277                let ptr = view.as_ptr() as *const A;
278                // SAFETY: The memory map is valid for the lifetime of self
279                unsafe { slice::from_raw_parts(ptr, self.size) }
280            }
281            _ => &[],
282        }
283    }
284
285    /// Open an existing memory-mapped array file
286    pub fn path(filepath: &Path, shape: &[usize]) -> Result<Self, CoreError> {
287        // Calculate total elements
288        let size = shape.iter().product();
289
290        // Open the file for reading
291        let file = File::open(filepath)
292            .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
293
294        // Get file size
295        let file_metadata = file
296            .metadata()
297            .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
298        let file_size = file_metadata.len() as usize;
299
300        // Calculate expected data size
301        let element_size = mem::size_of::<A>();
302        let data_size = size * element_size;
303
304        // Check if file has enough data
305        if data_size > file_size {
306            return Err(CoreError::ValidationError(
307                ErrorContext::new(format!(
308                    "File too small for specified shape: need {data_size} bytes, but file is only {file_size} bytes"
309                ))
310                .with_location(ErrorLocation::new(file!(), line!())),
311            ));
312        }
313
314        // Create memory mapping
315        let mmap = unsafe {
316            MmapOptions::new()
317                .len(data_size)
318                .map(&file)
319                .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
320        };
321
322        Ok(Self {
323            shape: shape.to_vec(),
324            file_path: filepath.to_path_buf(),
325            mode: AccessMode::ReadOnly,
326            offset: 0,
327            size,
328            mmap_view: Some(mmap),
329            mmap_view_mut: None,
330            is_temp: false,
331            phantom: PhantomData,
332        })
333    }
334
335    /// Create a new memory-mapped array from an existing array
336    ///
337    /// # Arguments
338    ///
339    /// * `data` - The source array to map to a file
340    /// * `file_path` - The path to the file to create or open
341    /// * `mode` - The access mode
342    /// * `offset` - The offset in the file where the data should start (in bytes)
343    ///
344    /// # Returns
345    ///
346    /// A new `MemoryMappedArray` instance
347    pub fn new<S, D>(
348        data: Option<&ArrayBase<S, D>>,
349        file_path: &Path,
350        mode: AccessMode,
351        offset: usize,
352    ) -> Result<Self, CoreError>
353    where
354        S: Data<Elem = A>,
355        D: Dimension,
356    {
357        let (shape, size) = if let Some(array) = data {
358            validation::check_not_empty(array)?;
359            (array.shape().to_vec(), array.len())
360        } else {
361            // If no data is provided, try to read the file header
362            let (header_, _) = read_header::<A>(file_path)?;
363            (header_.shape, header_.total_elements)
364        };
365
366        // Calculate required file size
367        let element_size = mem::size_of::<A>();
368        let data_size = size * element_size;
369
370        // Create and prepare the file depending on the mode
371        match mode {
372            AccessMode::ReadOnly => {
373                // Open existing file for reading only
374                let file = File::open(file_path)
375                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
376
377                // Get file size to ensure proper mapping
378                let file_metadata = file
379                    .metadata()
380                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
381                let file_size = file_metadata.len() as usize;
382
383                // Ensure the file is large enough
384                if offset + data_size > file_size {
385                    return Err(CoreError::ValidationError(
386                        ErrorContext::new(format!(
387                            "File too small: need {needed} bytes, but file is only {file_size} bytes",
388                            needed = offset + data_size
389                        ))
390                        .with_location(ErrorLocation::new(file!(), line!())),
391                    ));
392                }
393
394                // Create a read-only memory mapping
395                let mmap = unsafe {
396                    MmapOptions::new()
397                        .offset(offset as u64)
398                        .len(data_size)
399                        .map(&file)
400                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
401                };
402
403                Ok(Self {
404                    shape,
405                    file_path: file_path.to_path_buf(),
406                    mode,
407                    offset,
408                    size,
409                    mmap_view: Some(mmap),
410                    mmap_view_mut: None,
411                    is_temp: false,
412                    phantom: PhantomData,
413                })
414            }
415            AccessMode::ReadWrite => {
416                // Open existing file for reading and writing
417                let file = OpenOptions::new()
418                    .read(true)
419                    .write(true)
420                    .open(file_path)
421                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
422
423                // Get file metadata to check size
424                let metadata = file
425                    .metadata()
426                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
427                let file_size = metadata.len() as usize;
428
429                // Ensure file has sufficient size before mapping
430                if offset + data_size > file_size {
431                    file.set_len((offset + data_size) as u64)
432                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
433                }
434
435                // Create a mutable memory mapping
436                let mut mmap = unsafe {
437                    MmapOptions::new()
438                        .offset(offset as u64)
439                        .len(data_size)
440                        .map_mut(&file)
441                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
442                };
443
444                // If data is provided, write it to the mapping
445                if let Some(array) = data {
446                    // Convert array data to bytes
447                    let bytes = unsafe {
448                        slice::from_raw_parts(
449                            array.as_ptr() as *const u8,
450                            array.len() * mem::size_of::<A>(),
451                        )
452                    };
453
454                    // Copy the data to the memory mapping
455                    mmap[..].copy_from_slice(bytes);
456                }
457
458                Ok(Self {
459                    shape,
460                    file_path: file_path.to_path_buf(),
461                    mode,
462                    offset,
463                    size,
464                    mmap_view: None,
465                    mmap_view_mut: Some(mmap),
466                    is_temp: false,
467                    phantom: PhantomData,
468                })
469            }
470            AccessMode::Write => {
471                // Create or truncate file for writing
472                let mut file = OpenOptions::new()
473                    .read(true)
474                    .write(true)
475                    .create(true)
476                    .truncate(true)
477                    .open(file_path)
478                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
479
480                // Create header
481                let header = MemoryMappedHeader {
482                    element_size,
483                    shape: shape.clone(),
484                    total_elements: size,
485                };
486
487                // Serialize header to bytes
488                let cfg = config::standard();
489                let header_bytes = oxicode_serde::encode_to_vec(&header, cfg).map_err(|e| {
490                    CoreError::ValidationError(
491                        ErrorContext::new(format!("Failed to serialize header: {e}"))
492                            .with_location(ErrorLocation::new(file!(), line!())),
493                    )
494                })?;
495
496                // Write header length first (8 bytes)
497                let header_len = header_bytes.len() as u64;
498                file.write_all(&header_len.to_le_bytes())
499                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
500
501                // Write header to file
502                file.write_all(&header_bytes)
503                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
504                // Add padding so the start of the data region is aligned for type A.
505                // This avoids unaligned pointer issues when creating typed slices over the mmap.
506                let align = std::mem::align_of::<A>();
507                let mut padding_size = 0usize;
508                let header_size_unaligned = 8 + header_bytes.len();
509                if align > 1 {
510                    let rem = header_size_unaligned % align;
511                    if rem != 0 {
512                        padding_size = align - rem;
513                        // Write zero padding bytes
514                        let padding = vec![0u8; padding_size];
515                        file.write_all(&padding).map_err(|e| {
516                            CoreError::IoError(ErrorContext::new(format!(
517                                "Failed to write header padding: {e}"
518                            )))
519                        })?;
520                    }
521                }
522                let header_size = header_size_unaligned + padding_size; // 8 bytes for header length + header bytes + padding
523
524                // Calculate total file size (header + padding + data)
525                let total_size = header_size + data_size;
526
527                // Set file length to accommodate header and data
528                file.set_len(total_size as u64)
529                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
530
531                // Create a mutable memory mapping
532                // When we have a header, the actual data starts after the header
533                let data_offset = header_size + offset;
534                let mut mmap = unsafe {
535                    MmapOptions::new()
536                        .offset(data_offset as u64)
537                        .len(data_size)
538                        .map_mut(&file)
539                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
540                };
541
542                // If data is provided, write it to the mapping
543                if let Some(array) = data {
544                    // Convert array data to bytes
545                    let bytes = unsafe {
546                        slice::from_raw_parts(
547                            array.as_ptr() as *const u8,
548                            array.len() * mem::size_of::<A>(),
549                        )
550                    };
551
552                    // Copy the data to the memory mapping
553                    mmap[..].copy_from_slice(bytes);
554                }
555
556                Ok(Self {
557                    shape,
558                    file_path: file_path.to_path_buf(),
559                    mode,
560                    offset: data_offset, // Store the actual data offset, not the requested offset
561                    size,
562                    mmap_view: None,
563                    mmap_view_mut: Some(mmap),
564                    is_temp: false,
565                    phantom: PhantomData,
566                })
567            }
568            AccessMode::CopyOnWrite => {
569                // Open existing file for reading
570                let file = File::open(file_path)
571                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
572
573                // Create a copy-on-write memory mapping
574                let mmap = unsafe {
575                    MmapOptions::new()
576                        .offset(offset as u64)
577                        .len(data_size)
578                        .map_copy(&file)
579                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
580                };
581
582                Ok(Self {
583                    shape,
584                    file_path: file_path.to_path_buf(),
585                    mode,
586                    offset,
587                    size,
588                    mmap_view: None,
589                    mmap_view_mut: Some(mmap),
590                    is_temp: false,
591                    phantom: PhantomData,
592                })
593            }
594        }
595    }
596
597    /// Create a new memory-mapped array with a temporary file
598    ///
599    /// # Arguments
600    ///
601    /// * `data` - The source array to map to a temporary file
602    /// * `mode` - The access mode
603    /// * `offset` - The offset in the file where the data should start (in bytes)
604    ///
605    /// # Returns
606    ///
607    /// A new `MemoryMappedArray` instance backed by a temporary file
608    pub fn new_temp<S, D>(
609        data: &ArrayBase<S, D>,
610        mode: AccessMode,
611        offset: usize,
612    ) -> Result<Self, CoreError>
613    where
614        S: Data<Elem = A>,
615        D: Dimension,
616    {
617        let temp_file = NamedTempFile::new()
618            .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
619        let file_path = temp_file.path().to_path_buf();
620
621        // Manually persist the temp file so it stays around after we return
622        let _file = temp_file
623            .persist(&file_path)
624            .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
625
626        // For ReadOnly mode, we need to pre-write the data to the temp file
627        // before creating a read-only mapping, since the file starts empty.
628        // Use ReadWrite mode to write data, then re-open as ReadOnly.
629        let effective_mode = match mode {
630            AccessMode::ReadOnly => AccessMode::ReadWrite,
631            other => other,
632        };
633
634        let mut result = Self::new(Some(data), &file_path, effective_mode, offset)?;
635        result.is_temp = true;
636
637        Ok(result)
638    }
639
640    /// Get a view of the array data as an ndarray Array with the given dimension
641    ///
642    /// # Returns
643    ///
644    /// An ndarray Array view of the memory-mapped data
645    pub fn as_array<D>(&self) -> Result<Array<A, D>, CoreError>
646    where
647        D: Dimension,
648    {
649        // Get a slice to the memory-mapped data
650        let data_slice = match (&self.mmap_view, &self.mmap_view_mut) {
651            (Some(view), _) => {
652                // Read-only view
653                let ptr = view.as_ptr() as *const A;
654                // Validation: ptr should be within bounds
655                // Check that the view has enough bytes for the requested size
656                let required_bytes = self.size * std::mem::size_of::<A>();
657                if view.len() < required_bytes {
658                    return Err(CoreError::ValidationError(
659                        ErrorContext::new(format!(
660                            "Memory map view too small: {} bytes available, {} bytes required",
661                            view.len(),
662                            required_bytes
663                        ))
664                        .with_location(ErrorLocation::new(file!(), line!())),
665                    ));
666                }
667                // Check pointer alignment
668                if ptr as usize % std::mem::align_of::<A>() != 0 {
669                    return Err(CoreError::ValidationError(
670                        ErrorContext::new("Memory map pointer is not properly aligned")
671                            .with_location(ErrorLocation::new(file!(), line!())),
672                    ));
673                }
674                unsafe { std::slice::from_raw_parts(ptr, self.size) }
675            }
676            (_, Some(view)) => {
677                // Mutable view
678                let ptr = view.as_ptr() as *const A;
679                // Validation: ptr should be within bounds
680                // Check that the view has enough bytes for the requested size
681                let required_bytes = self.size * std::mem::size_of::<A>();
682                if view.len() < required_bytes {
683                    return Err(CoreError::ValidationError(
684                        ErrorContext::new(format!(
685                            "Memory map view too small: {} bytes available, {} bytes required",
686                            view.len(),
687                            required_bytes
688                        ))
689                        .with_location(ErrorLocation::new(file!(), line!())),
690                    ));
691                }
692                // Check pointer alignment
693                if ptr as usize % std::mem::align_of::<A>() != 0 {
694                    return Err(CoreError::ValidationError(
695                        ErrorContext::new("Memory map pointer is not properly aligned")
696                            .with_location(ErrorLocation::new(file!(), line!())),
697                    ));
698                }
699                unsafe { std::slice::from_raw_parts(ptr, self.size) }
700            }
701            _ => {
702                return Err(CoreError::ValidationError(
703                    ErrorContext::new("Memory map is not initialized".to_string())
704                        .with_location(ErrorLocation::new(file!(), line!())),
705                ));
706            }
707        };
708
709        // No need to create a separate dimension object - use the from_shape_vec method on Array directly
710        // This approach works because we're not trying to use the dimension directly
711        let shape_vec = self.shape.clone();
712
713        // Create an array from the memory-mapped data
714        let array = Array::from_shape_vec(shape_vec, data_slice.to_vec()).map_err(|e| {
715            CoreError::ShapeError(
716                ErrorContext::new(format!("error: {e}"))
717                    .with_location(ErrorLocation::new(file!(), line!())),
718            )
719        })?;
720
721        // Convert to the requested dimension type
722        let array = array.into_dimensionality::<D>().map_err(|e| {
723            CoreError::ShapeError(
724                ErrorContext::new(format!(
725                    "Failed to convert array to requested dimension type: {e}"
726                ))
727                .with_location(ErrorLocation::new(file!(), line!())),
728            )
729        })?;
730
731        Ok(array)
732    }
733
734    /// Get a mutable view of the array data as an ndarray ArrayViewMut with the given dimension
735    ///
736    /// # Returns
737    ///
738    /// A mutable ndarray ArrayViewMut of the memory-mapped data
739    ///
740    /// # Errors
741    ///
742    /// Returns an error if the array is in read-only mode
743    pub fn as_array_mut<D>(&mut self) -> Result<crate::ndarray::ArrayViewMut<A, D>, CoreError>
744    where
745        D: Dimension,
746    {
747        if self.mode == AccessMode::ReadOnly {
748            return Err(CoreError::ValidationError(
749                ErrorContext::new(
750                    "Cannot get mutable view of read-only memory-mapped array".to_string(),
751                )
752                .with_location(ErrorLocation::new(file!(), line!())),
753            ));
754        }
755
756        // Get a mutable slice to the memory-mapped data
757        let data_slice = if let Some(view) = &mut self.mmap_view_mut {
758            let ptr = view.as_mut_ptr() as *mut A;
759
760            // Validate safety preconditions for from_raw_parts_mut
761            if ptr.is_null() {
762                return Err(CoreError::MemoryError(
763                    ErrorContext::new("Memory map pointer is null".to_string())
764                        .with_location(ErrorLocation::new(file!(), line!())),
765                ));
766            }
767
768            // Check alignment
769            if (ptr as usize) % std::mem::align_of::<A>() != 0 {
770                return Err(CoreError::MemoryError(
771                    ErrorContext::new(format!(
772                        "Memory map pointer is not properly aligned for type {} (alignment: {}, address: 0x{:x})",
773                        std::any::type_name::<A>(),
774                        std::mem::align_of::<A>(),
775                        ptr as usize
776                    ))
777                    .with_location(ErrorLocation::new(file!(), line!())),
778                ));
779            }
780
781            // Check size bounds to prevent overflow
782            let element_size = std::mem::size_of::<A>();
783            if element_size > 0 && self.size > isize::MAX as usize / element_size {
784                return Err(CoreError::MemoryError(
785                    ErrorContext::new(format!(
786                        "Array size {} exceeds maximum safe size for slice creation",
787                        self.size
788                    ))
789                    .with_location(ErrorLocation::new(file!(), line!())),
790                ));
791            }
792
793            // Check that we don't exceed the memory map bounds
794            let total_bytes = self.size.checked_mul(element_size).ok_or_else(|| {
795                CoreError::MemoryError(
796                    ErrorContext::new("Array size calculation overflows".to_string())
797                        .with_location(ErrorLocation::new(file!(), line!())),
798                )
799            })?;
800
801            if total_bytes > view.len() {
802                return Err(CoreError::MemoryError(
803                    ErrorContext::new(format!(
804                        "Requested array size {} bytes exceeds memory map size {} bytes",
805                        total_bytes,
806                        view.len()
807                    ))
808                    .with_location(ErrorLocation::new(file!(), line!())),
809                ));
810            }
811
812            // Now it's safe to create the slice
813            // SAFETY: We have validated:
814            // 1. ptr is not null
815            // 2. ptr is properly aligned for type A
816            // 3. self.size * element_size <= isize::MAX
817            // 4. the memory region is valid (within the memory map bounds)
818            unsafe { slice::from_raw_parts_mut(ptr, self.size) }
819        } else {
820            return Err(CoreError::ValidationError(
821                ErrorContext::new("Mutable memory map is not initialized".to_string())
822                    .with_location(ErrorLocation::new(file!(), line!())),
823            ));
824        };
825
826        // Create a mutable array view from the memory-mapped data
827        let array_view = crate::ndarray::ArrayViewMut::from_shape(self.shape.clone(), data_slice)
828            .map_err(|e| {
829            CoreError::ShapeError(
830                ErrorContext::new(format!("error: {e}"))
831                    .with_location(ErrorLocation::new(file!(), line!())),
832            )
833        })?;
834
835        // Convert to the requested dimension type
836        let array_view = array_view.into_dimensionality::<D>().map_err(|e| {
837            CoreError::ShapeError(
838                ErrorContext::new(format!(
839                    "Failed to convert array to requested dimension type: {e}"
840                ))
841                .with_location(ErrorLocation::new(file!(), line!())),
842            )
843        })?;
844
845        Ok(array_view)
846    }
847
848    /// Flush changes to disk if the array is writable
849    ///
850    /// # Returns
851    ///
852    /// `Ok(())` if the flush succeeded, or an error
853    pub fn flush(&mut self) -> Result<(), CoreError> {
854        if let Some(view) = &mut self.mmap_view_mut {
855            view.flush()
856                .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
857        }
858
859        Ok(())
860    }
861
862    /// Reload the memory mapping from disk
863    ///
864    /// This function is useful when changes have been made to the underlying file
865    /// by other processes or by direct file I/O operations.
866    ///
867    /// # Returns
868    ///
869    /// `Ok(())` if the reload succeeded, or an error
870    pub fn reload(&mut self) -> Result<(), CoreError> {
871        // First, flush any pending changes
872        let _ = self.flush();
873
874        // Reopen the file with the original mode
875        let file_path = self.file_path.clone();
876        let mode = self.mode;
877        let offset = self.offset;
878
879        // Clear existing memory maps
880        self.mmap_view = None;
881        self.mmap_view_mut = None;
882
883        // Create the appropriate memory mapping based on the mode
884        match mode {
885            AccessMode::ReadOnly => {
886                // Open existing file for reading only
887                let file = File::open(&file_path)
888                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
889
890                // Create a read-only memory mapping
891                let mmap = unsafe {
892                    MmapOptions::new()
893                        .offset(offset as u64)
894                        .map(&file)
895                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
896                };
897
898                self.mmap_view = Some(mmap);
899            }
900            AccessMode::ReadWrite | AccessMode::Write => {
901                // Open existing file for reading and writing
902                let file = OpenOptions::new()
903                    .read(true)
904                    .write(true)
905                    .open(&file_path)
906                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
907
908                // Create a mutable memory mapping
909                let mmap = unsafe {
910                    MmapOptions::new()
911                        .offset(offset as u64)
912                        .map_mut(&file)
913                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
914                };
915
916                self.mmap_view_mut = Some(mmap);
917            }
918            AccessMode::CopyOnWrite => {
919                // Open existing file for reading only (copy-on-write doesn't modify the file)
920                let file = File::open(&file_path)
921                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
922
923                // Create a copy-on-write memory mapping
924                let mmap = unsafe {
925                    MmapOptions::new()
926                        .offset(offset as u64)
927                        .map_copy(&file)
928                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
929                };
930
931                self.mmap_view_mut = Some(mmap);
932            }
933        }
934
935        Ok(())
936    }
937
938    /// Check if the array is temporary
939    ///
940    /// # Returns
941    ///
942    /// `true` if the array is backed by a temporary file, `false` otherwise
943    pub fn is_temp(&self) -> bool {
944        self.is_temp
945    }
946
947    /// Get a view of the memory-mapped data as bytes
948    ///
949    /// # Returns
950    ///
951    /// A byte slice view of the memory-mapped data
952    pub fn as_bytes(&self) -> Result<&[u8], CoreError> {
953        match (&self.mmap_view, &self.mmap_view_mut) {
954            (Some(view), _) => {
955                // Read-only view
956                Ok(view)
957            }
958            (_, Some(view)) => {
959                // Mutable view
960                Ok(view)
961            }
962            _ => Err(CoreError::ValidationError(
963                ErrorContext::new("Memory map is not initialized".to_string())
964                    .with_location(ErrorLocation::new(file!(), line!())),
965            )),
966        }
967    }
968
969    /// Get a mutable view of the memory-mapped data as bytes
970    ///
971    /// # Returns
972    ///
973    /// A mutable byte slice view of the memory-mapped data
974    pub fn as_bytes_mut(&mut self) -> Result<&mut [u8], CoreError> {
975        if self.mode == AccessMode::ReadOnly {
976            return Err(CoreError::ValidationError(
977                ErrorContext::new(
978                    "Cannot get mutable view of read-only memory-mapped array".to_string(),
979                )
980                .with_location(ErrorLocation::new(file!(), line!())),
981            ));
982        }
983
984        match &mut self.mmap_view_mut {
985            Some(view) => {
986                // Mutable view
987                Ok(view)
988            }
989            _ => Err(CoreError::ValidationError(
990                ErrorContext::new("Mutable memory map is not initialized".to_string())
991                    .with_location(ErrorLocation::new(file!(), line!())),
992            )),
993        }
994    }
995}
996
997impl<A> Drop for MemoryMappedArray<A>
998where
999    A: Clone + Copy + 'static + Send + Sync + Send + Sync,
1000{
1001    fn drop(&mut self) {
1002        // Flush any pending changes
1003        if let Some(view) = &mut self.mmap_view_mut {
1004            let _ = view.flush();
1005        }
1006
1007        // If temporary, remove the file when done
1008        if self.is_temp {
1009            let _ = std::fs::remove_file(&self.file_path);
1010        }
1011    }
1012}
1013
1014/// Helper function to read the header from a file
1015#[allow(dead_code)]
1016fn read_header<A: Clone + Copy + 'static + Send + Sync>(
1017    file_path: &Path,
1018) -> Result<(MemoryMappedHeader, usize), CoreError> {
1019    // Open the file
1020    let mut file =
1021        File::open(file_path).map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
1022
1023    // Try to read the file as a proper memory-mapped file with header
1024    // First, check if the file is large enough to contain a header
1025    let file_metadata = file
1026        .metadata()
1027        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
1028    let file_size = file_metadata.len() as usize;
1029
1030    if file_size < 8 {
1031        // File is too small to have a proper header, treat as raw data
1032        let element_size = std::mem::size_of::<A>();
1033        let total_elements = file_size / element_size;
1034
1035        let header = MemoryMappedHeader {
1036            element_size,
1037            shape: vec![total_elements],
1038            total_elements,
1039        };
1040
1041        return Ok((header, 0)); // No header offset for raw files
1042    }
1043
1044    // Try to read as a proper memory-mapped file with header
1045    // Read header length (first 8 bytes)
1046    let mut header_len_bytes = [0u8; 8];
1047    if file.read_exact(&mut header_len_bytes).is_err() {
1048        // Failed to read header length, treat as raw data
1049        let element_size = std::mem::size_of::<A>();
1050        let total_elements = file_size / element_size;
1051
1052        let header = MemoryMappedHeader {
1053            element_size,
1054            shape: vec![total_elements],
1055            total_elements,
1056        };
1057
1058        return Ok((header, 0)); // No header offset for raw files
1059    }
1060
1061    let header_len = u64::from_ne_bytes(header_len_bytes) as usize;
1062
1063    // Sanity check: header length should be reasonable
1064    if header_len > file_size || header_len > 1024 * 1024 {
1065        // Header length is unreasonable, treat as raw data
1066        let element_size = std::mem::size_of::<A>();
1067        let total_elements = file_size / element_size;
1068
1069        let header = MemoryMappedHeader {
1070            element_size,
1071            shape: vec![total_elements],
1072            total_elements,
1073        };
1074
1075        return Ok((header, 0)); // No header offset for raw files
1076    }
1077
1078    // Read header data
1079    let mut header_bytes = vec![0u8; header_len];
1080    if file.read_exact(&mut header_bytes).is_err() {
1081        // Failed to read header, treat as raw data
1082        let element_size = std::mem::size_of::<A>();
1083        let total_elements = file_size / element_size;
1084
1085        let header = MemoryMappedHeader {
1086            element_size,
1087            shape: vec![total_elements],
1088            total_elements,
1089        };
1090
1091        return Ok((header, 0)); // No header offset for raw files
1092    }
1093
1094    // Try to deserialize header
1095    let cfg = config::standard();
1096    match oxicode_serde::decode_owned_from_slice::<MemoryMappedHeader, _>(&header_bytes, cfg) {
1097        Ok((header, _len)) => {
1098            let element_size_expected = std::mem::size_of::<A>();
1099            if header.element_size == element_size_expected {
1100                // Compute aligned header size (including potential padding) for newer files.
1101                let base_header_size = 8 + header_len; // length prefix + header bytes
1102                let align = std::mem::align_of::<A>();
1103                let padding = if align > 1 {
1104                    (align - (base_header_size % align)) % align
1105                } else {
1106                    0
1107                };
1108                let mut aligned_header_size = base_header_size;
1109                if padding > 0 && base_header_size + padding <= file_size {
1110                    // Peek padding bytes (should be zeros). We are currently positioned right after the header.
1111                    let mut padding_buf = vec![0u8; padding];
1112                    // Attempt to read padding; if it fails, revert to unaligned header size.
1113                    match file.read_exact(&mut padding_buf) {
1114                        Ok(_) => {
1115                            // Optional: validate all zeros; if not zeros, treat as legacy file without padding.
1116                            if padding_buf.iter().all(|b| *b == 0) {
1117                                aligned_header_size += padding;
1118                            } else {
1119                                // Non-zero padding -> legacy; reset file cursor backwards so data reads correctly.
1120                                // Seek back to start of data (cannot easily without Seek here; treat as legacy by ignoring consumption)
1121                                // We consumed bytes; that's acceptable because caller will start data mapping after aligned_header_size
1122                                // but since we won't add padding, adjust size back.
1123                                aligned_header_size = base_header_size; // do not include padding
1124                            }
1125                        }
1126                        Err(_) => {
1127                            aligned_header_size = base_header_size; // legacy file
1128                        }
1129                    }
1130                }
1131                Ok((header, aligned_header_size))
1132            } else {
1133                // Element size mismatch -> treat as raw data (legacy raw file)
1134                let element_size = element_size_expected;
1135                let total_elements = file_size / element_size;
1136                let fallback_header = MemoryMappedHeader {
1137                    element_size,
1138                    shape: vec![total_elements],
1139                    total_elements,
1140                };
1141                Ok((fallback_header, 0))
1142            }
1143        }
1144        Err(_) => {
1145            // Failed to deserialize header, treat as raw data
1146            let element_size = std::mem::size_of::<A>();
1147            let total_elements = file_size / element_size;
1148            let header = MemoryMappedHeader {
1149                element_size,
1150                shape: vec![total_elements],
1151                total_elements,
1152            };
1153            Ok((header, 0))
1154        }
1155    }
1156}
1157
1158/// Create a memory-mapped array from an existing file
1159///
1160/// # Arguments
1161///
1162/// * `file_path` - Path to the file to memory-map
1163/// * `mode` - Access mode (read-only, read-write, etc.)
1164/// * `offset` - Offset in bytes from the start of the file
1165///
1166/// # Returns
1167///
1168/// A new memory-mapped array
1169#[allow(dead_code)]
1170pub fn open_mmap<A, D>(
1171    file_path: &Path,
1172    mode: AccessMode,
1173    offset: usize,
1174) -> Result<MemoryMappedArray<A>, CoreError>
1175where
1176    A: Clone + Copy + Send + Sync + 'static,
1177    D: Dimension,
1178{
1179    // Read the header to get shape and element info
1180    let (header, header_size) = read_header::<A>(file_path)?;
1181
1182    // Verify element size
1183    let element_size = std::mem::size_of::<A>();
1184    if header.element_size != element_size {
1185        return Err(CoreError::ValidationError(
1186            ErrorContext::new(format!(
1187                "Element size mismatch: file has {} bytes, but type requires {} bytes",
1188                header.element_size, element_size
1189            ))
1190            .with_location(ErrorLocation::new(file!(), line!())),
1191        ));
1192    }
1193
1194    // Calculate the effective offset (header size + user offset)
1195    let effective_offset = header_size + offset;
1196
1197    // Create the array with the header info and effective offset
1198    MemoryMappedArray::<A>::new::<crate::ndarray::OwnedRepr<A>, D>(
1199        None,
1200        file_path,
1201        mode,
1202        effective_offset,
1203    )
1204}
1205
1206/// Create a new memory-mapped array file
1207///
1208/// # Arguments
1209///
1210/// * `data` - The array data to write to the file
1211/// * `file_path` - Path where the memory-mapped file should be created
1212/// * `mode` - Access mode (should be Write for new files)
1213/// * `offset` - Offset in bytes from the start of the file
1214///
1215/// # Returns
1216///
1217/// A new memory-mapped array
1218#[allow(dead_code)]
1219pub fn create_mmap<A, S, D>(
1220    data: &ArrayBase<S, D>,
1221    file_path: &Path,
1222    mode: AccessMode,
1223    offset: usize,
1224) -> Result<MemoryMappedArray<A>, CoreError>
1225where
1226    A: Clone + Copy + 'static + Send + Sync + Send + Sync,
1227    S: Data<Elem = A>,
1228    D: Dimension,
1229{
1230    MemoryMappedArray::new(Some(data), file_path, mode, offset)
1231}
1232
1233/// Create a new temporary memory-mapped array
1234///
1235/// # Arguments
1236///
1237/// * `data` - The array data to write to the temporary file
1238/// * `mode` - Access mode
1239/// * `offset` - Offset in bytes from the start of the file
1240///
1241/// # Returns
1242///
1243/// A new memory-mapped array backed by a temporary file
1244#[allow(dead_code)]
1245pub fn create_temp_mmap<A, S, D>(
1246    data: &ArrayBase<S, D>,
1247    mode: AccessMode,
1248    offset: usize,
1249) -> Result<MemoryMappedArray<A>, CoreError>
1250where
1251    A: Clone + Copy + 'static + Send + Sync + Send + Sync,
1252    S: Data<Elem = A>,
1253    D: Dimension,
1254{
1255    MemoryMappedArray::new_temp(data, mode, offset)
1256}