scirs2_core/memory_efficient/
memmap.rs

1//! Memory-mapped array implementation for efficient handling of large datasets.
2//!
3//! This module provides a `MemoryMappedArray` type that uses memory mapping to efficiently
4//! access large datasets stored on disk. Memory mapping allows the operating system to
5//! page in data as needed, reducing memory usage for very large arrays.
6//!
7//! Based on `NumPy`'s memmap implementation, this provides similar functionality in Rust.
8
9use super::validation;
10use crate::error::{CoreError, CoreResult, ErrorContext, ErrorLocation};
11use ::ndarray::{Array, ArrayBase, Data, Dimension, IxDyn};
12use ::serde::{Deserialize, Serialize};
13use bincode::{config, serde};
14use memmap2::{Mmap, MmapMut, MmapOptions};
15use std::fs::{File, OpenOptions};
16use std::io::{Read, Write};
17use std::marker::PhantomData;
18use std::mem;
19use std::path::{Path, PathBuf};
20use std::slice;
21use tempfile::NamedTempFile;
22
23/// Access mode for memory-mapped arrays
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum AccessMode {
26    /// Read-only access
27    ReadOnly,
28    /// Read-write access
29    ReadWrite,
30    /// Write access (creates a new file or overwrites existing one)
31    Write,
32    /// Copy-on-write access (changes not saved to disk)
33    CopyOnWrite,
34}
35
36impl AccessMode {
37    /// Convert to string representation
38    pub const fn as_str(&self) -> &'static str {
39        match self {
40            AccessMode::ReadOnly => "r",
41            AccessMode::ReadWrite => "r+",
42            AccessMode::Write => "w+",
43            AccessMode::CopyOnWrite => "c",
44        }
45    }
46}
47
48/// Implement FromStr for AccessMode to allow parsing from string
49impl std::str::FromStr for AccessMode {
50    type Err = CoreError;
51
52    fn from_str(s: &str) -> Result<Self, Self::Err> {
53        match s {
54            "r" => Ok(AccessMode::ReadOnly),
55            "r+" => Ok(AccessMode::ReadWrite),
56            "w+" => Ok(AccessMode::Write),
57            "c" => Ok(AccessMode::CopyOnWrite),
58            _ => Err(CoreError::ValidationError(
59                ErrorContext::new(format!("Invalid access mode: {s}"))
60                    .with_location(ErrorLocation::new(file!(), line!())),
61            )),
62        }
63    }
64}
65
66/// Memory-mapped array that efficiently maps file data directly into memory
67#[derive(Debug)]
68pub struct MemoryMappedArray<A>
69where
70    A: Clone + Copy + 'static + Send + Sync + Send + Sync,
71{
72    /// The shape of the array
73    pub shape: Vec<usize>,
74    /// The path to the mapped file
75    pub file_path: PathBuf,
76    /// The access mode
77    pub mode: AccessMode,
78    /// The offset in the file where the data starts (in bytes)
79    pub offset: usize,
80    /// The total number of elements
81    pub size: usize,
82    /// The memory-mapped data (read-only)
83    pub(crate) mmap_view: Option<Mmap>,
84    /// The memory-mapped data (mutable)
85    pub(crate) mmap_view_mut: Option<MmapMut>,
86    /// Whether the file is temporary and should be deleted on drop
87    pub(crate) is_temp: bool,
88    /// Phantom data for type parameters
89    pub(crate) phantom: PhantomData<A>,
90}
91
92/// Header information stored at the beginning of the file
93#[derive(Serialize, Deserialize, Debug, Clone)]
94struct MemoryMappedHeader {
95    /// Element type size in bytes
96    element_size: usize,
97    /// Shape of the array
98    shape: Vec<usize>,
99    /// Total number of elements
100    total_elements: usize,
101}
102
103impl<A> Clone for MemoryMappedArray<A>
104where
105    A: Clone + Copy + 'static + Send + Sync,
106{
107    fn clone(&self) -> Self {
108        // Create a new memory mapping with the same parameters
109        // This is safe because we're creating a new mapping to the same file
110        Self::new::<crate::ndarray::OwnedRepr<A>, IxDyn>(
111            None,
112            &self.file_path,
113            self.mode,
114            self.offset,
115        )
116        .expect("Failed to clone memory mapped array")
117    }
118}
119
120impl<A> MemoryMappedArray<A>
121where
122    A: Clone + Copy + 'static + Send + Sync + Send + Sync,
123{
124    /// Create a new reference to the same memory-mapped file
125    pub fn clone_ref(&self) -> CoreResult<Self> {
126        // Create a new MemoryMappedArray with the same parameters
127        // This will properly initialize the mmap views
128        let element_size = mem::size_of::<A>();
129        let data_size = self.size * element_size;
130
131        // Open file based on access mode
132        match self.mode {
133            AccessMode::ReadOnly => {
134                let file = File::open(&self.file_path)
135                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
136
137                let mmap = unsafe {
138                    MmapOptions::new()
139                        .offset(self.offset as u64)
140                        .len(data_size)
141                        .map(&file)
142                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
143                };
144
145                Ok(Self {
146                    shape: self.shape.clone(),
147                    file_path: self.file_path.clone(),
148                    mode: self.mode,
149                    offset: self.offset,
150                    size: self.size,
151                    mmap_view: Some(mmap),
152                    mmap_view_mut: None,
153                    is_temp: false,
154                    phantom: PhantomData,
155                })
156            }
157            AccessMode::ReadWrite | AccessMode::CopyOnWrite => {
158                let file = OpenOptions::new()
159                    .read(true)
160                    .write(true)
161                    .open(&self.file_path)
162                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
163
164                let mmap = unsafe {
165                    MmapOptions::new()
166                        .offset(self.offset as u64)
167                        .len(data_size)
168                        .map_mut(&file)
169                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
170                };
171
172                Ok(Self {
173                    shape: self.shape.clone(),
174                    file_path: self.file_path.clone(),
175                    mode: self.mode,
176                    offset: self.offset,
177                    size: self.size,
178                    mmap_view: None,
179                    mmap_view_mut: Some(mmap),
180                    is_temp: false,
181                    phantom: PhantomData,
182                })
183            }
184            AccessMode::Write => {
185                // For Write mode, we typically shouldn't clone
186                Err(CoreError::InvalidArgument(
187                    ErrorContext::new("Cannot clone a write-only memory-mapped array".to_string())
188                        .with_location(ErrorLocation::new(file!(), line!())),
189                ))
190            }
191        }
192    }
193    /// Validate safety preconditions and create a slice from raw parts
194    ///
195    /// # Safety
196    /// This method performs comprehensive validation before creating the slice
197    fn validate_and_create_slice<'a>(&self, ptr: *const A) -> Result<&'a [A], CoreError> {
198        // Validate safety preconditions for from_raw_parts
199        if ptr.is_null() {
200            return Err(CoreError::MemoryError(
201                ErrorContext::new("Memory map pointer is null".to_string())
202                    .with_location(ErrorLocation::new(file!(), line!())),
203            ));
204        }
205
206        // Check alignment
207        if (ptr as usize) % std::mem::align_of::<A>() != 0 {
208            return Err(CoreError::MemoryError(
209                ErrorContext::new(format!(
210                    "Memory map pointer is not properly aligned for type {} (alignment: {}, address: 0x{:x})",
211                    std::any::type_name::<A>(),
212                    std::mem::align_of::<A>(),
213                    ptr as usize
214                ))
215                .with_location(ErrorLocation::new(file!(), line!())),
216            ));
217        }
218
219        // Check size bounds to prevent overflow
220        let element_size = std::mem::size_of::<A>();
221        if element_size > 0 && self.size > isize::MAX as usize / element_size {
222            return Err(CoreError::MemoryError(
223                ErrorContext::new(format!(
224                    "Array size {} exceeds maximum safe size for slice creation",
225                    self.size
226                ))
227                .with_location(ErrorLocation::new(file!(), line!())),
228            ));
229        }
230
231        // Check that we don't exceed the memory map bounds
232        let total_bytes = self.size.checked_mul(element_size).ok_or_else(|| {
233            CoreError::MemoryError(
234                ErrorContext::new("Array size calculation overflows".to_string())
235                    .with_location(ErrorLocation::new(file!(), line!())),
236            )
237        })?;
238
239        let mmap_len = if let Some(ref mmap) = self.mmap_view {
240            mmap.len()
241        } else if let Some(ref mmap_mut) = self.mmap_view_mut {
242            mmap_mut.len()
243        } else {
244            return Err(CoreError::MemoryError(
245                ErrorContext::new("No memory map available".to_string())
246                    .with_location(ErrorLocation::new(file!(), line!())),
247            ));
248        };
249
250        if total_bytes > mmap_len {
251            return Err(CoreError::MemoryError(
252                ErrorContext::new(format!(
253                    "Requested array size {total_bytes} bytes exceeds memory map size {mmap_len} bytes"
254                ))
255                .with_location(ErrorLocation::new(file!(), line!())),
256            ));
257        }
258
259        // Now it's safe to create the slice
260        // SAFETY: We have validated:
261        // 1. ptr is not null
262        // 2. ptr is properly aligned for type A
263        // 3. self.size * element_size <= isize::MAX
264        // 4. the memory region is valid (within the memory map bounds)
265        Ok(unsafe { slice::from_raw_parts(ptr, self.size) })
266    }
267
268    /// Get the underlying slice of data
269    pub fn as_slice(&self) -> &[A] {
270        match (&self.mmap_view, &self.mmap_view_mut) {
271            (Some(view), _) => {
272                let ptr = view.as_ptr() as *const A;
273                // SAFETY: The memory map is valid for the lifetime of self
274                unsafe { slice::from_raw_parts(ptr, self.size) }
275            }
276            (_, Some(view)) => {
277                let ptr = view.as_ptr() as *const A;
278                // SAFETY: The memory map is valid for the lifetime of self
279                unsafe { slice::from_raw_parts(ptr, self.size) }
280            }
281            _ => &[],
282        }
283    }
284
285    /// Open an existing memory-mapped array file
286    pub fn path(filepath: &Path, shape: &[usize]) -> Result<Self, CoreError> {
287        // Calculate total elements
288        let size = shape.iter().product();
289
290        // Open the file for reading
291        let file = File::open(filepath)
292            .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
293
294        // Get file size
295        let file_metadata = file
296            .metadata()
297            .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
298        let file_size = file_metadata.len() as usize;
299
300        // Calculate expected data size
301        let element_size = mem::size_of::<A>();
302        let data_size = size * element_size;
303
304        // Check if file has enough data
305        if data_size > file_size {
306            return Err(CoreError::ValidationError(
307                ErrorContext::new(format!(
308                    "File too small for specified shape: need {data_size} bytes, but file is only {file_size} bytes"
309                ))
310                .with_location(ErrorLocation::new(file!(), line!())),
311            ));
312        }
313
314        // Create memory mapping
315        let mmap = unsafe {
316            MmapOptions::new()
317                .len(data_size)
318                .map(&file)
319                .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
320        };
321
322        Ok(Self {
323            shape: shape.to_vec(),
324            file_path: filepath.to_path_buf(),
325            mode: AccessMode::ReadOnly,
326            offset: 0,
327            size,
328            mmap_view: Some(mmap),
329            mmap_view_mut: None,
330            is_temp: false,
331            phantom: PhantomData,
332        })
333    }
334
335    /// Create a new memory-mapped array from an existing array
336    ///
337    /// # Arguments
338    ///
339    /// * `data` - The source array to map to a file
340    /// * `file_path` - The path to the file to create or open
341    /// * `mode` - The access mode
342    /// * `offset` - The offset in the file where the data should start (in bytes)
343    ///
344    /// # Returns
345    ///
346    /// A new `MemoryMappedArray` instance
347    pub fn new<S, D>(
348        data: Option<&ArrayBase<S, D>>,
349        file_path: &Path,
350        mode: AccessMode,
351        offset: usize,
352    ) -> Result<Self, CoreError>
353    where
354        S: Data<Elem = A>,
355        D: Dimension,
356    {
357        let (shape, size) = if let Some(array) = data {
358            validation::check_not_empty(array)?;
359            (array.shape().to_vec(), array.len())
360        } else {
361            // If no data is provided, try to read the file header
362            let (header_, _) = read_header::<A>(file_path)?;
363            (header_.shape, header_.total_elements)
364        };
365
366        // Calculate required file size
367        let element_size = mem::size_of::<A>();
368        let data_size = size * element_size;
369
370        // Create and prepare the file depending on the mode
371        match mode {
372            AccessMode::ReadOnly => {
373                // Open existing file for reading only
374                let file = File::open(file_path)
375                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
376
377                // Get file size to ensure proper mapping
378                let file_metadata = file
379                    .metadata()
380                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
381                let file_size = file_metadata.len() as usize;
382
383                // Ensure the file is large enough
384                if offset + data_size > file_size {
385                    return Err(CoreError::ValidationError(
386                        ErrorContext::new(format!(
387                            "File too small: need {needed} bytes, but file is only {file_size} bytes",
388                            needed = offset + data_size
389                        ))
390                        .with_location(ErrorLocation::new(file!(), line!())),
391                    ));
392                }
393
394                // Create a read-only memory mapping
395                let mmap = unsafe {
396                    MmapOptions::new()
397                        .offset(offset as u64)
398                        .len(data_size)
399                        .map(&file)
400                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
401                };
402
403                Ok(Self {
404                    shape,
405                    file_path: file_path.to_path_buf(),
406                    mode,
407                    offset,
408                    size,
409                    mmap_view: Some(mmap),
410                    mmap_view_mut: None,
411                    is_temp: false,
412                    phantom: PhantomData,
413                })
414            }
415            AccessMode::ReadWrite => {
416                // Open existing file for reading and writing
417                let file = OpenOptions::new()
418                    .read(true)
419                    .write(true)
420                    .open(file_path)
421                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
422
423                // Get file metadata to check size
424                let metadata = file
425                    .metadata()
426                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
427                let file_size = metadata.len() as usize;
428
429                // Ensure file has sufficient size before mapping
430                if offset + data_size > file_size {
431                    file.set_len((offset + data_size) as u64)
432                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
433                }
434
435                // Create a mutable memory mapping
436                let mut mmap = unsafe {
437                    MmapOptions::new()
438                        .offset(offset as u64)
439                        .len(data_size)
440                        .map_mut(&file)
441                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
442                };
443
444                // If data is provided, write it to the mapping
445                if let Some(array) = data {
446                    // Convert array data to bytes
447                    let bytes = unsafe {
448                        slice::from_raw_parts(
449                            array.as_ptr() as *const u8,
450                            array.len() * mem::size_of::<A>(),
451                        )
452                    };
453
454                    // Copy the data to the memory mapping
455                    mmap[..].copy_from_slice(bytes);
456                }
457
458                Ok(Self {
459                    shape,
460                    file_path: file_path.to_path_buf(),
461                    mode,
462                    offset,
463                    size,
464                    mmap_view: None,
465                    mmap_view_mut: Some(mmap),
466                    is_temp: false,
467                    phantom: PhantomData,
468                })
469            }
470            AccessMode::Write => {
471                // Create or truncate file for writing
472                let mut file = OpenOptions::new()
473                    .read(true)
474                    .write(true)
475                    .create(true)
476                    .truncate(true)
477                    .open(file_path)
478                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
479
480                // Create header
481                let header = MemoryMappedHeader {
482                    element_size,
483                    shape: shape.clone(),
484                    total_elements: size,
485                };
486
487                // Serialize header to bytes
488                let cfg = config::standard();
489                let header_bytes = serde::encode_to_vec(&header, cfg).map_err(|e| {
490                    CoreError::ValidationError(
491                        ErrorContext::new(format!("Failed to serialize header: {e}"))
492                            .with_location(ErrorLocation::new(file!(), line!())),
493                    )
494                })?;
495
496                // Write header length first (8 bytes)
497                let header_len = header_bytes.len() as u64;
498                file.write_all(&header_len.to_le_bytes())
499                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
500
501                // Write header to file
502                file.write_all(&header_bytes)
503                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
504                // Add padding so the start of the data region is aligned for type A.
505                // This avoids unaligned pointer issues when creating typed slices over the mmap.
506                let align = std::mem::align_of::<A>();
507                let mut padding_size = 0usize;
508                let header_size_unaligned = 8 + header_bytes.len();
509                if align > 1 {
510                    let rem = header_size_unaligned % align;
511                    if rem != 0 {
512                        padding_size = align - rem;
513                        // Write zero padding bytes
514                        let padding = vec![0u8; padding_size];
515                        file.write_all(&padding).map_err(|e| {
516                            CoreError::IoError(ErrorContext::new(format!(
517                                "Failed to write header padding: {e}"
518                            )))
519                        })?;
520                    }
521                }
522                let header_size = header_size_unaligned + padding_size; // 8 bytes for header length + header bytes + padding
523
524                // Calculate total file size (header + padding + data)
525                let total_size = header_size + data_size;
526
527                // Set file length to accommodate header and data
528                file.set_len(total_size as u64)
529                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
530
531                // Create a mutable memory mapping
532                // When we have a header, the actual data starts after the header
533                let data_offset = header_size + offset;
534                let mut mmap = unsafe {
535                    MmapOptions::new()
536                        .offset(data_offset as u64)
537                        .len(data_size)
538                        .map_mut(&file)
539                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
540                };
541
542                // If data is provided, write it to the mapping
543                if let Some(array) = data {
544                    // Convert array data to bytes
545                    let bytes = unsafe {
546                        slice::from_raw_parts(
547                            array.as_ptr() as *const u8,
548                            array.len() * mem::size_of::<A>(),
549                        )
550                    };
551
552                    // Copy the data to the memory mapping
553                    mmap[..].copy_from_slice(bytes);
554                }
555
556                Ok(Self {
557                    shape,
558                    file_path: file_path.to_path_buf(),
559                    mode,
560                    offset: data_offset, // Store the actual data offset, not the requested offset
561                    size,
562                    mmap_view: None,
563                    mmap_view_mut: Some(mmap),
564                    is_temp: false,
565                    phantom: PhantomData,
566                })
567            }
568            AccessMode::CopyOnWrite => {
569                // Open existing file for reading
570                let file = File::open(file_path)
571                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
572
573                // Create a copy-on-write memory mapping
574                let mmap = unsafe {
575                    MmapOptions::new()
576                        .offset(offset as u64)
577                        .len(data_size)
578                        .map_copy(&file)
579                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
580                };
581
582                Ok(Self {
583                    shape,
584                    file_path: file_path.to_path_buf(),
585                    mode,
586                    offset,
587                    size,
588                    mmap_view: None,
589                    mmap_view_mut: Some(mmap),
590                    is_temp: false,
591                    phantom: PhantomData,
592                })
593            }
594        }
595    }
596
597    /// Create a new memory-mapped array with a temporary file
598    ///
599    /// # Arguments
600    ///
601    /// * `data` - The source array to map to a temporary file
602    /// * `mode` - The access mode
603    /// * `offset` - The offset in the file where the data should start (in bytes)
604    ///
605    /// # Returns
606    ///
607    /// A new `MemoryMappedArray` instance backed by a temporary file
608    pub fn new_temp<S, D>(
609        data: &ArrayBase<S, D>,
610        mode: AccessMode,
611        offset: usize,
612    ) -> Result<Self, CoreError>
613    where
614        S: Data<Elem = A>,
615        D: Dimension,
616    {
617        let temp_file = NamedTempFile::new()
618            .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
619        let file_path = temp_file.path().to_path_buf();
620
621        // Manually persist the temp file so it stays around after we return
622        let _file = temp_file
623            .persist(&file_path)
624            .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
625
626        let mut result = Self::new(Some(data), &file_path, mode, offset)?;
627        result.is_temp = true;
628
629        Ok(result)
630    }
631
632    /// Get a view of the array data as an ndarray Array with the given dimension
633    ///
634    /// # Returns
635    ///
636    /// An ndarray Array view of the memory-mapped data
637    pub fn as_array<D>(&self) -> Result<Array<A, D>, CoreError>
638    where
639        D: Dimension,
640    {
641        // Get a slice to the memory-mapped data
642        let data_slice = match (&self.mmap_view, &self.mmap_view_mut) {
643            (Some(view), _) => {
644                // Read-only view
645                let ptr = view.as_ptr() as *const A;
646                // Validation: ptr should be within bounds
647                // Check that the view has enough bytes for the requested size
648                let required_bytes = self.size * std::mem::size_of::<A>();
649                if view.len() < required_bytes {
650                    return Err(CoreError::ValidationError(
651                        ErrorContext::new(format!(
652                            "Memory map view too small: {} bytes available, {} bytes required",
653                            view.len(),
654                            required_bytes
655                        ))
656                        .with_location(ErrorLocation::new(file!(), line!())),
657                    ));
658                }
659                // Check pointer alignment
660                if ptr as usize % std::mem::align_of::<A>() != 0 {
661                    return Err(CoreError::ValidationError(
662                        ErrorContext::new("Memory map pointer is not properly aligned")
663                            .with_location(ErrorLocation::new(file!(), line!())),
664                    ));
665                }
666                unsafe { std::slice::from_raw_parts(ptr, self.size) }
667            }
668            (_, Some(view)) => {
669                // Mutable view
670                let ptr = view.as_ptr() as *const A;
671                // Validation: ptr should be within bounds
672                // Check that the view has enough bytes for the requested size
673                let required_bytes = self.size * std::mem::size_of::<A>();
674                if view.len() < required_bytes {
675                    return Err(CoreError::ValidationError(
676                        ErrorContext::new(format!(
677                            "Memory map view too small: {} bytes available, {} bytes required",
678                            view.len(),
679                            required_bytes
680                        ))
681                        .with_location(ErrorLocation::new(file!(), line!())),
682                    ));
683                }
684                // Check pointer alignment
685                if ptr as usize % std::mem::align_of::<A>() != 0 {
686                    return Err(CoreError::ValidationError(
687                        ErrorContext::new("Memory map pointer is not properly aligned")
688                            .with_location(ErrorLocation::new(file!(), line!())),
689                    ));
690                }
691                unsafe { std::slice::from_raw_parts(ptr, self.size) }
692            }
693            _ => {
694                return Err(CoreError::ValidationError(
695                    ErrorContext::new("Memory map is not initialized".to_string())
696                        .with_location(ErrorLocation::new(file!(), line!())),
697                ));
698            }
699        };
700
701        // No need to create a separate dimension object - use the from_shape_vec method on Array directly
702        // This approach works because we're not trying to use the dimension directly
703        let shape_vec = self.shape.clone();
704
705        // Create an array from the memory-mapped data
706        let array = Array::from_shape_vec(shape_vec, data_slice.to_vec()).map_err(|e| {
707            CoreError::ShapeError(
708                ErrorContext::new(format!("error: {e}"))
709                    .with_location(ErrorLocation::new(file!(), line!())),
710            )
711        })?;
712
713        // Convert to the requested dimension type
714        let array = array.into_dimensionality::<D>().map_err(|e| {
715            CoreError::ShapeError(
716                ErrorContext::new(format!(
717                    "Failed to convert array to requested dimension type: {e}"
718                ))
719                .with_location(ErrorLocation::new(file!(), line!())),
720            )
721        })?;
722
723        Ok(array)
724    }
725
726    /// Get a mutable view of the array data as an ndarray ArrayViewMut with the given dimension
727    ///
728    /// # Returns
729    ///
730    /// A mutable ndarray ArrayViewMut of the memory-mapped data
731    ///
732    /// # Errors
733    ///
734    /// Returns an error if the array is in read-only mode
735    pub fn as_array_mut<D>(&mut self) -> Result<crate::ndarray::ArrayViewMut<A, D>, CoreError>
736    where
737        D: Dimension,
738    {
739        if self.mode == AccessMode::ReadOnly {
740            return Err(CoreError::ValidationError(
741                ErrorContext::new(
742                    "Cannot get mutable view of read-only memory-mapped array".to_string(),
743                )
744                .with_location(ErrorLocation::new(file!(), line!())),
745            ));
746        }
747
748        // Get a mutable slice to the memory-mapped data
749        let data_slice = if let Some(view) = &mut self.mmap_view_mut {
750            let ptr = view.as_mut_ptr() as *mut A;
751
752            // Validate safety preconditions for from_raw_parts_mut
753            if ptr.is_null() {
754                return Err(CoreError::MemoryError(
755                    ErrorContext::new("Memory map pointer is null".to_string())
756                        .with_location(ErrorLocation::new(file!(), line!())),
757                ));
758            }
759
760            // Check alignment
761            if (ptr as usize) % std::mem::align_of::<A>() != 0 {
762                return Err(CoreError::MemoryError(
763                    ErrorContext::new(format!(
764                        "Memory map pointer is not properly aligned for type {} (alignment: {}, address: 0x{:x})",
765                        std::any::type_name::<A>(),
766                        std::mem::align_of::<A>(),
767                        ptr as usize
768                    ))
769                    .with_location(ErrorLocation::new(file!(), line!())),
770                ));
771            }
772
773            // Check size bounds to prevent overflow
774            let element_size = std::mem::size_of::<A>();
775            if element_size > 0 && self.size > isize::MAX as usize / element_size {
776                return Err(CoreError::MemoryError(
777                    ErrorContext::new(format!(
778                        "Array size {} exceeds maximum safe size for slice creation",
779                        self.size
780                    ))
781                    .with_location(ErrorLocation::new(file!(), line!())),
782                ));
783            }
784
785            // Check that we don't exceed the memory map bounds
786            let total_bytes = self.size.checked_mul(element_size).ok_or_else(|| {
787                CoreError::MemoryError(
788                    ErrorContext::new("Array size calculation overflows".to_string())
789                        .with_location(ErrorLocation::new(file!(), line!())),
790                )
791            })?;
792
793            if total_bytes > view.len() {
794                return Err(CoreError::MemoryError(
795                    ErrorContext::new(format!(
796                        "Requested array size {} bytes exceeds memory map size {} bytes",
797                        total_bytes,
798                        view.len()
799                    ))
800                    .with_location(ErrorLocation::new(file!(), line!())),
801                ));
802            }
803
804            // Now it's safe to create the slice
805            // SAFETY: We have validated:
806            // 1. ptr is not null
807            // 2. ptr is properly aligned for type A
808            // 3. self.size * element_size <= isize::MAX
809            // 4. the memory region is valid (within the memory map bounds)
810            unsafe { slice::from_raw_parts_mut(ptr, self.size) }
811        } else {
812            return Err(CoreError::ValidationError(
813                ErrorContext::new("Mutable memory map is not initialized".to_string())
814                    .with_location(ErrorLocation::new(file!(), line!())),
815            ));
816        };
817
818        // Create a mutable array view from the memory-mapped data
819        let array_view = crate::ndarray::ArrayViewMut::from_shape(self.shape.clone(), data_slice)
820            .map_err(|e| {
821            CoreError::ShapeError(
822                ErrorContext::new(format!("error: {e}"))
823                    .with_location(ErrorLocation::new(file!(), line!())),
824            )
825        })?;
826
827        // Convert to the requested dimension type
828        let array_view = array_view.into_dimensionality::<D>().map_err(|e| {
829            CoreError::ShapeError(
830                ErrorContext::new(format!(
831                    "Failed to convert array to requested dimension type: {e}"
832                ))
833                .with_location(ErrorLocation::new(file!(), line!())),
834            )
835        })?;
836
837        Ok(array_view)
838    }
839
840    /// Flush changes to disk if the array is writable
841    ///
842    /// # Returns
843    ///
844    /// `Ok(())` if the flush succeeded, or an error
845    pub fn flush(&mut self) -> Result<(), CoreError> {
846        if let Some(view) = &mut self.mmap_view_mut {
847            view.flush()
848                .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
849        }
850
851        Ok(())
852    }
853
854    /// Reload the memory mapping from disk
855    ///
856    /// This function is useful when changes have been made to the underlying file
857    /// by other processes or by direct file I/O operations.
858    ///
859    /// # Returns
860    ///
861    /// `Ok(())` if the reload succeeded, or an error
862    pub fn reload(&mut self) -> Result<(), CoreError> {
863        // First, flush any pending changes
864        let _ = self.flush();
865
866        // Reopen the file with the original mode
867        let file_path = self.file_path.clone();
868        let mode = self.mode;
869        let offset = self.offset;
870
871        // Clear existing memory maps
872        self.mmap_view = None;
873        self.mmap_view_mut = None;
874
875        // Create the appropriate memory mapping based on the mode
876        match mode {
877            AccessMode::ReadOnly => {
878                // Open existing file for reading only
879                let file = File::open(&file_path)
880                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
881
882                // Create a read-only memory mapping
883                let mmap = unsafe {
884                    MmapOptions::new()
885                        .offset(offset as u64)
886                        .map(&file)
887                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
888                };
889
890                self.mmap_view = Some(mmap);
891            }
892            AccessMode::ReadWrite | AccessMode::Write => {
893                // Open existing file for reading and writing
894                let file = OpenOptions::new()
895                    .read(true)
896                    .write(true)
897                    .open(&file_path)
898                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
899
900                // Create a mutable memory mapping
901                let mmap = unsafe {
902                    MmapOptions::new()
903                        .offset(offset as u64)
904                        .map_mut(&file)
905                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
906                };
907
908                self.mmap_view_mut = Some(mmap);
909            }
910            AccessMode::CopyOnWrite => {
911                // Open existing file for reading only (copy-on-write doesn't modify the file)
912                let file = File::open(&file_path)
913                    .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
914
915                // Create a copy-on-write memory mapping
916                let mmap = unsafe {
917                    MmapOptions::new()
918                        .offset(offset as u64)
919                        .map_copy(&file)
920                        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?
921                };
922
923                self.mmap_view_mut = Some(mmap);
924            }
925        }
926
927        Ok(())
928    }
929
930    /// Check if the array is temporary
931    ///
932    /// # Returns
933    ///
934    /// `true` if the array is backed by a temporary file, `false` otherwise
935    pub fn is_temp(&self) -> bool {
936        self.is_temp
937    }
938
939    /// Get a view of the memory-mapped data as bytes
940    ///
941    /// # Returns
942    ///
943    /// A byte slice view of the memory-mapped data
944    pub fn as_bytes(&self) -> Result<&[u8], CoreError> {
945        match (&self.mmap_view, &self.mmap_view_mut) {
946            (Some(view), _) => {
947                // Read-only view
948                Ok(view)
949            }
950            (_, Some(view)) => {
951                // Mutable view
952                Ok(view)
953            }
954            _ => Err(CoreError::ValidationError(
955                ErrorContext::new("Memory map is not initialized".to_string())
956                    .with_location(ErrorLocation::new(file!(), line!())),
957            )),
958        }
959    }
960
961    /// Get a mutable view of the memory-mapped data as bytes
962    ///
963    /// # Returns
964    ///
965    /// A mutable byte slice view of the memory-mapped data
966    pub fn as_bytes_mut(&mut self) -> Result<&mut [u8], CoreError> {
967        if self.mode == AccessMode::ReadOnly {
968            return Err(CoreError::ValidationError(
969                ErrorContext::new(
970                    "Cannot get mutable view of read-only memory-mapped array".to_string(),
971                )
972                .with_location(ErrorLocation::new(file!(), line!())),
973            ));
974        }
975
976        match &mut self.mmap_view_mut {
977            Some(view) => {
978                // Mutable view
979                Ok(view)
980            }
981            _ => Err(CoreError::ValidationError(
982                ErrorContext::new("Mutable memory map is not initialized".to_string())
983                    .with_location(ErrorLocation::new(file!(), line!())),
984            )),
985        }
986    }
987}
988
989impl<A> Drop for MemoryMappedArray<A>
990where
991    A: Clone + Copy + 'static + Send + Sync + Send + Sync,
992{
993    fn drop(&mut self) {
994        // Flush any pending changes
995        if let Some(view) = &mut self.mmap_view_mut {
996            let _ = view.flush();
997        }
998
999        // If temporary, remove the file when done
1000        if self.is_temp {
1001            let _ = std::fs::remove_file(&self.file_path);
1002        }
1003    }
1004}
1005
1006/// Helper function to read the header from a file
1007#[allow(dead_code)]
1008fn read_header<A: Clone + Copy + 'static + Send + Sync>(
1009    file_path: &Path,
1010) -> Result<(MemoryMappedHeader, usize), CoreError> {
1011    // Open the file
1012    let mut file =
1013        File::open(file_path).map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
1014
1015    // Try to read the file as a proper memory-mapped file with header
1016    // First, check if the file is large enough to contain a header
1017    let file_metadata = file
1018        .metadata()
1019        .map_err(|e| CoreError::IoError(ErrorContext::new(e.to_string())))?;
1020    let file_size = file_metadata.len() as usize;
1021
1022    if file_size < 8 {
1023        // File is too small to have a proper header, treat as raw data
1024        let element_size = std::mem::size_of::<A>();
1025        let total_elements = file_size / element_size;
1026
1027        let header = MemoryMappedHeader {
1028            element_size,
1029            shape: vec![total_elements],
1030            total_elements,
1031        };
1032
1033        return Ok((header, 0)); // No header offset for raw files
1034    }
1035
1036    // Try to read as a proper memory-mapped file with header
1037    // Read header length (first 8 bytes)
1038    let mut header_len_bytes = [0u8; 8];
1039    if file.read_exact(&mut header_len_bytes).is_err() {
1040        // Failed to read header length, treat as raw data
1041        let element_size = std::mem::size_of::<A>();
1042        let total_elements = file_size / element_size;
1043
1044        let header = MemoryMappedHeader {
1045            element_size,
1046            shape: vec![total_elements],
1047            total_elements,
1048        };
1049
1050        return Ok((header, 0)); // No header offset for raw files
1051    }
1052
1053    let header_len = u64::from_ne_bytes(header_len_bytes) as usize;
1054
1055    // Sanity check: header length should be reasonable
1056    if header_len > file_size || header_len > 1024 * 1024 {
1057        // Header length is unreasonable, treat as raw data
1058        let element_size = std::mem::size_of::<A>();
1059        let total_elements = file_size / element_size;
1060
1061        let header = MemoryMappedHeader {
1062            element_size,
1063            shape: vec![total_elements],
1064            total_elements,
1065        };
1066
1067        return Ok((header, 0)); // No header offset for raw files
1068    }
1069
1070    // Read header data
1071    let mut header_bytes = vec![0u8; header_len];
1072    if file.read_exact(&mut header_bytes).is_err() {
1073        // Failed to read header, treat as raw data
1074        let element_size = std::mem::size_of::<A>();
1075        let total_elements = file_size / element_size;
1076
1077        let header = MemoryMappedHeader {
1078            element_size,
1079            shape: vec![total_elements],
1080            total_elements,
1081        };
1082
1083        return Ok((header, 0)); // No header offset for raw files
1084    }
1085
1086    // Try to deserialize header
1087    let cfg = config::standard();
1088    match serde::decode_from_slice::<MemoryMappedHeader, _>(&header_bytes, cfg) {
1089        Ok((header, _len)) => {
1090            let element_size_expected = std::mem::size_of::<A>();
1091            if header.element_size == element_size_expected {
1092                // Compute aligned header size (including potential padding) for newer files.
1093                let base_header_size = 8 + header_len; // length prefix + header bytes
1094                let align = std::mem::align_of::<A>();
1095                let padding = if align > 1 {
1096                    (align - (base_header_size % align)) % align
1097                } else {
1098                    0
1099                };
1100                let mut aligned_header_size = base_header_size;
1101                if padding > 0 && base_header_size + padding <= file_size {
1102                    // Peek padding bytes (should be zeros). We are currently positioned right after the header.
1103                    let mut padding_buf = vec![0u8; padding];
1104                    // Attempt to read padding; if it fails, revert to unaligned header size.
1105                    match file.read_exact(&mut padding_buf) {
1106                        Ok(_) => {
1107                            // Optional: validate all zeros; if not zeros, treat as legacy file without padding.
1108                            if padding_buf.iter().all(|b| *b == 0) {
1109                                aligned_header_size += padding;
1110                            } else {
1111                                // Non-zero padding -> legacy; reset file cursor backwards so data reads correctly.
1112                                // Seek back to start of data (cannot easily without Seek here; treat as legacy by ignoring consumption)
1113                                // We consumed bytes; that's acceptable because caller will start data mapping after aligned_header_size
1114                                // but since we won't add padding, adjust size back.
1115                                aligned_header_size = base_header_size; // do not include padding
1116                            }
1117                        }
1118                        Err(_) => {
1119                            aligned_header_size = base_header_size; // legacy file
1120                        }
1121                    }
1122                }
1123                Ok((header, aligned_header_size))
1124            } else {
1125                // Element size mismatch -> treat as raw data (legacy raw file)
1126                let element_size = element_size_expected;
1127                let total_elements = file_size / element_size;
1128                let fallback_header = MemoryMappedHeader {
1129                    element_size,
1130                    shape: vec![total_elements],
1131                    total_elements,
1132                };
1133                Ok((fallback_header, 0))
1134            }
1135        }
1136        Err(_) => {
1137            // Failed to deserialize header, treat as raw data
1138            let element_size = std::mem::size_of::<A>();
1139            let total_elements = file_size / element_size;
1140            let header = MemoryMappedHeader {
1141                element_size,
1142                shape: vec![total_elements],
1143                total_elements,
1144            };
1145            Ok((header, 0))
1146        }
1147    }
1148}
1149
1150/// Create a memory-mapped array from an existing file
1151///
1152/// # Arguments
1153///
1154/// * `file_path` - Path to the file to memory-map
1155/// * `mode` - Access mode (read-only, read-write, etc.)
1156/// * `offset` - Offset in bytes from the start of the file
1157///
1158/// # Returns
1159///
1160/// A new memory-mapped array
1161#[allow(dead_code)]
1162pub fn open_mmap<A, D>(
1163    file_path: &Path,
1164    mode: AccessMode,
1165    offset: usize,
1166) -> Result<MemoryMappedArray<A>, CoreError>
1167where
1168    A: Clone + Copy + Send + Sync + 'static,
1169    D: Dimension,
1170{
1171    // Read the header to get shape and element info
1172    let (header, header_size) = read_header::<A>(file_path)?;
1173
1174    // Verify element size
1175    let element_size = std::mem::size_of::<A>();
1176    if header.element_size != element_size {
1177        return Err(CoreError::ValidationError(
1178            ErrorContext::new(format!(
1179                "Element size mismatch: file has {} bytes, but type requires {} bytes",
1180                header.element_size, element_size
1181            ))
1182            .with_location(ErrorLocation::new(file!(), line!())),
1183        ));
1184    }
1185
1186    // Calculate the effective offset (header size + user offset)
1187    let effective_offset = header_size + offset;
1188
1189    // Create the array with the header info and effective offset
1190    MemoryMappedArray::<A>::new::<crate::ndarray::OwnedRepr<A>, D>(
1191        None,
1192        file_path,
1193        mode,
1194        effective_offset,
1195    )
1196}
1197
1198/// Create a new memory-mapped array file
1199///
1200/// # Arguments
1201///
1202/// * `data` - The array data to write to the file
1203/// * `file_path` - Path where the memory-mapped file should be created
1204/// * `mode` - Access mode (should be Write for new files)
1205/// * `offset` - Offset in bytes from the start of the file
1206///
1207/// # Returns
1208///
1209/// A new memory-mapped array
1210#[allow(dead_code)]
1211pub fn create_mmap<A, S, D>(
1212    data: &ArrayBase<S, D>,
1213    file_path: &Path,
1214    mode: AccessMode,
1215    offset: usize,
1216) -> Result<MemoryMappedArray<A>, CoreError>
1217where
1218    A: Clone + Copy + 'static + Send + Sync + Send + Sync,
1219    S: Data<Elem = A>,
1220    D: Dimension,
1221{
1222    MemoryMappedArray::new(Some(data), file_path, mode, offset)
1223}
1224
1225/// Create a new temporary memory-mapped array
1226///
1227/// # Arguments
1228///
1229/// * `data` - The array data to write to the temporary file
1230/// * `mode` - Access mode
1231/// * `offset` - Offset in bytes from the start of the file
1232///
1233/// # Returns
1234///
1235/// A new memory-mapped array backed by a temporary file
1236#[allow(dead_code)]
1237pub fn create_temp_mmap<A, S, D>(
1238    data: &ArrayBase<S, D>,
1239    mode: AccessMode,
1240    offset: usize,
1241) -> Result<MemoryMappedArray<A>, CoreError>
1242where
1243    A: Clone + Copy + 'static + Send + Sync + Send + Sync,
1244    S: Data<Elem = A>,
1245    D: Dimension,
1246{
1247    MemoryMappedArray::new_temp(data, mode, offset)
1248}