scirs2_core/memory_efficient/zero_serialization.rs
1//! Zero-copy serialization and deserialization for memory-mapped arrays.
2//!
3//! This module provides traits and implementations for serializing and deserializing
4//! data in memory-mapped arrays with zero-copy operations. It allows efficient
5//! loading and saving of data without unnecessary memory allocations.
6//!
7//! # Overview
8//!
9//! Zero-copy serialization avoids creating unnecessary copies of data by directly
10//! mapping file content to memory and interpreting it in place. This approach is
11//! especially beneficial for:
12//!
13//! - Very large datasets that don't fit comfortably in memory
14//! - Applications requiring frequent access to subsets of large arrays
15//! - Performance-critical code where minimizing memory copies is important
16//! - Systems with limited memory resources
17//!
18//! # Key Features
19//!
20//! - Fast serialization and deserialization with minimal memory overhead
21//! - Support for metadata reading/updating without loading the entire array
22//! - Flexible access modes (ReadOnly, ReadWrite, CopyOnWrite)
23//! - Efficient error handling and validation
24//! - Seamless integration with ndarray
25//!
26//! # Usage Examples
27//!
28//! ## Saving an Array with Zero-Copy Serialization
29//!
30//! ```no_run
31//! use scirs2_core::ndarray::Array2;
32//! use scirs2_core::memory_efficient::{MemoryMappedArray, ZeroCopySerialization};
33//! use serde_json::json;
34//! use std::path::Path;
35//!
36//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
37//! // Create a 2D array
38//! let data = Array2::<f64>::from_shape_fn((100, 100), |(i, j)| (i * 100 + j) as f64);
39//!
40//! // Define metadata
41//! let metadata = json!({
42//! "description": "Sample 2D array",
43//! "created": "2023-05-20",
44//! "dimensions": {
45//! "rows": 100,
46//! "cols": 100
47//! }
48//! });
49//!
50//! // Save with zero-copy serialization
51//! let filepath = Path::new("array_data.bin");
52//! MemoryMappedArray::<f64>::save_array(&data, &filepath, Some(metadata))?;
53//! # Ok(())
54//! # }
55//! ```
56//!
57//! ## Loading an Array with Zero-Copy Deserialization
58//!
59//! ```no_run
60//! use scirs2_core::memory_efficient::{AccessMode, MemoryMappedArray, ZeroCopySerialization};
61//! use std::path::Path;
62//!
63//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
64//! // Load with zero-copy deserialization
65//! let filepath = Path::new("array_data.bin");
66//! let array = MemoryMappedArray::<f64>::open_zero_copy(&filepath, AccessMode::ReadOnly)?;
67//!
68//! // Access as a standard ndarray
69//! let ndarray = array.readonlyarray::<scirs2_core::ndarray::Ix2>()?;
70//! println!("Value at [10, 20]: {}", ndarray[[10, 20]]);
71//! # Ok(())
72//! # }
73//! ```
74//!
75//! ## Working with Metadata
76//!
77//! ```no_run
78//! use scirs2_core::memory_efficient::{MemoryMappedArray, ZeroCopySerialization};
79//! use serde_json::json;
80//! use std::path::Path;
81//!
82//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
83//! let filepath = Path::new("array_data.bin");
84//!
85//! // Read metadata without loading the array
86//! let metadata = MemoryMappedArray::<f64>::read_metadata(&filepath)?;
87//! println!("Description: {}", metadata["description"]);
88//!
89//! // Update metadata without rewriting the array
90//! let updated_metadata = json!({
91//! "description": "Updated sample 2D array",
92//! "created": "2023-05-20",
93//! "updated": "2023-05-21",
94//! "dimensions": {
95//! "rows": 100,
96//! "cols": 100
97//! }
98//! });
99//! MemoryMappedArray::<f64>::update_metadata(&filepath, updated_metadata)?;
100//! # Ok(())
101//! # }
102//! ```
103//!
104//! ## Modifying Data In-Place
105//!
106//! ```no_run
107//! use scirs2_core::memory_efficient::{AccessMode, MemoryMappedArray, ZeroCopySerialization};
108//! use std::path::Path;
109//!
110//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
111//! let filepath = Path::new("array_data.bin");
112//!
113//! // Load with read-write access
114//! let mut array = MemoryMappedArray::<f64>::open_zero_copy(&filepath, AccessMode::ReadWrite)?;
115//!
116//! // Modify the array
117//! {
118//! let mut ndarray = array.as_array_mut::<scirs2_core::ndarray::Ix2>()?;
119//!
120//! // Set all diagonal elements to 1000
121//! for i in 0..100 {
122//! if i < ndarray.shape()[0] && i < ndarray.shape()[1] {
123//! ndarray[[i, i]] = 1000.0;
124//! }
125//! }
126//! }
127//!
128//! // Flush changes to disk
129//! array.flush()?;
130//! # Ok(())
131//! # }
132//! ```
133//!
134//! # File Format
135//!
136//! Files saved with zero-copy serialization have the following structure:
137//!
138//! 1. Header Length (8 bytes): u64 indicating the size of the serialized header
139//! 2. Header (variable size): Bincode-serialized ZeroCopyHeader struct containing:
140//! - Type name (string)
141//! - Element size in bytes (usize)
142//! - Array shape (`Vec<usize>`)
143//! - Total number of elements (usize)
144//! - Optional metadata (serde_json::Value or None)
145//! 3. Array Data: Raw binary data of the array's elements
146//!
147//! This format allows efficient operations like:
148//! - Reading metadata without loading the full array
149//! - Updating metadata without rewriting array data
150//! - Direct memory mapping of array data for zero-copy access
151//!
152//! # Performance Considerations
153//!
154//! - Zero-copy deserialization is almost instantaneous regardless of array size
155//! - First access to memory-mapped data may cause page faults, impacting initial performance
156//! - Subsequent accesses benefit from OS caching mechanisms
157//! - Memory usage is determined by accessed data, not the entire array size
158//! - Prefer sequential access patterns when possible for optimal performance
159//!
160//! # Custom Type Support
161//!
162//! This module supports creating custom zero-copy serializable types. To create
163//! a custom type that works with zero-copy serialization:
164//!
165//! 1. Use `#[repr(C)]` or `#[repr(transparent)]` to ensure stable memory layout
166//! 2. Implement `Clone + Copy`
167//! 3. Only include fields that are themselves zero-copy serializable (primitives)
168//! 4. Implement the `ZeroCopySerializable` trait with proper safety checks
169//! 5. Optionally override `type_identifier()` for more precise type validation
170//!
171//! Example of a custom complex number type:
172//!
173//! ```
174//! use std::mem;
175//! use std::slice;
176//! use scirs2_core::memory_efficient::ZeroCopySerializable;
177//! use scirs2_core::error::{CoreResult, CoreError, ErrorContext, ErrorLocation};
178//!
179//! #[repr(C)]
180//! #[derive(Debug, Clone, Copy, PartialEq)]
181//! struct Complex64 {
182//! real: f64,
183//! imag: f64,
184//! }
185//!
186//! impl Complex64 {
187//! fn new(real: f64, imag: f64) -> Self {
188//! Self { real, imag }
189//! }
190//! }
191//!
192//! impl ZeroCopySerializable for Complex64 {
193//! unsafe fn from_bytes(bytes: &[u8]) -> CoreResult<Self> {
194//! if !Self::validate_bytes(bytes) {
195//! return Err(CoreError::ValidationError(
196//! ErrorContext::new(format!(
197//! "Invalid byte length for Complex64: expected {} got {}",
198//! mem::size_of::<Self>(),
199//! bytes.len()
200//! ))
201//! .with_location(ErrorLocation::new(file!(), line!())),
202//! ));
203//! }
204//!
205//! let ptr = bytes.as_ptr() as *const Self;
206//! Ok(*ptr)
207//! }
208//!
209//! unsafe fn as_bytes(&self) -> &[u8] {
210//! let ptr = self as *const Self as *const u8;
211//! slice::from_raw_parts(ptr, mem::size_of::<Self>())
212//! }
213//!
214//! // Optional: Override the type identifier
215//! fn type_identifier() -> &'static str {
216//! "Complex64"
217//! }
218//! }
219//! ```
220//!
221//! Once implemented, your custom type can be used with all the memory-mapped array
222//! functionality, including saving and loading arrays of your type:
223//!
224//! ```no_run
225//! use scirs2_core::ndarray::Array2;
226//! use scirs2_core::memory_efficient::{AccessMode, MemoryMappedArray, ZeroCopySerialization};
227//! use serde_json::json;
228//! use std::path::Path;
229//!
230//! # #[repr(C)]
231//! # #[derive(Debug, Clone, Copy, PartialEq)]
232//! # struct Complex64 { real: f64, imag: f64 }
233//! # impl scirs2_core::memory_efficient::ZeroCopySerializable for Complex64 {
234//! # unsafe fn from_bytes(bytes: &[u8]) -> scirs2_core::error::CoreResult<Self> { unimplemented!() }
235//! # unsafe fn as_bytes(&self) -> &[u8] { unimplemented!() }
236//! # }
237//!
238//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
239//! // Create a 2D array of Complex64 numbers
240//! let data = Array2::<Complex64>::from_shape_fn((10, 10), |(i, j)| {
241//! Complex64 { real: i as f64, imag: j as f64 }
242//! });
243//!
244//! // Save with metadata
245//! let filepath = Path::new("complex_array.bin");
246//! let metadata = json!({
247//! "description": "Complex number array",
248//! "type": "Complex64"
249//! });
250//!
251//! // Save the array with zero-copy serialization
252//! MemoryMappedArray::<Complex64>::save_array(&data, &filepath, Some(metadata))?;
253//!
254//! // Load with zero-copy deserialization
255//! let array = MemoryMappedArray::<Complex64>::open_zero_copy(&filepath, AccessMode::ReadOnly)?;
256//!
257//! // Access as an ndarray
258//! let loaded_data = array.readonlyarray::<scirs2_core::ndarray::Ix2>()?;
259//! println!("First element: real={}, imag={}", loaded_data[[0, 0]].real, loaded_data[[0, 0]].imag);
260//! # Ok(())
261//! # }
262//! ```
263//!
264//! ## Safety Considerations
265//!
266//! When implementing `ZeroCopySerializable` for custom types, be aware of:
267//!
268//! - **Platform dependencies**: Memory layout can vary across platforms, so files may not be portable
269//! - **Endianness**: Byte order can differ between processors (e.g., little-endian vs. big-endian)
270//! - **Padding**: Ensure your type doesn't contain undefined padding bytes
271//! - **Pointers**: Avoid references or pointers in custom types as they cannot be serialized safely
272//!
273//! For maximum compatibility, consider:
274//!
275//! - Using explicit byte conversions for platform-independent serialization
276//! - Converting endianness explicitly (using `to_ne_bytes()` and `from_ne_bytes()`)
277//! - Adding a version field to your serialized format for future compatibility
278
279use crate::ndarray::compat::ArrayStatCompat;
280
281use std::fs::{File, OpenOptions};
282use std::io::{Read, Seek, SeekFrom, Write};
283use std::marker::PhantomData;
284use std::mem;
285use std::path::{Path, PathBuf};
286use std::slice;
287
288use ::ndarray::{Array, Dimension};
289use memmap2::MmapOptions;
290#[cfg(feature = "serialization")]
291use serde::{Deserialize, Serialize};
292
293use super::memmap::{AccessMode, MemoryMappedArray};
294use crate::error::{CoreError, CoreResult, ErrorContext, ErrorLocation};
295// use statrs::statistics::Statistics; // Not needed
296
297/// Trait for zero-copy serializable types.
298///
299/// This trait enables types to be directly mapped between memory and disk without
300/// intermediate copies. It's primarily designed for numeric types and other types
301/// with stable memory representations.
302///
303/// Implementations of this trait provide:
304/// - Direct memory layout access through raw byte slices
305/// - Safe conversion between bytes and typed values
306/// - Validation of serialized data for type safety
307/// - Size information for memory allocation and validation
308///
309/// This trait is optimized for performance-critical code where avoiding
310/// memory copies is essential, especially with large datasets.
311///
312/// # Safety Considerations
313///
314/// Zero-copy serialization relies on the binary representation of types,
315/// which depends on:
316///
317/// - Memory layout (which can differ across platforms)
318/// - Endianness (byte order)
319/// - Alignment requirements
320///
321/// For custom types, ensure:
322/// - The type has a well-defined memory layout (e.g., #[repr(C)] or #[repr(transparent)])
323/// - The type doesn't contain references, pointers, or other indirection
324/// - All fields are themselves zero-copy serializable
325/// - The type doesn't have any padding bytes with undefined values
326pub trait ZeroCopySerializable: Sized + Clone + Copy + 'static + Send + Sync {
327 /// Convert a byte slice to an instance of this type.
328 ///
329 /// # Safety
330 ///
331 /// This function is unsafe because it reads raw bytes and interprets them as a value
332 /// of type `Self`. The caller must ensure that the byte slice is valid for the type.
333 unsafe fn from_bytes(bytes: &[u8]) -> CoreResult<Self>;
334
335 /// Convert this value to a byte slice.
336 ///
337 /// # Safety
338 ///
339 /// This function is unsafe because it returns a raw byte slice. The caller must ensure
340 /// that the returned slice is used safely.
341 unsafe fn as_bytes(&self) -> &[u8];
342
343 /// Check if the byte slice is valid for this type.
344 fn validate_bytes(bytes: &[u8]) -> bool {
345 bytes.len() == mem::size_of::<Self>()
346 }
347
348 /// Get the size of this type in bytes.
349 fn byte_size() -> usize {
350 mem::size_of::<Self>()
351 }
352
353 /// Get a type identifier for validation during deserialization.
354 ///
355 /// This method provides a way to identify the type during deserialization.
356 /// By default, it returns the type name, but custom implementations may override
357 /// this for more specific type checking.
358 fn type_identifier() -> &'static str {
359 std::any::type_name::<Self>()
360 }
361}
362
363/// Implement ZeroCopySerializable for common numeric types
364/// Macro to implement ZeroCopySerializable for a primitive numeric type
365/// that has from_ne_bytes and to_ne_bytes methods
366macro_rules! impl_zerocopy_serializable {
367 ($type:ty, $bytesize:expr, $name:expr) => {
368 impl ZeroCopySerializable for $type {
369 unsafe fn from_bytes(bytes: &[u8]) -> CoreResult<Self> {
370 if !Self::validate_bytes(bytes) {
371 return Err(CoreError::ValidationError(
372 ErrorContext::new(format!(
373 "Invalid byte length for {}: expected {} got {}",
374 $name,
375 mem::size_of::<Self>(),
376 bytes.len()
377 ))
378 .with_location(ErrorLocation::new(file!(), line!())),
379 ));
380 }
381 let mut value = [0u8; $bytesize];
382 value.copy_from_slice(bytes);
383 Ok(<$type>::from_ne_bytes(value))
384 }
385
386 unsafe fn as_bytes(&self) -> &[u8] {
387 let ptr = self as *const Self as *const u8;
388 slice::from_raw_parts(ptr, mem::size_of::<Self>())
389 }
390 }
391 };
392}
393
394// Floating-point implementations
395
396// f32 support when requested
397#[cfg(feature = "float32")]
398impl_zerocopy_serializable!(f32, 4, "f32");
399
400// f64 is always implemented when requested
401#[cfg(feature = "float64")]
402impl_zerocopy_serializable!(f64, 8, "f64");
403
404// Default implementations when no specific float features are enabled
405// This ensures that f32 and f64 work out of the box for basic usage
406#[cfg(all(not(feature = "float32"), not(feature = "float64")))]
407mod default_float_impls {
408 use super::*;
409
410 impl_zerocopy_serializable!(f32, 4, "f32");
411 impl_zerocopy_serializable!(f64, 8, "f64");
412}
413
414// Integer implementations with non-overlapping feature flags
415
416// When all_ints is enabled, implement all integer types
417#[cfg(feature = "all_ints")]
418impl_zerocopy_serializable!(i8, 1, "i8");
419
420#[cfg(feature = "all_ints")]
421impl_zerocopy_serializable!(i16, 2, "i16");
422
423#[cfg(feature = "all_ints")]
424impl_zerocopy_serializable!(i32, 4, "i32");
425
426#[cfg(feature = "all_ints")]
427impl_zerocopy_serializable!(i64, 8, "i64");
428
429#[cfg(feature = "all_ints")]
430impl_zerocopy_serializable!(u8, 1, "u8");
431
432#[cfg(feature = "all_ints")]
433impl_zerocopy_serializable!(u16, 2, "u16");
434
435#[cfg(feature = "all_ints")]
436impl_zerocopy_serializable!(u32, 4, "u32");
437
438#[cfg(feature = "all_ints")]
439impl_zerocopy_serializable!(u64, 8, "u64");
440
441// When all_ints is NOT enabled, implement specific types based on feature flags
442#[cfg(all(not(feature = "all_ints"), feature = "int32"))]
443impl_zerocopy_serializable!(i32, 4, "i32");
444
445#[cfg(all(not(feature = "all_ints"), feature = "uint32"))]
446impl_zerocopy_serializable!(u32, 4, "u32");
447
448#[cfg(all(not(feature = "all_ints"), feature = "int64"))]
449impl_zerocopy_serializable!(i64, 8, "i64");
450
451#[cfg(all(not(feature = "all_ints"), feature = "uint64"))]
452impl_zerocopy_serializable!(u64, 8, "u64");
453
454// Default implementations when no specific integer features are enabled
455// This ensures that i32 and u32 work out of the box for basic usage
456#[cfg(all(
457 not(feature = "all_ints"),
458 not(feature = "int32"),
459 not(feature = "uint32"),
460 not(feature = "int64"),
461 not(feature = "uint64")
462))]
463mod default_int_impls {
464 use super::*;
465
466 impl_zerocopy_serializable!(i32, 4, "i32");
467 impl_zerocopy_serializable!(u32, 4, "u32");
468}
469
470// This approach ensures that at minimum, we'll have f32, f64, i32, and u32 types
471// available even if no specific feature flags are enabled.
472
473/// Metadata for zero-copy serialized arrays
474#[derive(Serialize, Deserialize, Debug, Clone)]
475struct ZeroCopyHeader {
476 /// Type name of the elements (for validation)
477 pub type_name: String,
478 /// Type identifier provided by the type for validation
479 pub type_identifier: String,
480 /// Size of each element in bytes
481 pub element_size: usize,
482 /// Shape of the array
483 pub shape: Vec<usize>,
484 /// Total number of elements
485 pub total_elements: usize,
486 /// Optional extra metadata as JSON string
487 pub metadata_json: Option<String>,
488}
489
490/// An extension trait for MemoryMappedArray to support zero-copy serialization and deserialization.
491///
492/// This trait provides methods to save and load memory-mapped arrays with zero-copy
493/// operations, enabling efficient serialization of large datasets. The zero-copy approach
494/// allows data to be memory-mapped directly from files with minimal overhead.
495///
496/// Key features:
497/// - Efficient saving of arrays to disk with optional metadata
498/// - Near-instantaneous loading of arrays from disk via memory mapping
499/// - Support for different access modes (ReadOnly, ReadWrite, CopyOnWrite)
500/// - Direct access to raw byte representation for advanced use cases
501/// - Access to the array with specified dimensionality
502pub trait ZeroCopySerialization<A: ZeroCopySerializable> {
503 /// Save the array to a file with zero-copy serialization.
504 ///
505 /// This method serializes the memory-mapped array to a file, including:
506 /// - A header with array information (type, shape, size)
507 /// - Optional metadata as JSON (can be used for array description, creation date, etc.)
508 /// - The raw binary data of the array
509 ///
510 /// # Arguments
511 ///
512 /// * `path` - Path where the array will be saved
513 /// * `metadata` - Optional metadata to include with the array (as JSON)
514 ///
515 /// # Returns
516 ///
517 /// `CoreResult<()>` indicating success or an error with context
518 ///
519 /// # Example
520 ///
521 /// ```no_run
522 /// # use scirs2_core::memory_efficient::{MemoryMappedArray, ZeroCopySerialization};
523 /// # use serde_json::json;
524 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
525 /// # let mmap: MemoryMappedArray<f64> = unimplemented!();
526 /// let metadata = json!({"description": "Example array", "created": "2023-05-20"});
527 /// mmap.save_zero_copy("array.bin", Some(metadata))?;
528 /// # Ok(())
529 /// # }
530 /// ```
531 fn save_zero_copy(
532 &self,
533 path: impl AsRef<Path>,
534 metadata: Option<serde_json::Value>,
535 ) -> CoreResult<()>;
536
537 /// Load an array from a file with zero-copy deserialization.
538 ///
539 /// This method memory-maps a file containing a previously serialized array,
540 /// allowing near-instantaneous "loading" regardless of array size.
541 ///
542 /// # Arguments
543 ///
544 /// * `path` - Path to the file containing the serialized array
545 /// * `mode` - Access mode (ReadOnly, ReadWrite, or CopyOnWrite)
546 ///
547 /// # Returns
548 ///
549 /// A memory-mapped array or an error with context
550 ///
551 /// # Example
552 ///
553 /// ```no_run
554 /// # use scirs2_core::memory_efficient::{AccessMode, MemoryMappedArray, ZeroCopySerialization};
555 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
556 /// let array = MemoryMappedArray::<f64>::load_zero_copy("array.bin", AccessMode::ReadOnly)?;
557 /// # Ok(())
558 /// # }
559 /// ```
560 fn load_zero_copy(path: impl AsRef<Path>, mode: AccessMode)
561 -> CoreResult<MemoryMappedArray<A>>;
562
563 /// Get the raw byte representation of the array.
564 ///
565 /// This provides low-level access to the memory-mapped data as a byte slice.
566 /// Primarily used for implementing serialization operations.
567 ///
568 /// # Returns
569 ///
570 /// A byte slice representing the raw array data or an error
571 fn as_bytes_slice(&self) -> CoreResult<&[u8]>;
572
573 /// Get the mutable raw byte representation of the array.
574 ///
575 /// This provides low-level mutable access to the memory-mapped data.
576 /// Primarily used for implementing serialization operations.
577 ///
578 /// # Returns
579 ///
580 /// A mutable byte slice or an error if the array is not mutable
581 fn as_bytes_slice_mut(&mut self) -> CoreResult<&mut [u8]>;
582}
583
584impl<A: ZeroCopySerializable> ZeroCopySerialization<A> for MemoryMappedArray<A> {
585 fn save_zero_copy(
586 &self,
587 path: impl AsRef<Path>,
588 metadata: Option<serde_json::Value>,
589 ) -> CoreResult<()> {
590 let path = path.as_ref();
591
592 // Create header
593 let metadata_json = metadata
594 .map(|m| serde_json::to_string(&m))
595 .transpose()
596 .map_err(|e| {
597 CoreError::ValidationError(
598 ErrorContext::new(format!("{e}"))
599 .with_location(ErrorLocation::new(file!(), line!())),
600 )
601 })?;
602
603 let header = ZeroCopyHeader {
604 type_name: std::any::type_name::<A>().to_string(),
605 type_identifier: A::type_identifier().to_string(),
606 element_size: mem::size_of::<A>(),
607 shape: self.shape.clone(),
608 total_elements: self.size,
609 metadata_json,
610 };
611
612 // Serialize header
613 let cfg = bincode::config::standard();
614 let header_bytes = bincode::serde::encode_to_vec(&header, cfg).map_err(|e| {
615 CoreError::ValidationError(
616 ErrorContext::new(format!("{e}"))
617 .with_location(ErrorLocation::new(file!(), line!())),
618 )
619 })?;
620
621 // Write header and array data
622 let mut file = OpenOptions::new()
623 .create(true)
624 .write(true)
625 .truncate(true)
626 .open(path)?;
627
628 // Write header length (for easier reading later)
629 let header_len = header_bytes.len() as u64;
630 file.write_all(&header_len.to_ne_bytes())?;
631
632 // Write header
633 file.write_all(&header_bytes)?;
634
635 // Calculate current position and add padding for data alignment
636 let current_pos = 8 + header_len as usize; // 8 bytes for length + header
637 let alignment = std::mem::align_of::<A>();
638 let padding_needed = if current_pos % alignment == 0 {
639 0
640 } else {
641 alignment - (current_pos % alignment)
642 };
643
644 // Write padding bytes
645 if padding_needed > 0 {
646 let padding = vec![0u8; padding_needed];
647 file.write_all(&padding)?;
648 }
649
650 // Get array bytes
651 let array_bytes = self.as_bytes_slice()?;
652
653 // Write array data
654 file.write_all(array_bytes)?;
655
656 Ok(())
657 }
658
659 fn load_zero_copy(
660 path: impl AsRef<Path>,
661 mode: AccessMode,
662 ) -> CoreResult<MemoryMappedArray<A>> {
663 let path = path.as_ref();
664
665 // Open file
666 let mut file = File::open(path)?;
667
668 // Read header length
669 let mut header_len_bytes = [0u8; 8]; // u64
670 file.read_exact(&mut header_len_bytes)?;
671 let header_len = u64::from_ne_bytes(header_len_bytes) as usize;
672
673 // Read header
674 let mut header_bytes = vec![0u8; header_len];
675 file.read_exact(&mut header_bytes)?;
676
677 // Deserialize header
678 let cfg = bincode::config::standard();
679 let (header, _len): (ZeroCopyHeader, usize) =
680 bincode::serde::decode_from_slice(&header_bytes, cfg).map_err(|e| {
681 CoreError::ValidationError(
682 ErrorContext::new(format!("{e}"))
683 .with_location(ErrorLocation::new(file!(), line!())),
684 )
685 })?;
686
687 // Validate type
688 if header.element_size != mem::size_of::<A>() {
689 return Err(CoreError::ValidationError(
690 ErrorContext::new(format!(
691 "Element size mismatch: expected {} got {}",
692 mem::size_of::<A>(),
693 header.element_size
694 ))
695 .with_location(ErrorLocation::new(file!(), line!())),
696 ));
697 }
698
699 // Validate type identifier
700 if header.type_identifier != A::type_identifier() {
701 return Err(CoreError::ValidationError(
702 ErrorContext::new(format!(
703 "Type identifier mismatch: expected '{}' got '{}'",
704 A::type_identifier(),
705 header.type_identifier
706 ))
707 .with_location(ErrorLocation::new(file!(), line!())),
708 ));
709 }
710
711 // Calculate data offset (8 bytes for header length + header bytes + alignment padding)
712 let base_offset = 8 + header_len;
713 let alignment = std::mem::align_of::<A>();
714 let padding_needed = if base_offset % alignment == 0 {
715 0
716 } else {
717 alignment - (base_offset % alignment)
718 };
719 let data_offset = base_offset + padding_needed;
720
721 // Memory map file at the offset of the actual data
722 match mode {
723 AccessMode::ReadOnly => {
724 // Create read-only memory map
725 let file = File::open(path)?;
726 let mmap = unsafe { MmapOptions::new().offset(data_offset as u64).map(&file)? };
727
728 // Create MemoryMappedArray
729 Ok(MemoryMappedArray {
730 shape: header.shape,
731 file_path: path.to_path_buf(),
732 mode,
733 offset: data_offset,
734 size: header.total_elements,
735 mmap_view: Some(mmap),
736 mmap_view_mut: None,
737 is_temp: false,
738 phantom: PhantomData,
739 })
740 }
741 AccessMode::ReadWrite => {
742 // Create read-write memory map
743 let file = OpenOptions::new().read(true).write(true).open(path)?;
744
745 let mmap = unsafe {
746 MmapOptions::new()
747 .offset(data_offset as u64)
748 .map_mut(&file)?
749 };
750
751 // Create MemoryMappedArray
752 Ok(MemoryMappedArray {
753 shape: header.shape,
754 file_path: path.to_path_buf(),
755 mode,
756 offset: data_offset,
757 size: header.total_elements,
758 mmap_view: None,
759 mmap_view_mut: Some(mmap),
760 is_temp: false,
761 phantom: PhantomData,
762 })
763 }
764 AccessMode::CopyOnWrite => {
765 // Create copy-on-write memory map
766 let file = File::open(path)?;
767 let mmap = unsafe {
768 MmapOptions::new()
769 .offset(data_offset as u64)
770 .map_copy(&file)?
771 };
772
773 // Create MemoryMappedArray
774 Ok(MemoryMappedArray {
775 shape: header.shape,
776 file_path: path.to_path_buf(),
777 mode,
778 offset: data_offset,
779 size: header.total_elements,
780 mmap_view: None,
781 mmap_view_mut: Some(mmap),
782 is_temp: false,
783 phantom: PhantomData,
784 })
785 }
786 AccessMode::Write => {
787 return Err(CoreError::ValidationError(
788 ErrorContext::new("Cannot use Write mode with load_zero_copy".to_string())
789 .with_location(ErrorLocation::new(file!(), line!())),
790 ));
791 }
792 }
793 }
794
795 fn as_bytes_slice(&self) -> CoreResult<&[u8]> {
796 self.as_bytes()
797 }
798
799 fn as_bytes_slice_mut(&mut self) -> CoreResult<&mut [u8]> {
800 self.as_bytes_mut()
801 }
802}
803
804// Extension methods for MemoryMappedArray
805impl<A: ZeroCopySerializable> MemoryMappedArray<A> {
806 /// Create a new memory-mapped array from an existing array and save with zero-copy serialization.
807 ///
808 /// This method provides a convenient way to convert a standard ndarray to a memory-mapped
809 /// array with zero-copy serialization in a single operation. It's particularly useful for
810 /// initializing memory-mapped arrays with data.
811 ///
812 /// # Arguments
813 ///
814 /// * `data` - The source ndarray to be converted and saved
815 /// * `filepath` - Path where the memory-mapped array will be saved
816 /// * `metadata` - Optional metadata to include with the array
817 ///
818 /// # Returns
819 ///
820 /// A new memory-mapped array with read-write access to the saved data
821 ///
822 /// # Example
823 ///
824 /// ```no_run
825 /// # use ::ndarray::Array2;
826 /// # use scirs2_core::memory_efficient::MemoryMappedArray;
827 /// # use serde_json::json;
828 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
829 /// // Create an ndarray
830 /// let data = Array2::<f64>::from_shape_fn((100, 100), |(i, j)| (i * 100 + j) as f64);
831 ///
832 /// // Create metadata
833 /// let metadata = json!({"description": "Temperature data", "units": "Celsius"});
834 ///
835 /// // Convert to memory-mapped array and save
836 /// let mmap = MemoryMappedArray::<f64>::save_array(&data, "temperature.bin", Some(metadata))?;
837 /// # Ok(())
838 /// # }
839 /// ```
840 pub fn save_array<S, D>(
841 data: &crate::ndarray::ArrayBase<S, D>,
842 file_path: impl AsRef<Path>,
843 metadata: Option<serde_json::Value>,
844 ) -> CoreResult<Self>
845 where
846 S: crate::ndarray::Data<Elem = A>,
847 D: Dimension,
848 {
849 // First create a temporary in-memory memory-mapped array
850 let mmap = super::memmap::create_temp_mmap(data, AccessMode::ReadWrite, 0)?;
851
852 // Save to the specified file with zero-copy serialization
853 mmap.save_zero_copy(&file_path, metadata)?;
854
855 // Open the file we just created with read-write access
856 Self::load_zero_copy(&file_path, AccessMode::ReadWrite)
857 }
858
859 /// Open a zero-copy serialized memory-mapped array from a file.
860 ///
861 /// This is a convenient wrapper around the `load_zero_copy` method with a more intuitive name.
862 /// It memory-maps a file containing a previously serialized array, providing efficient access
863 /// to the data.
864 ///
865 /// # Arguments
866 ///
867 /// * `filepath` - Path to the file containing the serialized array
868 /// * `mode` - Access mode (ReadOnly, ReadWrite, or CopyOnWrite)
869 ///
870 /// # Returns
871 ///
872 /// A memory-mapped array or an error with context
873 ///
874 /// # Example
875 ///
876 /// ```no_run
877 /// # use scirs2_core::memory_efficient::{AccessMode, MemoryMappedArray};
878 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
879 /// // Open a memory-mapped array with read-only access
880 /// let array = MemoryMappedArray::<f64>::open_zero_copy("data/temperature.bin", AccessMode::ReadOnly)?;
881 ///
882 /// // Access the array
883 /// let ndarray = array.readonlyarray::<scirs2_core::ndarray::Ix2>()?;
884 /// println!("First value: {}", ndarray[[0, 0]]);
885 /// # Ok(())
886 /// # }
887 /// ```
888 pub fn open_zero_copy(filepath: impl AsRef<Path>, mode: AccessMode) -> CoreResult<Self> {
889 Self::load_zero_copy(filepath, mode)
890 }
891
892 /// Read the metadata from a zero-copy serialized file without loading the entire array.
893 ///
894 /// This method efficiently extracts just the metadata from a file without memory-mapping
895 /// the entire array data. This is useful for checking array properties or file information
896 /// before deciding whether to load the full array.
897 ///
898 /// # Arguments
899 ///
900 /// * `filepath` - Path to the file containing the serialized array
901 ///
902 /// # Returns
903 ///
904 /// The metadata as a JSON value or an empty JSON object if no metadata was stored
905 ///
906 /// # Example
907 ///
908 /// ```no_run
909 /// # use scirs2_core::memory_efficient::MemoryMappedArray;
910 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
911 /// // Read metadata without loading the array
912 /// let metadata = MemoryMappedArray::<f64>::read_metadata("data/large_dataset.bin")?;
913 ///
914 /// // Check properties
915 /// if let Some(created) = metadata.get("created") {
916 /// println!("Dataset created on: {}", created);
917 /// }
918 ///
919 /// if let Some(dimensions) = metadata.get("dimensions") {
920 /// println!("Dataset dimensions: {}", dimensions);
921 /// }
922 /// # Ok(())
923 /// # }
924 /// ```
925 pub fn read_metadata(filepath: impl AsRef<Path>) -> CoreResult<serde_json::Value> {
926 let path = filepath.as_ref();
927
928 // Open file
929 let mut file = File::open(path)?;
930
931 // Read header length
932 let mut header_len_bytes = [0u8; 8]; // u64
933 file.read_exact(&mut header_len_bytes)?;
934 let header_len = u64::from_ne_bytes(header_len_bytes) as usize;
935
936 // Read header
937 let mut header_bytes = vec![0u8; header_len];
938 file.read_exact(&mut header_bytes)?;
939
940 // Deserialize header
941 let cfg = bincode::config::standard();
942 let (header, _len): (ZeroCopyHeader, usize) =
943 bincode::serde::decode_from_slice(&header_bytes, cfg).map_err(|e| {
944 CoreError::ValidationError(
945 ErrorContext::new(format!("{e}"))
946 .with_location(ErrorLocation::new(file!(), line!())),
947 )
948 })?;
949
950 // Parse metadata JSON or return empty object if none
951 match header.metadata_json {
952 Some(json_str) => serde_json::from_str(&json_str).map_err(|e| {
953 CoreError::ValidationError(
954 ErrorContext::new(format!("{e}"))
955 .with_location(ErrorLocation::new(file!(), line!())),
956 )
957 }),
958 None => Ok(serde_json::json!({})),
959 }
960 }
961
962 /// Get a read-only view of the array as an ndarray Array.
963 ///
964 /// This method provides a convenient way to access the memory-mapped array as a
965 /// standard ndarray Array with the specified dimensionality.
966 ///
967 /// # Type Parameters
968 ///
969 /// * `D` - The dimensionality for the returned array (e.g., Ix1, Ix2, IxDyn)
970 ///
971 /// # Returns
972 ///
973 /// A read-only ndarray Array view of the memory-mapped data
974 ///
975 /// # Example
976 ///
977 /// ```no_run
978 /// # use scirs2_core::ndarray::Ix2;
979 /// # use scirs2_core::ndarray::ArrayStatCompat;
980 /// # use scirs2_core::memory_efficient::{AccessMode, MemoryMappedArray};
981 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
982 /// let array = MemoryMappedArray::<f64>::open_zero_copy("matrix.bin", AccessMode::ReadOnly)?;
983 ///
984 /// // Access as a 2D ndarray
985 /// let ndarray = array.readonlyarray::<Ix2>()?;
986 ///
987 /// // Now you can use all the ndarray methods
988 /// let sum = ndarray.sum();
989 /// let mean = ndarray.mean_or(0.0);
990 /// println!("Matrix sum: {}, mean: {}", sum, mean);
991 /// # Ok(())
992 /// # }
993 /// ```
994 pub fn readonlyarray<D>(&self) -> CoreResult<Array<A, D>>
995 where
996 D: Dimension,
997 {
998 self.as_array::<D>()
999 }
1000
1001 /// Update metadata in a zero-copy serialized file without rewriting the entire array.
1002 ///
1003 /// This method efficiently updates just the metadata portion of a serialized array file
1004 /// without touching the actual array data. When possible, it performs the update in place
1005 /// to avoid creating a new file.
1006 ///
1007 /// # Arguments
1008 ///
1009 /// * `filepath` - Path to the file containing the serialized array
1010 /// * `metadata` - The new metadata to store (as JSON)
1011 ///
1012 /// # Returns
1013 ///
1014 /// `CoreResult<()>` indicating success or an error with context
1015 ///
1016 /// # Behavior
1017 ///
1018 /// - If the new metadata is the same size or smaller than the original, the update is done in-place
1019 /// - If the new metadata is larger, the entire file is rewritten to maintain proper alignment
1020 ///
1021 /// # Example
1022 ///
1023 /// ```no_run
1024 /// # use scirs2_core::memory_efficient::MemoryMappedArray;
1025 /// # use serde_json::json;
1026 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
1027 /// // Add processing information to the metadata
1028 /// let updated_metadata = json!({
1029 /// "description": "Temperature dataset",
1030 /// "processed": true,
1031 /// "processing_date": "2023-05-21",
1032 /// "normalization_applied": true,
1033 /// "outliers_removed": 12
1034 /// });
1035 ///
1036 /// // Update the metadata without affecting the array data
1037 /// MemoryMappedArray::<f64>::update_metadata("data/temperature.bin", updated_metadata)?;
1038 /// # Ok(())
1039 /// # }
1040 /// ```
1041 pub fn update_metadata(
1042 file_path: impl AsRef<Path>,
1043 metadata: serde_json::Value,
1044 ) -> CoreResult<()> {
1045 let path = file_path.as_ref();
1046
1047 // Open file
1048 let mut file = OpenOptions::new().read(true).write(true).open(path)?;
1049
1050 // Read header length
1051 let mut header_len_bytes = [0u8; 8]; // u64
1052 file.read_exact(&mut header_len_bytes)?;
1053 let header_len = u64::from_ne_bytes(header_len_bytes) as usize;
1054
1055 // Read header
1056 let mut header_bytes = vec![0u8; header_len];
1057 file.read_exact(&mut header_bytes)?;
1058
1059 // Deserialize header
1060 let cfg = bincode::config::standard();
1061 let (mut header, _len): (ZeroCopyHeader, usize) =
1062 bincode::serde::decode_from_slice(&header_bytes, cfg).map_err(|e| {
1063 CoreError::ValidationError(
1064 ErrorContext::new(format!("{e}"))
1065 .with_location(ErrorLocation::new(file!(), line!())),
1066 )
1067 })?;
1068
1069 // Update metadata
1070 header.metadata_json = Some(serde_json::to_string(&metadata).map_err(|e| {
1071 CoreError::ValidationError(
1072 ErrorContext::new(format!("{e}"))
1073 .with_location(ErrorLocation::new(file!(), line!())),
1074 )
1075 })?);
1076
1077 // Serialize updated header
1078 let cfg = bincode::config::standard();
1079 let new_header_bytes = bincode::serde::encode_to_vec(&header, cfg).map_err(|e| {
1080 CoreError::ValidationError(
1081 ErrorContext::new(format!("{e}"))
1082 .with_location(ErrorLocation::new(file!(), line!())),
1083 )
1084 })?;
1085
1086 // If new header is same size or smaller, we can update in place
1087 if new_header_bytes.len() <= header_len {
1088 // Seek back to header start (after header length)
1089 file.seek(SeekFrom::Start(8))?;
1090
1091 // Write new header
1092 file.write_all(&new_header_bytes)?;
1093
1094 // If new header is smaller, pad with zeros to maintain original size
1095 if new_header_bytes.len() < header_len {
1096 let padding = vec![0u8; header_len - new_header_bytes.len()];
1097 file.write_all(&padding)?;
1098 }
1099
1100 Ok(())
1101 } else {
1102 // If new header is larger, we need to rewrite the entire file
1103 // First, load the array
1104 let array = MemoryMappedArray::<A>::load_zero_copy(path, AccessMode::ReadOnly)?;
1105
1106 // Then save it to a temporary file
1107 let temppath = PathBuf::from(format!("{}.temp", path.display()));
1108 array.save_zero_copy(&temppath, Some(metadata.clone()))?;
1109
1110 // Replace the original file with the temporary file
1111 std::fs::rename(&temppath, path)?;
1112
1113 Ok(())
1114 }
1115 }
1116}
1117
1118#[cfg(test)]
1119mod tests {
1120 use super::*;
1121 use ndarray::{Array, Array1, Array2, Array3, IxDyn};
1122 use tempfile::tempdir;
1123
1124 // Example of a custom complex number type that implements ZeroCopySerializable
1125 #[repr(C)]
1126 #[derive(Debug, Copy, Clone, PartialEq)]
1127 struct Complex64 {
1128 real: f64,
1129 imag: f64,
1130 }
1131
1132 impl Complex64 {
1133 fn new(real: f64, imag: f64) -> Self {
1134 Self { real, imag }
1135 }
1136
1137 #[allow(dead_code)]
1138 fn magnitude(&self) -> f64 {
1139 (self.real * self.real + self.imag * self.imag).sqrt()
1140 }
1141 }
1142
1143 // Implementation of ZeroCopySerializable for our custom Complex64 type
1144 impl ZeroCopySerializable for Complex64 {
1145 unsafe fn from_bytes(bytes: &[u8]) -> CoreResult<Self> {
1146 if !Self::validate_bytes(bytes) {
1147 return Err(CoreError::ValidationError(
1148 ErrorContext::new(format!(
1149 "Invalid byte length for Complex64: expected {} got {}",
1150 mem::size_of::<Self>(),
1151 bytes.len()
1152 ))
1153 .with_location(ErrorLocation::new(file!(), line!())),
1154 ));
1155 }
1156
1157 // Create a pointer to the bytes and cast it to our type
1158 let ptr = bytes.as_ptr() as *const Self;
1159 Ok(*ptr)
1160 }
1161
1162 unsafe fn as_bytes(&self) -> &[u8] {
1163 let ptr = self as *const Self as *const u8;
1164 slice::from_raw_parts(ptr, mem::size_of::<Self>())
1165 }
1166
1167 // Override the type identifier for more specific validation
1168 fn type_identifier() -> &'static str {
1169 "Complex64"
1170 }
1171 }
1172
1173 // Test for our custom complex number type
1174 #[test]
1175 fn test_custom_complex_type() {
1176 // Create a complex number
1177 let complex = Complex64::new(3.5, 2.7);
1178
1179 // Test zero-copy serialization
1180 unsafe {
1181 let bytes = complex.as_bytes();
1182 assert_eq!(bytes.len(), 16); // 2 * f64 = 16 bytes
1183
1184 let deserialized = Complex64::from_bytes(bytes).expect("Operation failed");
1185 assert_eq!(complex.real, deserialized.real);
1186 assert_eq!(complex.imag, deserialized.imag);
1187 }
1188 }
1189
1190 // Test saving and loading an array of our custom type
1191 #[test]
1192 fn test_save_and_load_complex_array() {
1193 // Create a temporary directory
1194 let dir = tempdir().expect("Operation failed");
1195 let filepath = dir.path().join("complex_array.bin");
1196
1197 // Create a 2D array of complex numbers
1198 let data =
1199 Array2::<Complex64>::from_shape_fn((5, 5), |(i, j)| Complex64::new(i as f64, j as f64));
1200
1201 // Save with metadata
1202 let metadata = serde_json::json!({
1203 "description": "Complex number array",
1204 "type": "Complex64",
1205 "shape": [5, 5]
1206 });
1207
1208 let array =
1209 MemoryMappedArray::<Complex64>::save_array(&data, &filepath, Some(metadata.clone()))
1210 .expect("Operation failed");
1211
1212 // Verify save worked
1213 assert_eq!(array.shape.as_slice(), data.shape());
1214 assert_eq!(array.size, data.len());
1215
1216 // Load from file
1217 let loaded =
1218 MemoryMappedArray::<Complex64>::open_zero_copy(&filepath, AccessMode::ReadOnly)
1219 .expect("Operation failed");
1220
1221 // Verify load worked
1222 assert_eq!(loaded.shape.as_slice(), data.shape());
1223 assert_eq!(loaded.size, data.len());
1224
1225 // Convert to ndarray and check values
1226 let loaded_array = loaded
1227 .readonlyarray::<crate::ndarray::Ix2>()
1228 .expect("Operation failed");
1229
1230 for i in 0..5 {
1231 for j in 0..5 {
1232 let original = data[[0, j]];
1233 let loaded = loaded_array[[0, j]];
1234 assert_eq!(original.real, loaded.real);
1235 assert_eq!(original.imag, loaded.imag);
1236 }
1237 }
1238
1239 // Read metadata
1240 let loaded_metadata =
1241 MemoryMappedArray::<Complex64>::read_metadata(&filepath).expect("Operation failed");
1242 assert_eq!(loaded_metadata, metadata);
1243 }
1244
1245 #[test]
1246 #[cfg(feature = "float32")]
1247 fn test_zero_copy_serializable_f32() {
1248 let value: f32 = 3.5;
1249
1250 let bytes = value.to_ne_bytes();
1251 assert_eq!(bytes.len(), 4);
1252
1253 let deserialized = f32::from_ne_bytes(bytes);
1254 assert_eq!(value, deserialized);
1255 }
1256
1257 #[test]
1258 fn test_zero_copy_serializable_i32() {
1259 let value: i32 = -42;
1260
1261 unsafe {
1262 let bytes = value.as_bytes();
1263 assert_eq!(bytes.len(), 4);
1264
1265 let deserialized = i32::from_bytes(bytes).expect("Operation failed");
1266 assert_eq!(value, deserialized);
1267 }
1268 }
1269
1270 #[test]
1271 fn test_save_and_load_array_1d() {
1272 // Create a temporary directory
1273 let dir = tempdir().expect("Operation failed");
1274 let filepath = dir.path().join("test_array.bin");
1275
1276 // Create a 1D array
1277 let data = Array1::<f64>::linspace(0.0, 9.9, 100);
1278
1279 // Save with metadata
1280 let metadata = serde_json::json!({
1281 "description": "Test 1D array",
1282 "created": "2023-05-20",
1283 });
1284 let array = MemoryMappedArray::<f64>::save_array(&data, &filepath, Some(metadata.clone()))
1285 .expect("Operation failed");
1286
1287 // Verify save worked
1288 assert_eq!(array.shape.as_slice(), data.shape());
1289 assert_eq!(array.size, data.len());
1290
1291 // Load from file
1292 let loaded = MemoryMappedArray::<f64>::open_zero_copy(&filepath, AccessMode::ReadOnly)
1293 .expect("Operation failed");
1294
1295 // Verify load worked
1296 assert_eq!(loaded.shape.as_slice(), data.shape());
1297 assert_eq!(loaded.size, data.len());
1298
1299 // Convert to ndarray and check values
1300 let loaded_array = loaded
1301 .readonlyarray::<crate::ndarray::Ix1>()
1302 .expect("Operation failed");
1303 assert_eq!(loaded_array.shape(), data.shape());
1304
1305 for (i, &val) in loaded_array.iter().enumerate() {
1306 assert_eq!(val, data[i]);
1307 }
1308
1309 // Read metadata
1310 let loaded_metadata =
1311 MemoryMappedArray::<f64>::read_metadata(&filepath).expect("Operation failed");
1312 assert_eq!(loaded_metadata, metadata);
1313 }
1314
1315 #[test]
1316 #[cfg(feature = "float32")]
1317 fn test_save_and_load_array_2d() {
1318 // Create a temporary directory
1319 let dir = tempdir().expect("Operation failed");
1320 let filepath = dir.path().join("test_array_2d.bin");
1321
1322 // Create a 2D array
1323 let data = Array2::<f32>::from_shape_fn((10, 20), |(i, j)| (i * 20 + j) as f32);
1324
1325 // Save without metadata
1326 let array =
1327 MemoryMappedArray::<f32>::save_array(&data, &filepath, None).expect("Operation failed");
1328
1329 // Verify save worked
1330 assert_eq!(array.shape.as_slice(), data.shape());
1331 assert_eq!(array.size, data.len());
1332
1333 // Load from file
1334 let loaded = MemoryMappedArray::<f32>::open_zero_copy(&filepath, AccessMode::ReadOnly)
1335 .expect("Operation failed");
1336
1337 // Verify load worked
1338 assert_eq!(loaded.shape.as_slice(), data.shape());
1339 assert_eq!(loaded.size, data.len());
1340
1341 // Convert to ndarray and check values
1342 let loaded_array = loaded
1343 .readonlyarray::<crate::ndarray::Ix2>()
1344 .expect("Operation failed");
1345 assert_eq!(loaded_array.shape(), data.shape());
1346
1347 for i in 0..10 {
1348 for j in 0..20 {
1349 assert_eq!(loaded_array[[0, j]], data[[0, j]]);
1350 }
1351 }
1352 }
1353
1354 #[test]
1355 fn test_save_and_load_array_3d() {
1356 // Create a temporary directory
1357 let dir = tempdir().expect("Operation failed");
1358 let filepath = dir.path().join("test_array_3d.bin");
1359
1360 // Create a 3D array
1361 let data = Array3::<i32>::from_shape_fn((5, 5, 5), |(i, j, k)| (i * 25 + j * 5 + k) as i32);
1362
1363 // Save with metadata
1364 let metadata = serde_json::json!({
1365 "description": "Test 3D array",
1366 "dimensions": {
1367 "x": 5,
1368 "y": 5,
1369 "z": 5
1370 }
1371 });
1372 let array = MemoryMappedArray::<i32>::save_array(&data, &filepath, Some(metadata))
1373 .expect("Operation failed");
1374
1375 // Verify save worked
1376 assert_eq!(array.shape.as_slice(), data.shape());
1377 assert_eq!(array.size, data.len());
1378
1379 // Load from file
1380 let loaded = MemoryMappedArray::<i32>::open_zero_copy(&filepath, AccessMode::ReadOnly)
1381 .expect("Operation failed");
1382
1383 // Verify load worked
1384 assert_eq!(loaded.shape.as_slice(), data.shape());
1385 assert_eq!(loaded.size, data.len());
1386
1387 // Convert to ndarray and check values
1388 let loaded_array = loaded
1389 .readonlyarray::<crate::ndarray::Ix3>()
1390 .expect("Operation failed");
1391 assert_eq!(loaded_array.shape(), data.shape());
1392
1393 for i in 0..5 {
1394 for j in 0..5 {
1395 for k in 0..5 {
1396 assert_eq!(loaded_array[[0, j, k]], data[[0, j, k]]);
1397 }
1398 }
1399 }
1400 }
1401
1402 #[test]
1403 fn test_save_and_load_array_dynamic() {
1404 // Create a temporary directory
1405 let dir = tempdir().expect("Operation failed");
1406 let filepath = dir.path().join("test_array_dyn.bin");
1407
1408 // Create a dynamic-dimension array (4D)
1409 let shape = IxDyn(&[3, 4, 2, 5]);
1410 let data = Array::from_shape_fn(shape, |idx| {
1411 // Convert multidimensional index to a single value for testing
1412 let mut val = 0;
1413 let mut factor = 1;
1414 for &dim in idx.slice().iter().rev() {
1415 val += dim * factor;
1416 factor *= 10;
1417 }
1418 val as f64
1419 });
1420
1421 // Save with detailed metadata
1422 let metadata = serde_json::json!({
1423 "description": "Test dynamic 4D array",
1424 "dimensions": {
1425 "dim1": 3,
1426 "dim2": 4,
1427 "dim3": 2,
1428 "dim4": 5
1429 },
1430 "created": "2023-05-20",
1431 "format_version": "1.0"
1432 });
1433 let array = MemoryMappedArray::<f64>::save_array(&data, &filepath, Some(metadata))
1434 .expect("Operation failed");
1435
1436 // Verify save worked
1437 assert_eq!(array.shape.as_slice(), data.shape());
1438 assert_eq!(array.size, data.len());
1439
1440 // Load from file
1441 let loaded = MemoryMappedArray::<f64>::open_zero_copy(&filepath, AccessMode::ReadOnly)
1442 .expect("Operation failed");
1443
1444 // Verify load worked
1445 assert_eq!(loaded.shape.as_slice(), data.shape());
1446 assert_eq!(loaded.size, data.len());
1447
1448 // Convert to ndarray and check values
1449 let loaded_array = loaded.readonlyarray::<IxDyn>().expect("Operation failed");
1450 assert_eq!(loaded_array.shape(), data.shape());
1451
1452 // Test a few specific indices
1453 let test_indices = vec![
1454 IxDyn(&[0, 0, 0, 0]),
1455 IxDyn(&[1, 2, 1, 3]),
1456 IxDyn(&[2, 3, 1, 4]),
1457 IxDyn(&[2, 0, 0, 2]),
1458 ];
1459
1460 for idx in test_indices {
1461 assert_eq!(loaded_array[&idx], data[&idx]);
1462 }
1463
1464 // Also test reading data directly as slice
1465 let loaded_slice = loaded.as_slice();
1466 let data_standard = data.as_standard_layout();
1467 let data_slice = data_standard.as_slice().expect("Operation failed");
1468
1469 assert_eq!(loaded_slice.len(), data_slice.len());
1470 for i in 0..data_slice.len() {
1471 assert_eq!(loaded_slice[0], data_slice[0]);
1472 }
1473 }
1474
1475 #[test]
1476 #[cfg(feature = "float32")]
1477 fn test_save_and_load_array_mixed_types() {
1478 // Create a temporary directory
1479 let dir = tempdir().expect("Operation failed");
1480
1481 // Test u32 1D array
1482 {
1483 let filename = "u32_1d.bin";
1484 let filepath = dir.path().join(filename);
1485 let data = Array1::<u32>::from_shape_fn(100, |_| 0 as u32);
1486 let metadata = serde_json::json!({
1487 "array_type": "u32",
1488 "dimensions": data.ndim(),
1489 "shape": data.shape().to_vec()
1490 });
1491
1492 let array =
1493 MemoryMappedArray::<u32>::save_array(&data, &filepath, Some(metadata.clone()))
1494 .expect("Operation failed");
1495 assert_eq!(array.shape.as_slice(), data.shape());
1496
1497 // Load and verify
1498 let loaded = MemoryMappedArray::<u32>::open_zero_copy(&filepath, AccessMode::ReadOnly)
1499 .expect("Operation failed");
1500 let loaded_array = loaded
1501 .readonlyarray::<crate::ndarray::Ix1>()
1502 .expect("Operation failed");
1503
1504 for i in 0..data.len() {
1505 assert_eq!(loaded_array[0], data[0]);
1506 }
1507
1508 // Verify metadata was saved correctly
1509 let loaded_metadata =
1510 MemoryMappedArray::<u32>::read_metadata(&filepath).expect("Operation failed");
1511 assert_eq!(loaded_metadata, metadata);
1512 }
1513
1514 // Test i64 2D array
1515 {
1516 let filename = "i64_2d.bin";
1517 let filepath = dir.path().join(filename);
1518 let data = Array2::<i64>::from_shape_fn((5, 10), |(i, j)| (i * 10 + j) as i64);
1519 let metadata = serde_json::json!({
1520 "array_type": "i64",
1521 "dimensions": data.ndim(),
1522 "shape": data.shape().to_vec()
1523 });
1524
1525 let array =
1526 MemoryMappedArray::<i64>::save_array(&data, &filepath, Some(metadata.clone()))
1527 .expect("Operation failed");
1528 assert_eq!(array.shape.as_slice(), data.shape());
1529
1530 // Load and verify
1531 let loaded = MemoryMappedArray::<i64>::open_zero_copy(&filepath, AccessMode::ReadOnly)
1532 .expect("Operation failed");
1533 let loaded_array = loaded
1534 .readonlyarray::<crate::ndarray::Ix2>()
1535 .expect("Operation failed");
1536
1537 for i in 0..data.shape()[0] {
1538 for j in 0..data.shape()[1] {
1539 assert_eq!(loaded_array[[0, j]], data[[0, j]]);
1540 }
1541 }
1542
1543 // Verify metadata was saved correctly
1544 let loaded_metadata =
1545 MemoryMappedArray::<i64>::read_metadata(&filepath).expect("Operation failed");
1546 assert_eq!(loaded_metadata, metadata);
1547 }
1548
1549 // Test f32 3D array
1550 {
1551 let filename = "f32_3d.bin";
1552 let filepath = dir.path().join(filename);
1553 let data =
1554 Array3::<f32>::from_shape_fn((3, 4, 5), |(i, j, k)| (i * 20 + j * 5 + k) as f32);
1555 let metadata = serde_json::json!({
1556 "array_type": "f32",
1557 "dimensions": data.ndim(),
1558 "shape": data.shape().to_vec()
1559 });
1560
1561 let array =
1562 MemoryMappedArray::<f32>::save_array(&data, &filepath, Some(metadata.clone()))
1563 .expect("Operation failed");
1564 assert_eq!(array.shape.as_slice(), data.shape());
1565
1566 // Load and verify
1567 let loaded = MemoryMappedArray::<f32>::open_zero_copy(&filepath, AccessMode::ReadOnly)
1568 .expect("Operation failed");
1569 let loaded_array = loaded
1570 .readonlyarray::<crate::ndarray::Ix3>()
1571 .expect("Operation failed");
1572
1573 for i in 0..data.shape()[0] {
1574 for j in 0..data.shape()[1] {
1575 for k in 0..data.shape()[2] {
1576 assert_eq!(loaded_array[[0, j, k]], data[[0, j, k]]);
1577 }
1578 }
1579 }
1580
1581 // Verify metadata was saved correctly
1582 let loaded_metadata =
1583 MemoryMappedArray::<f32>::read_metadata(&filepath).expect("Operation failed");
1584 assert_eq!(loaded_metadata, metadata);
1585 }
1586 }
1587
1588 #[test]
1589 fn test_update_metadata() {
1590 // Create a temporary directory
1591 let dir = tempdir().expect("Operation failed");
1592 let filepath = dir.path().join("test_metadata_update.bin");
1593
1594 // Create a 1D array
1595 let data = Array1::<f64>::linspace(0.0, 9.9, 100);
1596
1597 // Save with initial metadata
1598 let initial_metadata = serde_json::json!({
1599 "description": "Initial metadata",
1600 "_version": "1.0"
1601 });
1602 MemoryMappedArray::<f64>::save_array(&data, &filepath, Some(initial_metadata))
1603 .expect("Operation failed");
1604
1605 // Update metadata
1606 let updated_metadata = serde_json::json!({
1607 "description": "Updated metadata",
1608 "_version": "2.0",
1609 "updated": true
1610 });
1611 MemoryMappedArray::<f64>::update_metadata(&filepath, updated_metadata.clone())
1612 .expect("Operation failed");
1613
1614 // Read metadata
1615 let loaded_metadata =
1616 MemoryMappedArray::<f64>::read_metadata(&filepath).expect("Operation failed");
1617 assert_eq!(loaded_metadata, updated_metadata);
1618
1619 // Load array and check it's still correct
1620 let loaded = MemoryMappedArray::<f64>::open_zero_copy(&filepath, AccessMode::ReadOnly)
1621 .expect("Operation failed");
1622 let loaded_array = loaded
1623 .readonlyarray::<crate::ndarray::Ix1>()
1624 .expect("Operation failed");
1625
1626 for (i, &val) in loaded_array.iter().enumerate() {
1627 assert_eq!(val, data[i]);
1628 }
1629 }
1630
1631 #[test]
1632 #[cfg(feature = "float32")]
1633 fn test_modify_array() {
1634 // Create a temporary directory
1635 let dir = tempdir().expect("Operation failed");
1636 let filepath = dir.path().join("test_modify.bin");
1637
1638 // Create a 2D array
1639 let data = Array2::<f32>::from_shape_fn((5, 5), |(i, j)| (i * 5 + j) as f32);
1640
1641 // Save array
1642 MemoryMappedArray::<f32>::save_array(&data, &filepath, None).expect("Operation failed");
1643
1644 // Load in read-write mode
1645 let mut mmap = MemoryMappedArray::<f32>::open_zero_copy(&filepath, AccessMode::ReadWrite)
1646 .expect("Operation failed");
1647
1648 // Modify array through mmap
1649 {
1650 let mut array = mmap
1651 .as_array_mut::<crate::ndarray::Ix2>()
1652 .expect("Operation failed");
1653 array[[2, 2]] = 999.0;
1654 }
1655
1656 // Flush changes
1657 mmap.flush().expect("Operation failed");
1658
1659 // Load again to verify changes were saved
1660 let loaded = MemoryMappedArray::<f32>::open_zero_copy(&filepath, AccessMode::ReadOnly)
1661 .expect("Operation failed");
1662 let loaded_array = loaded
1663 .readonlyarray::<crate::ndarray::Ix2>()
1664 .expect("Operation failed");
1665
1666 // Verify only the specified element was changed
1667 for i in 0..5 {
1668 for j in 0..5 {
1669 if i == 2 && j == 2 {
1670 assert_eq!(loaded_array[[i, j]], 999.0);
1671 } else {
1672 assert_eq!(loaded_array[[i, j]], data[[i, j]]);
1673 }
1674 }
1675 }
1676 }
1677
1678 #[test]
1679 fn test_copy_on_write_mode() {
1680 // Create a temporary directory
1681 let dir = tempdir().expect("Operation failed");
1682 let filepath = dir.path().join("test_cow.bin");
1683
1684 // Create a 2D array
1685 let data = Array2::<f64>::from_shape_fn((10, 10), |(i, j)| (i * 10 + j) as f64);
1686
1687 // Save array
1688 MemoryMappedArray::<f64>::save_array(&data, &filepath, None).expect("Operation failed");
1689
1690 // Load in copy-on-write mode
1691 let mut cow_mmap =
1692 MemoryMappedArray::<f64>::open_zero_copy(&filepath, AccessMode::CopyOnWrite)
1693 .expect("Operation failed");
1694
1695 // Modify array through copy-on-write view
1696 {
1697 let mut array_view = cow_mmap
1698 .as_array_mut::<crate::ndarray::Ix2>()
1699 .expect("Operation failed");
1700 // Set diagonal to 100
1701 for i in 0..10 {
1702 array_view[[i, i]] = 100.0;
1703 }
1704 }
1705
1706 // Load the original file to verify it wasn't modified
1707 let original = MemoryMappedArray::<f64>::open_zero_copy(&filepath, AccessMode::ReadOnly)
1708 .expect("Operation failed");
1709 let original_array = original
1710 .readonlyarray::<crate::ndarray::Ix2>()
1711 .expect("Operation failed");
1712
1713 // Check original values weren't changed on disk
1714 for i in 0..10 {
1715 for j in 0..10 {
1716 assert_eq!(original_array[[i, j]], data[[i, j]]);
1717 }
1718 }
1719
1720 // Check our copy-on-write view has the modifications
1721 let cow_array = cow_mmap
1722 .as_array::<crate::ndarray::Ix2>()
1723 .expect("Operation failed");
1724 for i in 0..10 {
1725 for j in 0..10 {
1726 if i == j {
1727 assert_eq!(cow_array[[i, j]], 100.0);
1728 } else {
1729 assert_eq!(cow_array[[i, j]], data[[i, j]]);
1730 }
1731 }
1732 }
1733 }
1734}