1use std::fs::{File, OpenOptions};
7use std::io::{BufReader, Read};
8use std::marker::PhantomData;
9use std::path::Path;
10
11use memmap2::{Mmap, MmapMut, MmapOptions};
12
13use ferray_core::Array;
14use ferray_core::dimension::IxDyn;
15use ferray_core::dtype::Element;
16use ferray_core::error::{FerrayError, FerrayResult};
17
18use crate::format::MemmapMode;
19use crate::npy::NpyElement;
20use crate::npy::header::{self, NpyHeader};
21
22pub struct MemmapArray<T: Element> {
27 _mmap: Mmap,
29 data_ptr: *const T,
31 shape: Vec<usize>,
33 len: usize,
35 _marker: PhantomData<T>,
37}
38
39unsafe impl<T: Element> Send for MemmapArray<T> {}
42unsafe impl<T: Element> Sync for MemmapArray<T> {}
43
44impl<T: Element> MemmapArray<T> {
45 pub fn shape(&self) -> &[usize] {
47 &self.shape
48 }
49
50 pub fn as_slice(&self) -> &[T] {
52 unsafe { std::slice::from_raw_parts(self.data_ptr, self.len) }
55 }
56
57 pub fn to_array(&self) -> FerrayResult<Array<T, IxDyn>> {
59 let data = self.as_slice().to_vec();
60 Array::from_vec(IxDyn::new(&self.shape), data)
61 }
62}
63
64pub struct MemmapArrayMut<T: Element> {
68 _mmap: MmapMut,
70 data_ptr: *mut T,
72 shape: Vec<usize>,
74 len: usize,
76 _marker: PhantomData<T>,
78}
79
80unsafe impl<T: Element> Send for MemmapArrayMut<T> {}
81unsafe impl<T: Element> Sync for MemmapArrayMut<T> {}
82
83impl<T: Element> MemmapArrayMut<T> {
84 pub fn shape(&self) -> &[usize] {
86 &self.shape
87 }
88
89 pub fn as_slice(&self) -> &[T] {
91 unsafe { std::slice::from_raw_parts(self.data_ptr, self.len) }
92 }
93
94 pub fn as_slice_mut(&mut self) -> &mut [T] {
99 unsafe { std::slice::from_raw_parts_mut(self.data_ptr, self.len) }
100 }
101
102 pub fn to_array(&self) -> FerrayResult<Array<T, IxDyn>> {
104 let data = self.as_slice().to_vec();
105 Array::from_vec(IxDyn::new(&self.shape), data)
106 }
107
108 pub fn flush(&self) -> FerrayResult<()> {
110 self._mmap
111 .flush()
112 .map_err(|e| FerrayError::io_error(format!("failed to flush mmap: {e}")))
113 }
114}
115
116pub fn memmap_readonly<T: Element + NpyElement, P: AsRef<Path>>(
124 path: P,
125) -> FerrayResult<MemmapArray<T>> {
126 let (header, data_offset) = read_npy_header_with_offset(path.as_ref())?;
127 validate_dtype::<T>(&header)?;
128 validate_native_endian(&header)?;
129
130 let file = File::open(path.as_ref())?;
131 let mmap = unsafe {
132 MmapOptions::new()
133 .offset(data_offset as u64)
134 .len(header.shape.iter().product::<usize>() * std::mem::size_of::<T>())
135 .map(&file)
136 .map_err(|e| FerrayError::io_error(format!("mmap failed: {e}")))?
137 };
138
139 let len: usize = header.shape.iter().product();
140 let data_ptr = mmap.as_ptr() as *const T;
141
142 if (data_ptr as usize) % std::mem::align_of::<T>() != 0 {
144 return Err(FerrayError::io_error(
145 "memory-mapped data is not properly aligned for the element type",
146 ));
147 }
148
149 Ok(MemmapArray {
150 _mmap: mmap,
151 data_ptr,
152 shape: header.shape,
153 len,
154 _marker: PhantomData,
155 })
156}
157
158pub fn memmap_mut<T: Element + NpyElement, P: AsRef<Path>>(
169 path: P,
170 mode: MemmapMode,
171) -> FerrayResult<MemmapArrayMut<T>> {
172 if mode == MemmapMode::ReadOnly {
173 return Err(FerrayError::invalid_value(
174 "use memmap_readonly for read-only access",
175 ));
176 }
177
178 let (header, data_offset) = read_npy_header_with_offset(path.as_ref())?;
179 validate_dtype::<T>(&header)?;
180 validate_native_endian(&header)?;
181
182 let len: usize = header.shape.iter().product();
183 let data_bytes = len * std::mem::size_of::<T>();
184
185 let mmap = match mode {
186 MemmapMode::ReadWrite => {
187 let file = OpenOptions::new()
188 .read(true)
189 .write(true)
190 .open(path.as_ref())?;
191 unsafe {
192 MmapOptions::new()
193 .offset(data_offset as u64)
194 .len(data_bytes)
195 .map_mut(&file)
196 .map_err(|e| FerrayError::io_error(format!("mmap_mut failed: {e}")))?
197 }
198 }
199 MemmapMode::CopyOnWrite => {
200 let file = File::open(path.as_ref())?;
201 unsafe {
202 MmapOptions::new()
203 .offset(data_offset as u64)
204 .len(data_bytes)
205 .map_copy(&file)
206 .map_err(|e| FerrayError::io_error(format!("mmap copy-on-write failed: {e}")))?
207 }
208 }
209 MemmapMode::ReadOnly => unreachable!(),
210 };
211
212 let data_ptr = mmap.as_ptr() as *mut T;
213
214 if (data_ptr as usize) % std::mem::align_of::<T>() != 0 {
215 return Err(FerrayError::io_error(
216 "memory-mapped data is not properly aligned for the element type",
217 ));
218 }
219
220 Ok(MemmapArrayMut {
221 _mmap: mmap,
222 data_ptr,
223 shape: header.shape,
224 len,
225 _marker: PhantomData,
226 })
227}
228
229pub fn open_memmap<T: Element + NpyElement, P: AsRef<Path>>(
238 path: P,
239 mode: MemmapMode,
240) -> FerrayResult<Array<T, IxDyn>> {
241 match mode {
242 MemmapMode::ReadOnly => {
243 let mapped = memmap_readonly::<T, _>(path)?;
244 mapped.to_array()
245 }
246 _ => {
247 let mapped = memmap_mut::<T, _>(path, mode)?;
248 mapped.to_array()
249 }
250 }
251}
252
253fn read_npy_header_with_offset(path: &Path) -> FerrayResult<(NpyHeader, usize)> {
255 let file = File::open(path)?;
256 let mut reader = BufReader::new(file);
257 let hdr = header::read_header(&mut reader)?;
258
259 let preamble_len = crate::format::NPY_MAGIC_LEN + 2; let header_len_field_size = if hdr.version.0 == 1 { 2 } else { 4 };
263
264 let file2 = File::open(path)?;
267 let mut reader2 = BufReader::new(file2);
268 let mut skip = vec![0u8; preamble_len + header_len_field_size];
269 reader2.read_exact(&mut skip)?;
270
271 let header_len = if hdr.version.0 == 1 {
272 u16::from_le_bytes([skip[preamble_len], skip[preamble_len + 1]]) as usize
273 } else {
274 u32::from_le_bytes([
275 skip[preamble_len],
276 skip[preamble_len + 1],
277 skip[preamble_len + 2],
278 skip[preamble_len + 3],
279 ]) as usize
280 };
281
282 let data_offset = preamble_len + header_len_field_size + header_len;
283
284 Ok((hdr, data_offset))
285}
286
287fn validate_dtype<T: Element>(header: &NpyHeader) -> FerrayResult<()> {
288 if header.dtype != T::dtype() {
289 return Err(FerrayError::invalid_dtype(format!(
290 "expected dtype {:?} for type {}, but file has {:?}",
291 T::dtype(),
292 std::any::type_name::<T>(),
293 header.dtype,
294 )));
295 }
296 Ok(())
297}
298
299fn validate_native_endian(header: &NpyHeader) -> FerrayResult<()> {
300 if header.endianness.needs_swap() {
301 return Err(FerrayError::io_error(
302 "memory-mapped arrays require native byte order; file has non-native endianness",
303 ));
304 }
305 Ok(())
306}
307
308#[cfg(test)]
309mod tests {
310 use super::*;
311 use crate::npy;
312 use ferray_core::dimension::Ix1;
313
314 fn test_dir() -> std::path::PathBuf {
315 let dir = std::env::temp_dir().join(format!("ferray_io_mmap_{}", std::process::id()));
316 let _ = std::fs::create_dir_all(&dir);
317 dir
318 }
319
320 fn test_file(name: &str) -> std::path::PathBuf {
321 test_dir().join(name)
322 }
323
324 #[test]
325 fn memmap_readonly_f64() {
326 let data = vec![1.0_f64, 2.0, 3.0, 4.0, 5.0];
327 let arr = Array::<f64, Ix1>::from_vec(Ix1::new([5]), data.clone()).unwrap();
328
329 let path = test_file("mm_ro_f64.npy");
330 npy::save(&path, &arr).unwrap();
331
332 let mapped = memmap_readonly::<f64, _>(&path).unwrap();
333 assert_eq!(mapped.shape(), &[5]);
334 assert_eq!(mapped.as_slice(), &data[..]);
335 let _ = std::fs::remove_file(&path);
336 }
337
338 #[test]
339 fn memmap_to_array() {
340 let data = vec![10i32, 20, 30];
341 let arr = Array::<i32, Ix1>::from_vec(Ix1::new([3]), data.clone()).unwrap();
342
343 let path = test_file("mm_to_arr.npy");
344 npy::save(&path, &arr).unwrap();
345
346 let mapped = memmap_readonly::<i32, _>(&path).unwrap();
347 let owned = mapped.to_array().unwrap();
348 assert_eq!(owned.shape(), &[3]);
349 assert_eq!(owned.as_slice().unwrap(), &data[..]);
350 let _ = std::fs::remove_file(&path);
351 }
352
353 #[test]
354 fn memmap_readwrite_persist() {
355 let data = vec![1.0_f64, 2.0, 3.0];
356 let arr = Array::<f64, Ix1>::from_vec(Ix1::new([3]), data).unwrap();
357
358 let path = test_file("mm_rw.npy");
359 npy::save(&path, &arr).unwrap();
360
361 {
363 let mut mapped = memmap_mut::<f64, _>(&path, MemmapMode::ReadWrite).unwrap();
364 mapped.as_slice_mut()[0] = 999.0;
365 mapped.flush().unwrap();
366 }
367
368 let loaded: Array<f64, Ix1> = npy::load(&path).unwrap();
370 assert_eq!(loaded.as_slice().unwrap()[0], 999.0);
371 assert_eq!(loaded.as_slice().unwrap()[1], 2.0);
372 assert_eq!(loaded.as_slice().unwrap()[2], 3.0);
373 let _ = std::fs::remove_file(&path);
374 }
375
376 #[test]
377 fn memmap_copy_on_write() {
378 let data = vec![1.0_f64, 2.0, 3.0];
379 let arr = Array::<f64, Ix1>::from_vec(Ix1::new([3]), data).unwrap();
380
381 let path = test_file("mm_cow.npy");
382 npy::save(&path, &arr).unwrap();
383
384 {
386 let mut mapped = memmap_mut::<f64, _>(&path, MemmapMode::CopyOnWrite).unwrap();
387 mapped.as_slice_mut()[0] = 999.0;
388 assert_eq!(mapped.as_slice()[0], 999.0);
389 }
390
391 let loaded: Array<f64, Ix1> = npy::load(&path).unwrap();
393 assert_eq!(loaded.as_slice().unwrap()[0], 1.0);
394 let _ = std::fs::remove_file(&path);
395 }
396
397 #[test]
398 fn memmap_wrong_dtype_error() {
399 let data = vec![1.0_f64, 2.0];
400 let arr = Array::<f64, Ix1>::from_vec(Ix1::new([2]), data).unwrap();
401
402 let path = test_file("mm_wrong_dt.npy");
403 npy::save(&path, &arr).unwrap();
404
405 let result = memmap_readonly::<f32, _>(&path);
406 assert!(result.is_err());
407 let _ = std::fs::remove_file(&path);
408 }
409
410 #[test]
411 fn open_memmap_readonly() {
412 let data = vec![1.0_f64, 2.0, 3.0];
413 let arr = Array::<f64, Ix1>::from_vec(Ix1::new([3]), data.clone()).unwrap();
414
415 let path = test_file("mm_open_ro.npy");
416 npy::save(&path, &arr).unwrap();
417
418 let loaded = open_memmap::<f64, _>(&path, MemmapMode::ReadOnly).unwrap();
419 assert_eq!(loaded.shape(), &[3]);
420 assert_eq!(loaded.as_slice().unwrap(), &data[..]);
421 let _ = std::fs::remove_file(&path);
422 }
423}