1use std::fs::{File, OpenOptions};
7use std::io::{BufReader, Seek};
8use std::marker::PhantomData;
9use std::path::Path;
10
11use memmap2::{Mmap, MmapMut, MmapOptions};
12
13use ferray_core::Array;
14use ferray_core::array::view::ArrayView;
15use ferray_core::dimension::IxDyn;
16use ferray_core::dtype::Element;
17use ferray_core::error::{FerrayError, FerrayResult};
18
19use crate::format::MemmapMode;
20use crate::npy::NpyElement;
21use crate::npy::checked_total_elements;
22use crate::npy::header::{self, NpyHeader};
23
24pub struct MemmapArray<T: Element> {
29 _mmap: Mmap,
31 data_ptr: *const T,
33 shape: Vec<usize>,
35 len: usize,
37 _marker: PhantomData<T>,
39}
40
41unsafe impl<T: Element> Send for MemmapArray<T> {}
44unsafe impl<T: Element> Sync for MemmapArray<T> {}
45
46impl<T: Element> MemmapArray<T> {
47 #[must_use]
49 pub fn shape(&self) -> &[usize] {
50 &self.shape
51 }
52
53 #[must_use]
55 pub const fn as_slice(&self) -> &[T] {
56 unsafe { std::slice::from_raw_parts(self.data_ptr, self.len) }
59 }
60
61 pub fn to_array(&self) -> FerrayResult<Array<T, IxDyn>> {
63 let data = self.as_slice().to_vec();
64 Array::from_vec(IxDyn::new(&self.shape), data)
65 }
66
67 #[must_use]
72 pub fn view(&self) -> ArrayView<'_, T, IxDyn> {
73 let ndim = self.shape.len();
75 let mut strides = vec![1usize; ndim];
76 for i in (0..ndim.saturating_sub(1)).rev() {
77 strides[i] = strides[i + 1] * self.shape[i + 1];
78 }
79 unsafe { ArrayView::from_shape_ptr(self.data_ptr, &self.shape, &strides) }
84 }
85}
86
87pub struct MemmapArrayMut<T: Element> {
91 mmap: MmapMut,
93 data_ptr: *mut T,
95 shape: Vec<usize>,
97 len: usize,
99 _marker: PhantomData<T>,
101}
102
103unsafe impl<T: Element> Send for MemmapArrayMut<T> {}
104unsafe impl<T: Element> Sync for MemmapArrayMut<T> {}
105
106impl<T: Element> MemmapArrayMut<T> {
107 #[must_use]
109 pub fn shape(&self) -> &[usize] {
110 &self.shape
111 }
112
113 #[must_use]
115 pub const fn as_slice(&self) -> &[T] {
116 unsafe { std::slice::from_raw_parts(self.data_ptr, self.len) }
117 }
118
119 pub const fn as_slice_mut(&mut self) -> &mut [T] {
124 unsafe { std::slice::from_raw_parts_mut(self.data_ptr, self.len) }
125 }
126
127 pub fn to_array(&self) -> FerrayResult<Array<T, IxDyn>> {
129 let data = self.as_slice().to_vec();
130 Array::from_vec(IxDyn::new(&self.shape), data)
131 }
132
133 #[must_use]
137 pub fn view(&self) -> ArrayView<'_, T, IxDyn> {
138 let ndim = self.shape.len();
139 let mut strides = vec![1usize; ndim];
140 for i in (0..ndim.saturating_sub(1)).rev() {
141 strides[i] = strides[i + 1] * self.shape[i + 1];
142 }
143 unsafe { ArrayView::from_shape_ptr(self.data_ptr.cast_const(), &self.shape, &strides) }
146 }
147
148 pub fn flush(&self) -> FerrayResult<()> {
150 self.mmap
151 .flush()
152 .map_err(|e| FerrayError::io_error(format!("failed to flush mmap: {e}")))
153 }
154}
155
156pub fn memmap_readonly<T: Element + NpyElement, P: AsRef<Path>>(
164 path: P,
165) -> FerrayResult<MemmapArray<T>> {
166 let (header, data_offset) = read_npy_header_with_offset(path.as_ref())?;
167 validate_dtype::<T>(&header)?;
168 validate_native_endian(&header)?;
169
170 let len = checked_total_elements(&header.shape)?;
171 let file = File::open(path.as_ref())?;
172 let mmap = unsafe {
173 MmapOptions::new()
174 .offset(data_offset as u64)
175 .len(len * std::mem::size_of::<T>())
176 .map(&file)
177 .map_err(|e| FerrayError::io_error(format!("mmap failed: {e}")))?
178 };
179 let data_ptr = mmap.as_ptr().cast::<T>();
180
181 if (data_ptr as usize) % std::mem::align_of::<T>() != 0 {
183 return Err(FerrayError::io_error(
184 "memory-mapped data is not properly aligned for the element type",
185 ));
186 }
187
188 Ok(MemmapArray {
189 _mmap: mmap,
190 data_ptr,
191 shape: header.shape,
192 len,
193 _marker: PhantomData,
194 })
195}
196
197pub fn memmap_mut<T: Element + NpyElement, P: AsRef<Path>>(
208 path: P,
209 mode: MemmapMode,
210) -> FerrayResult<MemmapArrayMut<T>> {
211 if mode == MemmapMode::ReadOnly {
212 return Err(FerrayError::invalid_value(
213 "use memmap_readonly for read-only access",
214 ));
215 }
216
217 let (header, data_offset) = read_npy_header_with_offset(path.as_ref())?;
218 validate_dtype::<T>(&header)?;
219 validate_native_endian(&header)?;
220
221 let len = checked_total_elements(&header.shape)?;
222 let data_bytes = len * std::mem::size_of::<T>();
223
224 let mmap = match mode {
225 MemmapMode::ReadWrite => {
226 let file = OpenOptions::new()
227 .read(true)
228 .write(true)
229 .open(path.as_ref())?;
230 unsafe {
231 MmapOptions::new()
232 .offset(data_offset as u64)
233 .len(data_bytes)
234 .map_mut(&file)
235 .map_err(|e| FerrayError::io_error(format!("mmap_mut failed: {e}")))?
236 }
237 }
238 MemmapMode::CopyOnWrite => {
239 let file = File::open(path.as_ref())?;
240 unsafe {
241 MmapOptions::new()
242 .offset(data_offset as u64)
243 .len(data_bytes)
244 .map_copy(&file)
245 .map_err(|e| FerrayError::io_error(format!("mmap copy-on-write failed: {e}")))?
246 }
247 }
248 MemmapMode::ReadOnly => unreachable!(),
249 };
250
251 let data_ptr = mmap.as_ptr().cast::<T>().cast_mut();
252
253 if (data_ptr as usize) % std::mem::align_of::<T>() != 0 {
254 return Err(FerrayError::io_error(
255 "memory-mapped data is not properly aligned for the element type",
256 ));
257 }
258
259 Ok(MemmapArrayMut {
260 mmap,
261 data_ptr,
262 shape: header.shape,
263 len,
264 _marker: PhantomData,
265 })
266}
267
268pub fn open_memmap<T: Element + NpyElement, P: AsRef<Path>>(
278 path: P,
279 mode: MemmapMode,
280) -> FerrayResult<Array<T, IxDyn>> {
281 if mode == MemmapMode::ReadOnly {
282 let mapped = memmap_readonly::<T, _>(path)?;
283 mapped.to_array()
284 } else {
285 let mapped = memmap_mut::<T, _>(path, mode)?;
286 mapped.to_array()
287 }
288}
289
290fn read_npy_header_with_offset(path: &Path) -> FerrayResult<(NpyHeader, usize)> {
295 let file = File::open(path)?;
296 let mut reader = BufReader::new(file);
297 let hdr = header::read_header(&mut reader)?;
298
299 let data_offset = reader
302 .stream_position()
303 .map_err(|e| FerrayError::io_error(format!("failed to get stream position: {e}")))?
304 as usize;
305
306 Ok((hdr, data_offset))
307}
308
309fn validate_dtype<T: Element>(header: &NpyHeader) -> FerrayResult<()> {
310 if header.dtype != T::dtype() {
311 return Err(FerrayError::invalid_dtype(format!(
312 "expected dtype {:?} for type {}, but file has {:?}",
313 T::dtype(),
314 std::any::type_name::<T>(),
315 header.dtype,
316 )));
317 }
318 Ok(())
319}
320
321fn validate_native_endian(header: &NpyHeader) -> FerrayResult<()> {
322 if header.endianness.needs_swap() {
323 return Err(FerrayError::io_error(
324 "memory-mapped arrays require native byte order; file has non-native endianness",
325 ));
326 }
327 Ok(())
328}
329
330#[cfg(test)]
331#[allow(clippy::float_cmp)] mod tests {
333 use super::*;
334 use crate::npy;
335 use ferray_core::dimension::Ix1;
336
337 fn test_dir() -> std::path::PathBuf {
338 let dir = std::env::temp_dir().join(format!("ferray_io_mmap_{}", std::process::id()));
339 let _ = std::fs::create_dir_all(&dir);
340 dir
341 }
342
343 fn test_file(name: &str) -> std::path::PathBuf {
344 test_dir().join(name)
345 }
346
347 #[test]
348 fn memmap_readonly_f64() {
349 let data = vec![1.0_f64, 2.0, 3.0, 4.0, 5.0];
350 let arr = Array::<f64, Ix1>::from_vec(Ix1::new([5]), data.clone()).unwrap();
351
352 let path = test_file("mm_ro_f64.npy");
353 npy::save(&path, &arr).unwrap();
354
355 let mapped = memmap_readonly::<f64, _>(&path).unwrap();
356 assert_eq!(mapped.shape(), &[5]);
357 assert_eq!(mapped.as_slice(), &data[..]);
358 let _ = std::fs::remove_file(&path);
359 }
360
361 #[test]
362 fn memmap_to_array() {
363 let data = vec![10i32, 20, 30];
364 let arr = Array::<i32, Ix1>::from_vec(Ix1::new([3]), data.clone()).unwrap();
365
366 let path = test_file("mm_to_arr.npy");
367 npy::save(&path, &arr).unwrap();
368
369 let mapped = memmap_readonly::<i32, _>(&path).unwrap();
370 let owned = mapped.to_array().unwrap();
371 assert_eq!(owned.shape(), &[3]);
372 assert_eq!(owned.as_slice().unwrap(), &data[..]);
373 let _ = std::fs::remove_file(&path);
374 }
375
376 #[test]
377 fn memmap_readwrite_persist() {
378 let data = vec![1.0_f64, 2.0, 3.0];
379 let arr = Array::<f64, Ix1>::from_vec(Ix1::new([3]), data).unwrap();
380
381 let path = test_file("mm_rw.npy");
382 npy::save(&path, &arr).unwrap();
383
384 {
386 let mut mapped = memmap_mut::<f64, _>(&path, MemmapMode::ReadWrite).unwrap();
387 mapped.as_slice_mut()[0] = 999.0;
388 mapped.flush().unwrap();
389 }
390
391 let loaded: Array<f64, Ix1> = npy::load(&path).unwrap();
393 assert_eq!(loaded.as_slice().unwrap()[0], 999.0);
394 assert_eq!(loaded.as_slice().unwrap()[1], 2.0);
395 assert_eq!(loaded.as_slice().unwrap()[2], 3.0);
396 let _ = std::fs::remove_file(&path);
397 }
398
399 #[test]
400 fn memmap_copy_on_write() {
401 let data = vec![1.0_f64, 2.0, 3.0];
402 let arr = Array::<f64, Ix1>::from_vec(Ix1::new([3]), data).unwrap();
403
404 let path = test_file("mm_cow.npy");
405 npy::save(&path, &arr).unwrap();
406
407 {
409 let mut mapped = memmap_mut::<f64, _>(&path, MemmapMode::CopyOnWrite).unwrap();
410 mapped.as_slice_mut()[0] = 999.0;
411 assert_eq!(mapped.as_slice()[0], 999.0);
412 }
413
414 let loaded: Array<f64, Ix1> = npy::load(&path).unwrap();
416 assert_eq!(loaded.as_slice().unwrap()[0], 1.0);
417 let _ = std::fs::remove_file(&path);
418 }
419
420 #[test]
421 fn memmap_wrong_dtype_error() {
422 let data = vec![1.0_f64, 2.0];
423 let arr = Array::<f64, Ix1>::from_vec(Ix1::new([2]), data).unwrap();
424
425 let path = test_file("mm_wrong_dt.npy");
426 npy::save(&path, &arr).unwrap();
427
428 let result = memmap_readonly::<f32, _>(&path);
429 assert!(result.is_err());
430 let _ = std::fs::remove_file(&path);
431 }
432
433 #[test]
434 fn open_memmap_readonly() {
435 let data = vec![1.0_f64, 2.0, 3.0];
436 let arr = Array::<f64, Ix1>::from_vec(Ix1::new([3]), data.clone()).unwrap();
437
438 let path = test_file("mm_open_ro.npy");
439 npy::save(&path, &arr).unwrap();
440
441 let loaded = open_memmap::<f64, _>(&path, MemmapMode::ReadOnly).unwrap();
442 assert_eq!(loaded.shape(), &[3]);
443 assert_eq!(loaded.as_slice().unwrap(), &data[..]);
444 let _ = std::fs::remove_file(&path);
445 }
446
447 #[test]
448 fn memmap_view_borrows_underlying_data() {
449 let data = vec![1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0];
453 let arr = Array::<f64, ferray_core::dimension::Ix2>::from_vec(
454 ferray_core::dimension::Ix2::new([2, 3]),
455 data.clone(),
456 )
457 .unwrap();
458
459 let path = test_file("mm_view.npy");
460 npy::save(&path, &arr).unwrap();
461
462 let mapped = memmap_readonly::<f64, _>(&path).unwrap();
463 let view = mapped.view();
464 assert_eq!(view.shape(), &[2, 3]);
465 let collected: Vec<f64> = view.iter().copied().collect();
466 assert_eq!(collected, data);
467 let _ = std::fs::remove_file(&path);
468 }
469}