ferray_io/format.rs
1// ferray-io: Format constants, shared types, and `numpy.lib.format`-equivalent helpers.
2
3use ferray_core::DynArray;
4use ferray_core::dtype::DType;
5use ferray_core::error::FerrayResult;
6
7use crate::npy::dtype_parse::{Endianness, dtype_to_descr, parse_dtype_str};
8
9/// The magic string at the start of every `.npy` file.
10pub const NPY_MAGIC: &[u8] = b"\x93NUMPY";
11
12/// Length of the magic string.
13pub const NPY_MAGIC_LEN: usize = 6;
14
15/// Supported `.npy` format version 1.0 (header length stored in 2 bytes).
16pub const VERSION_1_0: (u8, u8) = (1, 0);
17
18/// Supported `.npy` format version 2.0 (header length stored in 4 bytes).
19pub const VERSION_2_0: (u8, u8) = (2, 0);
20
21/// Supported `.npy` format version 3.0 (header length stored in 4 bytes, UTF-8 header).
22pub const VERSION_3_0: (u8, u8) = (3, 0);
23
24/// Alignment of the header + preamble in bytes.
25pub const HEADER_ALIGNMENT: usize = 64;
26
27/// Maximum header length for version 1.0 (`u16::MAX`).
28pub const MAX_HEADER_LEN_V1: usize = u16::MAX as usize;
29
30/// Mode for memory-mapped file access.
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum MemmapMode {
33 /// Read-only memory mapping. The file is opened for reading and the
34 /// resulting slice is immutable.
35 ReadOnly,
36 /// Read-write memory mapping. Modifications are written back to the
37 /// underlying file.
38 ReadWrite,
39 /// Copy-on-write memory mapping. Modifications are kept in memory
40 /// and are not written back to the file.
41 CopyOnWrite,
42}
43
44// ---------------------------------------------------------------------------
45// numpy.lib.format-equivalent helpers
46// ---------------------------------------------------------------------------
47
48/// Header dictionary parsed from an `.npy` v1.0 stream.
49///
50/// Mirrors the dict NumPy's `numpy.lib.format.header_data_from_array_1_0`
51/// returns: `descr` (the dtype descriptor string), `fortran_order`, and
52/// `shape`. Stored as a small struct so callers don't need an untyped map.
53#[derive(Debug, Clone, PartialEq, Eq)]
54pub struct HeaderData {
55 /// dtype descriptor string (e.g. `"<f8"`, `"|b1"`).
56 pub descr: String,
57 /// `true` if the underlying data is laid out in Fortran (column-major) order.
58 pub fortran_order: bool,
59 /// Array shape.
60 pub shape: Vec<usize>,
61}
62
63/// Parse a NumPy dtype descriptor string into a [`DType`].
64///
65/// Equivalent to `numpy.lib.format.descr_to_dtype` for the basic primitive
66/// dtype family. Endianness is dropped — call
67/// [`crate::npy::dtype_parse::parse_dtype_str`] directly to retain it.
68///
69/// # Errors
70/// Returns `FerrayError::InvalidDtype` for an unsupported or malformed
71/// descriptor.
72pub fn descr_to_dtype(descr: &str) -> FerrayResult<DType> {
73 let (dt, _) = parse_dtype_str(descr)?;
74 Ok(dt)
75}
76
77/// Build a [`HeaderData`] dictionary from a [`DynArray`].
78///
79/// Equivalent to `numpy.lib.format.header_data_from_array_1_0`. Always
80/// emits a little-endian descriptor (matching NumPy's convention for
81/// freshly-saved `.npy` files) and `fortran_order = false` (ferray
82/// arrays are always C-contiguous at the public boundary).
83///
84/// # Errors
85/// Returns `FerrayError::InvalidDtype` if the array's dtype cannot be
86/// rendered as a NumPy descriptor.
87pub fn header_data_from_array_1_0(array: &DynArray) -> FerrayResult<HeaderData> {
88 let descr = dtype_to_descr(array.dtype(), Endianness::Little)?;
89 Ok(HeaderData {
90 descr,
91 fortran_order: false,
92 shape: array.shape().to_vec(),
93 })
94}
95
96/// Read an `.npy`-formatted array from `reader` and return it as a
97/// [`DynArray`].
98///
99/// Standalone equivalent of `numpy.lib.format.read_array`. Internally
100/// delegates to [`crate::npy::load_dynamic_from_reader`].
101///
102/// # Errors
103/// Returns errors from the underlying NPY parser (bad magic, dtype
104/// mismatch, truncated stream, etc.).
105pub fn read_array<R: std::io::Read>(reader: &mut R) -> FerrayResult<DynArray> {
106 crate::npy::load_dynamic_from_reader(reader)
107}
108
109/// Write a [`DynArray`] to `writer` as an `.npy` v1.0 stream.
110///
111/// Standalone equivalent of `numpy.lib.format.write_array`. Internally
112/// delegates to [`crate::npy::save_dynamic_to_writer`].
113///
114/// # Errors
115/// Returns errors from the underlying NPY writer (I/O failure, dtype
116/// not encodable as a NumPy descriptor, etc.).
117pub fn write_array<W: std::io::Write>(writer: &mut W, array: &DynArray) -> FerrayResult<()> {
118 crate::npy::save_dynamic_to_writer(writer, array)
119}
120
121#[cfg(test)]
122mod tests {
123 use super::*;
124 use ferray_core::{Array, IxDyn};
125
126 #[test]
127 fn magic_bytes() {
128 assert_eq!(NPY_MAGIC.len(), NPY_MAGIC_LEN);
129 assert_eq!(NPY_MAGIC[0], 0x93);
130 assert_eq!(&NPY_MAGIC[1..], b"NUMPY");
131 }
132
133 #[test]
134 fn memmap_mode_variants() {
135 let modes = [
136 MemmapMode::ReadOnly,
137 MemmapMode::ReadWrite,
138 MemmapMode::CopyOnWrite,
139 ];
140 assert_eq!(modes.len(), 3);
141 assert_ne!(MemmapMode::ReadOnly, MemmapMode::ReadWrite);
142 }
143
144 #[test]
145 fn descr_to_dtype_basic() {
146 assert_eq!(descr_to_dtype("<f8").unwrap(), DType::F64);
147 assert_eq!(descr_to_dtype("<f4").unwrap(), DType::F32);
148 assert_eq!(descr_to_dtype("<i4").unwrap(), DType::I32);
149 assert_eq!(descr_to_dtype("|b1").unwrap(), DType::Bool);
150 }
151
152 #[test]
153 fn descr_to_dtype_invalid_errs() {
154 assert!(descr_to_dtype("<garbage").is_err());
155 }
156
157 #[test]
158 fn header_data_from_array_basic() {
159 let arr: Array<f64, IxDyn> = Array::from_vec(IxDyn::new(&[2, 3]), vec![1.0; 6]).unwrap();
160 let dyn_arr: DynArray = arr.into();
161 let h = header_data_from_array_1_0(&dyn_arr).unwrap();
162 assert_eq!(h.descr, "<f8");
163 assert!(!h.fortran_order);
164 assert_eq!(h.shape, vec![2, 3]);
165 }
166
167 #[test]
168 fn read_array_write_array_roundtrip() {
169 let arr: Array<i32, IxDyn> = Array::from_vec(IxDyn::new(&[3]), vec![10, 20, 30]).unwrap();
170 let dyn_arr: DynArray = arr.into();
171 let mut buf = Vec::<u8>::new();
172 write_array(&mut buf, &dyn_arr).unwrap();
173 // Re-read via read_array.
174 let mut cursor = std::io::Cursor::new(buf);
175 let restored = read_array(&mut cursor).unwrap();
176 assert_eq!(restored.shape(), &[3]);
177 assert_eq!(restored.dtype(), DType::I32);
178 }
179}