Skip to main content

ferray_io/
format.rs

1// ferray-io: Format constants, shared types, and `numpy.lib.format`-equivalent helpers.
2
3use ferray_core::DynArray;
4use ferray_core::dtype::DType;
5use ferray_core::error::FerrayResult;
6
7use crate::npy::dtype_parse::{Endianness, dtype_to_descr, parse_dtype_str};
8
9/// The magic string at the start of every `.npy` file.
10pub const NPY_MAGIC: &[u8] = b"\x93NUMPY";
11
12/// Length of the magic string.
13pub const NPY_MAGIC_LEN: usize = 6;
14
15/// Supported `.npy` format version 1.0 (header length stored in 2 bytes).
16pub const VERSION_1_0: (u8, u8) = (1, 0);
17
18/// Supported `.npy` format version 2.0 (header length stored in 4 bytes).
19pub const VERSION_2_0: (u8, u8) = (2, 0);
20
21/// Supported `.npy` format version 3.0 (header length stored in 4 bytes, UTF-8 header).
22pub const VERSION_3_0: (u8, u8) = (3, 0);
23
24/// Alignment of the header + preamble in bytes.
25pub const HEADER_ALIGNMENT: usize = 64;
26
27/// Maximum header length for version 1.0 (`u16::MAX`).
28pub const MAX_HEADER_LEN_V1: usize = u16::MAX as usize;
29
30/// Mode for memory-mapped file access.
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum MemmapMode {
33    /// Read-only memory mapping. The file is opened for reading and the
34    /// resulting slice is immutable.
35    ReadOnly,
36    /// Read-write memory mapping. Modifications are written back to the
37    /// underlying file.
38    ReadWrite,
39    /// Copy-on-write memory mapping. Modifications are kept in memory
40    /// and are not written back to the file.
41    CopyOnWrite,
42}
43
44// ---------------------------------------------------------------------------
45// numpy.lib.format-equivalent helpers
46// ---------------------------------------------------------------------------
47
48/// Header dictionary parsed from an `.npy` v1.0 stream.
49///
50/// Mirrors the dict NumPy's `numpy.lib.format.header_data_from_array_1_0`
51/// returns: `descr` (the dtype descriptor string), `fortran_order`, and
52/// `shape`. Stored as a small struct so callers don't need an untyped map.
53#[derive(Debug, Clone, PartialEq, Eq)]
54pub struct HeaderData {
55    /// dtype descriptor string (e.g. `"<f8"`, `"|b1"`).
56    pub descr: String,
57    /// `true` if the underlying data is laid out in Fortran (column-major) order.
58    pub fortran_order: bool,
59    /// Array shape.
60    pub shape: Vec<usize>,
61}
62
63/// Parse a NumPy dtype descriptor string into a [`DType`].
64///
65/// Equivalent to `numpy.lib.format.descr_to_dtype` for the basic primitive
66/// dtype family. Endianness is dropped — call
67/// [`crate::npy::dtype_parse::parse_dtype_str`] directly to retain it.
68///
69/// # Errors
70/// Returns `FerrayError::InvalidDtype` for an unsupported or malformed
71/// descriptor.
72pub fn descr_to_dtype(descr: &str) -> FerrayResult<DType> {
73    let (dt, _) = parse_dtype_str(descr)?;
74    Ok(dt)
75}
76
77/// Build a [`HeaderData`] dictionary from a [`DynArray`].
78///
79/// Equivalent to `numpy.lib.format.header_data_from_array_1_0`. Always
80/// emits a little-endian descriptor (matching NumPy's convention for
81/// freshly-saved `.npy` files) and `fortran_order = false` (ferray
82/// arrays are always C-contiguous at the public boundary).
83///
84/// # Errors
85/// Returns `FerrayError::InvalidDtype` if the array's dtype cannot be
86/// rendered as a NumPy descriptor.
87pub fn header_data_from_array_1_0(array: &DynArray) -> FerrayResult<HeaderData> {
88    let descr = dtype_to_descr(array.dtype(), Endianness::Little)?;
89    Ok(HeaderData {
90        descr,
91        fortran_order: false,
92        shape: array.shape().to_vec(),
93    })
94}
95
96/// Read an `.npy`-formatted array from `reader` and return it as a
97/// [`DynArray`].
98///
99/// Standalone equivalent of `numpy.lib.format.read_array`. Internally
100/// delegates to [`crate::npy::load_dynamic_from_reader`].
101///
102/// # Errors
103/// Returns errors from the underlying NPY parser (bad magic, dtype
104/// mismatch, truncated stream, etc.).
105pub fn read_array<R: std::io::Read>(reader: &mut R) -> FerrayResult<DynArray> {
106    crate::npy::load_dynamic_from_reader(reader)
107}
108
109/// Write a [`DynArray`] to `writer` as an `.npy` v1.0 stream.
110///
111/// Standalone equivalent of `numpy.lib.format.write_array`. Internally
112/// delegates to [`crate::npy::save_dynamic_to_writer`].
113///
114/// # Errors
115/// Returns errors from the underlying NPY writer (I/O failure, dtype
116/// not encodable as a NumPy descriptor, etc.).
117pub fn write_array<W: std::io::Write>(writer: &mut W, array: &DynArray) -> FerrayResult<()> {
118    crate::npy::save_dynamic_to_writer(writer, array)
119}
120
121#[cfg(test)]
122mod tests {
123    use super::*;
124    use ferray_core::{Array, IxDyn};
125
126    #[test]
127    fn magic_bytes() {
128        assert_eq!(NPY_MAGIC.len(), NPY_MAGIC_LEN);
129        assert_eq!(NPY_MAGIC[0], 0x93);
130        assert_eq!(&NPY_MAGIC[1..], b"NUMPY");
131    }
132
133    #[test]
134    fn memmap_mode_variants() {
135        let modes = [
136            MemmapMode::ReadOnly,
137            MemmapMode::ReadWrite,
138            MemmapMode::CopyOnWrite,
139        ];
140        assert_eq!(modes.len(), 3);
141        assert_ne!(MemmapMode::ReadOnly, MemmapMode::ReadWrite);
142    }
143
144    #[test]
145    fn descr_to_dtype_basic() {
146        assert_eq!(descr_to_dtype("<f8").unwrap(), DType::F64);
147        assert_eq!(descr_to_dtype("<f4").unwrap(), DType::F32);
148        assert_eq!(descr_to_dtype("<i4").unwrap(), DType::I32);
149        assert_eq!(descr_to_dtype("|b1").unwrap(), DType::Bool);
150    }
151
152    #[test]
153    fn descr_to_dtype_invalid_errs() {
154        assert!(descr_to_dtype("<garbage").is_err());
155    }
156
157    #[test]
158    fn header_data_from_array_basic() {
159        let arr: Array<f64, IxDyn> = Array::from_vec(IxDyn::new(&[2, 3]), vec![1.0; 6]).unwrap();
160        let dyn_arr: DynArray = arr.into();
161        let h = header_data_from_array_1_0(&dyn_arr).unwrap();
162        assert_eq!(h.descr, "<f8");
163        assert!(!h.fortran_order);
164        assert_eq!(h.shape, vec![2, 3]);
165    }
166
167    #[test]
168    fn read_array_write_array_roundtrip() {
169        let arr: Array<i32, IxDyn> = Array::from_vec(IxDyn::new(&[3]), vec![10, 20, 30]).unwrap();
170        let dyn_arr: DynArray = arr.into();
171        let mut buf = Vec::<u8>::new();
172        write_array(&mut buf, &dyn_arr).unwrap();
173        // Re-read via read_array.
174        let mut cursor = std::io::Cursor::new(buf);
175        let restored = read_array(&mut cursor).unwrap();
176        assert_eq!(restored.shape(), &[3]);
177        assert_eq!(restored.dtype(), DType::I32);
178    }
179}