sane_array/
read.rs

1use std::io::{prelude::Read, ErrorKind};
2use std::num::TryFromIntError;
3
4use ndarray::{IxDyn, ArrayView, ArrayD, Array, Dimension, ShapeError};
5use crate::data::{DataType, SaneData, Sane, Header, parse_data_type};
6
7// This cannot be written as a generic function because
8// `std::mem::size_of::<T>()` cannot be called for a generic `T`,
9// despite having a `T: Sized` constraint.
10macro_rules! sane_from_le_bytes {
11    ($t:ty, $e:expr) => {
12        {
13            const COUNT: usize = std::mem::size_of::<$t>();
14            let elems = $e.len() / COUNT;
15            let mut result = vec![];
16            for i in 0..elems {
17                let elem_bytes: [u8; COUNT] = $e[i*COUNT..(i+1)*COUNT].try_into().unwrap();
18                result.push(<$t>::from_le_bytes(elem_bytes));
19            };
20            result
21        }
22    }
23}
24
25/// To be able read SANE-encoded data we need to be able convert the `Vec<u8>` of little-endian
26/// data to the corresponding vector of values
27pub trait ReadSane: SaneData {
28    fn from_le_bytes(bytes: Vec<u8>) -> Vec<Self>;
29}
30
31impl ReadSane for f32 {
32    fn from_le_bytes(bytes: Vec<u8>) -> Vec<f32> {
33        return sane_from_le_bytes!(f32, bytes);
34    }
35}
36
37impl ReadSane for i32 {
38    fn from_le_bytes(bytes: Vec<u8>) -> Vec<i32> {
39        return sane_from_le_bytes!(i32, bytes);
40    }
41}
42
43impl ReadSane for u32 {
44    fn from_le_bytes(bytes: Vec<u8>) -> Vec<u32> {
45        return sane_from_le_bytes!(u32, bytes);
46    }
47}
48
49impl ReadSane for f64 {
50    fn from_le_bytes(bytes: Vec<u8>) -> Vec<f64> {
51        return sane_from_le_bytes!(f64, bytes);
52    }
53}
54
55impl ReadSane for i64 {
56    fn from_le_bytes(bytes: Vec<u8>) -> Vec<i64> {
57        return sane_from_le_bytes!(i64, bytes);
58    }
59}
60
61impl ReadSane for u64 {
62    fn from_le_bytes(bytes: Vec<u8>) -> Vec<u64> {
63        return sane_from_le_bytes!(u64, bytes);
64    }
65}
66
67impl ReadSane for i8 {
68    fn from_le_bytes(bytes: Vec<u8>) -> Vec<i8> {
69        return sane_from_le_bytes!(i8, bytes);
70    }
71}
72
73impl ReadSane for u8 {
74    fn from_le_bytes(bytes: Vec<u8>) -> Vec<u8> {
75        return bytes;
76    }
77}
78
79
80#[derive(Debug)]
81pub enum ParseError {
82    EOF,
83    NotSANE,
84    InvalidDataType(u8),
85    NotEnoughBytes(std::io::Error),
86    CannotConvertToUSize(TryFromIntError),
87    ReadError(std::io::Error),
88    ShapeError(ShapeError),
89    WrongDataType(DataType),
90}
91
92impl std::fmt::Display for ParseError {
93    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
94        use ParseError::*;
95        match self {
96            EOF => write!(f, "End of file"),
97            NotSANE => write!(f, "Not a SANE array"),
98            InvalidDataType(code) => write!(f, "Invalid data type code: {}", code),
99            NotEnoughBytes(err) => write!(f, "Not enough bytes: {}", err),
100            CannotConvertToUSize(err) => write!(f, "Cannot convert to size: {}", err),
101            ReadError(err) => write!(f, "Failed to read: {}", err),
102            ShapeError(err) => write!(f, "{}", err),
103            WrongDataType(t) => write!(f, "unexpected data type {:?}", t),
104        }
105    }
106}
107
108fn parse_u32_size(bytes: [u8; 4]) -> Result<usize, ParseError> {
109    usize::try_from(u32::from_le_bytes(bytes)).map_err(ParseError::CannotConvertToUSize)
110}
111
112fn parse_u64_size(bytes: [u8; 8]) -> Result<usize, ParseError> {
113    usize::try_from(u64::from_le_bytes(bytes)).map_err(ParseError::CannotConvertToUSize)
114}
115
116fn read_header<F: Read>(file: &mut F) -> Result<Header, ParseError> {
117    let mut magic_bytes = [0; 4];
118    file.read_exact(&mut magic_bytes).map_err(|err|
119        match err.kind() {
120            ErrorKind::UnexpectedEof => ParseError::EOF,
121            _ => ParseError::NotEnoughBytes(err),
122        }
123    )?;
124    let sane_bytes = "SANE".as_bytes();
125    if magic_bytes != sane_bytes {
126        return Err(ParseError::NotSANE);
127    }
128    let mut shape_length_bytes = [0; 4];
129    file.read_exact(&mut shape_length_bytes).map_err(ParseError::NotEnoughBytes)?;
130    let shape_length = parse_u32_size(shape_length_bytes)?;
131    let mut shape_bytes = vec![0u8; shape_length * 8];
132    file.read_exact(&mut shape_bytes).map_err(ParseError::NotEnoughBytes)?;
133    let mut shape = vec![];
134    for dim in 0..shape_length {
135        let mut dim_bytes = [0; 8];
136        dim_bytes.copy_from_slice(&shape_bytes[dim * 8..(dim+1)*8]);
137        let dimension = parse_u64_size(dim_bytes)?;
138        shape.push(dimension);
139    }
140    shape.reverse();
141    let mut data_type_bytes = [0; 1];
142    file.read_exact(&mut data_type_bytes).map_err(ParseError::NotEnoughBytes)?;
143    let data_type = parse_data_type(data_type_bytes[0]).map_err(ParseError::InvalidDataType)?;
144    let mut data_length_bytes = [0; 8];
145    file.read_exact(&mut data_length_bytes).map_err(ParseError::NotEnoughBytes)?;
146    let data_length = parse_u64_size(data_length_bytes)?;
147    Ok(Header {
148        shape,
149        data_type,
150        data_length,
151    })
152}
153
154fn read_array<T: ReadSane>(dims: IxDyn, byte_data: Vec<u8>) -> Result<ArrayD<T>, ParseError> {
155    if cfg!(endianness = "little") {
156        // If we're on a little-endian system we can just cast the bytes to our type
157        // as the SANE spec guarantees that the data is in little-endian byte order
158        let values = unsafe {
159            byte_data.align_to::<T>().1
160        };
161        let array_view = ArrayView::from_shape(dims, &values).map_err(ParseError::ShapeError)?;
162        Ok(array_view.to_owned())
163    } else {
164        let vec = T::from_le_bytes(byte_data);
165        let array_view = ArrayView::from_shape(dims, &vec).map_err(ParseError::ShapeError)?;
166        Ok(array_view.to_owned())
167    }
168}
169
170fn read_array_with_shape<T: ReadSane, D: Dimension>(shape: Vec<usize>, byte_data: Vec<u8>) -> Result<Array<T,D>, ParseError> {
171    let dyn_dims = IxDyn(&shape);
172    if cfg!(endianness = "little") {
173        let values = unsafe {
174            // If we're on a little-endian system we can just cast the bytes to our type
175            // as the SANE spec guarantees that the data is in little-endian byte order
176            byte_data.align_to::<T>().1
177        };
178        let array_view = ArrayView::from_shape(dyn_dims, &values).map_err(ParseError::ShapeError)?;
179        let shaped_array = array_view.into_dimensionality().map_err(ParseError::ShapeError)?;
180        Ok(shaped_array.to_owned())
181    } else {
182        let values = T::from_le_bytes(byte_data);
183        let array_view = ArrayView::from_shape(dyn_dims, &values).map_err(ParseError::ShapeError)?;
184        let shaped_array = array_view.into_dimensionality().map_err(ParseError::ShapeError)?;
185        Ok(shaped_array.to_owned())
186    }
187}
188
189/// Parse a SANE-encoded file into an array with known type and rank
190pub fn read_sane<F: Read, A: ReadSane, D: Dimension>(
191    file: &mut F,
192) -> Result<Array<A, D>, ParseError> {
193    let header = read_header(file)?;
194    let mut sane_data = vec![0u8; header.data_length];
195    file.read_exact(&mut sane_data).map_err(ParseError::NotEnoughBytes)?;
196    if header.data_type != A::sane_data_type() {
197        Err(ParseError::WrongDataType(header.data_type))?;
198    }
199    let sane = read_array_with_shape(header.shape, sane_data)?;
200    Ok(sane)
201}
202
203
204/// Parse a SANE-encoded file into an array with dynamic type and rank
205pub fn read_sane_dyn<F: Read>(
206    file: &mut F,
207) -> Result<Sane, ParseError> {
208    let header = read_header(file)?;
209    let mut sane_data = vec![0u8; header.data_length];
210    file.read_exact(&mut sane_data).map_err(ParseError::NotEnoughBytes)?;
211    let dims: IxDyn = IxDyn(&header.shape);
212    let sane = match header.data_type {
213        DataType::F32 => read_array(dims, sane_data).map(Sane::ArrayF32),
214        DataType::I32 => read_array(dims, sane_data).map(Sane::ArrayI32),
215        DataType::U32 => read_array(dims, sane_data).map(Sane::ArrayU32),
216        DataType::F64 => read_array(dims, sane_data).map(Sane::ArrayF64),
217        DataType::I64 => read_array(dims, sane_data).map(Sane::ArrayI64),
218        DataType::U64 => read_array(dims, sane_data).map(Sane::ArrayU64),
219        DataType::I8 => read_array(dims, sane_data).map(Sane::ArrayI8),
220        DataType::U8 => read_array(dims, sane_data).map(Sane::ArrayU8),
221    }?;
222    Ok(sane)
223}
224
225/// Parse multiple SANE-encoded arrays from a file
226pub fn read_sane_arrays<F: Read, A: ReadSane, D: Dimension>(
227    file: &mut F,
228) -> Result<Vec<Array<A, D>>, ParseError> {
229    let mut arrays = vec![];
230    loop {
231        match read_sane(file) {
232            Ok(array) => arrays.push(array),
233            Err(e) => match e {
234                ParseError::EOF => return Ok(arrays),
235                _ => return Err(e),
236            },
237        }
238    }
239}
240
241/// Parse multiple SANE-encoded arrays each with dynamic data type and rank
242pub fn read_sane_arrays_dyn<F: Read>(
243    file: &mut F,
244) -> Result<Vec<Sane>, ParseError> {
245    let mut arrays = vec![];
246    loop {
247        match read_sane_dyn(file) {
248            Ok(array) => arrays.push(array),
249            Err(e) => match e {
250                ParseError::EOF => return Ok(arrays),
251                _ => return Err(e),
252            },
253        }
254    }
255}