npy/
npy_data.rs

1
2use nom::*;
3use std::io::{Result, ErrorKind, Error};
4use std::marker::PhantomData;
5
6use header::{Value, DType, parse_header};
7use serializable::Serializable;
8
9
10/// The data structure representing a deserialized `npy` file.
11///
12/// The data is internally stored
13/// as a byte array, and deserialized only on-demand to minimize unnecessary allocations.
14/// The whole contents of the file can be deserialized by the [`to_vec`](#method.to_vec)
15/// member function.
16pub struct NpyData<'a, T> {
17    data: &'a [u8],
18    n_records: usize,
19    _t: PhantomData<T>,
20}
21
22impl<'a, T: Serializable> NpyData<'a, T> {
23    /// Deserialize a NPY file represented as bytes
24    pub fn from_bytes(bytes: &'a [u8]) -> ::std::io::Result<NpyData<'a, T>> {
25        let (data_slice, ns) = Self::get_data_slice(bytes)?;
26        Ok(NpyData { data: data_slice, n_records: ns as usize, _t: PhantomData })
27    }
28
29    /// Gets a single data-record with the specified index. Returns None, if the index is
30    /// out of bounds
31    pub fn get(&self, i: usize) -> Option<T> {
32        if i < self.n_records {
33            Some(self.get_unchecked(i))
34        } else {
35            None
36        }
37    }
38
39    /// Returns the total number of records
40    pub fn len(&self) -> usize {
41        self.n_records
42    }
43
44    /// Returns whether there are zero records in this NpyData structure
45    pub fn is_empty(&self) -> bool {
46        self.n_records == 0
47    }
48
49    /// Gets a single data-record wit the specified index. Panics, if the index is out of bounds.
50    pub fn get_unchecked(&self, i: usize) -> T {
51        T::read(&self.data[i * T::n_bytes()..])
52    }
53
54    /// Construct a vector with the deserialized contents of the whole file
55    pub fn to_vec(&self) -> Vec<T> {
56        let mut v = Vec::with_capacity(self.n_records);
57        for i in 0..self.n_records {
58            v.push(self.get_unchecked(i));
59        }
60        v
61    }
62
63    fn get_data_slice(bytes: &[u8]) -> Result<(&[u8], i64)> {
64        let (data, header) = match parse_header(bytes) {
65            IResult::Done(data, header) => {
66                Ok((data, header))
67            },
68            IResult::Incomplete(needed) => {
69                Err(Error::new(ErrorKind::InvalidData, format!("{:?}", needed)))
70            },
71            IResult::Error(err) => {
72                Err(Error::new(ErrorKind::InvalidData, format!("{:?}", err)))
73            }
74        }?;
75
76
77        let ns: i64 =
78            if let Value::Map(ref map) = header {
79                if let Some(&Value::List(ref l)) = map.get("shape") {
80                    if l.len() == 1 {
81                        if let Some(&Value::Integer(ref n)) = l.get(0) {
82                            Some(*n)
83                        } else { None }
84                    } else { None }
85                } else { None }
86            } else { None }
87            .ok_or_else(|| Error::new(ErrorKind::InvalidData,
88                    "\'shape\' field is not present or doesn't consist of a tuple of length 1."))?;
89
90        let descr: &Value =
91            if let Value::Map(ref map) = header {
92                map.get("descr")
93            } else { None }
94            .ok_or_else(|| Error::new(ErrorKind::InvalidData,
95                    "\'descr\' field is not present or doesn't contain a list."))?;
96
97        if let Ok(dtype) = DType::from_descr(descr.clone()) {
98            let expected_dtype = T::dtype();
99            if dtype != expected_dtype {
100                return Err(Error::new(ErrorKind::InvalidData,
101                    format!("Types don't match! found: {:?}, expected: {:?}", dtype, expected_dtype)
102                ));
103            }
104        } else {
105            return Err(Error::new(ErrorKind::InvalidData, format!("fail?!?")));
106        }
107
108        Ok((data, ns))
109    }
110}
111
112/// A result of NPY file deserialization.
113///
114/// It is an iterator to offer a lazy interface in case the data don't fit into memory.
115pub struct IntoIter<'a, T: 'a> {
116    data: NpyData<'a, T>,
117    i: usize,
118}
119
120impl<'a, T> IntoIter<'a, T> {
121    fn new(data: NpyData<'a, T>) -> Self {
122        IntoIter { data, i: 0 }
123    }
124}
125
126impl<'a, T: 'a + Serializable> IntoIterator for NpyData<'a, T> {
127    type Item = T;
128    type IntoIter = IntoIter<'a, T>;
129
130    fn into_iter(self) -> Self::IntoIter {
131        IntoIter::new(self)
132    }
133}
134
135impl<'a, T> Iterator for IntoIter<'a, T> where T: Serializable {
136    type Item = T;
137
138    fn next(&mut self) -> Option<Self::Item> {
139        self.i += 1;
140        self.data.get(self.i - 1)
141    }
142
143    fn size_hint(&self) -> (usize, Option<usize>) {
144        (self.data.len() - self.i, Some(self.data.len() - self.i))
145    }
146}
147
148impl<'a, T> ExactSizeIterator for IntoIter<'a, T> where T: Serializable {}