Skip to main content

hdf5_reader/
io.rs

1use crate::error::{ByteOrder, Error, Result};
2
3/// A cursor over a byte slice for sequential reading with endian-aware helpers.
4///
5/// All HDF5 file parsing goes through this type. It wraps a `&[u8]` and tracks
6/// the current position. Methods advance the position on success.
7#[derive(Clone)]
8pub struct Cursor<'a> {
9    data: &'a [u8],
10    pos: usize,
11}
12
13impl<'a> Cursor<'a> {
14    /// Create a new cursor at position 0.
15    pub fn new(data: &'a [u8]) -> Self {
16        Self { data, pos: 0 }
17    }
18
19    /// Current byte position.
20    #[inline]
21    pub fn position(&self) -> u64 {
22        self.pos as u64
23    }
24
25    /// Set the position.
26    pub fn set_position(&mut self, pos: u64) {
27        self.pos = pos as usize;
28    }
29
30    /// Total length of the underlying data.
31    #[inline]
32    pub fn len(&self) -> u64 {
33        self.data.len() as u64
34    }
35
36    /// Returns `true` if the underlying data is empty.
37    #[inline]
38    pub fn is_empty(&self) -> bool {
39        self.data.is_empty()
40    }
41
42    /// Remaining bytes from current position.
43    #[inline]
44    pub fn remaining(&self) -> u64 {
45        self.data.len().saturating_sub(self.pos) as u64
46    }
47
48    /// Get the underlying data slice.
49    #[inline]
50    pub fn data(&self) -> &'a [u8] {
51        self.data
52    }
53
54    /// Get a slice starting from an absolute offset.
55    pub fn slice_from(&self, offset: u64) -> Result<&'a [u8]> {
56        let offset = offset as usize;
57        if offset > self.data.len() {
58            return Err(Error::OffsetOutOfBounds(offset as u64));
59        }
60        Ok(&self.data[offset..])
61    }
62
63    /// Create a new cursor at a given absolute offset.
64    pub fn at_offset(&self, offset: u64) -> Result<Cursor<'a>> {
65        if offset as usize > self.data.len() {
66            return Err(Error::OffsetOutOfBounds(offset));
67        }
68        Ok(Cursor {
69            data: self.data,
70            pos: offset as usize,
71        })
72    }
73
74    /// Read exactly `n` bytes and advance.
75    pub fn read_bytes(&mut self, n: usize) -> Result<&'a [u8]> {
76        let end = self.pos.checked_add(n).ok_or(Error::UnexpectedEof {
77            offset: self.pos as u64,
78            needed: n as u64,
79            available: self.remaining(),
80        })?;
81        if end > self.data.len() {
82            return Err(Error::UnexpectedEof {
83                offset: self.pos as u64,
84                needed: n as u64,
85                available: self.remaining(),
86            });
87        }
88        let slice = &self.data[self.pos..end];
89        self.pos = end;
90        Ok(slice)
91    }
92
93    /// Peek at the next `n` bytes without advancing.
94    pub fn peek_bytes(&self, n: usize) -> Result<&'a [u8]> {
95        let end = self.pos.checked_add(n).ok_or(Error::UnexpectedEof {
96            offset: self.pos as u64,
97            needed: n as u64,
98            available: self.remaining(),
99        })?;
100        if end > self.data.len() {
101            return Err(Error::UnexpectedEof {
102                offset: self.pos as u64,
103                needed: n as u64,
104                available: self.remaining(),
105            });
106        }
107        Ok(&self.data[self.pos..end])
108    }
109
110    /// Skip `n` bytes.
111    pub fn skip(&mut self, n: usize) -> Result<()> {
112        let end = self.pos.checked_add(n).ok_or(Error::UnexpectedEof {
113            offset: self.pos as u64,
114            needed: n as u64,
115            available: self.remaining(),
116        })?;
117        if end > self.data.len() {
118            return Err(Error::UnexpectedEof {
119                offset: self.pos as u64,
120                needed: n as u64,
121                available: self.remaining(),
122            });
123        }
124        self.pos = end;
125        Ok(())
126    }
127
128    /// Align position to `alignment` boundary.
129    pub fn align(&mut self, alignment: usize) -> Result<()> {
130        if alignment == 0 || alignment == 1 {
131            return Ok(());
132        }
133        let remainder = self.pos % alignment;
134        if remainder != 0 {
135            self.skip(alignment - remainder)?;
136        }
137        Ok(())
138    }
139
140    // ---- Single-byte reads ----
141
142    pub fn read_u8(&mut self) -> Result<u8> {
143        let b = self.read_bytes(1)?;
144        Ok(b[0])
145    }
146
147    pub fn read_i8(&mut self) -> Result<i8> {
148        Ok(self.read_u8()? as i8)
149    }
150
151    // ---- Little-endian reads (HDF5 default) ----
152
153    pub fn read_u16_le(&mut self) -> Result<u16> {
154        let b = self.read_bytes(2)?;
155        Ok(u16::from_le_bytes([b[0], b[1]]))
156    }
157
158    pub fn read_u32_le(&mut self) -> Result<u32> {
159        let b = self.read_bytes(4)?;
160        Ok(u32::from_le_bytes([b[0], b[1], b[2], b[3]]))
161    }
162
163    pub fn read_u64_le(&mut self) -> Result<u64> {
164        let b = self.read_bytes(8)?;
165        Ok(u64::from_le_bytes([
166            b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7],
167        ]))
168    }
169
170    pub fn read_i16_le(&mut self) -> Result<i16> {
171        let b = self.read_bytes(2)?;
172        Ok(i16::from_le_bytes([b[0], b[1]]))
173    }
174
175    pub fn read_i32_le(&mut self) -> Result<i32> {
176        let b = self.read_bytes(4)?;
177        Ok(i32::from_le_bytes([b[0], b[1], b[2], b[3]]))
178    }
179
180    pub fn read_i64_le(&mut self) -> Result<i64> {
181        let b = self.read_bytes(8)?;
182        Ok(i64::from_le_bytes([
183            b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7],
184        ]))
185    }
186
187    pub fn read_f32_le(&mut self) -> Result<f32> {
188        let b = self.read_bytes(4)?;
189        Ok(f32::from_le_bytes([b[0], b[1], b[2], b[3]]))
190    }
191
192    pub fn read_f64_le(&mut self) -> Result<f64> {
193        let b = self.read_bytes(8)?;
194        Ok(f64::from_le_bytes([
195            b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7],
196        ]))
197    }
198
199    // ---- Big-endian reads ----
200
201    pub fn read_u16_be(&mut self) -> Result<u16> {
202        let b = self.read_bytes(2)?;
203        Ok(u16::from_be_bytes([b[0], b[1]]))
204    }
205
206    pub fn read_u32_be(&mut self) -> Result<u32> {
207        let b = self.read_bytes(4)?;
208        Ok(u32::from_be_bytes([b[0], b[1], b[2], b[3]]))
209    }
210
211    pub fn read_u64_be(&mut self) -> Result<u64> {
212        let b = self.read_bytes(8)?;
213        Ok(u64::from_be_bytes([
214            b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7],
215        ]))
216    }
217
218    // ---- Endian-dispatched reads ----
219
220    pub fn read_u16(&mut self, order: ByteOrder) -> Result<u16> {
221        match order {
222            ByteOrder::LittleEndian => self.read_u16_le(),
223            ByteOrder::BigEndian => self.read_u16_be(),
224        }
225    }
226
227    pub fn read_u32(&mut self, order: ByteOrder) -> Result<u32> {
228        match order {
229            ByteOrder::LittleEndian => self.read_u32_le(),
230            ByteOrder::BigEndian => self.read_u32_be(),
231        }
232    }
233
234    pub fn read_u64(&mut self, order: ByteOrder) -> Result<u64> {
235        match order {
236            ByteOrder::LittleEndian => self.read_u64_le(),
237            ByteOrder::BigEndian => self.read_u64_be(),
238        }
239    }
240
241    // ---- Variable-size offset/length reads ----
242
243    /// Read an offset (address) of `size` bytes (little-endian).
244    /// HDF5 uses 2/4/8-byte offsets depending on superblock configuration.
245    pub fn read_offset(&mut self, size: u8) -> Result<u64> {
246        match size {
247            2 => self.read_u16_le().map(u64::from),
248            4 => self.read_u32_le().map(u64::from),
249            8 => self.read_u64_le(),
250            _ => Err(Error::UnsupportedOffsetSize(size)),
251        }
252    }
253
254    /// Read a length of `size` bytes (little-endian).
255    pub fn read_length(&mut self, size: u8) -> Result<u64> {
256        match size {
257            2 => self.read_u16_le().map(u64::from),
258            4 => self.read_u32_le().map(u64::from),
259            8 => self.read_u64_le(),
260            _ => Err(Error::UnsupportedLengthSize(size)),
261        }
262    }
263
264    /// Read a variable-size unsigned integer of 1..=8 bytes (little-endian).
265    pub fn read_uvar(&mut self, size: usize) -> Result<u64> {
266        match size {
267            1 => self.read_u8().map(u64::from),
268            2 => self.read_u16_le().map(u64::from),
269            4 => self.read_u32_le().map(u64::from),
270            8 => self.read_u64_le(),
271            3 | 5..=7 => {
272                let bytes = self.read_bytes(size)?;
273                let mut value = 0u64;
274                for (shift, byte) in bytes.iter().enumerate() {
275                    value |= (*byte as u64) << (shift * 8);
276                }
277                Ok(value)
278            }
279            _ => Err(Error::InvalidData(format!(
280                "unsupported variable integer size: {}",
281                size
282            ))),
283        }
284    }
285
286    /// Check if an offset value represents the "undefined" address.
287    pub fn is_undefined_offset(val: u64, offset_size: u8) -> bool {
288        match offset_size {
289            2 => val == 0xFFFF,
290            4 => val == 0xFFFF_FFFF,
291            8 => val == 0xFFFF_FFFF_FFFF_FFFF,
292            _ => false,
293        }
294    }
295
296    /// Read a null-terminated UTF-8 string.
297    pub fn read_null_terminated_string(&mut self) -> Result<String> {
298        let start = self.pos;
299        while self.pos < self.data.len() {
300            if self.data[self.pos] == 0 {
301                let s = std::str::from_utf8(&self.data[start..self.pos])
302                    .map_err(|e| Error::InvalidData(format!("invalid UTF-8 string: {e}")))?;
303                self.pos += 1; // skip null terminator
304                return Ok(s.to_string());
305            }
306            self.pos += 1;
307        }
308        Err(Error::UnexpectedEof {
309            offset: start as u64,
310            needed: 1,
311            available: 0,
312        })
313    }
314
315    /// Read a fixed-length string, trimming null padding.
316    pub fn read_fixed_string(&mut self, len: usize) -> Result<String> {
317        let bytes = self.read_bytes(len)?;
318        // Trim trailing nulls
319        let end = bytes.iter().rposition(|&b| b != 0).map_or(0, |i| i + 1);
320        let s = std::str::from_utf8(&bytes[..end])
321            .map_err(|e| Error::InvalidData(format!("invalid UTF-8 string: {e}")))?;
322        Ok(s.to_string())
323    }
324}
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329
330    #[test]
331    fn test_read_u8() {
332        let data = [0x42];
333        let mut c = Cursor::new(&data);
334        assert_eq!(c.read_u8().unwrap(), 0x42);
335        assert_eq!(c.position(), 1);
336    }
337
338    #[test]
339    fn test_read_u16_le() {
340        let data = [0x01, 0x02];
341        let mut c = Cursor::new(&data);
342        assert_eq!(c.read_u16_le().unwrap(), 0x0201);
343    }
344
345    #[test]
346    fn test_read_u32_le() {
347        let data = [0x01, 0x02, 0x03, 0x04];
348        let mut c = Cursor::new(&data);
349        assert_eq!(c.read_u32_le().unwrap(), 0x04030201);
350    }
351
352    #[test]
353    fn test_read_u64_le() {
354        let data = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];
355        let mut c = Cursor::new(&data);
356        assert_eq!(c.read_u64_le().unwrap(), 0x0807060504030201);
357    }
358
359    #[test]
360    fn test_read_offset() {
361        // 4-byte offset
362        let data = [0x00, 0x01, 0x00, 0x00];
363        let mut c = Cursor::new(&data);
364        assert_eq!(c.read_offset(4).unwrap(), 256);
365
366        // 8-byte offset
367        let data = [0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00];
368        let mut c = Cursor::new(&data);
369        assert_eq!(c.read_offset(8).unwrap(), 0x100000000);
370    }
371
372    #[test]
373    fn test_null_terminated_string() {
374        let data = b"hello\0world";
375        let mut c = Cursor::new(data);
376        assert_eq!(c.read_null_terminated_string().unwrap(), "hello");
377        assert_eq!(c.position(), 6);
378    }
379
380    #[test]
381    fn test_fixed_string() {
382        let data = b"hi\0\0\0";
383        let mut c = Cursor::new(data);
384        assert_eq!(c.read_fixed_string(5).unwrap(), "hi");
385    }
386
387    #[test]
388    fn test_align() {
389        let data = [0u8; 16];
390        let mut c = Cursor::new(&data);
391        c.skip(3).unwrap();
392        c.align(8).unwrap();
393        assert_eq!(c.position(), 8);
394
395        // Already aligned
396        c.align(8).unwrap();
397        assert_eq!(c.position(), 8);
398    }
399
400    #[test]
401    fn test_eof_error() {
402        let data = [0u8; 2];
403        let mut c = Cursor::new(&data);
404        assert!(c.read_u32_le().is_err());
405    }
406
407    #[test]
408    fn test_is_undefined_offset() {
409        assert!(Cursor::is_undefined_offset(0xFFFFFFFF, 4));
410        assert!(Cursor::is_undefined_offset(0xFFFFFFFFFFFFFFFF, 8));
411        assert!(!Cursor::is_undefined_offset(0, 4));
412    }
413}