Skip to main content

rustyhdf5_io/
lib.rs

1//! I/O abstraction layer for HDF5 file access.
2//!
3//! Provides traits and adapters for reading and writing HDF5 data
4//! from files, memory buffers, and optionally memory-mapped files.
5
6use std::io::{self, Read, Seek, SeekFrom, Write};
7
8pub use rustyhdf5_format;
9
10/// Read-only access to HDF5 data.
11///
12/// Implementors provide the ability to read the entire file content
13/// as a byte slice, which is the interface that `rustyhdf5-format` expects.
14pub trait HDF5Read {
15    /// Returns the entire file content as a byte slice.
16    fn as_bytes(&self) -> &[u8];
17
18    /// Returns the length of the data in bytes.
19    fn len(&self) -> usize {
20        self.as_bytes().len()
21    }
22
23    /// Returns true if the data is empty.
24    fn is_empty(&self) -> bool {
25        self.as_bytes().is_empty()
26    }
27}
28
29/// Read-write access to HDF5 data.
30///
31/// Implementors can both read existing data and write new data.
32pub trait HDF5ReadWrite: HDF5Read {
33    /// Write the given bytes to the underlying storage, replacing all content.
34    fn write_all_bytes(&mut self, data: &[u8]) -> io::Result<()>;
35}
36
37// ---------------------------------------------------------------------------
38// MemoryReader — wraps a Vec<u8> or borrowed &[u8] for in-memory access
39// ---------------------------------------------------------------------------
40
41/// In-memory reader backed by an owned `Vec<u8>`.
42///
43/// This mirrors what `rustyhdf5-format` currently does: the entire file
44/// is held in memory as a byte vector.
45#[derive(Debug, Clone)]
46pub struct MemoryReader {
47    data: Vec<u8>,
48}
49
50impl MemoryReader {
51    /// Create a reader from an owned byte vector.
52    pub fn new(data: Vec<u8>) -> Self {
53        Self { data }
54    }
55
56    /// Create a reader by copying from a byte slice.
57    pub fn from_slice(data: &[u8]) -> Self {
58        Self {
59            data: data.to_vec(),
60        }
61    }
62
63    /// Consume the reader and return the underlying bytes.
64    pub fn into_inner(self) -> Vec<u8> {
65        self.data
66    }
67}
68
69impl HDF5Read for MemoryReader {
70    fn as_bytes(&self) -> &[u8] {
71        &self.data
72    }
73}
74
75impl HDF5ReadWrite for MemoryReader {
76    fn write_all_bytes(&mut self, data: &[u8]) -> io::Result<()> {
77        self.data = data.to_vec();
78        Ok(())
79    }
80}
81
82// ---------------------------------------------------------------------------
83// BorrowedReader — wraps &[u8] without copying
84// ---------------------------------------------------------------------------
85
86/// Zero-copy reader over a borrowed byte slice.
87#[derive(Debug, Clone, Copy)]
88pub struct BorrowedReader<'a> {
89    data: &'a [u8],
90}
91
92impl<'a> BorrowedReader<'a> {
93    /// Create a reader from a borrowed byte slice.
94    pub fn new(data: &'a [u8]) -> Self {
95        Self { data }
96    }
97}
98
99impl HDF5Read for BorrowedReader<'_> {
100    fn as_bytes(&self) -> &[u8] {
101        self.data
102    }
103}
104
105// ---------------------------------------------------------------------------
106// FileReader — wraps std::fs::File for read access
107// ---------------------------------------------------------------------------
108
109/// File-backed reader that loads the entire file into memory.
110///
111/// Uses `Read + Seek` to slurp the file content into a `Vec<u8>`.
112#[derive(Debug)]
113pub struct FileReader {
114    data: Vec<u8>,
115}
116
117impl FileReader {
118    /// Open a file and read its entire contents into memory.
119    pub fn open<P: AsRef<std::path::Path>>(path: P) -> io::Result<Self> {
120        let mut file = std::fs::File::open(path)?;
121        let len = file.seek(SeekFrom::End(0))? as usize;
122        file.seek(SeekFrom::Start(0))?;
123        let mut data = vec![0u8; len];
124        file.read_exact(&mut data)?;
125        Ok(Self { data })
126    }
127
128    /// Create a reader from an already-opened file.
129    pub fn from_file(mut file: std::fs::File) -> io::Result<Self> {
130        let len = file.seek(SeekFrom::End(0))? as usize;
131        file.seek(SeekFrom::Start(0))?;
132        let mut data = vec![0u8; len];
133        file.read_exact(&mut data)?;
134        Ok(Self { data })
135    }
136
137    /// Consume the reader and return the underlying bytes.
138    pub fn into_inner(self) -> Vec<u8> {
139        self.data
140    }
141}
142
143impl HDF5Read for FileReader {
144    fn as_bytes(&self) -> &[u8] {
145        &self.data
146    }
147}
148
149// ---------------------------------------------------------------------------
150// FileWriter — wraps std::fs::File for write access
151// ---------------------------------------------------------------------------
152
153/// File-backed writer that writes bytes to a file on disk.
154#[derive(Debug)]
155pub struct FileWriter {
156    path: std::path::PathBuf,
157    data: Vec<u8>,
158}
159
160impl FileWriter {
161    /// Create a new writer that will write to the given path.
162    pub fn create<P: AsRef<std::path::Path>>(path: P) -> io::Result<Self> {
163        let path = path.as_ref().to_path_buf();
164        Ok(Self {
165            path,
166            data: Vec::new(),
167        })
168    }
169
170    /// Flush the current data to disk.
171    pub fn flush_to_disk(&self) -> io::Result<()> {
172        let mut file = std::fs::File::create(&self.path)?;
173        file.write_all(&self.data)?;
174        file.flush()
175    }
176
177    /// Returns the target path.
178    pub fn path(&self) -> &std::path::Path {
179        &self.path
180    }
181}
182
183impl HDF5Read for FileWriter {
184    fn as_bytes(&self) -> &[u8] {
185        &self.data
186    }
187}
188
189impl HDF5ReadWrite for FileWriter {
190    fn write_all_bytes(&mut self, data: &[u8]) -> io::Result<()> {
191        self.data = data.to_vec();
192        self.flush_to_disk()
193    }
194}
195
196// ---------------------------------------------------------------------------
197// Optional modules
198// ---------------------------------------------------------------------------
199
200#[cfg(feature = "async")]
201pub mod async_read;
202
203#[cfg(feature = "hsds")]
204pub mod hsds;
205
206#[cfg(feature = "mmap")]
207pub mod mmap;
208
209#[cfg(feature = "mmap")]
210pub use mmap::{MmapReader, MmapReadWrite};
211
212pub mod prefetch;
213pub mod sweep;
214
215/// Configuration for lane-partitioned parallel decompression.
216///
217/// Controls how chunks are distributed across threads during parallel reads.
218/// The lane partitioning scheme assigns each thread a deterministic, disjoint
219/// subset of chunks — no locks or coordination needed at runtime.
220#[derive(Debug, Clone)]
221pub struct ParallelConfig {
222    /// Number of parallel lanes (threads).  `None` = auto-detect from
223    /// available CPU cores.
224    pub num_lanes: Option<usize>,
225    /// Enable work-stealing rebalancing.  When `true`, the partitioner
226    /// redistributes excess items from overloaded lanes to underloaded ones
227    /// so that no lane differs by more than 1 chunk.  Default: `true`.
228    pub work_stealing: bool,
229}
230
231impl Default for ParallelConfig {
232    fn default() -> Self {
233        Self {
234            num_lanes: None,
235            work_stealing: true,
236        }
237    }
238}
239
240impl ParallelConfig {
241    /// Create a config with explicit lane count and work-stealing on.
242    pub fn with_lanes(num_lanes: usize) -> Self {
243        Self {
244            num_lanes: Some(num_lanes),
245            work_stealing: true,
246        }
247    }
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253    use std::io::Write;
254
255    #[test]
256    fn memory_reader_from_vec() {
257        let data = vec![1u8, 2, 3, 4, 5];
258        let reader = MemoryReader::new(data.clone());
259        assert_eq!(reader.as_bytes(), &data);
260        assert_eq!(reader.len(), 5);
261        assert!(!reader.is_empty());
262    }
263
264    #[test]
265    fn memory_reader_from_slice() {
266        let data = [10u8, 20, 30];
267        let reader = MemoryReader::from_slice(&data);
268        assert_eq!(reader.as_bytes(), &data);
269    }
270
271    #[test]
272    fn memory_reader_empty() {
273        let reader = MemoryReader::new(Vec::new());
274        assert!(reader.is_empty());
275        assert_eq!(reader.len(), 0);
276    }
277
278    #[test]
279    fn memory_reader_into_inner() {
280        let data = vec![7u8, 8, 9];
281        let reader = MemoryReader::new(data.clone());
282        assert_eq!(reader.into_inner(), data);
283    }
284
285    #[test]
286    fn memory_reader_write_replaces_content() {
287        let mut reader = MemoryReader::new(vec![1, 2, 3]);
288        reader.write_all_bytes(&[4, 5]).unwrap();
289        assert_eq!(reader.as_bytes(), &[4, 5]);
290    }
291
292    #[test]
293    fn borrowed_reader_basic() {
294        let data = [42u8, 43, 44];
295        let reader = BorrowedReader::new(&data);
296        assert_eq!(reader.as_bytes(), &data);
297        assert_eq!(reader.len(), 3);
298        assert!(!reader.is_empty());
299    }
300
301    #[test]
302    fn borrowed_reader_empty() {
303        let reader = BorrowedReader::new(&[]);
304        assert!(reader.is_empty());
305    }
306
307    #[test]
308    fn file_reader_roundtrip() {
309        let dir = std::env::temp_dir();
310        let path = dir.join("rustyhdf5_io_test_file_reader.bin");
311
312        // Write test data
313        {
314            let mut f = std::fs::File::create(&path).unwrap();
315            f.write_all(&[0x89, 0x48, 0x44, 0x46]).unwrap();
316        }
317
318        let reader = FileReader::open(&path).unwrap();
319        assert_eq!(reader.as_bytes(), &[0x89, 0x48, 0x44, 0x46]);
320        assert_eq!(reader.len(), 4);
321
322        let bytes = reader.into_inner();
323        assert_eq!(bytes, vec![0x89, 0x48, 0x44, 0x46]);
324
325        std::fs::remove_file(&path).ok();
326    }
327
328    #[test]
329    fn file_reader_from_file() {
330        let dir = std::env::temp_dir();
331        let path = dir.join("rustyhdf5_io_test_from_file.bin");
332
333        {
334            let mut f = std::fs::File::create(&path).unwrap();
335            f.write_all(&[1, 2, 3, 4, 5, 6]).unwrap();
336        }
337
338        let file = std::fs::File::open(&path).unwrap();
339        let reader = FileReader::from_file(file).unwrap();
340        assert_eq!(reader.as_bytes(), &[1, 2, 3, 4, 5, 6]);
341
342        std::fs::remove_file(&path).ok();
343    }
344
345    #[test]
346    fn file_reader_nonexistent() {
347        let result = FileReader::open("/tmp/rustyhdf5_io_does_not_exist_12345.bin");
348        assert!(result.is_err());
349    }
350
351    #[test]
352    fn file_writer_create_and_write() {
353        let dir = std::env::temp_dir();
354        let path = dir.join("rustyhdf5_io_test_writer.bin");
355
356        let mut writer = FileWriter::create(&path).unwrap();
357        assert!(writer.as_bytes().is_empty());
358
359        writer.write_all_bytes(&[10, 20, 30]).unwrap();
360        assert_eq!(writer.as_bytes(), &[10, 20, 30]);
361
362        // Verify the file was written to disk
363        let on_disk = std::fs::read(&path).unwrap();
364        assert_eq!(on_disk, vec![10, 20, 30]);
365
366        std::fs::remove_file(&path).ok();
367    }
368
369    #[test]
370    fn file_writer_overwrite() {
371        let dir = std::env::temp_dir();
372        let path = dir.join("rustyhdf5_io_test_writer_overwrite.bin");
373
374        let mut writer = FileWriter::create(&path).unwrap();
375        writer.write_all_bytes(&[1, 2, 3]).unwrap();
376        writer.write_all_bytes(&[4, 5, 6, 7]).unwrap();
377
378        let on_disk = std::fs::read(&path).unwrap();
379        assert_eq!(on_disk, vec![4, 5, 6, 7]);
380
381        std::fs::remove_file(&path).ok();
382    }
383
384    #[test]
385    fn file_writer_path() {
386        let dir = std::env::temp_dir();
387        let path = dir.join("rustyhdf5_io_test_path.bin");
388        let writer = FileWriter::create(&path).unwrap();
389        assert_eq!(writer.path(), path.as_path());
390    }
391
392    #[test]
393    fn file_writer_flush_to_disk() {
394        let dir = std::env::temp_dir();
395        let path = dir.join("rustyhdf5_io_test_flush.bin");
396
397        let mut writer = FileWriter::create(&path).unwrap();
398        writer.write_all_bytes(&[0xDE, 0xAD]).unwrap();
399        writer.flush_to_disk().unwrap();
400
401        let on_disk = std::fs::read(&path).unwrap();
402        assert_eq!(on_disk, vec![0xDE, 0xAD]);
403
404        std::fs::remove_file(&path).ok();
405    }
406
407    #[test]
408    fn hdf5_file_via_memory_reader() {
409        // Integration test: use MemoryReader with an HDF5 file created by rustyhdf5-format
410        use rustyhdf5_format::file_writer::FileWriter as FmtWriter;
411
412        let mut fw = FmtWriter::new();
413        fw.create_dataset("test").with_f64_data(&[1.0, 2.0, 3.0]);
414        let bytes = fw.finish().unwrap();
415
416        let reader = MemoryReader::new(bytes);
417        let data = reader.as_bytes();
418
419        // Verify it's a valid HDF5 file by checking the signature
420        assert!(data.len() > 8);
421        assert_eq!(&data[..8], b"\x89HDF\r\n\x1a\n");
422    }
423
424    #[test]
425    fn hdf5_file_via_file_reader_writer() {
426        use rustyhdf5_format::file_writer::FileWriter as FmtWriter;
427
428        let dir = std::env::temp_dir();
429        let path = dir.join("rustyhdf5_io_test_hdf5_roundtrip.h5");
430
431        // Write an HDF5 file via FileWriter
432        let mut fw = FmtWriter::new();
433        fw.create_dataset("values").with_i32_data(&[10, 20, 30]);
434        let bytes = fw.finish().unwrap();
435
436        let mut writer = FileWriter::create(&path).unwrap();
437        writer.write_all_bytes(&bytes).unwrap();
438
439        // Read it back via FileReader
440        let reader = FileReader::open(&path).unwrap();
441        assert_eq!(reader.as_bytes(), &bytes);
442        assert_eq!(&reader.as_bytes()[..8], b"\x89HDF\r\n\x1a\n");
443
444        std::fs::remove_file(&path).ok();
445    }
446}