Skip to main content

hdf5_reader/
storage.rs

1use std::fs::File;
2use std::ops::Deref;
3use std::path::Path;
4use std::sync::Arc;
5
6use memmap2::Mmap;
7
8use crate::error::{Error, Result};
9
10#[derive(Clone)]
11enum StorageBacking {
12    Bytes(Arc<[u8]>),
13    Mmap(Arc<Mmap>),
14}
15
16/// An immutable byte range returned by a storage backend.
17#[derive(Clone)]
18pub struct StorageBuffer {
19    backing: StorageBacking,
20    start: usize,
21    len: usize,
22}
23
24impl StorageBuffer {
25    pub fn from_vec(bytes: Vec<u8>) -> Self {
26        let len = bytes.len();
27        Self {
28            backing: StorageBacking::Bytes(Arc::<[u8]>::from(bytes)),
29            start: 0,
30            len,
31        }
32    }
33
34    pub(crate) fn from_arc_bytes(bytes: Arc<[u8]>, start: usize, len: usize) -> Self {
35        Self {
36            backing: StorageBacking::Bytes(bytes),
37            start,
38            len,
39        }
40    }
41
42    pub(crate) fn from_arc_mmap(mmap: Arc<Mmap>, start: usize, len: usize) -> Self {
43        Self {
44            backing: StorageBacking::Mmap(mmap),
45            start,
46            len,
47        }
48    }
49
50    pub fn len(&self) -> usize {
51        self.len
52    }
53
54    pub fn is_empty(&self) -> bool {
55        self.len == 0
56    }
57}
58
59impl AsRef<[u8]> for StorageBuffer {
60    fn as_ref(&self) -> &[u8] {
61        self
62    }
63}
64
65impl Deref for StorageBuffer {
66    type Target = [u8];
67
68    fn deref(&self) -> &Self::Target {
69        match &self.backing {
70            StorageBacking::Bytes(bytes) => &bytes[self.start..self.start + self.len],
71            StorageBacking::Mmap(mmap) => &mmap[self.start..self.start + self.len],
72        }
73    }
74}
75
76/// Random-access, immutable byte storage for HDF5 parsing and reads.
77pub trait Storage: Send + Sync {
78    /// Total length in bytes.
79    fn len(&self) -> u64;
80
81    /// Returns `true` if the storage is empty.
82    fn is_empty(&self) -> bool {
83        self.len() == 0
84    }
85
86    /// Read a byte range from `offset..offset+len`.
87    fn read_range(&self, offset: u64, len: usize) -> Result<StorageBuffer>;
88}
89
90pub type DynStorage = Arc<dyn Storage>;
91
92/// In-memory storage backed by owned bytes.
93pub struct BytesStorage {
94    data: Arc<[u8]>,
95}
96
97impl BytesStorage {
98    pub fn new(data: Vec<u8>) -> Self {
99        Self {
100            data: Arc::<[u8]>::from(data),
101        }
102    }
103}
104
105impl Storage for BytesStorage {
106    fn len(&self) -> u64 {
107        self.data.len() as u64
108    }
109
110    fn read_range(&self, offset: u64, len: usize) -> Result<StorageBuffer> {
111        let start = usize::try_from(offset).map_err(|_| Error::OffsetOutOfBounds(offset))?;
112        let end = start
113            .checked_add(len)
114            .ok_or(Error::OffsetOutOfBounds(offset))?;
115        if end > self.data.len() {
116            return Err(Error::UnexpectedEof {
117                offset,
118                needed: len as u64,
119                available: self.len().saturating_sub(offset),
120            });
121        }
122        Ok(StorageBuffer::from_arc_bytes(self.data.clone(), start, len))
123    }
124}
125
126/// In-memory storage backed by a read-only memory map.
127pub struct MmapStorage {
128    mmap: Arc<Mmap>,
129}
130
131impl MmapStorage {
132    pub fn new(mmap: Mmap) -> Self {
133        Self {
134            mmap: Arc::new(mmap),
135        }
136    }
137}
138
139impl Storage for MmapStorage {
140    fn len(&self) -> u64 {
141        self.mmap.len() as u64
142    }
143
144    fn read_range(&self, offset: u64, len: usize) -> Result<StorageBuffer> {
145        let start = usize::try_from(offset).map_err(|_| Error::OffsetOutOfBounds(offset))?;
146        let end = start
147            .checked_add(len)
148            .ok_or(Error::OffsetOutOfBounds(offset))?;
149        if end > self.mmap.len() {
150            return Err(Error::UnexpectedEof {
151                offset,
152                needed: len as u64,
153                available: self.len().saturating_sub(offset),
154            });
155        }
156        Ok(StorageBuffer::from_arc_mmap(self.mmap.clone(), start, len))
157    }
158}
159
160/// File-backed storage that serves explicit byte ranges via positional reads.
161pub struct FileStorage {
162    file: Arc<File>,
163    len: u64,
164}
165
166impl FileStorage {
167    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
168        let file = File::open(path)?;
169        let len = file.metadata()?.len();
170        Ok(Self {
171            file: Arc::new(file),
172            len,
173        })
174    }
175}
176
177impl Storage for FileStorage {
178    fn len(&self) -> u64 {
179        self.len
180    }
181
182    fn read_range(&self, offset: u64, len: usize) -> Result<StorageBuffer> {
183        let needed = u64::try_from(len).map_err(|_| Error::OffsetOutOfBounds(offset))?;
184        let end = offset
185            .checked_add(needed)
186            .ok_or(Error::OffsetOutOfBounds(offset))?;
187        if end > self.len {
188            return Err(Error::UnexpectedEof {
189                offset,
190                needed,
191                available: self.len.saturating_sub(offset),
192            });
193        }
194
195        let mut buf = vec![0u8; len];
196        read_exact_at(self.file.as_ref(), &mut buf, offset)?;
197        Ok(StorageBuffer::from_vec(buf))
198    }
199}
200
201#[cfg(unix)]
202fn read_exact_at(file: &File, mut buf: &mut [u8], mut offset: u64) -> std::io::Result<()> {
203    use std::os::unix::fs::FileExt;
204
205    while !buf.is_empty() {
206        let n = file.read_at(buf, offset)?;
207        if n == 0 {
208            return Err(std::io::Error::new(
209                std::io::ErrorKind::UnexpectedEof,
210                "failed to fill whole buffer",
211            ));
212        }
213        offset += n as u64;
214        buf = &mut buf[n..];
215    }
216    Ok(())
217}
218
219#[cfg(windows)]
220fn read_exact_at(file: &File, mut buf: &mut [u8], mut offset: u64) -> std::io::Result<()> {
221    use std::os::windows::fs::FileExt;
222
223    while !buf.is_empty() {
224        let n = file.seek_read(buf, offset)?;
225        if n == 0 {
226            return Err(std::io::Error::new(
227                std::io::ErrorKind::UnexpectedEof,
228                "failed to fill whole buffer",
229            ));
230        }
231        offset += n as u64;
232        buf = &mut buf[n..];
233    }
234    Ok(())
235}