1pub mod checksum;
2pub mod error;
3pub mod io;
4
5pub mod superblock;
7
8pub mod btree_v1;
10pub mod btree_v2;
11pub mod chunk_index;
12pub mod extensible_array;
13pub mod fixed_array;
14pub mod fractal_heap;
15pub mod global_heap;
16pub mod local_heap;
17pub mod symbol_table;
18
19pub mod messages;
21pub mod object_header;
22
23pub mod attribute_api;
25pub mod dataset;
26pub mod datatype_api;
27pub mod group;
28pub mod reference;
29pub mod storage;
30
31pub mod filters;
33
34pub mod cache;
36
37use std::collections::HashMap;
38use std::path::Path;
39use std::sync::{Arc, OnceLock};
40
41use memmap2::Mmap;
42use cache::ChunkCache;
45use error::{Error, Result};
46use group::Group;
47use object_header::ObjectHeader;
48use storage::DynStorage;
49use superblock::Superblock;
50
51pub use attribute_api::Attribute;
53use dataset::DatasetTemplate;
54pub use dataset::{Dataset, SliceInfo, SliceInfoElem};
55pub use datatype_api::{
56 dtype_element_size, CompoundField, EnumMember, H5Type, ReferenceType, StringEncoding,
57 StringPadding, StringSize,
58};
59pub use error::ByteOrder;
60pub use filters::FilterRegistry;
61pub use messages::datatype::Datatype;
62pub use storage::{BytesStorage, FileStorage, MmapStorage, Storage, StorageBuffer};
63
64pub struct OpenOptions {
66 pub chunk_cache_bytes: usize,
68 pub chunk_cache_slots: usize,
70 pub filter_registry: Option<FilterRegistry>,
72}
73
74impl Default for OpenOptions {
75 fn default() -> Self {
76 OpenOptions {
77 chunk_cache_bytes: 64 * 1024 * 1024,
78 chunk_cache_slots: 521,
79 filter_registry: None,
80 }
81 }
82}
83
84pub type HeaderCache = Arc<parking_lot::Mutex<HashMap<u64, Arc<ObjectHeader>>>>;
86
87pub struct Hdf5File {
93 context: Arc<FileContext>,
94}
95
96pub(crate) struct FileContext {
97 pub(crate) storage: DynStorage,
98 pub(crate) superblock: Superblock,
99 pub(crate) chunk_cache: Arc<ChunkCache>,
100 pub(crate) header_cache: HeaderCache,
101 pub(crate) dataset_path_cache: Arc<parking_lot::Mutex<HashMap<String, Arc<DatasetTemplate>>>>,
102 pub(crate) filter_registry: Arc<FilterRegistry>,
103 full_file_cache: OnceLock<StorageBuffer>,
104}
105
106impl FileContext {
107 pub(crate) fn read_range(&self, offset: u64, len: usize) -> Result<StorageBuffer> {
108 self.storage.read_range(offset, len)
109 }
110
111 pub(crate) fn full_file_data(&self) -> Result<StorageBuffer> {
112 if let Some(buffer) = self.full_file_cache.get() {
113 return Ok(buffer.clone());
114 }
115
116 let len = usize::try_from(self.storage.len()).map_err(|_| {
117 Error::InvalidData("file size exceeds platform usize capacity".to_string())
118 })?;
119 let buffer = self.storage.read_range(0, len)?;
120 let _ = self.full_file_cache.set(buffer);
121 Ok(self
122 .full_file_cache
123 .get()
124 .expect("full-file buffer must exist after successful initialization")
125 .clone())
126 }
127
128 pub(crate) fn get_or_parse_header(&self, addr: u64) -> Result<Arc<ObjectHeader>> {
129 {
130 let cache = self.header_cache.lock();
131 if let Some(hdr) = cache.get(&addr) {
132 return Ok(Arc::clone(hdr));
133 }
134 }
135
136 let mut hdr = ObjectHeader::parse_at_storage(
137 self.storage.as_ref(),
138 addr,
139 self.superblock.offset_size,
140 self.superblock.length_size,
141 )?;
142 hdr.resolve_shared_messages_storage(
143 self.storage.as_ref(),
144 self.superblock.offset_size,
145 self.superblock.length_size,
146 )?;
147 let arc = Arc::new(hdr);
148 let mut cache = self.header_cache.lock();
149 cache.insert(addr, Arc::clone(&arc));
150 Ok(arc)
151 }
152}
153
154impl Hdf5File {
155 fn from_storage_impl(storage: DynStorage, options: OpenOptions) -> Result<Self> {
156 let superblock = Superblock::parse_from_storage(storage.as_ref())?;
157 let cache = Arc::new(ChunkCache::new(
158 options.chunk_cache_bytes,
159 options.chunk_cache_slots,
160 ));
161 let registry = options.filter_registry.unwrap_or_default();
162
163 Ok(Hdf5File {
164 context: Arc::new(FileContext {
165 storage,
166 superblock,
167 chunk_cache: cache,
168 header_cache: Arc::new(parking_lot::Mutex::new(HashMap::new())),
169 dataset_path_cache: Arc::new(parking_lot::Mutex::new(HashMap::new())),
170 filter_registry: Arc::new(registry),
171 full_file_cache: OnceLock::new(),
172 }),
173 })
174 }
175
176 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
178 Self::open_with_options(path, OpenOptions::default())
179 }
180
181 pub fn open_with_options(path: impl AsRef<Path>, options: OpenOptions) -> Result<Self> {
183 Self::from_storage_with_options(Arc::new(FileStorage::open(path)?), options)
184 }
185
186 pub fn from_bytes(data: &[u8]) -> Result<Self> {
190 Self::from_bytes_with_options(data, OpenOptions::default())
191 }
192
193 pub fn from_bytes_with_options(data: &[u8], options: OpenOptions) -> Result<Self> {
197 Self::from_vec_with_options(data.to_vec(), options)
198 }
199
200 pub fn from_vec(data: Vec<u8>) -> Result<Self> {
202 Self::from_vec_with_options(data, OpenOptions::default())
203 }
204
205 pub fn from_vec_with_options(data: Vec<u8>, options: OpenOptions) -> Result<Self> {
207 Self::from_storage_with_options(Arc::new(BytesStorage::new(data)), options)
208 }
209
210 pub fn from_mmap_with_options(mmap: Mmap, options: OpenOptions) -> Result<Self> {
214 Self::from_storage_with_options(Arc::new(MmapStorage::new(mmap)), options)
215 }
216
217 pub fn from_storage(storage: DynStorage) -> Result<Self> {
219 Self::from_storage_with_options(storage, OpenOptions::default())
220 }
221
222 pub fn from_storage_with_options(storage: DynStorage, options: OpenOptions) -> Result<Self> {
224 Self::from_storage_impl(storage, options)
225 }
226
227 pub fn superblock(&self) -> &Superblock {
229 &self.context.superblock
230 }
231
232 pub fn storage(&self) -> &dyn Storage {
234 self.context.storage.as_ref()
235 }
236
237 pub fn get_or_parse_header(&self, addr: u64) -> Result<Arc<ObjectHeader>> {
241 self.context.get_or_parse_header(addr)
242 }
243
244 pub fn root_group(&self) -> Result<Group> {
246 let addr = self.context.superblock.root_object_header_address()?;
247
248 Ok(Group::new(
249 self.context.clone(),
250 addr,
251 "/".to_string(),
252 addr, ))
254 }
255
256 pub fn dataset(&self, path: &str) -> Result<Dataset> {
258 let parts: Vec<&str> = path
259 .trim_start_matches('/')
260 .split('/')
261 .filter(|s| !s.is_empty())
262 .collect();
263 let normalized_path = format!("/{}", parts.join("/"));
264
265 if parts.is_empty() {
266 return Err(Error::DatasetNotFound(path.to_string()).with_context(path));
267 }
268
269 if let Some(template) = self
270 .context
271 .dataset_path_cache
272 .lock()
273 .get(&normalized_path)
274 .cloned()
275 {
276 return Ok(Dataset::from_template(self.context.clone(), template));
277 }
278
279 let mut group = self.root_group()?;
280 for &part in &parts[..parts.len() - 1] {
281 group = group.group(part).map_err(|e| e.with_context(path))?;
282 }
283
284 let dataset = group
285 .dataset(parts[parts.len() - 1])
286 .map_err(|e| e.with_context(path))?;
287 self.context
288 .dataset_path_cache
289 .lock()
290 .insert(normalized_path, dataset.template());
291 Ok(dataset)
292 }
293
294 pub fn group(&self, path: &str) -> Result<Group> {
296 let parts: Vec<&str> = path
297 .trim_start_matches('/')
298 .split('/')
299 .filter(|s| !s.is_empty())
300 .collect();
301
302 let mut group = self.root_group()?;
303 for &part in &parts {
304 group = group.group(part)?;
305 }
306
307 Ok(group)
308 }
309}
310
311#[cfg(test)]
312mod tests {
313 use super::*;
314
315 #[test]
316 fn test_open_options_default() {
317 let opts = OpenOptions::default();
318 assert_eq!(opts.chunk_cache_bytes, 64 * 1024 * 1024);
319 assert_eq!(opts.chunk_cache_slots, 521);
320 }
321
322 #[test]
323 fn test_invalid_file() {
324 let data = b"this is not an HDF5 file";
325 let result = Hdf5File::from_bytes(data);
326 assert!(result.is_err());
327 }
328}