1pub mod checksum;
2pub mod error;
3pub mod io;
4
5pub mod superblock;
7
8pub mod btree_v1;
10pub mod btree_v2;
11pub mod chunk_index;
12pub mod extensible_array;
13pub mod fixed_array;
14pub mod fractal_heap;
15pub mod global_heap;
16pub mod local_heap;
17pub mod symbol_table;
18
19pub mod messages;
21pub mod object_header;
22
23pub mod attribute_api;
25pub mod dataset;
26pub mod datatype_api;
27pub mod group;
28pub mod reference;
29
30pub mod filters;
32
33pub mod cache;
35
36use std::collections::HashMap;
37use std::path::Path;
38use std::sync::Arc;
39
40use memmap2::Mmap;
41use cache::ChunkCache;
44use error::{Error, Result};
45use group::Group;
46use io::Cursor;
47use object_header::ObjectHeader;
48use superblock::Superblock;
49
50pub use attribute_api::Attribute;
52use dataset::DatasetTemplate;
53pub use dataset::{Dataset, SliceInfo, SliceInfoElem};
54pub use datatype_api::{
55 dtype_element_size, CompoundField, EnumMember, H5Type, ReferenceType, StringEncoding,
56 StringPadding, StringSize,
57};
58pub use error::ByteOrder;
59pub use filters::FilterRegistry;
60pub use messages::datatype::Datatype;
61
62pub struct OpenOptions {
64 pub chunk_cache_bytes: usize,
66 pub chunk_cache_slots: usize,
68 pub filter_registry: Option<FilterRegistry>,
70}
71
72impl Default for OpenOptions {
73 fn default() -> Self {
74 OpenOptions {
75 chunk_cache_bytes: 64 * 1024 * 1024,
76 chunk_cache_slots: 521,
77 filter_registry: None,
78 }
79 }
80}
81
82pub type HeaderCache = Arc<parking_lot::Mutex<HashMap<u64, Arc<ObjectHeader>>>>;
84
85pub struct Hdf5File {
90 data: FileData,
92 superblock: Superblock,
94 chunk_cache: Arc<ChunkCache>,
96 header_cache: HeaderCache,
98 dataset_path_cache: Arc<parking_lot::Mutex<HashMap<String, Arc<DatasetTemplate>>>>,
100 filter_registry: Arc<FilterRegistry>,
102}
103
104enum FileData {
105 Mmap(Mmap),
106 Bytes(Vec<u8>),
107}
108
109impl FileData {
110 fn as_slice(&self) -> &[u8] {
111 match self {
112 FileData::Mmap(m) => m,
113 FileData::Bytes(b) => b,
114 }
115 }
116}
117
118impl Hdf5File {
119 fn from_file_data(data: FileData, options: OpenOptions) -> Result<Self> {
120 let mut cursor = Cursor::new(data.as_slice());
121 let superblock = Superblock::parse(&mut cursor)?;
122 let cache = Arc::new(ChunkCache::new(
123 options.chunk_cache_bytes,
124 options.chunk_cache_slots,
125 ));
126 let registry = options.filter_registry.unwrap_or_default();
127
128 Ok(Hdf5File {
129 data,
130 superblock,
131 chunk_cache: cache,
132 header_cache: Arc::new(parking_lot::Mutex::new(HashMap::new())),
133 dataset_path_cache: Arc::new(parking_lot::Mutex::new(HashMap::new())),
134 filter_registry: Arc::new(registry),
135 })
136 }
137
138 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
140 Self::open_with_options(path, OpenOptions::default())
141 }
142
143 pub fn open_with_options(path: impl AsRef<Path>, options: OpenOptions) -> Result<Self> {
145 let file = std::fs::File::open(path.as_ref())?;
146 let mmap = unsafe { Mmap::map(&file)? };
150 Self::from_mmap_with_options(mmap, options)
151 }
152
153 pub fn from_bytes(data: &[u8]) -> Result<Self> {
157 Self::from_bytes_with_options(data, OpenOptions::default())
158 }
159
160 pub fn from_bytes_with_options(data: &[u8], options: OpenOptions) -> Result<Self> {
164 Self::from_vec_with_options(data.to_vec(), options)
165 }
166
167 pub fn from_vec(data: Vec<u8>) -> Result<Self> {
169 Self::from_vec_with_options(data, OpenOptions::default())
170 }
171
172 pub fn from_vec_with_options(data: Vec<u8>, options: OpenOptions) -> Result<Self> {
174 Self::from_file_data(FileData::Bytes(data), options)
175 }
176
177 pub fn from_mmap_with_options(mmap: Mmap, options: OpenOptions) -> Result<Self> {
181 Self::from_file_data(FileData::Mmap(mmap), options)
182 }
183
184 pub fn superblock(&self) -> &Superblock {
186 &self.superblock
187 }
188
189 pub fn get_or_parse_header(&self, addr: u64) -> Result<Arc<ObjectHeader>> {
193 {
194 let cache = self.header_cache.lock();
195 if let Some(hdr) = cache.get(&addr) {
196 return Ok(Arc::clone(hdr));
197 }
198 }
199 let data = self.data.as_slice();
200 let mut hdr = ObjectHeader::parse_at(
201 data,
202 addr,
203 self.superblock.offset_size,
204 self.superblock.length_size,
205 )?;
206 hdr.resolve_shared_messages(
207 data,
208 self.superblock.offset_size,
209 self.superblock.length_size,
210 )?;
211 let arc = Arc::new(hdr);
212 let mut cache = self.header_cache.lock();
213 cache.insert(addr, Arc::clone(&arc));
214 Ok(arc)
215 }
216
217 pub fn root_group(&self) -> Result<Group<'_>> {
219 let data = self.data.as_slice();
220 let addr = self.superblock.root_object_header_address()?;
221
222 Ok(Group::new(
223 data,
224 addr,
225 "/".to_string(),
226 self.superblock.offset_size,
227 self.superblock.length_size,
228 addr, self.chunk_cache.clone(),
230 self.header_cache.clone(),
231 self.filter_registry.clone(),
232 ))
233 }
234
235 pub fn dataset(&self, path: &str) -> Result<Dataset<'_>> {
237 let parts: Vec<&str> = path
238 .trim_start_matches('/')
239 .split('/')
240 .filter(|s| !s.is_empty())
241 .collect();
242 let normalized_path = format!("/{}", parts.join("/"));
243
244 if parts.is_empty() {
245 return Err(Error::DatasetNotFound(path.to_string()).with_context(path));
246 }
247
248 if let Some(template) = self
249 .dataset_path_cache
250 .lock()
251 .get(&normalized_path)
252 .cloned()
253 {
254 return Ok(Dataset::from_template(
255 self.data.as_slice(),
256 self.superblock.offset_size,
257 self.superblock.length_size,
258 template,
259 self.chunk_cache.clone(),
260 self.filter_registry.clone(),
261 ));
262 }
263
264 let mut group = self.root_group()?;
265 for &part in &parts[..parts.len() - 1] {
266 group = group.group(part).map_err(|e| e.with_context(path))?;
267 }
268
269 let dataset = group
270 .dataset(parts[parts.len() - 1])
271 .map_err(|e| e.with_context(path))?;
272 self.dataset_path_cache
273 .lock()
274 .insert(normalized_path, dataset.template());
275 Ok(dataset)
276 }
277
278 pub fn group(&self, path: &str) -> Result<Group<'_>> {
280 let parts: Vec<&str> = path
281 .trim_start_matches('/')
282 .split('/')
283 .filter(|s| !s.is_empty())
284 .collect();
285
286 let mut group = self.root_group()?;
287 for &part in &parts {
288 group = group.group(part)?;
289 }
290
291 Ok(group)
292 }
293}
294
295#[cfg(test)]
296mod tests {
297 use super::*;
298
299 #[test]
300 fn test_open_options_default() {
301 let opts = OpenOptions::default();
302 assert_eq!(opts.chunk_cache_bytes, 64 * 1024 * 1024);
303 assert_eq!(opts.chunk_cache_slots, 521);
304 }
305
306 #[test]
307 fn test_invalid_file() {
308 let data = b"this is not an HDF5 file";
309 let result = Hdf5File::from_bytes(data);
310 assert!(result.is_err());
311 }
312}