1pub mod checksum;
2pub mod error;
3pub mod io;
4
5pub mod superblock;
7
8pub mod btree_v1;
10pub mod btree_v2;
11pub mod chunk_index;
12pub mod extensible_array;
13pub mod fixed_array;
14pub mod fractal_heap;
15pub mod global_heap;
16pub mod local_heap;
17pub mod symbol_table;
18
19pub mod messages;
21pub mod object_header;
22
23pub mod attribute_api;
25pub mod dataset;
26pub mod datatype_api;
27pub mod group;
28pub mod reference;
29
30pub mod filters;
32
33pub mod cache;
35
36use std::collections::HashMap;
37use std::path::Path;
38use std::sync::Arc;
39
40use memmap2::Mmap;
41use cache::ChunkCache;
44use error::{Error, Result};
45use group::Group;
46use io::Cursor;
47use object_header::ObjectHeader;
48use superblock::Superblock;
49
50pub use attribute_api::Attribute;
52use dataset::DatasetTemplate;
53pub use dataset::{Dataset, SliceInfo, SliceInfoElem};
54pub use datatype_api::{
55 dtype_element_size, CompoundField, EnumMember, H5Type, ReferenceType, StringEncoding,
56 StringPadding, StringSize,
57};
58pub use error::ByteOrder;
59pub use filters::FilterRegistry;
60pub use messages::datatype::Datatype;
61
62pub struct OpenOptions {
64 pub chunk_cache_bytes: usize,
66 pub chunk_cache_slots: usize,
68 pub filter_registry: Option<FilterRegistry>,
70}
71
72impl Default for OpenOptions {
73 fn default() -> Self {
74 OpenOptions {
75 chunk_cache_bytes: 64 * 1024 * 1024,
76 chunk_cache_slots: 521,
77 filter_registry: None,
78 }
79 }
80}
81
82pub type HeaderCache = Arc<parking_lot::Mutex<HashMap<u64, Arc<ObjectHeader>>>>;
84
85pub struct Hdf5File {
90 data: FileData,
92 superblock: Superblock,
94 chunk_cache: Arc<ChunkCache>,
96 header_cache: HeaderCache,
98 dataset_path_cache: Arc<parking_lot::Mutex<HashMap<String, Arc<DatasetTemplate>>>>,
100 filter_registry: Arc<FilterRegistry>,
102}
103
104enum FileData {
105 Mmap(Mmap),
106 Bytes(Vec<u8>),
107}
108
109impl FileData {
110 fn as_slice(&self) -> &[u8] {
111 match self {
112 FileData::Mmap(m) => m,
113 FileData::Bytes(b) => b,
114 }
115 }
116}
117
118impl Hdf5File {
119 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
121 Self::open_with_options(path, OpenOptions::default())
122 }
123
124 pub fn open_with_options(path: impl AsRef<Path>, options: OpenOptions) -> Result<Self> {
126 let file = std::fs::File::open(path.as_ref())?;
127 let mmap = unsafe { Mmap::map(&file)? };
131
132 let mut cursor = Cursor::new(&mmap);
133 let superblock = Superblock::parse(&mut cursor)?;
134
135 let cache = Arc::new(ChunkCache::new(
136 options.chunk_cache_bytes,
137 options.chunk_cache_slots,
138 ));
139
140 let registry = options.filter_registry.unwrap_or_default();
141
142 Ok(Hdf5File {
143 data: FileData::Mmap(mmap),
144 superblock,
145 chunk_cache: cache,
146 header_cache: Arc::new(parking_lot::Mutex::new(HashMap::new())),
147 dataset_path_cache: Arc::new(parking_lot::Mutex::new(HashMap::new())),
148 filter_registry: Arc::new(registry),
149 })
150 }
151
152 pub fn from_bytes(data: &[u8]) -> Result<Self> {
156 Self::from_vec(data.to_vec())
157 }
158
159 pub fn from_vec(data: Vec<u8>) -> Result<Self> {
161 let mut cursor = Cursor::new(&data);
162 let superblock = Superblock::parse(&mut cursor)?;
163
164 Ok(Hdf5File {
165 data: FileData::Bytes(data),
166 superblock,
167 chunk_cache: Arc::new(ChunkCache::default()),
168 header_cache: Arc::new(parking_lot::Mutex::new(HashMap::new())),
169 dataset_path_cache: Arc::new(parking_lot::Mutex::new(HashMap::new())),
170 filter_registry: Arc::new(FilterRegistry::default()),
171 })
172 }
173
174 pub fn superblock(&self) -> &Superblock {
176 &self.superblock
177 }
178
179 pub fn get_or_parse_header(&self, addr: u64) -> Result<Arc<ObjectHeader>> {
183 {
184 let cache = self.header_cache.lock();
185 if let Some(hdr) = cache.get(&addr) {
186 return Ok(Arc::clone(hdr));
187 }
188 }
189 let data = self.data.as_slice();
190 let mut hdr = ObjectHeader::parse_at(
191 data,
192 addr,
193 self.superblock.offset_size,
194 self.superblock.length_size,
195 )?;
196 hdr.resolve_shared_messages(
197 data,
198 self.superblock.offset_size,
199 self.superblock.length_size,
200 )?;
201 let arc = Arc::new(hdr);
202 let mut cache = self.header_cache.lock();
203 cache.insert(addr, Arc::clone(&arc));
204 Ok(arc)
205 }
206
207 pub fn root_group(&self) -> Result<Group<'_>> {
209 let data = self.data.as_slice();
210 let addr = self.superblock.root_object_header_address()?;
211
212 Ok(Group::new(
213 data,
214 addr,
215 "/".to_string(),
216 self.superblock.offset_size,
217 self.superblock.length_size,
218 addr, self.chunk_cache.clone(),
220 self.header_cache.clone(),
221 self.filter_registry.clone(),
222 ))
223 }
224
225 pub fn dataset(&self, path: &str) -> Result<Dataset<'_>> {
227 let parts: Vec<&str> = path
228 .trim_start_matches('/')
229 .split('/')
230 .filter(|s| !s.is_empty())
231 .collect();
232 let normalized_path = format!("/{}", parts.join("/"));
233
234 if parts.is_empty() {
235 return Err(Error::DatasetNotFound(path.to_string()).with_context(path));
236 }
237
238 if let Some(template) = self
239 .dataset_path_cache
240 .lock()
241 .get(&normalized_path)
242 .cloned()
243 {
244 return Ok(Dataset::from_template(
245 self.data.as_slice(),
246 self.superblock.offset_size,
247 self.superblock.length_size,
248 template,
249 self.chunk_cache.clone(),
250 self.filter_registry.clone(),
251 ));
252 }
253
254 let mut group = self.root_group()?;
255 for &part in &parts[..parts.len() - 1] {
256 group = group.group(part).map_err(|e| e.with_context(path))?;
257 }
258
259 let dataset = group
260 .dataset(parts[parts.len() - 1])
261 .map_err(|e| e.with_context(path))?;
262 self.dataset_path_cache
263 .lock()
264 .insert(normalized_path, dataset.template());
265 Ok(dataset)
266 }
267
268 pub fn group(&self, path: &str) -> Result<Group<'_>> {
270 let parts: Vec<&str> = path
271 .trim_start_matches('/')
272 .split('/')
273 .filter(|s| !s.is_empty())
274 .collect();
275
276 let mut group = self.root_group()?;
277 for &part in &parts {
278 group = group.group(part)?;
279 }
280
281 Ok(group)
282 }
283}
284
285#[cfg(test)]
286mod tests {
287 use super::*;
288
289 #[test]
290 fn test_open_options_default() {
291 let opts = OpenOptions::default();
292 assert_eq!(opts.chunk_cache_bytes, 64 * 1024 * 1024);
293 assert_eq!(opts.chunk_cache_slots, 521);
294 }
295
296 #[test]
297 fn test_invalid_file() {
298 let data = b"this is not an HDF5 file";
299 let result = Hdf5File::from_bytes(data);
300 assert!(result.is_err());
301 }
302}