Skip to main content

oxiphysics_io/hdf5_io/
file.rs

1// Copyright 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3
4//! HDF5 file: root container, superblock, named types, path utilities.
5
6#![allow(dead_code)]
7
8use std::collections::HashMap;
9
10use super::dataset::Hdf5Dataset;
11use super::group::Hdf5Group;
12use super::types::{AttrValue, Hdf5Dtype, Hdf5Error, Hdf5Result, LockState, ParallelHdf5Meta};
13
14// ---------------------------------------------------------------------------
15// Superblock & object-header simulation
16// ---------------------------------------------------------------------------
17
18/// Simulated HDF5 superblock (format metadata).
19///
20/// In real HDF5 the superblock occupies bytes 0-511 (v2) of the file and
21/// stores the root-group offset, free-space information, etc.  Here we store
22/// the metadata in a plain struct.
23#[derive(Debug, Clone)]
24pub struct Hdf5Superblock {
25    /// Format version (0, 1, 2 or 3).
26    pub version: u8,
27    /// Total simulated file size in bytes.
28    pub file_size: u64,
29    /// Byte offset of the root group object header.
30    pub root_obj_header_offset: u64,
31    /// Byte offset of the end-of-file.
32    pub eof_address: u64,
33    /// Size of lengths in bytes (4 or 8 for 32/64-bit offsets).
34    pub size_of_lengths: u8,
35    /// Size of offsets in bytes.
36    pub size_of_offsets: u8,
37}
38
39impl Default for Hdf5Superblock {
40    fn default() -> Self {
41        Self {
42            version: 2,
43            file_size: 0,
44            root_obj_header_offset: 512,
45            eof_address: 0,
46            size_of_lengths: 8,
47            size_of_offsets: 8,
48        }
49    }
50}
51
52/// Simulated HDF5 object header (per-object metadata).
53#[derive(Debug, Clone)]
54pub struct Hdf5ObjectHeader {
55    /// Object type: "group" or "dataset".
56    pub object_type: String,
57    /// Simulated byte address of this header.
58    pub address: u64,
59    /// Number of messages in the header.
60    pub n_messages: u32,
61    /// Total header size in simulated bytes.
62    pub header_size: u32,
63}
64
65// ---------------------------------------------------------------------------
66// Named datatype registry
67// ---------------------------------------------------------------------------
68
69/// Registry for named (committed) datatypes.
70#[derive(Debug, Clone, Default)]
71pub struct NamedDatatypeRegistry {
72    /// Registered types: name -> dtype.
73    types: HashMap<String, Hdf5Dtype>,
74}
75
76impl NamedDatatypeRegistry {
77    /// Register a named type.  Returns an error if the name is already taken.
78    pub fn register(&mut self, name: &str, dtype: Hdf5Dtype) -> Hdf5Result<()> {
79        if self.types.contains_key(name) {
80            return Err(Hdf5Error::AlreadyExists(name.to_string()));
81        }
82        self.types.insert(name.to_string(), dtype);
83        Ok(())
84    }
85
86    /// Look up a named type by name.
87    pub fn get(&self, name: &str) -> Hdf5Result<&Hdf5Dtype> {
88        self.types
89            .get(name)
90            .ok_or_else(|| Hdf5Error::NotFound(format!("named type '{name}'")))
91    }
92
93    /// List all registered type names.
94    pub fn names(&self) -> Vec<String> {
95        let mut v: Vec<String> = self.types.keys().cloned().collect();
96        v.sort();
97        v
98    }
99}
100
101// ---------------------------------------------------------------------------
102// HDF5 File (root container)
103// ---------------------------------------------------------------------------
104
105/// Top-level HDF5 mock file containing a group hierarchy, named-type registry
106/// and file-level metadata.
107#[derive(Debug, Clone)]
108pub struct Hdf5File {
109    /// Virtual filename.
110    pub filename: String,
111    /// Root group ("/").
112    pub root: Hdf5Group,
113    /// Simulated superblock.
114    pub superblock: Hdf5Superblock,
115    /// Named (committed) datatype registry.
116    pub named_types: NamedDatatypeRegistry,
117    /// Current file lock state.
118    pub lock_state: LockState,
119    /// Optional parallel write metadata.
120    pub parallel_meta: Option<ParallelHdf5Meta>,
121    /// Simulation clock tick (incremented on each write).
122    pub write_tick: u64,
123}
124
125impl Hdf5File {
126    /// Create a new empty in-memory HDF5 file.
127    pub fn new(filename: &str) -> Self {
128        Self {
129            filename: filename.to_string(),
130            root: Hdf5Group::new("/"),
131            superblock: Hdf5Superblock::default(),
132            named_types: NamedDatatypeRegistry::default(),
133            lock_state: LockState::Unlocked,
134            parallel_meta: None,
135            write_tick: 0,
136        }
137    }
138
139    // -- lock / unlock --
140
141    /// Acquire an exclusive write lock (simulated).
142    ///
143    /// Returns `Err(Hdf5Error::FileLocked)` if the file is already locked.
144    pub fn lock_write(&mut self, owner_id: u64) -> Hdf5Result<()> {
145        match self.lock_state {
146            LockState::Unlocked => {
147                self.lock_state = LockState::WriteLocked { owner_id };
148                Ok(())
149            }
150            _ => Err(Hdf5Error::FileLocked),
151        }
152    }
153
154    /// Release the write lock.
155    pub fn unlock(&mut self) {
156        self.lock_state = LockState::Unlocked;
157    }
158
159    /// Acquire a shared read lock (multiple readers allowed).
160    pub fn lock_read(&mut self) -> Hdf5Result<()> {
161        match self.lock_state {
162            LockState::Unlocked => {
163                self.lock_state = LockState::ReadLocked { n_readers: 1 };
164                Ok(())
165            }
166            LockState::ReadLocked { n_readers } => {
167                self.lock_state = LockState::ReadLocked {
168                    n_readers: n_readers + 1,
169                };
170                Ok(())
171            }
172            LockState::WriteLocked { .. } => Err(Hdf5Error::FileLocked),
173        }
174    }
175
176    /// Return `true` if the file is write-locked.
177    pub fn is_locked(&self) -> bool {
178        matches!(self.lock_state, LockState::WriteLocked { .. })
179    }
180
181    // -- superblock helpers --
182
183    /// Simulate updating the superblock's EOF address after a write.
184    pub fn update_eof(&mut self, new_eof: u64) {
185        self.superblock.eof_address = new_eof;
186        self.superblock.file_size = new_eof;
187    }
188
189    // -- group traversal --
190
191    /// Create a group at the given slash-separated path (relative to root).
192    ///
193    /// Intermediate groups are created as needed (like `mkdir -p`).
194    pub fn create_group(&mut self, path: &str) -> Hdf5Result<()> {
195        if self.is_locked() {
196            return Err(Hdf5Error::FileLocked);
197        }
198        let parts = split_path(path);
199        let mut current = &mut self.root;
200        for part in parts {
201            if !current.groups.contains_key(part) {
202                current
203                    .groups
204                    .insert(part.to_string(), Hdf5Group::new(part));
205            }
206            // SAFETY: We just ensured the key exists above.
207            current = current
208                .groups
209                .get_mut(part)
210                .unwrap_or_else(|| unreachable!());
211        }
212        Ok(())
213    }
214
215    /// Return a shared reference to the group at `path`.
216    pub fn open_group(&self, path: &str) -> Hdf5Result<&Hdf5Group> {
217        let parts = split_path(path);
218        let mut current = &self.root;
219        for part in parts {
220            current = current
221                .groups
222                .get(part)
223                .ok_or_else(|| Hdf5Error::NotFound(format!("group '{path}'")))?;
224        }
225        Ok(current)
226    }
227
228    /// Return a mutable reference to the group at `path`.
229    pub fn open_group_mut(&mut self, path: &str) -> Hdf5Result<&mut Hdf5Group> {
230        if self.is_locked() {
231            return Err(Hdf5Error::FileLocked);
232        }
233        let parts = split_path(path);
234        let mut current = &mut self.root;
235        for part in parts {
236            current = current
237                .groups
238                .get_mut(part)
239                .ok_or_else(|| Hdf5Error::NotFound(format!("group '{path}'")))?;
240        }
241        Ok(current)
242    }
243
244    // -- dataset access --
245
246    /// Create a dataset at `group_path/dataset_name`.
247    #[allow(clippy::too_many_arguments)]
248    pub fn create_dataset(
249        &mut self,
250        group_path: &str,
251        name: &str,
252        shape: Vec<usize>,
253        dtype: Hdf5Dtype,
254    ) -> Hdf5Result<()> {
255        if self.is_locked() {
256            return Err(Hdf5Error::FileLocked);
257        }
258        self.write_tick += 1;
259        let group = self.open_group_mut(group_path)?;
260        group.create_dataset(name, shape, dtype)
261    }
262
263    /// Return a shared reference to a dataset at `group_path/dataset_name`.
264    pub fn open_dataset(&self, group_path: &str, name: &str) -> Hdf5Result<&Hdf5Dataset> {
265        let group = self.open_group(group_path)?;
266        group.open_dataset(name)
267    }
268
269    /// Return a mutable reference to a dataset.
270    pub fn open_dataset_mut(
271        &mut self,
272        group_path: &str,
273        name: &str,
274    ) -> Hdf5Result<&mut Hdf5Dataset> {
275        if self.is_locked() {
276            return Err(Hdf5Error::FileLocked);
277        }
278        let group = self.open_group_mut(group_path)?;
279        group.open_dataset_mut(name)
280    }
281
282    // -- attribute helpers --
283
284    /// Set an attribute on a dataset.
285    pub fn set_dataset_attr(
286        &mut self,
287        group_path: &str,
288        dataset: &str,
289        attr_name: &str,
290        value: AttrValue,
291    ) -> Hdf5Result<()> {
292        if self.is_locked() {
293            return Err(Hdf5Error::FileLocked);
294        }
295        let ds = self.open_dataset_mut(group_path, dataset)?;
296        ds.set_attr(attr_name, value);
297        Ok(())
298    }
299
300    /// Get an attribute from a dataset.
301    pub fn get_dataset_attr(
302        &self,
303        group_path: &str,
304        dataset: &str,
305        attr_name: &str,
306    ) -> Hdf5Result<&AttrValue> {
307        let ds = self.open_dataset(group_path, dataset)?;
308        ds.get_attr(attr_name)
309    }
310
311    // -- named types --
312
313    /// Register a named (committed) datatype.
314    pub fn commit_datatype(&mut self, name: &str, dtype: Hdf5Dtype) -> Hdf5Result<()> {
315        self.named_types.register(name, dtype)
316    }
317
318    /// Look up a named datatype.
319    pub fn find_named_type(&self, name: &str) -> Hdf5Result<&Hdf5Dtype> {
320        self.named_types.get(name)
321    }
322
323    // -- links --
324
325    /// Create a soft link inside `group_path`.
326    pub fn create_soft_link(
327        &mut self,
328        group_path: &str,
329        link_name: &str,
330        target: &str,
331    ) -> Hdf5Result<()> {
332        if self.is_locked() {
333            return Err(Hdf5Error::FileLocked);
334        }
335        let group = self.open_group_mut(group_path)?;
336        group.create_soft_link(link_name, target)
337    }
338
339    /// Create a hard link inside `group_path`.
340    pub fn create_hard_link(
341        &mut self,
342        group_path: &str,
343        link_name: &str,
344        target: &str,
345    ) -> Hdf5Result<()> {
346        if self.is_locked() {
347            return Err(Hdf5Error::FileLocked);
348        }
349        let group = self.open_group_mut(group_path)?;
350        group.create_hard_link(link_name, target)
351    }
352
353    // -- parallel metadata --
354
355    /// Attach parallel HDF5 metadata for an N-rank job.
356    pub fn init_parallel(&mut self, n_ranks: usize) {
357        self.parallel_meta = Some(ParallelHdf5Meta::new(n_ranks));
358    }
359
360    /// Record bytes written by MPI rank `rank`.
361    pub fn record_rank_bytes(&mut self, rank: usize, bytes: u64) {
362        if let Some(ref mut meta) = self.parallel_meta {
363            meta.record_rank_bytes(rank, bytes);
364        }
365    }
366}
367
368// ---------------------------------------------------------------------------
369// Path utilities
370// ---------------------------------------------------------------------------
371
372/// Split a slash-separated HDF5 path into its components, skipping empty parts.
373pub(crate) fn split_path(path: &str) -> Vec<&str> {
374    path.split('/').filter(|s| !s.is_empty()).collect()
375}