Skip to main content

oxiphysics_io/hdf5_io/
file_image.rs

1// Copyright 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3
4//! HDF5 file-image serialisation, region references, file statistics,
5//! ring-buffer trajectories, merge helpers, snapshot utilities, and
6//! extendable datasets.
7
8#![allow(dead_code)]
9
10use super::convenience::{
11    list_datasets_recursive, read_vlen_strings, write_f64_dataset, write_matrix_f64,
12    write_vlen_strings,
13};
14use super::file::Hdf5File;
15use super::group::Hdf5Group;
16use super::types::{AttrValue, Hdf5Dtype, Hdf5Error, Hdf5Result, Hyperslab};
17
18// ===========================================================================
19// HDF5 file-image serialisation (binary round-trip simulation)
20// ===========================================================================
21
22/// A byte-oriented image of an in-memory HDF5 file.
23///
24/// Encodes groups, datasets and attributes into a compact binary
25/// representation for testing checkpoint / restart round-trip workflows.
26#[derive(Debug, Clone)]
27pub struct Hdf5FileImage {
28    /// Version tag.
29    pub version: u8,
30    /// Serialised group segments: `(path, attr_json)`.
31    pub group_segments: Vec<(String, String)>,
32    /// Serialised dataset segments: `(group_path, name, shape, flat_f64)`.
33    pub dataset_segments: Vec<(String, String, Vec<usize>, Vec<f64>)>,
34    /// Serialised attribute segments: `(group, name, attr_name, value_str)`.
35    pub attr_segments: Vec<(String, String, String, String)>,
36}
37
38impl Hdf5FileImage {
39    /// Build an image from an `Hdf5File`.
40    ///
41    /// Only datasets containing f64 data are captured; other dtypes are skipped.
42    pub fn from_file(file: &Hdf5File) -> Self {
43        let mut img = Self {
44            version: 1,
45            group_segments: Vec::new(),
46            dataset_segments: Vec::new(),
47            attr_segments: Vec::new(),
48        };
49        Self::capture_group(file, &file.root, "", &mut img);
50        img
51    }
52
53    fn capture_group(_file: &Hdf5File, group: &Hdf5Group, prefix: &str, img: &mut Self) {
54        let path = if prefix.is_empty() {
55            "/".to_string()
56        } else {
57            prefix.to_string()
58        };
59        let attrs_str = format!("{:?}", group.attributes.keys().collect::<Vec<_>>());
60        img.group_segments.push((path.clone(), attrs_str));
61
62        for (ds_name, ds) in &group.datasets {
63            if let Ok(flat) = ds.read_f64() {
64                img.dataset_segments
65                    .push((path.clone(), ds_name.clone(), ds.shape.clone(), flat));
66            }
67            for (attr_name, attr_val) in &ds.attributes {
68                let val_str = format!("{attr_val:?}");
69                img.attr_segments
70                    .push((path.clone(), ds_name.clone(), attr_name.clone(), val_str));
71            }
72        }
73        for (child_name, child) in &group.groups {
74            let child_prefix = if prefix.is_empty() {
75                child_name.clone()
76            } else {
77                format!("{prefix}/{child_name}")
78            };
79            Self::capture_group(_file, child, &child_prefix, img);
80        }
81    }
82
83    /// Restore datasets from this image into a (possibly empty) `Hdf5File`.
84    pub fn restore_to_file(&self, file: &mut Hdf5File) -> Hdf5Result<()> {
85        for (grp, name, shape, data) in &self.dataset_segments {
86            let grp_path = if grp == "/" { "" } else { grp.as_str() };
87            // Ensure the group exists.
88            if !grp_path.is_empty() {
89                file.create_group(grp_path)?;
90            }
91            let _ = file.create_dataset(grp_path, name, shape.clone(), Hdf5Dtype::Float64);
92            let ds = file.open_dataset_mut(grp_path, name)?;
93            ds.write_f64(data)?;
94        }
95        Ok(())
96    }
97
98    /// Total number of dataset segments in the image.
99    pub fn n_datasets(&self) -> usize {
100        self.dataset_segments.len()
101    }
102}
103
104// ===========================================================================
105// HDF5 region reference
106// ===========================================================================
107
108/// A region reference: points to a hyperslab inside a specific dataset.
109#[derive(Debug, Clone)]
110pub struct RegionReference {
111    /// Group path containing the target dataset.
112    pub group: String,
113    /// Dataset name.
114    pub dataset: String,
115    /// Hyperslab selection.
116    pub slab: Hyperslab,
117}
118
119impl RegionReference {
120    /// Create a region reference.
121    pub fn new(group: &str, dataset: &str, slab: Hyperslab) -> Self {
122        Self {
123            group: group.to_string(),
124            dataset: dataset.to_string(),
125            slab,
126        }
127    }
128
129    /// Dereference: read the selected data from the file.
130    pub fn dereference_f64(&self, file: &Hdf5File) -> Hdf5Result<Vec<f64>> {
131        let ds = file.open_dataset(&self.group, &self.dataset)?;
132        ds.read_hyperslab_f64(&self.slab)
133    }
134}
135
136// ===========================================================================
137// 3-D grid dataset helper
138// ===========================================================================
139
140/// Write a 3-D scalar field (e.g. electron density) to a dataset.
141///
142/// `data` has shape `[nx x ny x nz]` stored flat in row-major order.
143pub fn write_3d_grid_f64(
144    file: &mut Hdf5File,
145    group: &str,
146    name: &str,
147    nx: usize,
148    ny: usize,
149    nz: usize,
150    data: &[f64],
151) -> Hdf5Result<()> {
152    assert_eq!(data.len(), nx * ny * nz, "3D grid size mismatch");
153    file.create_group(group)?;
154    let _ = file.create_dataset(group, name, vec![nx, ny, nz], Hdf5Dtype::Float64);
155    let ds = file.open_dataset_mut(group, name)?;
156    ds.write_f64(data)?;
157    ds.set_attr("grid_type", AttrValue::String("scalar_3d".to_string()));
158    Ok(())
159}
160
161/// Read a 3-D scalar field, returning `(nx, ny, nz, data)`.
162pub fn read_3d_grid_f64(
163    file: &Hdf5File,
164    group: &str,
165    name: &str,
166) -> Hdf5Result<(usize, usize, usize, Vec<f64>)> {
167    let ds = file.open_dataset(group, name)?;
168    if ds.shape.len() != 3 {
169        return Err(Hdf5Error::Generic(format!(
170            "expected 3D dataset, got {}D",
171            ds.shape.len()
172        )));
173    }
174    let data = ds.read_f64()?;
175    Ok((ds.shape[0], ds.shape[1], ds.shape[2], data))
176}
177
178// ===========================================================================
179// Force file (MD output)
180// ===========================================================================
181
182/// Write atomic forces `[n_atoms x n_frames x 3]` to a group.
183pub fn write_forces(
184    file: &mut Hdf5File,
185    group: &str,
186    forces: &[f64],
187    n_frames: usize,
188    n_atoms: usize,
189) -> Hdf5Result<()> {
190    file.create_group(group)?;
191    let _ = file.create_dataset(
192        group,
193        "forces",
194        vec![n_frames, n_atoms, 3],
195        Hdf5Dtype::Float64,
196    );
197    let ds = file.open_dataset_mut(group, "forces")?;
198    ds.write_f64(forces)?;
199    ds.set_attr("units", AttrValue::String("kJ/mol/nm".to_string()));
200    Ok(())
201}
202
203// ===========================================================================
204// Energy dataset helper
205// ===========================================================================
206
207/// Write a per-frame energy array to a group.
208pub fn write_energies(file: &mut Hdf5File, group: &str, energies: &[f64]) -> Hdf5Result<()> {
209    write_f64_dataset(file, group, "potential_energy", energies)?;
210    file.set_dataset_attr(
211        group,
212        "potential_energy",
213        "units",
214        AttrValue::String("kJ/mol".to_string()),
215    )?;
216    Ok(())
217}
218
219// ===========================================================================
220// Atom types string dataset
221// ===========================================================================
222
223/// Write atom-type strings (e.g. `["H", "H", "O"]`) to a dataset.
224pub fn write_atom_types(file: &mut Hdf5File, group: &str, atom_types: &[String]) -> Hdf5Result<()> {
225    write_vlen_strings(file, group, "atom_types", atom_types)
226}
227
228/// Read atom-type strings from a dataset.
229pub fn read_atom_types(file: &Hdf5File, group: &str) -> Hdf5Result<Vec<String>> {
230    read_vlen_strings(file, group, "atom_types")
231}
232
233// ===========================================================================
234// Pairwise distance matrix helper
235// ===========================================================================
236
237/// Compute and write a pairwise distance matrix for `n_atoms` positions.
238pub fn write_distance_matrix(
239    file: &mut Hdf5File,
240    group: &str,
241    positions: &[[f64; 3]],
242) -> Hdf5Result<()> {
243    let n = positions.len();
244    let mut mat = vec![0.0_f64; n * n];
245    for i in 0..n {
246        for j in 0..n {
247            let dx = positions[i][0] - positions[j][0];
248            let dy = positions[i][1] - positions[j][1];
249            let dz = positions[i][2] - positions[j][2];
250            mat[i * n + j] = (dx * dx + dy * dy + dz * dz).sqrt();
251        }
252    }
253    write_matrix_f64(file, group, "distance_matrix", n, n, &mat)
254}
255
256// ===========================================================================
257// HDF5 iterator: depth-first walk
258// ===========================================================================
259
260/// Collect all dataset paths under a file root using depth-first traversal.
261pub fn walk_datasets(file: &Hdf5File) -> Vec<String> {
262    list_datasets_recursive(&file.root, "")
263}
264
265/// Collect all group paths using depth-first traversal.
266pub fn walk_groups(group: &Hdf5Group, prefix: &str) -> Vec<String> {
267    let mut paths = Vec::new();
268    for (name, child) in &group.groups {
269        let p = format!("{prefix}/{name}");
270        paths.push(p.clone());
271        paths.extend(walk_groups(child, &p));
272    }
273    paths.sort();
274    paths
275}
276
277// ===========================================================================
278// Metadata header for physics files
279// ===========================================================================
280
281/// Standard metadata written to the root group of a physics output file.
282#[derive(Debug, Clone)]
283pub struct PhysicsFileHeader {
284    /// Name of the simulation code.
285    pub code_name: String,
286    /// Code version string.
287    pub code_version: String,
288    /// Title of the simulation.
289    pub title: String,
290    /// ISO-8601 creation timestamp (simulated).
291    pub created: String,
292    /// Number of atoms.
293    pub n_atoms: usize,
294    /// Integration time step in picoseconds.
295    pub dt_ps: f64,
296    /// Total simulation time in picoseconds.
297    pub total_time_ps: f64,
298}
299
300impl PhysicsFileHeader {
301    /// Write all metadata as group-level attributes on the root group.
302    pub fn write_to_file(&self, file: &mut Hdf5File) -> Hdf5Result<()> {
303        // Write metadata dataset under "metadata"
304        file.create_group("metadata")?;
305        let grp = file.open_group_mut("metadata")?;
306        grp.set_attr("code_name", AttrValue::String(self.code_name.clone()));
307        grp.set_attr("code_version", AttrValue::String(self.code_version.clone()));
308        grp.set_attr("title", AttrValue::String(self.title.clone()));
309        grp.set_attr("created", AttrValue::String(self.created.clone()));
310        grp.set_attr("n_atoms", AttrValue::Int32(self.n_atoms as i32));
311        grp.set_attr("dt_ps", AttrValue::Float64(self.dt_ps));
312        grp.set_attr("total_time_ps", AttrValue::Float64(self.total_time_ps));
313        Ok(())
314    }
315
316    /// Read the metadata back from the file.
317    pub fn read_from_file(file: &Hdf5File) -> Hdf5Result<Self> {
318        let grp = file.open_group("metadata")?;
319        let code_name = match grp.get_attr("code_name")? {
320            AttrValue::String(s) => s.clone(),
321            _ => String::new(),
322        };
323        let n_atoms = match grp.get_attr("n_atoms")? {
324            AttrValue::Int32(v) => *v as usize,
325            _ => 0,
326        };
327        let dt_ps = match grp.get_attr("dt_ps")? {
328            AttrValue::Float64(v) => *v,
329            _ => 0.0,
330        };
331        let total_time_ps = match grp.get_attr("total_time_ps")? {
332            AttrValue::Float64(v) => *v,
333            _ => 0.0,
334        };
335        Ok(Self {
336            code_name,
337            code_version: String::new(),
338            title: String::new(),
339            created: String::new(),
340            n_atoms,
341            dt_ps,
342            total_time_ps,
343        })
344    }
345}
346
347// ===========================================================================
348// Ring-buffer trajectory store
349// ===========================================================================
350
351/// A ring-buffer that stores only the last N frames of a trajectory.
352#[derive(Debug, Clone)]
353pub struct RingTrajectory {
354    /// Maximum number of frames.
355    pub capacity: usize,
356    /// Flat position storage: `[capacity * n_atoms * 3]`.
357    pub storage: Vec<f64>,
358    /// Number of atoms.
359    pub n_atoms: usize,
360    /// Write head (index of next slot to write).
361    pub head: usize,
362    /// Total frames ever appended (for determining full/empty state).
363    pub total_appended: usize,
364}
365
366impl RingTrajectory {
367    /// Create a new ring-buffer trajectory.
368    pub fn new(capacity: usize, n_atoms: usize) -> Self {
369        Self {
370            capacity,
371            storage: vec![0.0_f64; capacity * n_atoms * 3],
372            n_atoms,
373            head: 0,
374            total_appended: 0,
375        }
376    }
377
378    /// Append a frame (positions `[n_atoms * 3]`).
379    pub fn append(&mut self, positions: &[f64]) {
380        assert_eq!(positions.len(), self.n_atoms * 3);
381        let base = self.head * self.n_atoms * 3;
382        self.storage[base..base + self.n_atoms * 3].copy_from_slice(positions);
383        self.head = (self.head + 1) % self.capacity;
384        self.total_appended += 1;
385    }
386
387    /// Number of frames currently stored (min of capacity and total appended).
388    pub fn n_stored(&self) -> usize {
389        self.total_appended.min(self.capacity)
390    }
391
392    /// Read the frame at logical index `i` (0 = oldest, n_stored-1 = newest).
393    pub fn read_frame(&self, i: usize) -> Hdf5Result<Vec<[f64; 3]>> {
394        let n = self.n_stored();
395        if i >= n {
396            return Err(Hdf5Error::NotFound(format!("ring frame {i}")));
397        }
398        let oldest_slot = if self.total_appended < self.capacity {
399            0
400        } else {
401            self.head
402        };
403        let slot = (oldest_slot + i) % self.capacity;
404        let base = slot * self.n_atoms * 3;
405        let out: Vec<[f64; 3]> = (0..self.n_atoms)
406            .map(|a| {
407                let p = base + a * 3;
408                [self.storage[p], self.storage[p + 1], self.storage[p + 2]]
409            })
410            .collect();
411        Ok(out)
412    }
413}
414
415// ===========================================================================
416// HDF5 file merge helper
417// ===========================================================================
418
419/// Merge all datasets from `src` into `dst`, skipping duplicates.
420pub fn merge_files(src: &Hdf5File, dst: &mut Hdf5File) -> Hdf5Result<usize> {
421    let paths = list_datasets_recursive(&src.root, "");
422    let mut merged = 0;
423    for path in paths {
424        // path = "/group/name" or "/name"
425        let parts: Vec<&str> = path.trim_start_matches('/').rsplitn(2, '/').collect();
426        let (name, group) = if parts.len() == 2 {
427            (parts[0], parts[1])
428        } else {
429            (parts[0], "")
430        };
431        // Locate source dataset
432        let src_grp = if group.is_empty() {
433            &src.root
434        } else {
435            match src.open_group(group) {
436                Ok(g) => g,
437                Err(_) => continue,
438            }
439        };
440        let src_ds = match src_grp.open_dataset(name) {
441            Ok(ds) => ds.clone(),
442            Err(_) => continue,
443        };
444        // Create in destination (skip if already exists)
445        if !group.is_empty() {
446            dst.create_group(group)?;
447        }
448        let dst_grp = if group.is_empty() {
449            &mut dst.root
450        } else {
451            match dst.open_group_mut(group) {
452                Ok(g) => g,
453                Err(_) => continue,
454            }
455        };
456        if dst_grp.datasets.contains_key(name) {
457            continue;
458        }
459        dst_grp.datasets.insert(name.to_string(), src_ds);
460        merged += 1;
461    }
462    Ok(merged)
463}
464
465// ===========================================================================
466// Atom position round-trip helper
467// ===========================================================================
468
469/// Write atom positions and types together, then read them back.
470pub fn write_snapshot(
471    file: &mut Hdf5File,
472    group: &str,
473    positions: &[[f64; 3]],
474    atom_types: &[String],
475) -> Hdf5Result<()> {
476    let n = positions.len();
477    assert_eq!(n, atom_types.len());
478    let flat: Vec<f64> = positions.iter().flat_map(|p| p.iter().cloned()).collect();
479    file.create_group(group)?;
480    file.create_dataset(group, "positions", vec![n, 3], Hdf5Dtype::Float64)?;
481    file.open_dataset_mut(group, "positions")?
482        .write_f64(&flat)?;
483    write_vlen_strings(file, group, "atom_types", atom_types)?;
484    Ok(())
485}
486
487/// Read snapshot positions and types back.
488pub fn read_snapshot(file: &Hdf5File, group: &str) -> Hdf5Result<(Vec<[f64; 3]>, Vec<String>)> {
489    let ds = file.open_dataset(group, "positions")?;
490    let flat = ds.read_f64()?;
491    let n = ds.shape[0];
492    let positions: Vec<[f64; 3]> = (0..n)
493        .map(|i| [flat[i * 3], flat[i * 3 + 1], flat[i * 3 + 2]])
494        .collect();
495    let types = read_vlen_strings(file, group, "atom_types")?;
496    Ok((positions, types))
497}
498
499// ===========================================================================
500// B-factor / temperature factor dataset
501// ===========================================================================
502
503/// Write per-atom B-factors (crystallographic temperature factors) to the file.
504pub fn write_bfactors(file: &mut Hdf5File, group: &str, bfactors: &[f64]) -> Hdf5Result<()> {
505    write_f64_dataset(file, group, "bfactor", bfactors)?;
506    file.set_dataset_attr(
507        group,
508        "bfactor",
509        "units",
510        AttrValue::String("Angstrom^2".to_string()),
511    )?;
512    Ok(())
513}
514
515// ===========================================================================
516// HDF5 file statistics
517// ===========================================================================
518
519/// Compute basic statistics over all f64 datasets in a file.
520#[derive(Debug, Clone)]
521pub struct FileStats {
522    /// Total number of datasets.
523    pub n_datasets: usize,
524    /// Total number of f64 elements across all datasets.
525    pub total_elements: usize,
526    /// Global minimum value.
527    pub global_min: f64,
528    /// Global maximum value.
529    pub global_max: f64,
530    /// Global mean value.
531    pub global_mean: f64,
532}
533
534impl FileStats {
535    /// Compute statistics from a file.
536    pub fn compute(file: &Hdf5File) -> Self {
537        let paths = list_datasets_recursive(&file.root, "");
538        let mut all_data: Vec<f64> = Vec::new();
539        let mut n_ds = 0;
540
541        for path in &paths {
542            let parts: Vec<&str> = path.trim_start_matches('/').rsplitn(2, '/').collect();
543            let (name, group) = if parts.len() == 2 {
544                (parts[0], parts[1])
545            } else {
546                (parts[0], "")
547            };
548            let src_grp: &Hdf5Group = if group.is_empty() {
549                &file.root
550            } else {
551                match file.open_group(group) {
552                    Ok(g) => g,
553                    Err(_) => continue,
554                }
555            };
556            if let Ok(ds) = src_grp.open_dataset(name)
557                && let Ok(data) = ds.read_f64()
558            {
559                all_data.extend_from_slice(&data);
560                n_ds += 1;
561            }
562        }
563
564        if all_data.is_empty() {
565            return Self {
566                n_datasets: n_ds,
567                total_elements: 0,
568                global_min: 0.0,
569                global_max: 0.0,
570                global_mean: 0.0,
571            };
572        }
573        let min = all_data.iter().cloned().fold(f64::INFINITY, f64::min);
574        let max = all_data.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
575        let mean = all_data.iter().sum::<f64>() / all_data.len() as f64;
576
577        Self {
578            n_datasets: n_ds,
579            total_elements: all_data.len(),
580            global_min: min,
581            global_max: max,
582            global_mean: mean,
583        }
584    }
585}
586
587// ===========================================================================
588// Incremental dataset append (simulated extendable datasets)
589// ===========================================================================
590
591/// An incrementally-extendable 1-D dataset.
592///
593/// Simulates HDF5 unlimited-dimension datasets by re-creating the backing
594/// vector on each extension.
595#[derive(Debug, Clone)]
596pub struct ExtendableDataset {
597    /// Dataset name.
598    pub name: String,
599    /// Current data values.
600    pub data: Vec<f64>,
601    /// Chunk size for the underlying buffer.
602    pub chunk_size: usize,
603}
604
605impl ExtendableDataset {
606    /// Create a new extendable dataset.
607    pub fn new(name: &str, chunk_size: usize) -> Self {
608        Self {
609            name: name.to_string(),
610            data: Vec::new(),
611            chunk_size: chunk_size.max(1),
612        }
613    }
614
615    /// Append values to the dataset.
616    pub fn extend(&mut self, values: &[f64]) {
617        self.data.extend_from_slice(values);
618    }
619
620    /// Flush the current data to a group in an HDF5 file.
621    pub fn flush(&self, file: &mut Hdf5File, group: &str) -> Hdf5Result<()> {
622        write_f64_dataset(file, group, &self.name, &self.data)
623    }
624
625    /// Current length.
626    pub fn len(&self) -> usize {
627        self.data.len()
628    }
629
630    /// Return true if empty.
631    pub fn is_empty(&self) -> bool {
632        self.data.is_empty()
633    }
634}