1#![allow(dead_code)]
9
10use super::convenience::{
11 list_datasets_recursive, read_vlen_strings, write_f64_dataset, write_matrix_f64,
12 write_vlen_strings,
13};
14use super::file::Hdf5File;
15use super::group::Hdf5Group;
16use super::types::{AttrValue, Hdf5Dtype, Hdf5Error, Hdf5Result, Hyperslab};
17
18#[derive(Debug, Clone)]
27pub struct Hdf5FileImage {
28 pub version: u8,
30 pub group_segments: Vec<(String, String)>,
32 pub dataset_segments: Vec<(String, String, Vec<usize>, Vec<f64>)>,
34 pub attr_segments: Vec<(String, String, String, String)>,
36}
37
38impl Hdf5FileImage {
39 pub fn from_file(file: &Hdf5File) -> Self {
43 let mut img = Self {
44 version: 1,
45 group_segments: Vec::new(),
46 dataset_segments: Vec::new(),
47 attr_segments: Vec::new(),
48 };
49 Self::capture_group(file, &file.root, "", &mut img);
50 img
51 }
52
53 fn capture_group(_file: &Hdf5File, group: &Hdf5Group, prefix: &str, img: &mut Self) {
54 let path = if prefix.is_empty() {
55 "/".to_string()
56 } else {
57 prefix.to_string()
58 };
59 let attrs_str = format!("{:?}", group.attributes.keys().collect::<Vec<_>>());
60 img.group_segments.push((path.clone(), attrs_str));
61
62 for (ds_name, ds) in &group.datasets {
63 if let Ok(flat) = ds.read_f64() {
64 img.dataset_segments
65 .push((path.clone(), ds_name.clone(), ds.shape.clone(), flat));
66 }
67 for (attr_name, attr_val) in &ds.attributes {
68 let val_str = format!("{attr_val:?}");
69 img.attr_segments
70 .push((path.clone(), ds_name.clone(), attr_name.clone(), val_str));
71 }
72 }
73 for (child_name, child) in &group.groups {
74 let child_prefix = if prefix.is_empty() {
75 child_name.clone()
76 } else {
77 format!("{prefix}/{child_name}")
78 };
79 Self::capture_group(_file, child, &child_prefix, img);
80 }
81 }
82
83 pub fn restore_to_file(&self, file: &mut Hdf5File) -> Hdf5Result<()> {
85 for (grp, name, shape, data) in &self.dataset_segments {
86 let grp_path = if grp == "/" { "" } else { grp.as_str() };
87 if !grp_path.is_empty() {
89 file.create_group(grp_path)?;
90 }
91 let _ = file.create_dataset(grp_path, name, shape.clone(), Hdf5Dtype::Float64);
92 let ds = file.open_dataset_mut(grp_path, name)?;
93 ds.write_f64(data)?;
94 }
95 Ok(())
96 }
97
98 pub fn n_datasets(&self) -> usize {
100 self.dataset_segments.len()
101 }
102}
103
104#[derive(Debug, Clone)]
110pub struct RegionReference {
111 pub group: String,
113 pub dataset: String,
115 pub slab: Hyperslab,
117}
118
119impl RegionReference {
120 pub fn new(group: &str, dataset: &str, slab: Hyperslab) -> Self {
122 Self {
123 group: group.to_string(),
124 dataset: dataset.to_string(),
125 slab,
126 }
127 }
128
129 pub fn dereference_f64(&self, file: &Hdf5File) -> Hdf5Result<Vec<f64>> {
131 let ds = file.open_dataset(&self.group, &self.dataset)?;
132 ds.read_hyperslab_f64(&self.slab)
133 }
134}
135
136pub fn write_3d_grid_f64(
144 file: &mut Hdf5File,
145 group: &str,
146 name: &str,
147 nx: usize,
148 ny: usize,
149 nz: usize,
150 data: &[f64],
151) -> Hdf5Result<()> {
152 assert_eq!(data.len(), nx * ny * nz, "3D grid size mismatch");
153 file.create_group(group)?;
154 let _ = file.create_dataset(group, name, vec![nx, ny, nz], Hdf5Dtype::Float64);
155 let ds = file.open_dataset_mut(group, name)?;
156 ds.write_f64(data)?;
157 ds.set_attr("grid_type", AttrValue::String("scalar_3d".to_string()));
158 Ok(())
159}
160
161pub fn read_3d_grid_f64(
163 file: &Hdf5File,
164 group: &str,
165 name: &str,
166) -> Hdf5Result<(usize, usize, usize, Vec<f64>)> {
167 let ds = file.open_dataset(group, name)?;
168 if ds.shape.len() != 3 {
169 return Err(Hdf5Error::Generic(format!(
170 "expected 3D dataset, got {}D",
171 ds.shape.len()
172 )));
173 }
174 let data = ds.read_f64()?;
175 Ok((ds.shape[0], ds.shape[1], ds.shape[2], data))
176}
177
178pub fn write_forces(
184 file: &mut Hdf5File,
185 group: &str,
186 forces: &[f64],
187 n_frames: usize,
188 n_atoms: usize,
189) -> Hdf5Result<()> {
190 file.create_group(group)?;
191 let _ = file.create_dataset(
192 group,
193 "forces",
194 vec![n_frames, n_atoms, 3],
195 Hdf5Dtype::Float64,
196 );
197 let ds = file.open_dataset_mut(group, "forces")?;
198 ds.write_f64(forces)?;
199 ds.set_attr("units", AttrValue::String("kJ/mol/nm".to_string()));
200 Ok(())
201}
202
203pub fn write_energies(file: &mut Hdf5File, group: &str, energies: &[f64]) -> Hdf5Result<()> {
209 write_f64_dataset(file, group, "potential_energy", energies)?;
210 file.set_dataset_attr(
211 group,
212 "potential_energy",
213 "units",
214 AttrValue::String("kJ/mol".to_string()),
215 )?;
216 Ok(())
217}
218
219pub fn write_atom_types(file: &mut Hdf5File, group: &str, atom_types: &[String]) -> Hdf5Result<()> {
225 write_vlen_strings(file, group, "atom_types", atom_types)
226}
227
228pub fn read_atom_types(file: &Hdf5File, group: &str) -> Hdf5Result<Vec<String>> {
230 read_vlen_strings(file, group, "atom_types")
231}
232
233pub fn write_distance_matrix(
239 file: &mut Hdf5File,
240 group: &str,
241 positions: &[[f64; 3]],
242) -> Hdf5Result<()> {
243 let n = positions.len();
244 let mut mat = vec![0.0_f64; n * n];
245 for i in 0..n {
246 for j in 0..n {
247 let dx = positions[i][0] - positions[j][0];
248 let dy = positions[i][1] - positions[j][1];
249 let dz = positions[i][2] - positions[j][2];
250 mat[i * n + j] = (dx * dx + dy * dy + dz * dz).sqrt();
251 }
252 }
253 write_matrix_f64(file, group, "distance_matrix", n, n, &mat)
254}
255
256pub fn walk_datasets(file: &Hdf5File) -> Vec<String> {
262 list_datasets_recursive(&file.root, "")
263}
264
265pub fn walk_groups(group: &Hdf5Group, prefix: &str) -> Vec<String> {
267 let mut paths = Vec::new();
268 for (name, child) in &group.groups {
269 let p = format!("{prefix}/{name}");
270 paths.push(p.clone());
271 paths.extend(walk_groups(child, &p));
272 }
273 paths.sort();
274 paths
275}
276
277#[derive(Debug, Clone)]
283pub struct PhysicsFileHeader {
284 pub code_name: String,
286 pub code_version: String,
288 pub title: String,
290 pub created: String,
292 pub n_atoms: usize,
294 pub dt_ps: f64,
296 pub total_time_ps: f64,
298}
299
300impl PhysicsFileHeader {
301 pub fn write_to_file(&self, file: &mut Hdf5File) -> Hdf5Result<()> {
303 file.create_group("metadata")?;
305 let grp = file.open_group_mut("metadata")?;
306 grp.set_attr("code_name", AttrValue::String(self.code_name.clone()));
307 grp.set_attr("code_version", AttrValue::String(self.code_version.clone()));
308 grp.set_attr("title", AttrValue::String(self.title.clone()));
309 grp.set_attr("created", AttrValue::String(self.created.clone()));
310 grp.set_attr("n_atoms", AttrValue::Int32(self.n_atoms as i32));
311 grp.set_attr("dt_ps", AttrValue::Float64(self.dt_ps));
312 grp.set_attr("total_time_ps", AttrValue::Float64(self.total_time_ps));
313 Ok(())
314 }
315
316 pub fn read_from_file(file: &Hdf5File) -> Hdf5Result<Self> {
318 let grp = file.open_group("metadata")?;
319 let code_name = match grp.get_attr("code_name")? {
320 AttrValue::String(s) => s.clone(),
321 _ => String::new(),
322 };
323 let n_atoms = match grp.get_attr("n_atoms")? {
324 AttrValue::Int32(v) => *v as usize,
325 _ => 0,
326 };
327 let dt_ps = match grp.get_attr("dt_ps")? {
328 AttrValue::Float64(v) => *v,
329 _ => 0.0,
330 };
331 let total_time_ps = match grp.get_attr("total_time_ps")? {
332 AttrValue::Float64(v) => *v,
333 _ => 0.0,
334 };
335 Ok(Self {
336 code_name,
337 code_version: String::new(),
338 title: String::new(),
339 created: String::new(),
340 n_atoms,
341 dt_ps,
342 total_time_ps,
343 })
344 }
345}
346
347#[derive(Debug, Clone)]
353pub struct RingTrajectory {
354 pub capacity: usize,
356 pub storage: Vec<f64>,
358 pub n_atoms: usize,
360 pub head: usize,
362 pub total_appended: usize,
364}
365
366impl RingTrajectory {
367 pub fn new(capacity: usize, n_atoms: usize) -> Self {
369 Self {
370 capacity,
371 storage: vec![0.0_f64; capacity * n_atoms * 3],
372 n_atoms,
373 head: 0,
374 total_appended: 0,
375 }
376 }
377
378 pub fn append(&mut self, positions: &[f64]) {
380 assert_eq!(positions.len(), self.n_atoms * 3);
381 let base = self.head * self.n_atoms * 3;
382 self.storage[base..base + self.n_atoms * 3].copy_from_slice(positions);
383 self.head = (self.head + 1) % self.capacity;
384 self.total_appended += 1;
385 }
386
387 pub fn n_stored(&self) -> usize {
389 self.total_appended.min(self.capacity)
390 }
391
392 pub fn read_frame(&self, i: usize) -> Hdf5Result<Vec<[f64; 3]>> {
394 let n = self.n_stored();
395 if i >= n {
396 return Err(Hdf5Error::NotFound(format!("ring frame {i}")));
397 }
398 let oldest_slot = if self.total_appended < self.capacity {
399 0
400 } else {
401 self.head
402 };
403 let slot = (oldest_slot + i) % self.capacity;
404 let base = slot * self.n_atoms * 3;
405 let out: Vec<[f64; 3]> = (0..self.n_atoms)
406 .map(|a| {
407 let p = base + a * 3;
408 [self.storage[p], self.storage[p + 1], self.storage[p + 2]]
409 })
410 .collect();
411 Ok(out)
412 }
413}
414
415pub fn merge_files(src: &Hdf5File, dst: &mut Hdf5File) -> Hdf5Result<usize> {
421 let paths = list_datasets_recursive(&src.root, "");
422 let mut merged = 0;
423 for path in paths {
424 let parts: Vec<&str> = path.trim_start_matches('/').rsplitn(2, '/').collect();
426 let (name, group) = if parts.len() == 2 {
427 (parts[0], parts[1])
428 } else {
429 (parts[0], "")
430 };
431 let src_grp = if group.is_empty() {
433 &src.root
434 } else {
435 match src.open_group(group) {
436 Ok(g) => g,
437 Err(_) => continue,
438 }
439 };
440 let src_ds = match src_grp.open_dataset(name) {
441 Ok(ds) => ds.clone(),
442 Err(_) => continue,
443 };
444 if !group.is_empty() {
446 dst.create_group(group)?;
447 }
448 let dst_grp = if group.is_empty() {
449 &mut dst.root
450 } else {
451 match dst.open_group_mut(group) {
452 Ok(g) => g,
453 Err(_) => continue,
454 }
455 };
456 if dst_grp.datasets.contains_key(name) {
457 continue;
458 }
459 dst_grp.datasets.insert(name.to_string(), src_ds);
460 merged += 1;
461 }
462 Ok(merged)
463}
464
465pub fn write_snapshot(
471 file: &mut Hdf5File,
472 group: &str,
473 positions: &[[f64; 3]],
474 atom_types: &[String],
475) -> Hdf5Result<()> {
476 let n = positions.len();
477 assert_eq!(n, atom_types.len());
478 let flat: Vec<f64> = positions.iter().flat_map(|p| p.iter().cloned()).collect();
479 file.create_group(group)?;
480 file.create_dataset(group, "positions", vec![n, 3], Hdf5Dtype::Float64)?;
481 file.open_dataset_mut(group, "positions")?
482 .write_f64(&flat)?;
483 write_vlen_strings(file, group, "atom_types", atom_types)?;
484 Ok(())
485}
486
487pub fn read_snapshot(file: &Hdf5File, group: &str) -> Hdf5Result<(Vec<[f64; 3]>, Vec<String>)> {
489 let ds = file.open_dataset(group, "positions")?;
490 let flat = ds.read_f64()?;
491 let n = ds.shape[0];
492 let positions: Vec<[f64; 3]> = (0..n)
493 .map(|i| [flat[i * 3], flat[i * 3 + 1], flat[i * 3 + 2]])
494 .collect();
495 let types = read_vlen_strings(file, group, "atom_types")?;
496 Ok((positions, types))
497}
498
499pub fn write_bfactors(file: &mut Hdf5File, group: &str, bfactors: &[f64]) -> Hdf5Result<()> {
505 write_f64_dataset(file, group, "bfactor", bfactors)?;
506 file.set_dataset_attr(
507 group,
508 "bfactor",
509 "units",
510 AttrValue::String("Angstrom^2".to_string()),
511 )?;
512 Ok(())
513}
514
515#[derive(Debug, Clone)]
521pub struct FileStats {
522 pub n_datasets: usize,
524 pub total_elements: usize,
526 pub global_min: f64,
528 pub global_max: f64,
530 pub global_mean: f64,
532}
533
534impl FileStats {
535 pub fn compute(file: &Hdf5File) -> Self {
537 let paths = list_datasets_recursive(&file.root, "");
538 let mut all_data: Vec<f64> = Vec::new();
539 let mut n_ds = 0;
540
541 for path in &paths {
542 let parts: Vec<&str> = path.trim_start_matches('/').rsplitn(2, '/').collect();
543 let (name, group) = if parts.len() == 2 {
544 (parts[0], parts[1])
545 } else {
546 (parts[0], "")
547 };
548 let src_grp: &Hdf5Group = if group.is_empty() {
549 &file.root
550 } else {
551 match file.open_group(group) {
552 Ok(g) => g,
553 Err(_) => continue,
554 }
555 };
556 if let Ok(ds) = src_grp.open_dataset(name)
557 && let Ok(data) = ds.read_f64()
558 {
559 all_data.extend_from_slice(&data);
560 n_ds += 1;
561 }
562 }
563
564 if all_data.is_empty() {
565 return Self {
566 n_datasets: n_ds,
567 total_elements: 0,
568 global_min: 0.0,
569 global_max: 0.0,
570 global_mean: 0.0,
571 };
572 }
573 let min = all_data.iter().cloned().fold(f64::INFINITY, f64::min);
574 let max = all_data.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
575 let mean = all_data.iter().sum::<f64>() / all_data.len() as f64;
576
577 Self {
578 n_datasets: n_ds,
579 total_elements: all_data.len(),
580 global_min: min,
581 global_max: max,
582 global_mean: mean,
583 }
584 }
585}
586
587#[derive(Debug, Clone)]
596pub struct ExtendableDataset {
597 pub name: String,
599 pub data: Vec<f64>,
601 pub chunk_size: usize,
603}
604
605impl ExtendableDataset {
606 pub fn new(name: &str, chunk_size: usize) -> Self {
608 Self {
609 name: name.to_string(),
610 data: Vec::new(),
611 chunk_size: chunk_size.max(1),
612 }
613 }
614
615 pub fn extend(&mut self, values: &[f64]) {
617 self.data.extend_from_slice(values);
618 }
619
620 pub fn flush(&self, file: &mut Hdf5File, group: &str) -> Hdf5Result<()> {
622 write_f64_dataset(file, group, &self.name, &self.data)
623 }
624
625 pub fn len(&self) -> usize {
627 self.data.len()
628 }
629
630 pub fn is_empty(&self) -> bool {
632 self.data.is_empty()
633 }
634}