Skip to main content

oxiphysics_io/
scientific_formats.rs

1// Copyright 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3
4//! Scientific data format support for OxiPhysics.
5//!
6//! Provides read/write support for Gaussian Cube files, Molden geometry files,
7//! extended XYZ (extxyz) trajectory frames, CIF cell parameter lines, and a
8//! simplified parser for Gaussian formatted checkpoint (fchk) arrays.
9
10use std::collections::HashMap;
11use std::io::{BufRead, Write};
12
13// ── Cube file ─────────────────────────────────────────────────────────────────
14
15/// A Gaussian Cube volumetric data file.
16#[derive(Debug, Clone)]
17pub struct CubeFile {
18    /// Number of atoms in the molecule.
19    pub n_atoms: usize,
20    /// Origin of the volumetric grid in Bohr.
21    pub origin: [f64; 3],
22    /// Axis vectors `axes[i]` define the step along axis `i`.
23    pub axes: [[f64; 3]; 3],
24    /// Number of grid points along the X axis.
25    pub nx: usize,
26    /// Number of grid points along the Y axis.
27    pub ny: usize,
28    /// Number of grid points along the Z axis.
29    pub nz: usize,
30    /// Volumetric data in row-major order `[x][y][z]`.
31    pub data: Vec<f64>,
32    /// Atomic positions in Bohr, one per atom.
33    pub atom_positions: Vec<[f64; 3]>,
34    /// Atomic numbers, one per atom.
35    pub atom_numbers: Vec<u32>,
36}
37
38/// Write a [`CubeFile`] to `path` in Gaussian Cube format.
39pub fn write_cube(path: &str, cf: &CubeFile) -> Result<(), std::io::Error> {
40    let file = std::fs::File::create(path)?;
41    let mut w = std::io::BufWriter::new(file);
42
43    writeln!(w, " Cube file generated by OxiPhysics")?;
44    writeln!(w, " Volumetric data")?;
45    writeln!(
46        w,
47        " {:5} {:12.6} {:12.6} {:12.6}",
48        cf.n_atoms as i64, cf.origin[0], cf.origin[1], cf.origin[2]
49    )?;
50    writeln!(
51        w,
52        " {:5} {:12.6} {:12.6} {:12.6}",
53        cf.nx, cf.axes[0][0], cf.axes[0][1], cf.axes[0][2]
54    )?;
55    writeln!(
56        w,
57        " {:5} {:12.6} {:12.6} {:12.6}",
58        cf.ny, cf.axes[1][0], cf.axes[1][1], cf.axes[1][2]
59    )?;
60    writeln!(
61        w,
62        " {:5} {:12.6} {:12.6} {:12.6}",
63        cf.nz, cf.axes[2][0], cf.axes[2][1], cf.axes[2][2]
64    )?;
65    for i in 0..cf.n_atoms {
66        let z = cf.atom_numbers.get(i).copied().unwrap_or(0);
67        let pos = cf.atom_positions.get(i).copied().unwrap_or([0.0; 3]);
68        writeln!(
69            w,
70            " {:5} {:12.6} {:12.6} {:12.6} {:12.6}",
71            z, z as f64, pos[0], pos[1], pos[2]
72        )?;
73    }
74    // Write volumetric data, 6 values per line
75    for (idx, &val) in cf.data.iter().enumerate() {
76        if idx > 0 && idx % 6 == 0 {
77            writeln!(w)?;
78        }
79        write!(w, " {:12.5E}", val)?;
80    }
81    writeln!(w)?;
82    Ok(())
83}
84
85/// Read a [`CubeFile`] from `path`.
86pub fn read_cube(path: &str) -> Result<CubeFile, std::io::Error> {
87    let file = std::fs::File::open(path)?;
88    let reader = std::io::BufReader::new(file);
89    let mut lines = reader.lines();
90
91    // Skip two comment lines
92    lines.next();
93    lines.next();
94
95    let parse_err = |s: &str| std::io::Error::new(std::io::ErrorKind::InvalidData, s.to_string());
96
97    let read_line = |lines: &mut dyn Iterator<Item = Result<String, std::io::Error>>| {
98        lines
99            .next()
100            .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "EOF"))
101            .and_then(|r| r)
102    };
103
104    // Line: n_atoms  ox oy oz
105    let atom_line = read_line(&mut lines)?;
106    let toks: Vec<&str> = atom_line.split_whitespace().collect();
107    let n_atoms = toks
108        .first()
109        .ok_or_else(|| parse_err("missing n_atoms"))?
110        .parse::<i64>()
111        .map_err(|e| parse_err(&e.to_string()))?
112        .unsigned_abs() as usize;
113    let ox: f64 = toks.get(1).unwrap_or(&"0").parse().unwrap_or(0.0);
114    let oy: f64 = toks.get(2).unwrap_or(&"0").parse().unwrap_or(0.0);
115    let oz: f64 = toks.get(3).unwrap_or(&"0").parse().unwrap_or(0.0);
116
117    let mut axes = [[0.0_f64; 3]; 3];
118    let mut dims = [0usize; 3];
119    for i in 0..3 {
120        let l = read_line(&mut lines)?;
121        let t: Vec<&str> = l.split_whitespace().collect();
122        dims[i] = t.first().unwrap_or(&"0").parse().unwrap_or(0);
123        axes[i][0] = t.get(1).unwrap_or(&"0").parse().unwrap_or(0.0);
124        axes[i][1] = t.get(2).unwrap_or(&"0").parse().unwrap_or(0.0);
125        axes[i][2] = t.get(3).unwrap_or(&"0").parse().unwrap_or(0.0);
126    }
127
128    let mut atom_numbers = Vec::with_capacity(n_atoms);
129    let mut atom_positions = Vec::with_capacity(n_atoms);
130    for _ in 0..n_atoms {
131        let l = read_line(&mut lines)?;
132        let t: Vec<&str> = l.split_whitespace().collect();
133        let z: u32 = t.first().unwrap_or(&"0").parse().unwrap_or(0);
134        let x: f64 = t.get(2).unwrap_or(&"0").parse().unwrap_or(0.0);
135        let y: f64 = t.get(3).unwrap_or(&"0").parse().unwrap_or(0.0);
136        let zc: f64 = t.get(4).unwrap_or(&"0").parse().unwrap_or(0.0);
137        atom_numbers.push(z);
138        atom_positions.push([x, y, zc]);
139    }
140
141    let total = dims[0] * dims[1] * dims[2];
142    let mut data = Vec::with_capacity(total);
143    for line in lines {
144        let l = line?;
145        for tok in l.split_whitespace() {
146            if let Ok(v) = tok.parse::<f64>() {
147                data.push(v);
148            }
149        }
150    }
151
152    Ok(CubeFile {
153        n_atoms,
154        origin: [ox, oy, oz],
155        axes,
156        nx: dims[0],
157        ny: dims[1],
158        nz: dims[2],
159        data,
160        atom_positions,
161        atom_numbers,
162    })
163}
164
165// ── Molden file ───────────────────────────────────────────────────────────────
166
167/// A simplified Molden file containing atomic geometry and vibrational data.
168#[derive(Debug, Clone)]
169pub struct MoldenFile {
170    /// Atoms as `(element_symbol, [x, y, z])` in Ångström.
171    pub atoms: Vec<(String, [f64; 3])>,
172    /// Vibrational frequencies in cm⁻¹.
173    pub frequencies: Vec<f64>,
174    /// IR intensities in km/mol.
175    pub intensities: Vec<f64>,
176}
177
178/// Write Molden geometry section to `path`.
179pub fn write_molden_geometry(path: &str, mf: &MoldenFile) -> Result<(), std::io::Error> {
180    let file = std::fs::File::create(path)?;
181    let mut w = std::io::BufWriter::new(file);
182    writeln!(w, "[Molden Format]")?;
183    writeln!(w, "[Atoms] Angs")?;
184    for (i, (sym, pos)) in mf.atoms.iter().enumerate() {
185        writeln!(
186            w,
187            " {:<4} {:5} {:3} {:14.8} {:14.8} {:14.8}",
188            sym,
189            i + 1,
190            1, // dummy atomic number
191            pos[0],
192            pos[1],
193            pos[2]
194        )?;
195    }
196    if !mf.frequencies.is_empty() {
197        writeln!(w, "[FREQ]")?;
198        for &f in &mf.frequencies {
199            writeln!(w, " {:14.6}", f)?;
200        }
201    }
202    if !mf.intensities.is_empty() {
203        writeln!(w, "[INT]")?;
204        for &ir in &mf.intensities {
205            writeln!(w, " {:14.6}", ir)?;
206        }
207    }
208    Ok(())
209}
210
211// ── Extended XYZ ──────────────────────────────────────────────────────────────
212
213/// A single frame from an extended XYZ trajectory.
214#[derive(Debug, Clone)]
215pub struct XyzFrame {
216    /// Simulation step index.
217    pub step: usize,
218    /// Simulation time in ps.
219    pub time: f64,
220    /// Atoms as `(element_symbol, [x, y, z])`.
221    pub atoms: Vec<(String, [f64; 3])>,
222}
223
224/// Parse key=value pairs from an extXYZ comment line.
225///
226/// Handles quoted string values and bare numeric values.
227pub fn extxyz_parse_comment(line: &str) -> HashMap<String, String> {
228    let mut map = HashMap::new();
229    // Simple tokeniser: split on spaces that are not inside quotes
230    let mut chars = line.chars().peekable();
231    loop {
232        // Skip whitespace
233        while chars.peek() == Some(&' ') {
234            chars.next();
235        }
236        if chars.peek().is_none() {
237            break;
238        }
239        // Read key
240        let key: String = chars.by_ref().take_while(|&c| c != '=').collect();
241        let key = key.trim().to_string();
242        if key.is_empty() {
243            break;
244        }
245        // Read value
246        let value = if chars.peek() == Some(&'"') {
247            chars.next(); // skip opening quote
248            let v: String = chars.by_ref().take_while(|&c| c != '"').collect();
249            v
250        } else {
251            // Read until space
252            let v: String = chars.by_ref().take_while(|&c| c != ' ').collect();
253            v
254        };
255        if !key.is_empty() {
256            map.insert(key, value);
257        }
258    }
259    map
260}
261
262/// Write an extXYZ frame to `path`.
263pub fn write_extxyz_frame(
264    path: &str,
265    frame: &XyzFrame,
266    properties: &HashMap<String, String>,
267) -> Result<(), std::io::Error> {
268    let file = std::fs::File::create(path)?;
269    let mut w = std::io::BufWriter::new(file);
270    writeln!(w, "{}", frame.atoms.len())?;
271    // Comment line with step, time, and extra properties
272    let mut comment = format!("step={} time={:.6}", frame.step, frame.time);
273    for (k, v) in properties {
274        comment.push_str(&format!(" {}={}", k, v));
275    }
276    writeln!(w, "{}", comment)?;
277    for (sym, pos) in &frame.atoms {
278        writeln!(w, "{} {:14.8} {:14.8} {:14.8}", sym, pos[0], pos[1], pos[2])?;
279    }
280    Ok(())
281}
282
283// ── CIF helpers ───────────────────────────────────────────────────────────────
284
285/// Format a CIF `_cell_*` section for the given unit cell parameters.
286///
287/// Angles are in degrees; lengths in Ångström.
288pub fn cif_cell_parameters_line(
289    a: f64,
290    b: f64,
291    c: f64,
292    alpha: f64,
293    beta: f64,
294    gamma: f64,
295) -> String {
296    format!(
297        "_cell_length_a                  {:.4}\n\
298         _cell_length_b                  {:.4}\n\
299         _cell_length_c                  {:.4}\n\
300         _cell_angle_alpha               {:.4}\n\
301         _cell_angle_beta                {:.4}\n\
302         _cell_angle_gamma               {:.4}",
303        a, b, c, alpha, beta, gamma
304    )
305}
306
307// ── Gaussian fchk parser ──────────────────────────────────────────────────────
308
309/// Read a float-valued array from a Gaussian formatted checkpoint file.
310///
311/// Searches for a section matching `keyword` and reads `N` values where `N`
312/// is declared on the keyword line.  Returns an empty `Vec` if the keyword is
313/// not found.
314pub fn fchk_read_array(path: &str, keyword: &str) -> Result<Vec<f64>, std::io::Error> {
315    let file = std::fs::File::open(path)?;
316    let reader = std::io::BufReader::new(file);
317    let lines: Vec<String> = reader.lines().collect::<Result<_, _>>()?;
318
319    let mut reading = false;
320    let mut count = 0usize;
321    let mut values: Vec<f64> = Vec::new();
322
323    for line in &lines {
324        if !reading {
325            if line.contains(keyword) && line.contains("N=") {
326                // Extract count after "N="
327                if let Some(pos) = line.find("N=") {
328                    let rest = line[pos + 2..].trim();
329                    if let Ok(n) = rest.parse::<usize>() {
330                        count = n;
331                        reading = true;
332                        values.reserve(count);
333                    }
334                }
335            }
336        } else {
337            for tok in line.split_whitespace() {
338                if let Ok(v) = tok.parse::<f64>() {
339                    values.push(v);
340                    if values.len() >= count {
341                        return Ok(values);
342                    }
343                }
344            }
345        }
346    }
347    Ok(values)
348}
349
350// ── Tests ─────────────────────────────────────────────────────────────────────
351
352#[cfg(test)]
353mod tests {
354    use super::*;
355
356    fn sample_cube() -> CubeFile {
357        CubeFile {
358            n_atoms: 2,
359            origin: [0.0, 0.0, 0.0],
360            axes: [[0.2, 0.0, 0.0], [0.0, 0.2, 0.0], [0.0, 0.0, 0.2]],
361            nx: 2,
362            ny: 2,
363            nz: 2,
364            data: vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
365            atom_positions: vec![[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]],
366            atom_numbers: vec![6, 1],
367        }
368    }
369
370    #[test]
371    fn test_write_read_cube_roundtrip() {
372        let path = "/tmp/test_oxiphysics_cube.cube";
373        let cf = sample_cube();
374        write_cube(path, &cf).unwrap();
375        let cf2 = read_cube(path).unwrap();
376        assert_eq!(cf2.n_atoms, 2);
377        assert_eq!(cf2.nx, 2);
378        assert_eq!(cf2.ny, 2);
379        assert_eq!(cf2.nz, 2);
380        assert_eq!(cf2.data.len(), 8);
381    }
382
383    #[test]
384    fn test_cube_data_values() {
385        let path = "/tmp/test_oxiphysics_cube_data.cube";
386        let cf = sample_cube();
387        write_cube(path, &cf).unwrap();
388        let cf2 = read_cube(path).unwrap();
389        assert!((cf2.data[0] - 1.0).abs() < 1e-4);
390        assert!((cf2.data[7] - 8.0).abs() < 1e-4);
391    }
392
393    #[test]
394    fn test_cube_atom_numbers() {
395        let path = "/tmp/test_oxiphysics_cube_atoms.cube";
396        let cf = sample_cube();
397        write_cube(path, &cf).unwrap();
398        let cf2 = read_cube(path).unwrap();
399        assert_eq!(cf2.atom_numbers[0], 6);
400        assert_eq!(cf2.atom_numbers[1], 1);
401    }
402
403    #[test]
404    fn test_cube_atom_positions() {
405        let path = "/tmp/test_oxiphysics_cube_pos.cube";
406        let cf = sample_cube();
407        write_cube(path, &cf).unwrap();
408        let cf2 = read_cube(path).unwrap();
409        assert!((cf2.atom_positions[1][0] - 1.0).abs() < 1e-4);
410    }
411
412    #[test]
413    fn test_cube_origin() {
414        let path = "/tmp/test_oxiphysics_cube_origin.cube";
415        let cf = sample_cube();
416        write_cube(path, &cf).unwrap();
417        let cf2 = read_cube(path).unwrap();
418        assert!((cf2.origin[0]).abs() < 1e-6);
419    }
420
421    #[test]
422    fn test_write_molden_creates_file() {
423        let path = "/tmp/test_oxiphysics_molden.mld";
424        let mf = MoldenFile {
425            atoms: vec![
426                ("C".to_string(), [0.0, 0.0, 0.0]),
427                ("H".to_string(), [1.0, 0.0, 0.0]),
428            ],
429            frequencies: vec![1000.0, 2000.0],
430            intensities: vec![50.0, 100.0],
431        };
432        write_molden_geometry(path, &mf).unwrap();
433        let content = std::fs::read_to_string(path).unwrap();
434        assert!(content.contains("[Molden Format]"));
435        assert!(content.contains("[Atoms]"));
436        assert!(content.contains("[FREQ]"));
437    }
438
439    #[test]
440    fn test_molden_no_freq() {
441        let path = "/tmp/test_oxiphysics_molden_nofreq.mld";
442        let mf = MoldenFile {
443            atoms: vec![("O".to_string(), [0.0, 0.0, 0.0])],
444            frequencies: vec![],
445            intensities: vec![],
446        };
447        write_molden_geometry(path, &mf).unwrap();
448        let content = std::fs::read_to_string(path).unwrap();
449        assert!(!content.contains("[FREQ]"));
450    }
451
452    #[test]
453    fn test_extxyz_parse_simple() {
454        let line =
455            "Lattice=\"5.0 0 0 0 5 0 0 0 5\" Properties=species:S:1:pos:R:3 step=10 time=0.5";
456        let map = extxyz_parse_comment(line);
457        assert_eq!(map.get("step").map(|s| s.as_str()), Some("10"));
458        assert_eq!(map.get("time").map(|s| s.as_str()), Some("0.5"));
459    }
460
461    #[test]
462    fn test_extxyz_parse_empty() {
463        let map = extxyz_parse_comment("");
464        assert!(map.is_empty());
465    }
466
467    #[test]
468    fn test_extxyz_parse_quoted_value() {
469        let line = r#"Lattice="1 0 0 0 1 0 0 0 1" step=5"#;
470        let map = extxyz_parse_comment(line);
471        assert!(map.contains_key("Lattice"));
472        assert_eq!(map.get("step").map(|s| s.as_str()), Some("5"));
473    }
474
475    #[test]
476    fn test_write_extxyz_frame() {
477        let path = "/tmp/test_oxiphysics_extxyz.xyz";
478        let frame = XyzFrame {
479            step: 10,
480            time: 0.1,
481            atoms: vec![
482                ("C".to_string(), [0.0, 0.0, 0.0]),
483                ("H".to_string(), [1.1, 0.0, 0.0]),
484            ],
485        };
486        let mut props = HashMap::new();
487        props.insert("energy".to_string(), "-100.5".to_string());
488        write_extxyz_frame(path, &frame, &props).unwrap();
489        let content = std::fs::read_to_string(path).unwrap();
490        assert!(content.starts_with("2\n"));
491        assert!(content.contains("step=10"));
492        assert!(content.contains("energy=-100.5"));
493    }
494
495    #[test]
496    fn test_write_extxyz_atom_count() {
497        let path = "/tmp/test_oxiphysics_extxyz_count.xyz";
498        let frame = XyzFrame {
499            step: 0,
500            time: 0.0,
501            atoms: vec![("N".to_string(), [0.0, 0.0, 0.0])],
502        };
503        write_extxyz_frame(path, &frame, &HashMap::new()).unwrap();
504        let content = std::fs::read_to_string(path).unwrap();
505        assert!(content.starts_with("1\n"));
506    }
507
508    #[test]
509    fn test_cif_cell_line_format() {
510        let line = cif_cell_parameters_line(5.0, 5.0, 5.0, 90.0, 90.0, 90.0);
511        assert!(line.contains("_cell_length_a"));
512        assert!(line.contains("5.0000"));
513        assert!(line.contains("90.0000"));
514    }
515
516    #[test]
517    fn test_cif_cell_line_non_cubic() {
518        let line = cif_cell_parameters_line(3.0, 4.0, 5.0, 80.0, 100.0, 120.0);
519        assert!(line.contains("3.0000"));
520        assert!(line.contains("4.0000"));
521        assert!(line.contains("120.0000"));
522    }
523
524    #[test]
525    fn test_fchk_read_array_not_found() {
526        let path = "/tmp/test_fchk_empty.fchk";
527        std::fs::write(path, "Some data\n").unwrap();
528        let arr = fchk_read_array(path, "NonExistentKeyword").unwrap();
529        assert!(arr.is_empty());
530    }
531
532    #[test]
533    fn test_fchk_read_array_found() {
534        let path = "/tmp/test_fchk_data.fchk";
535        let content = "Total Energy                       R   N=           3\n   1.0000000E+00   2.0000000E+00   3.0000000E+00\n";
536        std::fs::write(path, content).unwrap();
537        let arr = fchk_read_array(path, "Total Energy").unwrap();
538        assert_eq!(arr.len(), 3);
539        assert!((arr[0] - 1.0).abs() < 1e-6);
540        assert!((arr[2] - 3.0).abs() < 1e-6);
541    }
542
543    #[test]
544    fn test_cube_file_no_atoms() {
545        let path = "/tmp/test_oxiphysics_cube_zero.cube";
546        let cf = CubeFile {
547            n_atoms: 0,
548            origin: [0.0; 3],
549            axes: [[0.1, 0.0, 0.0], [0.0, 0.1, 0.0], [0.0, 0.0, 0.1]],
550            nx: 1,
551            ny: 1,
552            nz: 1,
553            data: vec![0.5],
554            atom_positions: vec![],
555            atom_numbers: vec![],
556        };
557        write_cube(path, &cf).unwrap();
558        let cf2 = read_cube(path).unwrap();
559        assert_eq!(cf2.n_atoms, 0);
560    }
561
562    #[test]
563    fn test_molden_atom_symbols() {
564        let path = "/tmp/test_oxiphysics_molden_sym.mld";
565        let mf = MoldenFile {
566            atoms: vec![("Fe".to_string(), [0.5, 0.5, 0.5])],
567            frequencies: vec![],
568            intensities: vec![],
569        };
570        write_molden_geometry(path, &mf).unwrap();
571        let content = std::fs::read_to_string(path).unwrap();
572        assert!(content.contains("Fe"));
573    }
574
575    #[test]
576    fn test_extxyz_parse_multiple_keys() {
577        let line = "a=1 b=2 c=3";
578        let map = extxyz_parse_comment(line);
579        assert_eq!(map.len(), 3);
580        assert_eq!(map.get("a").map(|s| s.as_str()), Some("1"));
581    }
582
583    #[test]
584    fn test_write_extxyz_time() {
585        let path = "/tmp/test_oxiphysics_extxyz_time.xyz";
586        let frame = XyzFrame {
587            step: 5,
588            time: 1.23456,
589            atoms: vec![("H".to_string(), [0.0, 0.0, 0.0])],
590        };
591        write_extxyz_frame(path, &frame, &HashMap::new()).unwrap();
592        let content = std::fs::read_to_string(path).unwrap();
593        assert!(content.contains("time=1.234560"));
594    }
595
596    #[test]
597    fn test_cif_cell_contains_all_fields() {
598        let line = cif_cell_parameters_line(1.0, 2.0, 3.0, 91.0, 92.0, 93.0);
599        for field in &[
600            "_cell_length_a",
601            "_cell_length_b",
602            "_cell_length_c",
603            "_cell_angle_alpha",
604            "_cell_angle_beta",
605            "_cell_angle_gamma",
606        ] {
607            assert!(line.contains(field), "Missing field: {}", field);
608        }
609    }
610}