gchemol-readwrite 0.1.12

Reading/writing chemical objects for gchemol
Documentation
// [[file:../../gchemol-readwrite.note::574001a8][574001a8]]
use super::*;
// 574001a8 ends here

// [[file:../../gchemol-readwrite.note::7636c8a8][7636c8a8]]
/// The extended XYZ format
#[derive(Copy, Clone, Debug)]
pub struct ExtxyzFile();
// 7636c8a8 ends here

// [[file:../../gchemol-readwrite.note::b83394d9][b83394d9]]
fn get_lattice(flat_vector: Vec<f64>) -> Option<Lattice> {
    if flat_vector.len() >= 9 {
        let va: [f64; 3] = flat_vector[..3].try_into().ok()?;
        let vb: [f64; 3] = flat_vector[3..6].try_into().ok()?;
        let vc: [f64; 3] = flat_vector[6..9].try_into().ok()?;
        let lat = Lattice::new([va, vb, vc]);
        Some(lat)
    } else {
        None
    }
}
// b83394d9 ends here

// [[file:../../gchemol-readwrite.note::9078bfde][9078bfde]]
impl ExtxyzFile {
    /// Return Lattice object by reading data from `line` in ase [extxyz](https://wiki.fysik.dtu.dk/ase/ase/io/formatoptions.html#xyz) format.
    ///
    /// Lattice="13.5142 0.0 0.0 0.0 14.9833 0.0 0.0 0.0 20.0" Properties=species:S:1:pos:R:3 pbc="T T T"
    pub fn read_lattice(line: &str) -> Option<Lattice> {
        if line.starts_with("Lattice=") {
            if let Some(pos) = &line[9..].find("\"") {
                let lattice_str = &line[9..pos + 9];
                let lattice_numbers: Vec<f64> = lattice_str.split_ascii_whitespace().filter_map(|value| value.parse().ok()).collect();
                return get_lattice(lattice_numbers);
            }
        }
        None
    }

    fn format_lattice(lat: &Lattice) -> String {
        let [va, vb, vc] = lat.vectors();
        let va_s: String = va.iter().map(|x| format!("{x} ")).collect();
        let vb_s: String = vb.iter().map(|x| format!("{x} ")).collect();
        let vc_s: String = vc.iter().map(|x| format!("{x} ")).collect();
        let lattice_data = ([va_s, vb_s, vc_s]).join(" ");
        format!(r#"Lattice="{lattice_data}""#)
    }
}

#[test]
fn test_lattice_extxyz() {
    let line = "Lattice=\"13.5142 0.0 0.0 0.0 14.9833 0.0 0.0 0.0 20.0\" Properties=species:S:1:pos:R:3 pbc=\"T T T\"";
    let lat = ExtxyzFile::read_lattice(line);
    assert!(lat.is_some());
    assert_eq!(lat.unwrap().lengths(), [13.5142, 14.9833, 20.0]);

    let s = ExtxyzFile::format_lattice(&lat.unwrap());
    let lat = ExtxyzFile::read_lattice(&s);
    assert!(lat.is_some());
    assert_eq!(lat.unwrap().lengths(), [13.5142, 14.9833, 20.0]);
}
// 9078bfde ends here

// [[file:../../gchemol-readwrite.note::8ac5d7e7][8ac5d7e7]]
use ::extxyz::{read_xyz_frames_direct, Info, RawAtoms};

fn get_array(value: serde_json::Value) -> Option<Vec<f64>> {
    let array = value.as_array()?;
    array.iter().map(|x| x.as_f64()).collect()
}

fn get_molecule_from_extxyz_atoms(raw_atoms: RawAtoms) -> Result<Molecule> {
    ensure!(raw_atoms.natoms == raw_atoms.atoms.len());

    let mut mol = Molecule::default();
    if let Ok(mut info) = raw_atoms.comment.parse::<Info>() {
        // get atom's properties
        for (i, a) in raw_atoms.atoms.into_iter().enumerate() {
            let mut atom = Atom::new(a.element, a.position);
            // parse extra data for each atom
            let mut atom_properties = info.parse_extra_columns(&a.extra)?;
            atom.properties.raw_map_mut().append(&mut atom_properties);
            mol.add_atom(i + 1, atom);
        }
        // get molecule's properties
        if let Some(lat) = info.pop("Lattice").and_then(get_array).and_then(get_lattice) {
            mol.set_lattice(lat);
        }
        mol.properties.raw_map_mut().append(info.raw_map_mut());
    } else {
        mol.set_title(raw_atoms.comment);
    }

    Ok(mol)
}

impl ExtxyzFile {
    /// Parse `Molecule` from str `s` in extxyz format.
    pub fn parse_molecule(s: &str) -> Result<Molecule> {
        let atoms = RawAtoms::parse_from(&s)?;
        let mol = get_molecule_from_extxyz_atoms(atoms)?;
        Ok(mol)
    }

    /// Read `Molecule` from file in `path` in extxyz format. Invalid
    /// frames will be discarded silently.
    pub fn read_molecules_from(path: impl AsRef<Path>) -> Result<impl Iterator<Item = Molecule>> {
        let frames = read_xyz_frames_direct(path)?;
        let mols = frames.filter_map(|frame| Self::parse_molecule(&frame).ok());
        Ok(mols)
    }
}
// 8ac5d7e7 ends here

// [[file:../../gchemol-readwrite.note::ec30581c][ec30581c]]
impl ExtxyzFile {
    /// Returns a string representation of `mol` in extxyz
    /// format. Properties such as "energy" for Molecule and "forces"
    /// for atoms, will be also write if available in `mol`.
    pub fn format_molecule(mol: &Molecule) -> String {
        let mut s = String::new();
        let natoms = mol.natoms();
        let _ = writeln!(s, "{natoms}");

        // Check if energy/forces properties. If available, write these attributes
        let energy: Option<f64> = mol.properties.load("energy").ok();
        let forces: Option<Vec<[f64; 3]>> = mol.atoms().map(|(_, a)| a.properties.load("forces").ok()).collect();
        let has_forces = forces.as_ref().map(|forces| forces.len() == natoms) == Some(true);

        let mut title = String::new();
        if let Some(lat) = mol.get_lattice() {
            title = Self::format_lattice(lat)
        };

        if has_forces {
            title.push_str(" Properties=species:S:1:pos:R:3:forces:R:3");
        } else {
            title.push_str(" Properties=species:S:1:pos:R:3");
        }
        if let Some(energy) = energy {
            title.push_str(&format!(" energy={energy}"));
        }
        let _ = writeln!(s, "{title}");
        for (i, (_, a)) in mol.atoms().enumerate() {
            let sym = a.symbol();
            let [x, y, z] = a.position();
            // write atom force vector
            let mut extra = String::new();
            if has_forces {
                let [x, y, z] = forces.as_ref().unwrap()[i];
                extra = format!("{x:18.9} {y:18.8} {z:18.8}");
            }
            let _ = writeln!(s, "{sym} {x:18.8} {y:18.8} {z:18.8} {extra}");
        }

        s
    }

    /// Write molecules into `path` in extxyz format.
    pub fn write_molecules<'a>(path: impl AsRef<Path>, mols: impl IntoIterator<Item = &'a Molecule>) -> Result<()> {
        let path = path.as_ref();
        let mut fp = File::create(path).with_context(|| format!("Failed to create file: {path:?}"))?;
        for mol in mols {
            let s = Self::format_molecule(mol);
            fp.write(s.as_bytes());
        }

        Ok(())
    }
}
// ec30581c ends here

// [[file:../../gchemol-readwrite.note::8b8bce94][8b8bce94]]
impl ExtxyzFile {
    pub(crate) fn parsable(path: &Path) -> Result<bool> {
        use std::fs::File;
        use std::io;
        use std::io::prelude::*;
        use std::io::BufReader;

        let possible_extensions = [".extxyz", ".xyz"];
        let filename = path.display().to_string().to_lowercase();
        for s in &possible_extensions {
            if filename.ends_with(s) {
                let mut f = File::open(path)?;
                let mut reader = BufReader::new(f);
                if let Some(Ok(line)) = reader.lines().skip(1).take(1).next() {
                    return Ok(line.contains("Properties=species:S:1"));
                }
            }
        }

        Ok(false)
    }
}

#[test]
fn test_extxyz_parsable() {
    let f = "./tests/files/extxyz/cu.xyz";
    let parsable = ExtxyzFile::parsable(f.as_ref()).unwrap();
    assert!(parsable);

    let f = "./tests/files/xyz/H2O.xyz";
    let parsable = ExtxyzFile::parsable(f.as_ref()).unwrap();
    assert!(!parsable);
}
// 8b8bce94 ends here

// [[file:../../gchemol-readwrite.note::55a4e567][55a4e567]]
#[test]
fn test_read_extxyz() -> Result<()> {
    let f = "tests/files/extxyz/cu.xyz";
    let mols = ExtxyzFile::read_molecules_from(f)?;
    let mol = mols.last().unwrap();
    assert_eq!(mol.natoms(), 107);
    assert!(mol.get_lattice().is_some());

    // test molecule's properties
    let energy: f64 = mol.properties.load("energy")?;
    assert_eq!(energy, 0.63);
    let user_data: Vec<usize> = mol.properties.load("user-data")?;
    assert_eq!(user_data.len(), 3);

   // test atom's properties
    let atom = mol.get_atom(1).unwrap();
    let energy: f64 = atom.properties.load("energy")?;
    assert_eq!(energy, 0.08400641);

    // format molecule and then parse it
    let s = ExtxyzFile::format_molecule(&mol);
    let mol = ExtxyzFile::parse_molecule(dbg!(&s))?;
    assert_eq!(mol.natoms(), 107);

    let energy: f64 = mol.properties.load("energy")?;
    assert_eq!(energy, 0.63);
    let force: [f64; 3] = atom.properties.load("forces")?;
    assert_eq!(force.len(), 3);

    Ok(())
}
// 55a4e567 ends here