use gut::prelude::*;
use gchemol_core::{Atom, Lattice, Molecule};
use roxmltree::Node;
use std::collections::HashMap;
fn parse_atom_from(node: Node) -> Option<Atom> {
let map: HashMap<_, _> = node.attributes().map(|attr| (attr.name(), attr.value())).collect();
let x = map.get("x3").or(map.get("xFract"))?.parse().ok()?;
let y = map.get("y3").or(map.get("yFract"))?.parse().ok()?;
let z = map.get("z3").or(map.get("zFract"))?.parse().ok()?;
let symbol = map.get("elementType")?;
let mut atom = Atom::new(*symbol, [x, y, z]);
Some(atom)
}
fn parse_lattice_from(lattice: Node) -> Lattice {
use std::collections::HashMap;
let params: HashMap<_, f64> = lattice
.descendants()
.filter_map(|n| {
if n.has_tag_name("scalar") && n.has_attribute("title") {
if let Some(value) = n.text() {
let title = n.attribute("title")?;
let value = value.parse().ok()?;
return Some((title, value));
}
}
None
})
.collect();
Lattice::from_params(params["a"], params["b"], params["c"], params["alpha"], params["beta"], params["gamma"])
}
fn parse_molecule_from(molecule: Node) -> Molecule {
let atoms = molecule.descendants().filter(|n| n.has_tag_name("atom"));
let atoms = atoms.filter_map(|node| parse_atom_from(node));
let title = molecule.attribute("id").unwrap_or("untitled cml");
let mut mol = Molecule::from_atoms(atoms);
mol.set_title(title);
if let Some(node) = molecule.children().find(|n| n.has_tag_name("crystal")) {
let lattice = parse_lattice_from(node);
mol.set_lattice(lattice);
}
if let Some(node) = molecule.children().find(|n| n.has_tag_name("bondArray")) {
}
mol
}
pub(self) fn parse_molecules(s: &str) -> Result<Vec<Molecule>> {
use roxmltree::Document;
let doc = Document::parse(s)?;
let nodes_mol = doc.root_element().descendants().filter(|n| n.has_tag_name("molecule"));
let mols = nodes_mol.map(|node| parse_molecule_from(node)).collect();
Ok(mols)
}
fn write_molecule(s: &mut String, mol: &Molecule) {
let title = mol.title();
writeln!(s, " <molecule id='{title}'>");
writeln!(s, " <atomArray>");
for (i, a) in mol.atoms() {
let sym = a.symbol();
let [x, y, z] = a.position();
writeln!(s, " <atom id='a{i}' elementType='sym' x3='{x}' y3='{y}' z3='{z}' />");
}
writeln!(s, " </atomArray>");
if let Some(lat) = mol.get_lattice() {
writeln!(s, " <crystal>");
let [a, b, c] = lat.lengths();
writeln!(s, "<scalar title='a' units='units:angstrom'>{a}</scalar>");
writeln!(s, "<scalar title='b' units='units:angstrom'>{b}</scalar>");
writeln!(s, "<scalar title='c' units='units:angstrom'>{c}</scalar>");
let [alpha, beta, gamma] = lat.angles();
writeln!(s, "<scalar title='alpha' units='units:degree'>{alpha}</scalar>");
writeln!(s, "<scalar title='beta' units='units:degree'>{beta}</scalar>");
writeln!(s, "<scalar title='gamma' units='units:degree'>{gamma}</scalar>");
writeln!(s, " </crystal>");
}
writeln!(s, " <bondArray>");
for (u, v, _) in mol.bonds() {
writeln!(s, " <bond atomRefs2='a{u} a{v} />\n");
}
writeln!(s, " </bondArray>");
writeln!(s, "</molecule>");
}
pub(self) fn format_molecules<'a>(mols: impl IntoIterator<Item = &'a Molecule>) -> String {
let mut s = String::new();
writeln!(&mut s, "<?xml version='1.0'?>");
writeln!(&mut s, "<list xmlns='http://www.xml-cml.org/schema'>");
for mol in mols.into_iter() {
write_molecule(&mut s, mol);
}
writeln!(&mut s, "</list>");
s
}
use super::ChemicalFile;
use super::ParseMolecule;
#[derive(Clone, Copy, Debug)]
pub struct CmlFile();
impl ChemicalFile for CmlFile {
fn ftype(&self) -> &str {
"xml/cml"
}
fn possible_extensions(&self) -> Vec<&str> {
vec![".cml"]
}
fn format_molecule(&self, mol: &Molecule) -> Result<String> {
ensure!(!mol.is_periodic(), "cannot render Lattice in cml format!");
Ok(format_molecules([mol]))
}
}
impl ParseMolecule for CmlFile {
fn parse_molecule(&self, input: &str) -> Result<Molecule> {
let mut mols = parse_molecules(input)?;
ensure!(!mols.is_empty(), "parse cml failed");
let i = mols.len() - 1;
Ok(mols.remove(i))
}
}
crate::cf_impl_partitions!(CmlFile);
#[test]
fn test_parse_mol_from_cml() -> Result<()> {
let f = "tests/files/cml/1LJL_Cys10.cml";
let s = gut::fs::read_file(f)?;
let mols = parse_molecules(&s)?;
let s = format_molecules(&mols);
let mols = parse_molecules(&s)?;
assert_eq!(mols.len(), 7);
let natoms_list = vec![1, 3, 7, 3, 207, 33, 13];
for i in 0..7 {
assert_eq!(mols[i].natoms(), natoms_list[i]);
}
let f = "tests/files/cml/Fe.cml";
let s = gut::fs::read_file(f)?;
let mols = parse_molecules(&s)?;
assert_eq!(mols.len(), 1);
assert!(mols[0].is_periodic());
Ok(())
}