use chemrust_core::data::lattice::{LatticeModel, LatticeVectors};
use nalgebra::{Matrix3, Point3};
use nom::{
branch::alt,
bytes::complete::{tag, take_until},
character::complete::{alpha1, alphanumeric1, line_ending, space0, space1},
combinator::{peek, recognize},
multi::{many0, many1},
sequence::{delimited, preceded, terminated, tuple},
IResult,
};
use std::{fmt::Debug, marker::PhantomData};
use crate::decimal;
use self::{
atom_parser::*,
helper::FromRawAttrs,
model_attributes_parser::{
hashmap_attrs, parse_cry_tolerance, parse_periodic_type, parse_space_group, parse_vector,
},
};
mod atom_parser;
mod helper;
mod model_attributes_parser;
pub trait MsiParserState: Debug {}
#[derive(Debug)]
pub struct MsiParser<'a, S: MsiParserState> {
to_parse: Option<&'a str>,
model_attributes: Vec<&'a str>,
atoms: Vec<&'a str>,
bonds: Vec<&'a str>,
num_attr: usize,
num_atom: usize,
num_bond: usize,
state: PhantomData<S>,
}
impl<'a, S: MsiParserState> MsiParser<'a, S> {
fn next_field(input: &str) -> IResult<&str, &str> {
alt((
preceded(space1, tag("(")),
delimited(space1, tag(")"), line_ending),
))(input)
}
fn take_attribute(input: &str) -> IResult<&str, &str> {
delimited(
tuple((space0, tag("("), tag("A"), space1)),
alt((take_until(")\r\n"), take_until(")\n"))),
tuple((tag(")"), line_ending)),
)(input)
}
fn take_object(input: &str) -> IResult<&str, &str> {
delimited(
tuple((space0, tag("("), decimal, space1)),
take_until(" )"),
tuple((space0, tag(")"), line_ending)),
)(input)
}
fn get_object_type(object_input: &str) -> IResult<&str, &str> {
terminated(alpha1, line_ending)(object_input)
}
fn get_attribute_type(attr_input: &str) -> IResult<&str, &str> {
peek(delimited(
tuple((alpha1, space1)),
recognize(many1(alt((alphanumeric1, tag("/"))))),
space1,
))(attr_input)
}
fn get_field(inside_model_input: &str) -> IResult<&str, &str> {
alt((Self::take_attribute, Self::take_object))(inside_model_input)
}
fn model_end(input: &str) -> IResult<&str, &str> {
tag(")")(input)
}
}
#[derive(Debug)]
pub(crate) struct Loaded;
impl MsiParserState for Loaded {}
impl<'a> MsiParser<'a, Loaded> {
pub fn new(input: &'a str) -> Self {
Self {
to_parse: Some(input),
num_atom: 0,
num_bond: 0,
state: PhantomData,
model_attributes: Vec::new(),
atoms: Vec::new(),
bonds: Vec::new(),
num_attr: 0,
}
}
fn get_to_model(input: &str) -> IResult<&str, &str> {
take_until("(1 Model")(input)
}
fn enter_model(input: &str) -> IResult<&str, &str> {
recognize(tuple((tag("(1 Model"), line_ending)))(input)
}
pub fn starts(self) -> MsiParser<'a, Start> {
let (rest, _): (&'a str, &'a str) = Self::get_to_model(self.to_parse.unwrap()).unwrap();
let (rest, _) = Self::enter_model(rest).unwrap();
MsiParser {
to_parse: Some(rest),
model_attributes: self.model_attributes,
atoms: self.atoms,
bonds: self.bonds,
num_attr: 0,
num_atom: 0,
num_bond: 0,
state: PhantomData,
}
}
}
#[derive(Debug)]
pub(crate) struct Start {}
impl MsiParserState for Start {}
impl<'a> MsiParser<'a, Start> {
fn push_atom(&mut self, atom_fields: &'a str) {
self.atoms.push(atom_fields);
self.num_atom += 1;
}
fn push_bond(&mut self, bond_fields: &'a str) {
self.bonds.push(bond_fields);
self.num_bond += 1;
}
fn push_model_attribute(&mut self, attribute_field: &'a str) {
self.model_attributes.push(attribute_field);
self.num_attr += 1;
}
pub fn analyze(mut self) -> MsiParser<'a, Analyzed> {
while let Ok((rest, parsed_field)) = Self::get_field(self.to_parse.unwrap()) {
if let Ok((object_fields, object_type)) = Self::get_object_type(parsed_field) {
if object_type == "Atom" {
self.push_atom(object_fields);
} else {
self.push_bond(object_fields);
}
} else {
self.push_model_attribute(parsed_field);
}
self.to_parse = Some(rest);
}
let (_, _model_end) =
Self::model_end(self.to_parse.unwrap()).expect("Error: end of model not found!");
self.to_parse = None;
let Self {
to_parse,
model_attributes: attributes,
atoms,
bonds,
num_attr,
num_atom,
num_bond,
state: _,
} = self;
MsiParser {
to_parse,
model_attributes: attributes,
atoms,
bonds,
num_attr,
num_atom,
num_bond,
state: PhantomData,
}
}
}
#[derive(Debug)]
pub(crate) struct Analyzed {}
impl MsiParserState for Analyzed {}
impl<'a> MsiParser<'a, Analyzed> {
fn parse_attributes(&self) -> ModelParameters<Msi> {
if self.model_attributes.is_empty() {
ModelParameters::default()
} else {
let attr_table = hashmap_attrs(self.model_attributes.as_ref());
let (_, periodic_type) =
parse_periodic_type(attr_table.get("PeriodicType").expect("No `PeriodicType`"))
.unwrap();
let (_, cry_tolerance) =
parse_cry_tolerance(attr_table.get("CRY/TOLERANCE").expect("No `CRY/TOLERANCE`"))
.unwrap();
let (_, space_group) =
parse_space_group(attr_table.get("SpaceGroup").expect("No `SpaceGroup`")).unwrap();
let periodic_type = PeriodicType::new(periodic_type);
let space_group = SpaceGroup::new(space_group.into());
let cry_tolerance = CryTolerance::new(cry_tolerance);
ModelParameters::new_msi_settings(periodic_type, space_group, cry_tolerance)
}
}
fn parse_lattice_vectors(&self) -> Option<LatticeVectors<Msi>> {
if self.model_attributes.is_empty() {
None
} else {
let attr_table = hashmap_attrs(self.model_attributes.as_ref());
let (_, vec_a) = parse_vector(attr_table.get("A3").unwrap()).unwrap();
let (_, vec_b) = parse_vector(attr_table.get("B3").unwrap()).unwrap();
let (_, vec_c) = parse_vector(attr_table.get("C3").unwrap()).unwrap();
let lattice_vector = Matrix3::from_columns(&[vec_a, vec_b, vec_c]);
Some(LatticeVectors::new(lattice_vector))
}
}
fn parse_atoms(&self) -> AtomCollection<Msi> {
let mut element_symbols: Vec<&str> = Vec::with_capacity(self.num_atom);
let mut atomic_numbers: Vec<u8> = Vec::with_capacity(self.num_atom);
let mut xyz_coords: Vec<Point3<f64>> = Vec::with_capacity(self.num_atom);
let mut atom_ids: Vec<u32> = Vec::with_capacity(self.num_atom);
self.atoms.iter().for_each(|atom_fields| {
let (_, atom_attrs) = many0(Self::take_attribute)(atom_fields).unwrap();
atom_attrs.iter().for_each(|item| {
if let Ok((_, acl)) = parse_acl(item) {
let (num, symbol) = acl;
atomic_numbers.push(num);
element_symbols.push(symbol);
} else if let Ok((_, xyz)) = parse_xyz(item) {
xyz_coords.push(xyz);
} else if let Ok((_, id)) = parse_id(item) {
atom_ids.push(id);
} else {
}
})
});
let builder = AtomCollection::builder(self.num_atom);
builder
.with_atom_ids(&atom_ids.convert())
.unwrap()
.with_symbols(&element_symbols.convert())
.unwrap()
.with_atomic_number(&atomic_numbers.convert())
.unwrap()
.with_xyz(&xyz_coords.convert())
.unwrap()
.finish()
.build()
}
pub fn build_lattice_model(&self) -> LatticeModel<Msi> {
let settings = self.parse_attributes();
let lattice_vector = self.parse_lattice_vectors();
let atoms = self.parse_atoms();
LatticeModel::builder()
.with_vectors(lattice_vector)
.with_settings(Some(settings))
.with_atoms(atoms)
.build()
}
}
#[cfg(test)]
mod test {
use std::fs::read_to_string;
use super::MsiParser;
#[test]
fn msi_parser() {
let msi_file = read_to_string("SAC_GDY_V.msi").unwrap();
let _msi_model = MsiParser::new(&msi_file)
.starts()
.analyze()
.build_lattice_model();
}
}