use std::{fmt::Debug, marker::PhantomData};
use nalgebra::{Matrix3, Point3};
use nom::{
branch::alt,
bytes::complete::{tag, take_until},
character::complete::{alpha1, alphanumeric1, line_ending, space0, space1},
combinator::{peek, recognize},
multi::{many0, many1},
sequence::{delimited, preceded, terminated, tuple},
IResult,
};
use crate::{
atom::{AtomCollection, AtomCollectionBuilder},
builder_typestate::No,
lattice::LatticeVectors,
model_type::Settings,
parser::{
decimal,
msi_parser::state_machine::model_attributes_parser::{
parse_cry_tolerance, parse_space_group,
},
},
LatticeModel, MsiModel,
};
use self::{
atom_parser::{parse_acl, parse_id, parse_xyz},
model_attributes_parser::{hashmap_attrs, parse_periodic_type, parse_vector},
};
mod atom_parser;
mod helper;
mod model_attributes_parser;
pub trait ParserState: Debug {}
#[derive(Debug)]
pub struct MsiParser<'a, S: ParserState> {
to_parse: Option<&'a str>,
model_attributes: Vec<&'a str>,
atoms: Vec<&'a str>,
bonds: Vec<&'a str>,
num_attr: usize,
num_atom: usize,
num_bond: usize,
state: PhantomData<S>,
}
impl<'a, S: ParserState> MsiParser<'a, S> {
fn next_field(input: &str) -> IResult<&str, &str> {
alt((
preceded(space1, tag("(")),
delimited(space1, tag(")"), line_ending),
))(input)
}
fn take_attribute(input: &str) -> IResult<&str, &str> {
delimited(
tuple((space0, tag("("), tag("A"), space1)),
alt((take_until(")\r\n"), take_until(")\n"))),
tuple((tag(")"), line_ending)),
)(input)
}
fn take_object(input: &str) -> IResult<&str, &str> {
delimited(
tuple((space0, tag("("), decimal, space1)),
take_until(" )"),
tuple((space0, tag(")"), line_ending)),
)(input)
}
fn get_object_type(object_input: &str) -> IResult<&str, &str> {
terminated(alpha1, line_ending)(object_input)
}
fn get_attribute_type(attr_input: &str) -> IResult<&str, &str> {
peek(delimited(
tuple((alpha1, space1)),
recognize(many1(alt((alphanumeric1, tag("/"))))),
space1,
))(attr_input)
}
fn get_field(inside_model_input: &str) -> IResult<&str, &str> {
alt((Self::take_attribute, Self::take_object))(inside_model_input)
}
fn model_end(input: &str) -> IResult<&str, &str> {
tag(")")(input)
}
}
#[derive(Debug)]
pub(crate) struct Loaded;
impl ParserState for Loaded {}
impl<'a> MsiParser<'a, Loaded> {
pub fn new(input: &'a str) -> Self {
Self {
to_parse: Some(input),
num_atom: 0,
num_bond: 0,
state: PhantomData,
model_attributes: Vec::new(),
atoms: Vec::new(),
bonds: Vec::new(),
num_attr: 0,
}
}
fn get_to_model(input: &str) -> IResult<&str, &str> {
take_until("(1 Model")(input)
}
fn enter_model(input: &str) -> IResult<&str, &str> {
recognize(tuple((tag("(1 Model"), line_ending)))(input)
}
pub fn starts(self) -> MsiParser<'a, Start> {
let (rest, _): (&'a str, &'a str) = Self::get_to_model(self.to_parse.unwrap()).unwrap();
let (rest, _) = Self::enter_model(rest).unwrap();
MsiParser {
to_parse: Some(rest),
model_attributes: self.model_attributes,
atoms: self.atoms,
bonds: self.bonds,
num_attr: 0,
num_atom: 0,
num_bond: 0,
state: PhantomData,
}
}
}
#[derive(Debug)]
pub(crate) struct Start {}
impl ParserState for Start {}
impl<'a> MsiParser<'a, Start> {
fn push_atom(&mut self, atom_fields: &'a str) {
self.atoms.push(atom_fields);
self.num_atom += 1;
}
fn push_bond(&mut self, bond_fields: &'a str) {
self.bonds.push(bond_fields);
self.num_bond += 1;
}
fn push_model_attribute(&mut self, attribute_field: &'a str) {
self.model_attributes.push(attribute_field);
self.num_attr += 1;
}
pub fn analyze(mut self) -> MsiParser<'a, Analyzed> {
while let Ok((rest, parsed_field)) = Self::get_field(self.to_parse.unwrap()) {
if let Ok((object_fields, object_type)) = Self::get_object_type(parsed_field) {
if object_type == "Atom" {
self.push_atom(object_fields);
} else {
self.push_bond(object_fields);
}
} else {
self.push_model_attribute(parsed_field);
}
self.to_parse = Some(rest);
}
let (_, _model_end) =
Self::model_end(self.to_parse.unwrap()).expect("Error: end of model not found!");
self.to_parse = None;
let Self {
to_parse,
model_attributes: attributes,
atoms,
bonds,
num_attr,
num_atom,
num_bond,
state: _,
} = self;
MsiParser {
to_parse,
model_attributes: attributes,
atoms,
bonds,
num_attr,
num_atom,
num_bond,
state: PhantomData,
}
}
}
#[derive(Debug)]
pub(crate) struct Analyzed {}
impl ParserState for Analyzed {}
impl<'a> MsiParser<'a, Analyzed> {
fn parse_attributes(&self) -> Settings<MsiModel> {
if self.model_attributes.is_empty() {
Settings::default()
} else {
let attr_table = hashmap_attrs(self.model_attributes.as_ref());
let (_, periodic_type) =
parse_periodic_type(attr_table.get("PeriodicType").expect("No `PeriodicType`"))
.unwrap();
let (_, cry_tolerance) =
parse_cry_tolerance(attr_table.get("CRY/TOLERANCE").expect("No `CRY/TOLERANCE`"))
.unwrap();
let (_, space_group) =
parse_space_group(attr_table.get("SpaceGroup").expect("No `SpaceGroup`")).unwrap();
Settings::new_msi_settings(periodic_type, space_group, cry_tolerance)
}
}
fn parse_lattice_vectors(&self) -> Option<LatticeVectors<MsiModel>> {
if self.model_attributes.is_empty() {
None
} else {
let attr_table = hashmap_attrs(self.model_attributes.as_ref());
let (_, vec_a) = parse_vector(attr_table.get("A3").unwrap()).unwrap();
let (_, vec_b) = parse_vector(attr_table.get("B3").unwrap()).unwrap();
let (_, vec_c) = parse_vector(attr_table.get("C3").unwrap()).unwrap();
let lattice_vector = Matrix3::from_columns(&[vec_a, vec_b, vec_c]);
Some(LatticeVectors::new(lattice_vector))
}
}
fn parse_atoms(&self) -> AtomCollection<MsiModel> {
let mut element_symbols: Vec<String> = Vec::with_capacity(self.num_atom);
let mut atomic_numbers: Vec<u8> = Vec::with_capacity(self.num_atom);
let mut xyz_coords: Vec<Point3<f64>> = Vec::with_capacity(self.num_atom);
let mut atom_ids: Vec<u32> = Vec::with_capacity(self.num_atom);
let frac_xyz: Vec<Option<Point3<f64>>> =
(0..self.num_atom).into_iter().map(|_| None).collect();
self.atoms.iter().for_each(|atom_fields| {
let (_, atom_attrs) = many0(Self::take_attribute)(atom_fields).unwrap();
atom_attrs.iter().for_each(|item| {
if let Ok((_, acl)) = parse_acl(item) {
let (num, symbol) = acl;
atomic_numbers.push(num);
element_symbols.push(symbol.into());
} else if let Ok((_, xyz)) = parse_xyz(item) {
xyz_coords.push(xyz);
} else if let Ok((_, id)) = parse_id(item) {
atom_ids.push(id);
} else {
}
})
});
let builder = AtomCollectionBuilder::<MsiModel, No>::new(self.num_atom);
builder
.with_atom_ids(&atom_ids)
.unwrap()
.with_element_symbols(&element_symbols)
.unwrap()
.with_atomic_nums(&atomic_numbers)
.unwrap()
.with_xyz_coords(&xyz_coords)
.unwrap()
.with_fractional_xyz(&frac_xyz)
.unwrap()
.finish()
.unwrap()
.build()
}
pub fn build_lattice_model(&self) -> LatticeModel<MsiModel> {
let settings = self.parse_attributes();
let lattice_vector = self.parse_lattice_vectors();
let atoms = self.parse_atoms();
LatticeModel::new(lattice_vector, atoms, settings)
}
}
mod error;
#[cfg(test)]
mod test {
use std::fs::read_to_string;
use crate::parser::msi_parser::state_machine::Analyzed;
use super::MsiParser;
#[test]
fn parsing_lattice() {
let file_content = read_to_string("SAC_GDY_V.msi").unwrap();
let parser = MsiParser::new(&file_content);
let mut parser = parser.starts().analyze();
println!("{:?}", parser.parse_atoms());
parser.model_attributes.sort_by_key(|item| {
let (_, key) = MsiParser::<Analyzed>::get_attribute_type(item).unwrap();
key
});
println!("{:?}", parser.parse_lattice_vectors());
println!("{:?}", parser.build_lattice_model());
}
#[test]
fn parsing_ads() {
let file_content = read_to_string("C2H4.msi").unwrap();
let parser = MsiParser::new(&file_content);
let mut parser = parser.starts().analyze();
println!("{:?}", parser.parse_atoms());
parser.model_attributes.sort_by_key(|item| {
let (_, key) = MsiParser::<Analyzed>::get_attribute_type(item).unwrap();
key
});
println!("{:?}", parser.parse_lattice_vectors());
println!("{:?}", parser.build_lattice_model());
}
}