use crate::io::error::Error;
use crate::model::template::Template;
use crate::model::types::BondOrder;
use std::collections::{BTreeMap, HashSet};
use std::io::BufRead;
const FORMAT: &str = "MOL2";
pub fn read<R: BufRead>(reader: R) -> Result<Template, Error> {
let mut section = Section::None;
let mut molecule_lines_seen = 0usize;
let mut template_name: Option<String> = None;
let mut expected_atoms: Option<usize> = None;
let mut expected_bonds: Option<usize> = None;
let mut atom_names: BTreeMap<usize, String> = BTreeMap::new();
let mut seen_atom_names: HashSet<String> = HashSet::new();
let mut bond_records: Vec<(usize, usize, BondOrder)> = Vec::new();
for (idx, line_res) in reader.lines().enumerate() {
let line = line_res.map_err(|e| Error::from_io(e, None))?;
let line_number = idx + 1;
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with('#') {
continue;
}
if let Some(header) = trimmed.strip_prefix("@<TRIPOS>") {
section = match header {
"MOLECULE" => {
molecule_lines_seen = 0;
Section::Molecule
}
"ATOM" => Section::Atom,
"BOND" => Section::Bond,
_ => Section::None,
};
continue;
}
match section {
Section::Molecule => {
molecule_lines_seen += 1;
match molecule_lines_seen {
1 => {
if trimmed == "****" {
return Err(Error::parse(
FORMAT,
None,
line_number,
"MOL2 molecule name must not be empty",
));
}
template_name = Some(trimmed.to_string());
}
2 => {
let (atoms, bonds) = parse_expected_counts(trimmed, line_number)?;
expected_atoms = Some(atoms);
expected_bonds = Some(bonds);
}
_ => {}
}
}
Section::Atom => {
let tokens: Vec<&str> = trimmed.split_whitespace().collect();
if tokens.len() < 2 {
return Err(Error::parse(
FORMAT,
None,
line_number,
"ATOM record must include at least id and name",
));
}
let atom_id = tokens[0]
.parse::<usize>()
.map_err(|_| Error::parse(FORMAT, None, line_number, "Invalid atom id"))?;
if atom_id == 0 {
return Err(Error::parse(
FORMAT,
None,
line_number,
"Atom id must be positive",
));
}
if atom_names.contains_key(&atom_id) {
return Err(Error::parse(
FORMAT,
None,
line_number,
format!("Duplicate atom id {atom_id}"),
));
}
let atom_name = tokens[1];
if atom_name == "****" {
return Err(Error::parse(
FORMAT,
None,
line_number,
"Atom name must not be ****",
));
}
if !seen_atom_names.insert(atom_name.to_string()) {
return Err(Error::parse(
FORMAT,
None,
line_number,
format!("Duplicate atom name '{atom_name}'"),
));
}
atom_names.insert(atom_id, atom_name.to_string());
}
Section::Bond => {
let tokens: Vec<&str> = trimmed.split_whitespace().collect();
if tokens.len() < 4 {
return Err(Error::parse(
FORMAT,
None,
line_number,
"BOND record must include id, endpoints, and bond type",
));
}
let origin = tokens[1].parse::<usize>().map_err(|_| {
Error::parse(FORMAT, None, line_number, "Invalid origin atom id")
})?;
let target = tokens[2].parse::<usize>().map_err(|_| {
Error::parse(FORMAT, None, line_number, "Invalid target atom id")
})?;
if origin == 0 || target == 0 {
return Err(Error::parse(
FORMAT,
None,
line_number,
"Bond endpoints must reference positive atom ids",
));
}
let order = parse_bond_order_token(tokens[3], line_number)?;
bond_records.push((origin, target, order));
}
Section::None => {}
}
}
let name = template_name.ok_or_else(|| {
Error::parse(
FORMAT,
None,
0,
"Missing @<TRIPOS>MOLECULE section with molecule name",
)
})?;
if atom_names.is_empty() {
return Err(Error::parse(
FORMAT,
None,
0,
"Missing or empty @<TRIPOS>ATOM section",
));
}
if let Some(expected) = expected_atoms.filter(|&expected| expected != atom_names.len()) {
return Err(Error::inconsistent_data(
FORMAT,
None,
format!("Declared {expected} atoms but parsed {}", atom_names.len()),
));
}
if let Some(expected) = expected_bonds.filter(|&expected| expected != bond_records.len()) {
return Err(Error::inconsistent_data(
FORMAT,
None,
format!(
"Declared {expected} bonds but parsed {}",
bond_records.len()
),
));
}
let mut bonds = Vec::with_capacity(bond_records.len());
for (origin, target, order) in bond_records {
let a1 = atom_names.get(&origin).ok_or_else(|| {
Error::inconsistent_data(
FORMAT,
None,
format!("Bond references unknown atom id {origin}"),
)
})?;
let a2 = atom_names.get(&target).ok_or_else(|| {
Error::inconsistent_data(
FORMAT,
None,
format!("Bond references unknown atom id {target}"),
)
})?;
bonds.push((a1.clone(), a2.clone(), order));
}
let atom_name_list: Vec<String> = atom_names.values().cloned().collect();
Ok(Template::new(name, atom_name_list, bonds))
}
enum Section {
None,
Molecule,
Atom,
Bond,
}
fn parse_expected_counts(line: &str, line_number: usize) -> Result<(usize, usize), Error> {
let fields: Vec<&str> = line.split_whitespace().collect();
if fields.len() < 2 {
return Err(Error::parse(
FORMAT,
None,
line_number,
"Molecule counts line must specify atom and bond counts",
));
}
let atoms = fields[0]
.parse::<usize>()
.map_err(|_| Error::parse(FORMAT, None, line_number, "Invalid atom count"))?;
let bonds = fields[1]
.parse::<usize>()
.map_err(|_| Error::parse(FORMAT, None, line_number, "Invalid bond count"))?;
Ok((atoms, bonds))
}
fn parse_bond_order_token(token: &str, line_number: usize) -> Result<BondOrder, Error> {
match token.to_ascii_lowercase().as_str() {
"1" | "single" => Ok(BondOrder::Single),
"2" | "double" => Ok(BondOrder::Double),
"3" | "triple" => Ok(BondOrder::Triple),
"ar" | "aromatic" => Ok(BondOrder::Aromatic),
other => Err(Error::parse(
FORMAT,
None,
line_number,
format!("Unsupported bond type '{other}'"),
)),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::io::error::Error;
use std::io::Cursor;
const BENZENE: &str = "\
@<TRIPOS>MOLECULE\n\
BNZ\n\
12 12 0 0 0\n\
SMALL\n\
NO_CHARGES\n\
****\n\
\n\
@<TRIPOS>ATOM\n\
1 C1 4.5700 21.9710 1.5490 C.ar 1 BNZ 0.000\n\
2 C2 3.9410 23.1640 1.8820 C.ar 1 BNZ 0.000\n\
3 C3 4.6620 24.3520 1.8890 C.ar 1 BNZ 0.000\n\
4 C4 6.0260 24.3190 1.5870 C.ar 1 BNZ 0.000\n\
5 C5 6.6430 23.1280 1.2540 C.ar 1 BNZ 0.000\n\
6 C6 5.9080 21.9520 1.2180 C.ar 1 BNZ 0.000\n\
7 H1 4.0060 21.0500 1.5490 H 1 BNZ 0.000\n\
8 H2 2.8910 23.1680 2.1360 H 1 BNZ 0.000\n\
9 H3 4.1750 25.2870 2.1240 H 1 BNZ 0.000\n\
10 H4 6.6030 25.2320 1.6140 H 1 BNZ 0.000\n\
11 H5 7.6980 23.1130 1.0220 H 1 BNZ 0.000\n\
12 H6 6.3820 21.0250 0.9320 H 1 BNZ 0.000\n\
\n\
@<TRIPOS>BOND\n\
1 1 2 ar\n\
2 2 3 ar\n\
3 3 4 ar\n\
4 4 5 ar\n\
5 5 6 ar\n\
6 6 1 ar\n\
7 1 7 1\n\
8 2 8 1\n\
9 3 9 1\n\
10 4 10 1\n\
11 5 11 1\n\
12 6 12 1\n";
#[test]
fn read_parses_benzene_template() {
let mut cursor = Cursor::new(BENZENE.as_bytes());
let template = read(&mut cursor).expect("benzene should parse");
assert_eq!(template.name, "BNZ");
assert_eq!(template.atom_count(), 12);
assert_eq!(template.bond_count(), 12);
assert!(template.has_atom("C1"));
assert!(template.has_atom("H6"));
assert!(template.has_bond("C1", "C2"));
assert!(template.has_bond("C3", "H3"));
}
#[test]
fn read_errors_on_duplicate_atom_names() {
const DUPLICATE: &str = "\
@<TRIPOS>MOLECULE\n\
LIG\n\
2 1 0 0 0\n\
SMALL\n\
NO_CHARGES\n\
@<TRIPOS>ATOM\n\
1 C1 0 0 0 C.3\n\
2 C1 1 0 0 C.3\n\
@<TRIPOS>BOND\n\
1 1 2 1\n";
let mut cursor = Cursor::new(DUPLICATE.as_bytes());
let err = read(&mut cursor).expect_err("duplicate atom names should fail");
match err {
Error::Parse { details, .. } => {
assert!(details.contains("Duplicate atom name"));
}
other => panic!("unexpected error: {other:?}"),
}
}
#[test]
fn read_errors_on_unknown_bond_atom() {
const UNKNOWN: &str = "\
@<TRIPOS>MOLECULE\n\
LIG\n\
2 1 0 0 0\n\
SMALL\n\
NO_CHARGES\n\
@<TRIPOS>ATOM\n\
1 C1 0 0 0 C.3\n\
2 C2 1 0 0 C.3\n\
@<TRIPOS>BOND\n\
1 1 3 1\n";
let mut cursor = Cursor::new(UNKNOWN.as_bytes());
let err = read(&mut cursor).expect_err("unknown atom id should fail");
match err {
Error::InconsistentData { details, .. } => {
assert!(details.contains("unknown atom id"));
}
other => panic!("unexpected error: {other:?}"),
}
}
#[test]
fn read_errors_on_count_mismatch() {
const COUNT: &str = "\
@<TRIPOS>MOLECULE\n\
LIG\n\
3 0 0 0 0\n\
SMALL\n\
NO_CHARGES\n\
@<TRIPOS>ATOM\n\
1 C1 0 0 0 C.3\n\
2 C2 1 0 0 C.3\n";
let mut cursor = Cursor::new(COUNT.as_bytes());
let err = read(&mut cursor).expect_err("count mismatch should fail");
match err {
Error::InconsistentData { details, .. } => {
assert!(details.contains("Declared 3 atoms"));
}
other => panic!("unexpected error: {other:?}"),
}
}
}