assembly_theory/
loader.rs1use std::{error::Error, fmt::Display};
17
18use clap::error::Result;
19
20use crate::molecule::{Atom, Bond, Element::Hydrogen, MGraph, Molecule};
21
22#[derive(Debug, Clone, PartialEq, Eq)]
24pub enum ParserError {
25 AtomCountNotInt(usize),
27 BondCountNotInt(usize),
29 FileVersionIsNotV2000(usize),
31 BadElementSymbol(usize, String),
34 BondNumberNotInt(usize),
36 BondTypeNotInt(usize),
38 BadBondType(usize),
40 NotEnoughLines,
42 ThisShouldNotHappen,
44}
45
46impl Error for ParserError {}
47
48impl Display for ParserError {
49 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
50 match self {
51 Self::AtomCountNotInt(line) => {
52 write!(f, "Line {line}: Atom count is not an integer")
53 }
54 Self::BondCountNotInt(line) => {
55 write!(f, "Line {line}: Bond count is not an integer")
56 }
57 Self::FileVersionIsNotV2000(line) => {
58 write!(f, "Line {line}: File version is not V2000")
59 }
60 Self::BadElementSymbol(line, sym) => {
61 write!(f, "Line {line}: Bad element symbol '{sym}'")
62 }
63 Self::BondNumberNotInt(line) => {
64 write!(f, "Line {line}: Bond number is not an integer")
65 }
66 Self::BondTypeNotInt(line) => {
67 write!(f, "Line {line}: Bond type is not an integer")
68 }
69 Self::BadBondType(line) => {
70 write!(f, "Line {line}: Bond type is not 1, 2, or 3")
71 }
72 Self::NotEnoughLines => {
73 write!(f, "File does not have enough lines")
74 }
75 Self::ThisShouldNotHappen => {
76 write!(f, "This should not happen, report it as a bug")
77 }
78 }
79 }
80}
81
82pub fn parse_molfile_str(input: &str) -> Result<Molecule, ParserError> {
99 let mut lines = input.lines().enumerate().skip(3); let (ix, counts_line) = lines.next().ok_or(ParserError::NotEnoughLines)?;
101 let (n_atoms, n_bonds) = parse_counts_line(ix, counts_line)?;
102
103 let mut graph = MGraph::new_undirected();
104 let mut atom_indices = Vec::with_capacity(n_atoms); lines.by_ref().take(n_atoms).try_for_each(|(i, line)| {
108 let atom = parse_atom_line(i, line)?;
109 if atom.element() == Hydrogen {
110 atom_indices.push(None); } else {
112 let idx = graph.add_node(atom);
113 atom_indices.push(Some(idx));
114 }
115 Ok(())
116 })?;
117
118 lines.by_ref().take(n_bonds).try_for_each(|(i, line)| {
120 let (first, second, bond) = parse_bond_line(i, line)?;
121 let a = atom_indices.get(first - 1).copied().flatten();
122 let b = atom_indices.get(second - 1).copied().flatten();
123 if let (Some(ai), Some(bi)) = (a, b) {
124 graph.add_edge(ai, bi, bond);
125 }
126 Ok(())
127 })?;
128
129 Ok(Molecule::from_graph(graph))
130}
131
132fn parse_counts_line(line_ix: usize, counts_line: &str) -> Result<(usize, usize), ParserError> {
133 let n_atoms = counts_line[0..3]
134 .trim()
135 .parse()
136 .map_err(|_| ParserError::AtomCountNotInt(line_ix))?;
137 let n_bonds = counts_line[3..6]
138 .trim()
139 .parse()
140 .map_err(|_| ParserError::BondCountNotInt(line_ix))?;
141 let version_number = counts_line[33..39].trim().to_uppercase();
142 if version_number != "V2000" {
143 Err(ParserError::FileVersionIsNotV2000(line_ix))
144 } else {
145 Ok((n_atoms, n_bonds))
146 }
147}
148
149fn parse_atom_line(line_ix: usize, atom_line: &str) -> Result<Atom, ParserError> {
150 let elem_str = atom_line[31..34].trim();
151 let element = elem_str
152 .parse()
153 .map_err(|_| ParserError::BadElementSymbol(line_ix, elem_str.to_owned()))?;
154 let capacity = atom_line[44..47].trim().parse::<u32>().unwrap_or(0);
155 Ok(Atom::new(element, capacity))
156}
157
158fn parse_bond_line(line_ix: usize, bond_line: &str) -> Result<(usize, usize, Bond), ParserError> {
159 let first_atom = bond_line[0..3]
160 .trim()
161 .parse()
162 .map_err(|_| ParserError::BondNumberNotInt(line_ix))?;
163 let second_atom = bond_line[3..6]
164 .trim()
165 .parse()
166 .map_err(|_| ParserError::BondNumberNotInt(line_ix))?;
167 let bond = bond_line[6..9]
168 .trim()
169 .parse::<usize>()
170 .map_err(|_| ParserError::BondTypeNotInt(line_ix))?
171 .try_into()
172 .map_err(|_| ParserError::BadBondType(line_ix))?;
173 Ok((first_atom, second_atom, bond))
174}