assembly_theory/
loader.rs1use crate::molecule::{Atom, Bond, MGraph, Molecule};
18use clap::error::Result;
19use pyo3::exceptions::PyOSError;
20use pyo3::PyErr;
21use std::error::Error;
22use std::fmt::Display;
23
24#[derive(Debug, Clone, PartialEq, Eq)]
29pub enum ParserError {
30 AtomCountNotInt(usize),
32 BondCountNotInt(usize),
34 FileVersionIsNotV2000(usize),
36 BadElementSymbol(usize, String),
39 BadBondNumber(usize),
41 BondTypeNotInt(usize),
43 BondTypeOutOfBounds(usize),
46 ThisShouldNotHappen,
48 NotEnoughLines,
50}
51
52impl Error for ParserError {}
53
54impl From<ParserError> for PyErr {
56 fn from(err: ParserError) -> PyErr {
57 PyOSError::new_err(err.to_string())
58 }
59}
60
61pub fn parse_sdfile_str(_input: &str) -> Result<Molecule, ParserError> {
64 todo!("SDfile parser unimplemented!")
65}
66
67pub fn parse_molfile_str(input: &str) -> Result<Molecule, ParserError> {
87 let mut lines = input.lines().enumerate().skip(3); let (ix, counts_line) = lines.next().ok_or(ParserError::NotEnoughLines)?;
89 let (n_atoms, n_bonds) = parse_counts_line(ix, counts_line)?;
90
91 let mut graph = MGraph::new_undirected();
92 let mut atom_indices = Vec::new();
93
94 lines
95 .by_ref()
96 .take(n_atoms)
97 .try_fold(&mut graph, |g, (i, l)| {
98 parse_atom_line(i, l).map(|atom| {
99 atom_indices.push(g.add_node(atom));
100 g
101 })
102 })?;
103
104 lines
105 .by_ref()
106 .take(n_bonds)
107 .try_fold(&mut graph, |g, (i, l)| {
108 parse_bond_line(i, l).map(|(first, second, bond)| {
109 g.add_edge(atom_indices[first - 1], atom_indices[second - 1], bond);
110 g
111 })
112 })?;
113
114 Ok(Molecule::from_graph(graph))
115}
116
117fn parse_counts_line(line_ix: usize, counts_line: &str) -> Result<(usize, usize), ParserError> {
118 let n_atoms = counts_line[0..3]
119 .trim()
120 .parse()
121 .map_err(|_| ParserError::AtomCountNotInt(line_ix))?;
122 let n_bonds = counts_line[3..6]
123 .trim()
124 .parse()
125 .map_err(|_| ParserError::BondCountNotInt(line_ix))?;
126 let version_number = counts_line[33..39].trim().to_uppercase();
127 if version_number != "V2000" {
128 Err(ParserError::FileVersionIsNotV2000(line_ix))
129 } else {
130 Ok((n_atoms, n_bonds))
131 }
132}
133
134fn parse_atom_line(line_ix: usize, atom_line: &str) -> Result<Atom, ParserError> {
135 let elem_str = atom_line[31..34].trim();
136 let element = elem_str
137 .parse()
138 .map_err(|_| ParserError::BadElementSymbol(line_ix, elem_str.to_owned()))?;
139 let capacity = atom_line[44..47].trim().parse::<u32>().unwrap_or(0);
140 Ok(Atom::new(element, capacity))
141}
142
143fn parse_bond_line(line_ix: usize, bond_line: &str) -> Result<(usize, usize, Bond), ParserError> {
144 let first_atom = bond_line[0..3]
145 .trim()
146 .parse()
147 .map_err(|_| ParserError::BadBondNumber(line_ix))?;
148 let second_atom = bond_line[3..6]
149 .trim()
150 .parse()
151 .map_err(|_| ParserError::BadBondNumber(line_ix))?;
152 let bond = bond_line[6..9]
153 .trim()
154 .parse::<usize>()
155 .map_err(|_| ParserError::BondTypeNotInt(line_ix))?
156 .try_into()
157 .map_err(|_| ParserError::BondTypeOutOfBounds(line_ix))?;
158 Ok((first_atom, second_atom, bond))
159}
160
161impl Display for ParserError {
162 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
163 match self {
164 Self::AtomCountNotInt(line) => {
165 write!(f, "Line {line}: Atom count is not an integer")
166 }
167 Self::BondCountNotInt(line) => {
168 write!(f, "Line {line}: Bond count is not an integer")
169 }
170 Self::FileVersionIsNotV2000(line) => {
171 write!(f, "Line {line}: File version is not V2000")
172 }
173 Self::BondTypeNotInt(line) => {
174 write!(f, "Line {line}: Bond type is not an integer")
175 }
176 Self::BondTypeOutOfBounds(line) => {
177 write!(f, "Line {line}: Bond type is not 1, 2, or 3")
178 }
179 Self::BadElementSymbol(line, sym) => {
180 write!(f, "Line {line}: Bad element symbol {sym}")
181 }
182 Self::BadBondNumber(line) => {
183 write!(f, "Line {line}: Bad bond number")
184 }
185 Self::NotEnoughLines => {
186 write!(f, "File does not have enough lines")
187 }
188 Self::ThisShouldNotHappen => {
189 write!(f, "This should not happen, report it as a bug")
190 }
191 }
192 }
193}