chem_eq/parse/
mod.rs

1#[cfg(test)]
2mod tests;
3pub mod util;
4
5use std::str::FromStr;
6
7use nom::{
8    bytes::complete::{tag, take_till1, take_while, take_while1},
9    character::complete::{anychar, digit0, multispace0},
10    combinator::{map, map_opt, map_res, opt, peek, verify},
11    error::{context, Error as NomError, ErrorKind as NomErrorKind},
12    multi::many1,
13    sequence::{delimited, preceded, terminated, tuple},
14};
15
16use crate::{
17    element::SimpleElement,
18    parse::util::{Error, ErrorKind, Input, Result},
19    Compound, Direction, Element, Equation, State,
20};
21
22/// Parse an [`Equation`] from a str
23pub fn parse_equation(orig_i: Input) -> Result<Equation> {
24    // get the left side of the equals
25    let (i, lhs) = context(
26        "splitting equation",
27        take_till1(|c: char| c == '<' || c == '-'),
28    )(orig_i)?;
29
30    // get the direction of reaction
31    let (rhs, tag) = context(
32        "direction of equation",
33        take_while1(|c: char| c == '<' || c == '-' || c == '>'),
34    )(i)?;
35    let direction = Direction::from_str(tag)
36        .map_err(|_| nom::Err::Error(NomError::new(i, NomErrorKind::Verify).into()))?;
37
38    // parse either side
39    let (_, left_cmp) = context("left side", parse_side)(lhs)?;
40    let (i, right_cmp) = context("right side", parse_side)(rhs)?;
41
42    // clear trailing whitespace
43    let mut orig_i = orig_i.to_string();
44    orig_i.truncate(orig_i.trim_end().len());
45    Ok((
46        i,
47        Equation {
48            left: left_cmp,
49            right: right_cmp,
50            direction,
51            equation: orig_i,
52            ..Default::default()
53        },
54    ))
55}
56
57/// Parse one side of the equation into [`Compound`]
58fn parse_side(i: Input) -> Result<Vec<Compound>> {
59    // collect as many compounds as possible skipping leading whitespace
60    preceded(multispace0, many1(compound_and_plus))(i)
61}
62
63/// Parse an [`Element`]
64pub fn parse_element(orig_i: Input) -> Result<Element> {
65    let (i, (c, name)) = context(
66        "element name",
67        tuple((
68            context(
69                "starting element letter",
70                verify(anychar, |c| c.is_uppercase() && c.is_alphabetic()),
71            ),
72            context(
73                "rest of element name",
74                take_while(|i: char| i.is_alphabetic() && i.is_lowercase()),
75            ),
76        )),
77    )(orig_i)?;
78
79    let mut c = c.to_string();
80    c.push_str(name);
81
82    // capture the number at the end of the element
83    map_res(
84        map_opt(opt(digit0::<_, Error<&str>>), |s| {
85            s.map(str::parse::<usize>)
86        }),
87        move |num| {
88            SimpleElement {
89                // map expects FnMut which theoretically can be called multiple times, so we can't move
90                // out of c
91                name: c.clone(),
92                count: num.unwrap_or(1),
93            }
94            .into_element()
95        },
96    )(i)
97    .map_err(|e| match e {
98        nom::Err::Error(inner)
99            if inner
100                .errors
101                .first()
102                .map(|e| matches!(e.1, ErrorKind::InvalidElement(_)))
103                .unwrap_or_default() =>
104        {
105            nom::Err::Failure(inner)
106        }
107        e => e,
108    })
109}
110
111/// Parse a [`Compound`] from an input
112pub fn parse_compound(i: Input) -> Result<Compound> {
113    // get prefix of compound and extra elements
114    let (i, (num, elements)) = tuple((
115        // optional coefficient
116        context(
117            "compound coefficient",
118            map_opt(opt(digit0), |s: Option<&str>| s.map(str::parse::<usize>)),
119        ),
120        // get all the elements
121        context(
122            "optionally bracketed elements",
123            map(many1(bracketed_elements), |v| {
124                v.into_iter().flatten().collect::<Vec<_>>()
125            }),
126        ),
127    ))(i)?;
128
129    // get state of compound
130    let (i, state) = match delimited(
131        context(
132            "leading bracket for compound state",
133            tag::<_, _, NomError<&str>>("("),
134        ),
135        context(
136            "compound state",
137            map_res(take_while(char::is_alphabetic), State::from_str),
138        ),
139        context("closing bracket for compound state", tag(")")),
140    )(i)
141    {
142        Ok((i, state)) => (i, Some(state)),
143        Err(e) => {
144            match e {
145                // if state couldn't be parsed
146                nom::Err::Error(inner) if inner.code == NomErrorKind::MapRes => {
147                    return Err(nom::Err::Error(inner.into()))
148                }
149                _ => {}
150            }
151            (i, None)
152        }
153    };
154
155    Ok((
156        i,
157        Compound {
158            elements,
159            coefficient: num.unwrap_or(1),
160            state,
161            concentration: 0.0,
162        },
163    ))
164}
165
166/// Parse elements that are bracketed with a coefficient on the end
167fn bracketed_elements(orig_i: Input) -> Result<Vec<Element>> {
168    // get bracket
169    let (i, b) = peek(anychar)(orig_i)?;
170
171    // this isn't an element, but a state
172    if i.chars().next().unwrap_or('a').is_lowercase() {
173        return Err(nom::Err::Error(
174            NomError::new(orig_i, NomErrorKind::Verify).into(),
175        ));
176    }
177
178    // keep track of if we're in brackets
179    let (i, deep) = if b == '(' {
180        // should be good considering we peeked it earlier
181        let (i, _) = anychar::<_, Error<&str>>(i).unwrap();
182        (i, true)
183    } else {
184        (i, false)
185    };
186
187    // get list of elements
188    let (i, mut elements) = context("elements in compound", many1(parse_element))(i)?;
189
190    // see if there's a second bracket, and if there is, get the coefficient
191    //
192    // also, check if we've run out of input, and if so, return what we've got so far
193    let (i, b) = match peek(anychar::<_, NomError<&str>>)(i) {
194        Ok((i, b)) => (i, b),
195        Err(e) => match e {
196            nom::Err::Error(e) if e.code == NomErrorKind::Eof => return Ok((i, elements)),
197            nom::Err::Error(e) => return Err(nom::Err::Error(e.into())),
198            nom::Err::Failure(e) => return Err(nom::Err::Failure(e.into())),
199            // not using streaming parsers
200            nom::Err::Incomplete(_) => unreachable!(),
201        },
202    };
203
204    let (i, coef) = if b == ')' && deep {
205        // same logic as above with peeking
206        let (i, _) = anychar::<_, Error<&str>>(i).unwrap();
207        let opt_num = opt(digit0);
208        context(
209            "coefficient for brackets",
210            map_opt(opt_num, |s: Option<&str>| s.map(str::parse::<usize>)),
211        )(i)?
212    } else {
213        (i, Ok(1))
214    };
215
216    // multiply each element's count by the coefficient
217    for el in &mut elements {
218        el.count *= coef.as_ref().unwrap_or(&1);
219    }
220
221    Ok((i, elements))
222}
223
224/// Parse a compound and an optional "+"
225fn compound_and_plus(i: Input) -> Result<Compound> {
226    terminated(
227        context("compound", parse_compound),
228        take_while(|c: char| c.is_whitespace() || c == '+'),
229    )(i)
230}