chem_parse/
lexer.rs

1use crate::token_types::{ParenType, TokenMetadata, Tokens};
2use std::{
3    iter::{Enumerate, Iterator, Peekable},
4    str::Chars,
5};
6
7/// An iterator that iterates over a string and parses it lazily
8pub struct LazyTokenStream<'a> {
9    string_iter: Peekable<Enumerate<Chars<'a>>>,
10}
11
12impl<'a> LazyTokenStream<'a> {
13    /// Construct a new `LazyTokenStream` from the string
14    pub fn new(string: &'a String) -> Self {
15        Self {
16            string_iter: string.chars().enumerate().peekable(),
17        }
18    }
19}
20
21impl From<LazyTokenStream<'_>> for Result<Vec<Tokens>, String> {
22    fn from(other: LazyTokenStream<'_>) -> Self {
23        let mut vec = Vec::new();
24        for token in other {
25            if token.is_err() {
26                return Err(token.err().unwrap());
27            };
28            vec.push(token.unwrap());
29        }
30        return Ok(vec);
31    }
32}
33
34impl Iterator for LazyTokenStream<'_> {
35    type Item = Result<Tokens, String>;
36
37    fn next(&mut self) -> Option<Self::Item> {
38        match self.string_iter.next() {
39            // numbers
40            Some((loc, val @ '0'..='9')) => {
41                let mut temp = String::new();
42                temp.push(val);
43                loop {
44                    match self.string_iter.peek() {
45                        Some((_, inner_val @ '0'..='9')) => {
46                            temp.push(*inner_val);
47                            self.string_iter.next();
48                        }
49                        Some(_) | None => {
50                            break Some(
51                                temp.parse::<u16>()
52                                    .map_err(|e| format!("Could not parse number: {e}"))
53                                    .map(|value| Tokens::Number {
54                                        data: value,
55                                        meta: TokenMetadata::new(&temp, loc),
56                                    }),
57                            );
58                        }
59                    }
60                }
61            }
62
63            // parens
64            Some((loc, raw @ '(')) => Some(Ok(Tokens::Paren {
65                data: ParenType::OPEN,
66                meta: TokenMetadata::new(&raw.to_string(), loc),
67            })),
68            Some((loc, raw @ ')')) => Some(Ok(Tokens::Paren {
69                data: ParenType::CLOSE,
70                meta: TokenMetadata::new(&raw.to_string(), loc),
71            })),
72
73            // plus
74            Some((loc, raw @ '+')) => Some(Ok(Tokens::Plus {
75                meta: TokenMetadata::new(&raw.to_string(), loc),
76            })),
77
78            // yields
79            Some((loc, '-')) => match self.string_iter.peek() {
80                Some((_, '>')) => {
81                    self.string_iter.next();
82                    Some(Ok(Tokens::Yields {
83                        meta: TokenMetadata::new("->", loc),
84                    }))
85                }
86                Some(_) => Some(Err("Yield sign (->) unfinished".to_owned())),
87                None => None,
88            },
89
90            // elements
91            Some((loc, val @ 'A'..='Z')) => {
92                let mut temp = String::new();
93                temp.push(val);
94
95                if let Some((_, inner_val @ 'a'..='z')) = self.string_iter.peek() {
96                    temp.push(*inner_val);
97                    self.string_iter.next();
98                    if let Some((_, 'a'..='z')) = self.string_iter.peek() {
99                        // should not have 3 letter element names
100                        return Some(Err(
101                            "Formula should not have 3 letter element names".to_owned()
102                        ));
103                    };
104                }
105
106                Some(Ok(Tokens::Element {
107                    data: temp.clone(),
108                    meta: TokenMetadata::new(&temp, loc),
109                }))
110            }
111
112            Some((_, c)) => Some(Err(format!("Invalid Character: {}", c))),
113            None => None,
114        }
115    }
116}
117
118#[cfg(test)]
119mod tests {
120    use super::*;
121    #[test]
122    fn can_parse_simple_element() {
123        let to_parse = &String::from("Fe");
124        let stream = LazyTokenStream::new(to_parse);
125
126        let res = Result::from(stream);
127
128        assert!(res.is_ok(), "An error occurred while parsing");
129
130        let exp = vec![Tokens::Element {
131            data: "Fe".to_owned(),
132            meta: TokenMetadata::new("Fe", 0),
133        }];
134
135        assert_eq!(exp, res.unwrap());
136    }
137
138    #[test]
139    fn can_parse_compound_with_subscript_and_coeffiecient() {
140        let to_parse = &String::from("2FeCO3");
141        let stream = LazyTokenStream::new(to_parse);
142
143        let res = Result::from(stream);
144
145        assert!(res.is_ok(), "An error occurred while parsing");
146
147        let exp = vec![
148            Tokens::Number {
149                data: 2,
150                meta: TokenMetadata::new("2", 0),
151            },
152            Tokens::Element {
153                data: "Fe".to_owned(),
154                meta: TokenMetadata::new("Fe", 1),
155            },
156            Tokens::Element {
157                data: "C".to_owned(),
158                meta: TokenMetadata::new("C", 3),
159            },
160            Tokens::Element {
161                data: "O".to_owned(),
162                meta: TokenMetadata::new("O", 4),
163            },
164            Tokens::Number {
165                data: 3,
166                meta: TokenMetadata::new("3", 5),
167            },
168        ];
169
170        assert_eq!(exp, res.unwrap());
171    }
172
173    #[test]
174    fn can_parse_forumula() {
175        let to_parse = &String::from("2Fe+Na2F3->2FeNa+F3");
176        let stream = LazyTokenStream::new(to_parse);
177
178        let res = Result::from(stream);
179
180        assert!(
181            res.is_ok(),
182            "An error occurred while parsing: {}",
183            res.err().unwrap()
184        );
185
186        let exp = vec![
187            Tokens::Number {
188                data: 2,
189                meta: TokenMetadata::new("2", 0),
190            },
191            Tokens::Element {
192                data: "Fe".to_owned(),
193                meta: TokenMetadata::new("Fe", 1),
194            },
195            Tokens::Plus {
196                meta: TokenMetadata::new("+", 3),
197            },
198            Tokens::Element {
199                data: "Na".to_owned(),
200                meta: TokenMetadata::new("Na", 4),
201            },
202            Tokens::Number {
203                data: 2,
204                meta: TokenMetadata::new("2", 6),
205            },
206            Tokens::Element {
207                data: "F".to_owned(),
208                meta: TokenMetadata::new("F", 7),
209            },
210            Tokens::Number {
211                data: 3,
212                meta: TokenMetadata::new("3", 8),
213            },
214            Tokens::Yields {
215                meta: TokenMetadata::new("->", 9),
216            },
217            Tokens::Number {
218                data: 2,
219                meta: TokenMetadata::new("2", 11),
220            },
221            Tokens::Element {
222                data: "Fe".to_owned(),
223                meta: TokenMetadata::new("Fe", 12),
224            },
225            Tokens::Element {
226                data: "Na".to_owned(),
227                meta: TokenMetadata::new("Na", 14),
228            },
229            Tokens::Plus {
230                meta: TokenMetadata::new("+", 16),
231            },
232            Tokens::Element {
233                data: "F".to_owned(),
234                meta: TokenMetadata::new("F", 17),
235            },
236            Tokens::Number {
237                data: 3,
238                meta: TokenMetadata::new("3", 17),
239            },
240        ];
241
242        assert_eq!(exp, res.unwrap());
243    }
244}