tc/
lex.rs

1use std::{fmt::Display, iter::Filter, num::ParseFloatError};
2
3use crate::input::{Cursor, HasSpan, Pos, Span};
4
5#[derive(Debug, Clone)]
6pub enum Error {
7    InvalidChar(Span, char),
8    InvalidNum(Span, String, ParseFloatError),
9}
10
11impl Display for Error {
12    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
13        match self {
14            Error::InvalidChar(_, c) => {
15                write!(f, "Invalid character: {c}")
16            }
17            Error::InvalidNum(_, s, err) => {
18                write!(f, "Invalid number: {s}: {err}")
19            }
20        }
21    }
22}
23
24impl HasSpan for Error {
25    fn span(&self) -> Span {
26        match self {
27            Error::InvalidChar(span, _) => *span,
28            Error::InvalidNum(span, _, _) => *span,
29        }
30    }
31}
32
33pub type Result<T> = std::result::Result<T, Error>;
34
35#[derive(Debug, Clone, PartialEq)]
36pub struct Token {
37    pub kind: TokenKind,
38    pub span: Span,
39}
40
41#[derive(Debug, Clone, PartialEq)]
42pub enum TokenKind {
43    Num(f64),
44    Symbol(String),
45    OpenPar,
46    ClosePar,
47    Equal,
48    Plus,
49    Minus,
50    Star,
51    Slash,
52    Percent,
53    Hat,
54    Comma,
55    NewLine,
56    Space,
57    Comment(String),
58}
59
60pub fn tokenize<I>(chars: I) -> Tokenizer<I::IntoIter>
61where
62    I: IntoIterator<Item = char>,
63{
64    Tokenizer::new(Cursor::new(chars.into_iter()))
65}
66
67#[derive(Debug, Clone)]
68pub struct Tokenizer<I> {
69    cursor: Cursor<I>,
70}
71
72impl<I> Tokenizer<I> {
73    pub fn new(cursor: Cursor<I>) -> Tokenizer<I> {
74        Tokenizer { cursor }
75    }
76}
77
78impl<I> Tokenizer<I>
79where
80    I: Iterator<Item = char> + Clone,
81{
82    pub fn in_band(self) -> Filter<Tokenizer<I>, fn(&Result<Token>) -> bool> {
83        self.filter(|tok| {
84            !matches!(
85                tok,
86                Ok(Token {
87                    kind: TokenKind::Space | TokenKind::Comment(..),
88                    ..
89                })
90            )
91        })
92    }
93
94    fn parse_num(&mut self, pos: Pos, first: char) -> Result<f64> {
95        let mut s = String::from(first);
96        loop {
97            let c = self.cursor.first();
98            match c {
99                Some(c) if c.is_ascii_digit() || c == '.' => {
100                    self.cursor.next();
101                    s.push(c)
102                }
103                _ => break,
104            }
105        }
106        match s.parse::<f64>() {
107            Ok(n) => Ok(n),
108            Err(err) => Err(Error::InvalidNum((pos, pos + s.len() as u32), s, err)),
109        }
110    }
111
112    fn next_token_kind(&mut self, pos: Pos) -> Result<Option<TokenKind>> {
113        let c = match self.cursor.next() {
114            None => return Ok(None),
115            Some(c) => c,
116        };
117
118        let kind = match c {
119            '(' => TokenKind::OpenPar,
120            ')' => TokenKind::ClosePar,
121            '=' => TokenKind::Equal,
122            '+' => TokenKind::Plus,
123            '-' => TokenKind::Minus,
124            '*' => TokenKind::Star,
125            '/' => TokenKind::Slash,
126            '%' => TokenKind::Percent,
127            '^' => TokenKind::Hat,
128            ',' => TokenKind::Comma,
129            '\n' => TokenKind::NewLine,
130            '#' => {
131                let mut s = String::new();
132                loop {
133                    let c = self.cursor.first();
134                    match c {
135                        Some(c) if c != '\n' => {
136                            self.cursor.next();
137                            s.push(c)
138                        }
139                        _ => break,
140                    }
141                }
142                TokenKind::Comment(s)
143            }
144            '0'..='9' | '.' => {
145                let num = self.parse_num(pos, c)?;
146                TokenKind::Num(num)
147            }
148            'a'..='z' | 'A'..='Z' | '_' => {
149                let mut sym = String::new();
150                sym.push(c);
151                loop {
152                    let c = self.cursor.first();
153                    match c {
154                        Some(c @ ('0'..='9' | 'a'..='z' | 'A'..='Z' | '_')) => {
155                            self.cursor.next();
156                            sym.push(c)
157                        }
158                        _ => break,
159                    }
160                }
161                TokenKind::Symbol(sym)
162            }
163            c if c.is_ascii_whitespace() => {
164                loop {
165                    let c = self.cursor.first();
166                    match c {
167                        Some(c) if c.is_ascii_whitespace() => {
168                            self.cursor.next();
169                        }
170                        _ => break,
171                    }
172                }
173                TokenKind::Space
174            }
175            _ => return Err(Error::InvalidChar((pos, pos + 1), c)),
176        };
177        Ok(Some(kind))
178    }
179}
180
181impl<I> Iterator for Tokenizer<I>
182where
183    I: Iterator<Item = char> + Clone,
184{
185    type Item = Result<Token>;
186
187    fn next(&mut self) -> Option<Result<Token>> {
188        let pos = self.cursor.pos();
189        let kind = match self.next_token_kind(pos) {
190            Ok(Some(kind)) => kind,
191            Ok(None) => return None,
192            Err(err) => return Some(Err(err)),
193        };
194        let end = self.cursor.pos();
195        Some(Ok(Token {
196            kind,
197            span: (pos, end),
198        }))
199    }
200}
201
202#[test]
203fn test_tokenize() {
204    let tokens: Vec<_> = tokenize("1 + 2 # a comment".chars())
205        .map(Result::unwrap)
206        .collect();
207    assert_eq!(
208        tokens,
209        vec![
210            Token {
211                span: (0, 1),
212                kind: TokenKind::Num(1.0),
213            },
214            Token {
215                span: (1, 2),
216                kind: TokenKind::Space,
217            },
218            Token {
219                span: (2, 3),
220                kind: TokenKind::Plus,
221            },
222            Token {
223                span: (3, 4),
224                kind: TokenKind::Space,
225            },
226            Token {
227                span: (4, 5),
228                kind: TokenKind::Num(2.0),
229            },
230            Token {
231                span: (5, 6),
232                kind: TokenKind::Space,
233            },
234            Token {
235                span: (6, 17),
236                kind: TokenKind::Comment(" a comment".to_string()),
237            },
238        ]
239    );
240}
241
242#[test]
243fn test_tokenize_in_band() {
244    let tokens: Vec<_> = tokenize("1 + 2 # a comment".chars())
245        .in_band()
246        .map(Result::unwrap)
247        .collect();
248    assert_eq!(
249        tokens,
250        vec![
251            Token {
252                span: (0, 1),
253                kind: TokenKind::Num(1.0),
254            },
255            Token {
256                span: (2, 3),
257                kind: TokenKind::Plus,
258            },
259            Token {
260                span: (4, 5),
261                kind: TokenKind::Num(2.0),
262            },
263        ]
264    );
265}
266
267#[test]
268fn test_tokenize_sin_pi() {
269    let tokens: Vec<_> = tokenize("sin(pi)".chars()).map(Result::unwrap).collect();
270    assert_eq!(
271        tokens,
272        vec![
273            Token {
274                span: (0, 3),
275                kind: TokenKind::Symbol("sin".to_string()),
276            },
277            Token {
278                span: (3, 4),
279                kind: TokenKind::OpenPar,
280            },
281            Token {
282                span: (4, 6),
283                kind: TokenKind::Symbol("pi".to_string()),
284            },
285            Token {
286                span: (6, 7),
287                kind: TokenKind::ClosePar,
288            },
289        ]
290    );
291}