1use std::{fmt::Display, iter::Filter, num::ParseFloatError};
2
3use crate::input::{Cursor, HasSpan, Pos, Span};
4
5#[derive(Debug, Clone)]
6pub enum Error {
7 InvalidChar(Span, char),
8 InvalidNum(Span, String, ParseFloatError),
9}
10
11impl Display for Error {
12 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
13 match self {
14 Error::InvalidChar(_, c) => {
15 write!(f, "Invalid character: {c}")
16 }
17 Error::InvalidNum(_, s, err) => {
18 write!(f, "Invalid number: {s}: {err}")
19 }
20 }
21 }
22}
23
24impl HasSpan for Error {
25 fn span(&self) -> Span {
26 match self {
27 Error::InvalidChar(span, _) => *span,
28 Error::InvalidNum(span, _, _) => *span,
29 }
30 }
31}
32
33pub type Result<T> = std::result::Result<T, Error>;
34
35#[derive(Debug, Clone, PartialEq)]
36pub struct Token {
37 pub kind: TokenKind,
38 pub span: Span,
39}
40
41#[derive(Debug, Clone, PartialEq)]
42pub enum TokenKind {
43 Num(f64),
44 Symbol(String),
45 OpenPar,
46 ClosePar,
47 Equal,
48 Plus,
49 Minus,
50 Star,
51 Slash,
52 Percent,
53 Hat,
54 Comma,
55 NewLine,
56 Space,
57 Comment(String),
58}
59
60pub fn tokenize<I>(chars: I) -> Tokenizer<I::IntoIter>
61where
62 I: IntoIterator<Item = char>,
63{
64 Tokenizer::new(Cursor::new(chars.into_iter()))
65}
66
67#[derive(Debug, Clone)]
68pub struct Tokenizer<I> {
69 cursor: Cursor<I>,
70}
71
72impl<I> Tokenizer<I> {
73 pub fn new(cursor: Cursor<I>) -> Tokenizer<I> {
74 Tokenizer { cursor }
75 }
76}
77
78impl<I> Tokenizer<I>
79where
80 I: Iterator<Item = char> + Clone,
81{
82 pub fn in_band(self) -> Filter<Tokenizer<I>, fn(&Result<Token>) -> bool> {
83 self.filter(|tok| {
84 !matches!(
85 tok,
86 Ok(Token {
87 kind: TokenKind::Space | TokenKind::Comment(..),
88 ..
89 })
90 )
91 })
92 }
93
94 fn parse_num(&mut self, pos: Pos, first: char) -> Result<f64> {
95 let mut s = String::from(first);
96 loop {
97 let c = self.cursor.first();
98 match c {
99 Some(c) if c.is_ascii_digit() || c == '.' => {
100 self.cursor.next();
101 s.push(c)
102 }
103 _ => break,
104 }
105 }
106 match s.parse::<f64>() {
107 Ok(n) => Ok(n),
108 Err(err) => Err(Error::InvalidNum((pos, pos + s.len() as u32), s, err)),
109 }
110 }
111
112 fn next_token_kind(&mut self, pos: Pos) -> Result<Option<TokenKind>> {
113 let c = match self.cursor.next() {
114 None => return Ok(None),
115 Some(c) => c,
116 };
117
118 let kind = match c {
119 '(' => TokenKind::OpenPar,
120 ')' => TokenKind::ClosePar,
121 '=' => TokenKind::Equal,
122 '+' => TokenKind::Plus,
123 '-' => TokenKind::Minus,
124 '*' => TokenKind::Star,
125 '/' => TokenKind::Slash,
126 '%' => TokenKind::Percent,
127 '^' => TokenKind::Hat,
128 ',' => TokenKind::Comma,
129 '\n' => TokenKind::NewLine,
130 '#' => {
131 let mut s = String::new();
132 loop {
133 let c = self.cursor.first();
134 match c {
135 Some(c) if c != '\n' => {
136 self.cursor.next();
137 s.push(c)
138 }
139 _ => break,
140 }
141 }
142 TokenKind::Comment(s)
143 }
144 '0'..='9' | '.' => {
145 let num = self.parse_num(pos, c)?;
146 TokenKind::Num(num)
147 }
148 'a'..='z' | 'A'..='Z' | '_' => {
149 let mut sym = String::new();
150 sym.push(c);
151 loop {
152 let c = self.cursor.first();
153 match c {
154 Some(c @ ('0'..='9' | 'a'..='z' | 'A'..='Z' | '_')) => {
155 self.cursor.next();
156 sym.push(c)
157 }
158 _ => break,
159 }
160 }
161 TokenKind::Symbol(sym)
162 }
163 c if c.is_ascii_whitespace() => {
164 loop {
165 let c = self.cursor.first();
166 match c {
167 Some(c) if c.is_ascii_whitespace() => {
168 self.cursor.next();
169 }
170 _ => break,
171 }
172 }
173 TokenKind::Space
174 }
175 _ => return Err(Error::InvalidChar((pos, pos + 1), c)),
176 };
177 Ok(Some(kind))
178 }
179}
180
181impl<I> Iterator for Tokenizer<I>
182where
183 I: Iterator<Item = char> + Clone,
184{
185 type Item = Result<Token>;
186
187 fn next(&mut self) -> Option<Result<Token>> {
188 let pos = self.cursor.pos();
189 let kind = match self.next_token_kind(pos) {
190 Ok(Some(kind)) => kind,
191 Ok(None) => return None,
192 Err(err) => return Some(Err(err)),
193 };
194 let end = self.cursor.pos();
195 Some(Ok(Token {
196 kind,
197 span: (pos, end),
198 }))
199 }
200}
201
202#[test]
203fn test_tokenize() {
204 let tokens: Vec<_> = tokenize("1 + 2 # a comment".chars())
205 .map(Result::unwrap)
206 .collect();
207 assert_eq!(
208 tokens,
209 vec![
210 Token {
211 span: (0, 1),
212 kind: TokenKind::Num(1.0),
213 },
214 Token {
215 span: (1, 2),
216 kind: TokenKind::Space,
217 },
218 Token {
219 span: (2, 3),
220 kind: TokenKind::Plus,
221 },
222 Token {
223 span: (3, 4),
224 kind: TokenKind::Space,
225 },
226 Token {
227 span: (4, 5),
228 kind: TokenKind::Num(2.0),
229 },
230 Token {
231 span: (5, 6),
232 kind: TokenKind::Space,
233 },
234 Token {
235 span: (6, 17),
236 kind: TokenKind::Comment(" a comment".to_string()),
237 },
238 ]
239 );
240}
241
242#[test]
243fn test_tokenize_in_band() {
244 let tokens: Vec<_> = tokenize("1 + 2 # a comment".chars())
245 .in_band()
246 .map(Result::unwrap)
247 .collect();
248 assert_eq!(
249 tokens,
250 vec![
251 Token {
252 span: (0, 1),
253 kind: TokenKind::Num(1.0),
254 },
255 Token {
256 span: (2, 3),
257 kind: TokenKind::Plus,
258 },
259 Token {
260 span: (4, 5),
261 kind: TokenKind::Num(2.0),
262 },
263 ]
264 );
265}
266
267#[test]
268fn test_tokenize_sin_pi() {
269 let tokens: Vec<_> = tokenize("sin(pi)".chars()).map(Result::unwrap).collect();
270 assert_eq!(
271 tokens,
272 vec![
273 Token {
274 span: (0, 3),
275 kind: TokenKind::Symbol("sin".to_string()),
276 },
277 Token {
278 span: (3, 4),
279 kind: TokenKind::OpenPar,
280 },
281 Token {
282 span: (4, 6),
283 kind: TokenKind::Symbol("pi".to_string()),
284 },
285 Token {
286 span: (6, 7),
287 kind: TokenKind::ClosePar,
288 },
289 ]
290 );
291}