1use crate::token::*;
2use std::io;
3use std::iter;
4use std::result;
5use std::string;
6use thiserror::Error;
7
8#[derive(Error, Debug, Clone, PartialEq)]
9pub enum Error {
10 #[error("io: {0}")]
11 IOError(String),
12 #[error("invalid identifier: {0}")]
13 UTF8Error(#[from] string::FromUtf8Error),
14 #[error("unexpected eof")]
15 UnexpectedEOF,
16 #[error("invalid {0}")]
17 InvalidToken(String),
18 #[error("unknown token: {0}")]
19 UnknownToken(u8),
20 #[error("invalid int: {0}")]
21 IntError(String),
22}
23
24impl From<&io::Error> for Error {
25 fn from(err: &io::Error) -> Error {
26 Error::IOError(format!("{}", err))
27 }
28}
29
30type Result<T> = result::Result<T, Error>;
31
32pub struct Scanner<R: io::BufRead> {
33 bytes: iter::Peekable<io::Bytes<R>>,
34}
35
36fn is_delim(b: u8) -> bool {
37 !b.is_ascii_alphanumeric() && b != b'_'
38}
39
40impl<R: io::BufRead> Scanner<R> {
41 fn peek(&mut self) -> Option<Result<u8>> {
42 let peeked = self.bytes.peek()?;
43 let result = peeked.as_ref().map_err(Error::from).map(|ch| *ch);
44 Some(result)
45 }
46
47 fn advance(&mut self) -> Result<u8> {
48 let result = self.bytes.next().ok_or(Error::UnexpectedEOF)?;
49 result.map_err(|err| (&err).into())
50 }
51
52 fn advance_while(&mut self, f: impl Fn(u8) -> bool) -> Result<String> {
53 let mut buf = Vec::new();
54 while let Some(value) = self.peek() {
55 if !f(value?) {
56 break;
57 }
58 buf.push(self.advance().unwrap());
59 }
60 Ok(String::from_utf8(buf)?)
61 }
62
63 fn eat(&mut self, got: &[u8], want: impl ToString) -> Result<()> {
64 for ch in got {
65 if self.peek() != Some(Ok(*ch)) {
66 return Err(Error::InvalidToken(want.to_string()));
67 }
68 self.advance().unwrap();
69 }
70 Ok(())
71 }
72
73 fn eat_as(&mut self, s: &[u8], tok: Token) -> Result<Token> {
74 self.eat(s, &tok)?;
75 Ok(tok)
76 }
77
78 fn str(&mut self) -> Result<Token> {
79 self.eat(b"\"", "Str")?;
80 let text = self.advance_while(|b| b != b'"')?;
81 self.eat(b"\"", "Str")?;
82 Ok(Token::Str(text))
83 }
84
85 fn int(&mut self) -> Result<Token> {
86 let text = self.advance_while(|b| !is_delim(b))?;
87 let int = text.parse::<i64>().map_err(|_| Error::IntError(text))?;
88 Ok(Token::Int(int))
89 }
90
91 fn keyword_or_ident(&mut self) -> Result<Token> {
92 let text = self.advance_while(|b| !is_delim(b))?;
93 let tok = match text.as_str() {
94 "fn" => Token::Fn,
95 "let" => Token::Let,
96 _ => Token::Ident(text),
97 };
98 Ok(tok)
99 }
100
101 fn skip_whitespace(&mut self) {
102 while matches!(self.peek(), Some(Ok(b' ' | b'\n'))) {
103 self.advance().unwrap();
104 }
105 }
106}
107
108impl<R: io::BufRead> iter::Iterator for Scanner<R> {
109 type Item = result::Result<Token, Error>;
110
111 fn next(&mut self) -> Option<Self::Item> {
112 self.skip_whitespace();
113 let result = self.peek()?.and_then(|b| match b {
114 b'+' => self.eat_as(b"+", Token::Op(Op::Plus)),
115 b'=' => self.eat_as(b"=", Token::Eq),
116 b'(' => self.eat_as(b"(", Token::Lparen),
117 b')' => self.eat_as(b")", Token::Rparen),
118 b'{' => self.eat_as(b"{", Token::Lbrace),
119 b'}' => self.eat_as(b"}", Token::Rbrace),
120 b',' => self.eat_as(b",", Token::Comma),
121 b';' => self.eat_as(b";", Token::Semi),
122 b':' => self.eat_as(b":", Token::Colon),
123 b'"' => self.str(),
124 b if b.is_ascii_digit() => self.int(),
125 b if b.is_ascii_alphabetic() => self.keyword_or_ident(),
126 b => Err(Error::UnknownToken(b)),
127 });
128 Some(result)
129 }
130}
131
132pub fn scan<R: io::BufRead>(r: R) -> Scanner<R> {
133 Scanner { bytes: r.bytes().peekable() }
134}
135
136#[cfg(test)]
137mod tests {
138 use super::*;
139
140 fn scan_all(b: &[u8]) -> Result<Vec<Token>> {
141 scan(b).collect()
142 }
143
144 #[test]
145 fn test_empty() {
146 let input = b"
147 ";
148 let toks = scan_all(input).unwrap();
149 let empty = Vec::<Token>::new();
150 assert_eq!(empty, toks);
151 }
152
153 #[test]
154 fn test() {
155 use Token::*;
156 let input = b"
157 fn foo(bar: int, baz: str) {
158 println(\"hello, world\", 27);
159 {
160 let foo: int = 7;
161 }
162 }
163 ";
164 let toks = scan_all(input).unwrap();
165 let expected = vec![
166 Fn,
167 Ident(String::from("foo")),
168 Lparen,
169 Ident(String::from("bar")),
170 Colon,
171 Ident(String::from("int")),
172 Comma,
173 Ident(String::from("baz")),
174 Colon,
175 Ident(String::from("str")),
176 Rparen,
177 Lbrace,
178 Ident(String::from("println")),
179 Lparen,
180 Str(String::from("hello, world")),
181 Comma,
182 Int(27),
183 Rparen,
184 Semi,
185 Lbrace,
186 Let,
187 Ident(String::from("foo")),
188 Colon,
189 Ident(String::from("int")),
190 Eq,
191 Int(7),
192 Semi,
193 Rbrace,
194 Rbrace,
195 ];
196 assert_eq!(expected, toks);
197 }
198}