1use super::span::Span;
2
3#[derive(Copy, Clone, Debug, PartialEq)]
5pub enum TokenType {
6 OpenParen,
8 CloseParen,
10 String,
12 UnterminatedString,
14 Comment,
16 Other,
18}
19
20#[derive(Copy, Clone, Debug, PartialEq)]
22pub struct Token {
23 pub token_type: TokenType,
25 pub span: Span,
27}
28
29impl Token {
30 pub fn as_str<'a>(&self, src: &'a str) -> &'a str {
32 self.span.with_src(src).as_str()
33 }
34
35 pub fn parse_tokens(input_source: &str) -> impl '_ + Iterator<Item = Token> {
37 let mut start = 0;
38 std::iter::from_fn(move || {
39 Token::parse_next(input_source, start).inspect(|t| start = t.span.end as usize)
40 })
41 }
42
43 #[cfg(test)]
46 pub fn parse_tokens_to_vec<'a>(input_src: &'a str) -> Vec<(TokenType, &'a str)> {
47 let tokens =
48 Token::parse_tokens(input_src).map(|token| (token.token_type, token.as_str(input_src)));
49 tokens.collect()
50 }
51
52 fn parse_next(src: &str, start: usize) -> Option<Token> {
55 let input_src = &src[start..].trim_start();
56 let start = src.len() - input_src.len();
57 match input_src.chars().next() {
58 None => return None,
59 Some(';') => {
60 return Some(Token {
61 token_type: TokenType::Comment,
62 span: Token::parse_comment(src, start),
63 })
64 }
65 Some('"') => return Some(Token::parse_next_string(src, start)),
66 Some('(') | Some('[') => {
67 return Some(Token {
68 token_type: TokenType::OpenParen,
69 span: Span::new(start as u32, start as u32 + 1),
70 })
71 }
72 Some(')') | Some(']') => {
73 return Some(Token {
74 token_type: TokenType::CloseParen,
75 span: Span::new(start as u32, start as u32 + 1),
76 })
77 }
78 _ => {}
79 }
80 for (idx, ch) in input_src.char_indices() {
81 let is_end = match ch {
82 '(' | ')' | '[' | ']' => true,
83 _ => ch.is_whitespace(),
84 };
85 if is_end {
86 return Some(Token {
87 token_type: TokenType::Other,
88 span: Span::new(start as u32, start as u32 + idx as u32),
89 });
90 }
91 }
92 Some(Token {
93 token_type: TokenType::Other,
94 span: Span::new(start as u32, src.len() as u32),
95 })
96 }
97
98 fn parse_comment(src: &str, start: usize) -> Span {
101 for (idx, ch) in (start + 1..src.len()).zip(src[start..].chars()) {
102 if ch == '\n' {
103 return Span::new(start as u32, idx as u32);
104 }
105 }
106 Span::new(start as u32, src.len() as u32)
107 }
108
109 fn parse_next_string(src: &str, start: usize) -> Token {
112 let input_src = &src[start..];
113 let mut is_escaped = false;
114 for (idx, ch) in input_src.char_indices() {
115 if idx == 0 {
116 debug_assert_eq!(ch, '"');
117 continue;
118 };
119 match ch {
120 '\\' => {
121 is_escaped = !is_escaped;
122 }
123 '"' => {
124 if !is_escaped {
125 return Token {
126 token_type: TokenType::String,
127 span: Span::new(start as u32, start as u32 + idx as u32 + 1),
128 };
129 }
130 is_escaped = false;
131 }
132 _ => {
133 is_escaped = false;
134 }
135 };
136 }
137 Token {
138 token_type: TokenType::UnterminatedString,
139 span: Span::new(start as u32, src.len() as u32),
140 }
141 }
142}
143
144#[cfg(test)]
145mod tests {
146 use super::*;
147
148 #[test]
149 fn empty_str_produces_empty_stream() {
150 let actual = Token::parse_tokens_to_vec("");
151 assert_eq!(actual, Vec::new());
152 }
153
154 #[test]
155 fn whitespace_only_produces_empty_stream() {
156 let actual = Token::parse_tokens_to_vec(" \n\t");
157 assert_eq!(actual, Vec::new());
158 }
159
160 #[test]
161 fn whitespace_separated_values_produce_item_for_each() {
162 let src = "\t1 two\n3.0\n";
163 let actual = Token::parse_tokens_to_vec(src);
164 assert_eq!(
165 actual,
166 vec![
167 (TokenType::Other, "1"),
168 (TokenType::Other, "two"),
169 (TokenType::Other, "3.0"),
170 ]
171 );
172 }
173
174 #[test]
175 fn phrase_in_quotes_is_string() {
176 let actual = Token::parse_tokens_to_vec("\"hello world!\"not-text");
177 assert_eq!(
178 actual,
179 vec![
180 (TokenType::String, "\"hello world!\""),
181 (TokenType::Other, "not-text")
182 ]
183 );
184 }
185
186 #[test]
187 fn backslash_quote_in_quote_escapes_quote_as_part_of_string() {
188 let actual = Token::parse_tokens_to_vec(r#" \" "\"quotes\"" "#);
189 assert_eq!(
190 actual,
191 vec![
192 (TokenType::Other, "\\\""),
193 (TokenType::String, "\"\\\"quotes\\\"\"")
194 ]
195 );
196 }
197
198 #[test]
199 fn unclosed_string_is_unterminated_string() {
200 let actual = Token::parse_tokens_to_vec("\"I am not closed");
201 assert_eq!(
202 actual,
203 vec![(TokenType::UnterminatedString, "\"I am not closed")]
204 );
205 }
206
207 #[test]
208 fn parenthesis_are_parsed_into_own_tokens() {
209 let actual = Token::parse_tokens_to_vec("(left right)");
210 assert_eq!(
211 actual,
212 vec![
213 (TokenType::OpenParen, "("),
214 (TokenType::Other, "left"),
215 (TokenType::Other, "right"),
216 (TokenType::CloseParen, ")")
217 ]
218 );
219 }
220
221 #[test]
222 fn colon_denotes_start_of_line_comment() {
223 let actual = Token::parse_tokens_to_vec("(code) ; comment\n;other comment");
224 assert_eq!(
225 actual,
226 vec![
227 (TokenType::OpenParen, "("),
228 (TokenType::Other, "code"),
229 (TokenType::CloseParen, ")"),
230 (TokenType::Comment, "; comment\n"),
231 (TokenType::Comment, ";other comment"),
232 ]
233 );
234 }
235}