1use super::core::Lexer;
20use super::core::Token;
21use super::core::TokenId;
22use super::core::WordContext;
23use super::core::WordLexer;
24use super::core::is_blank;
25use super::op::is_operator_char;
26use crate::parser::core::Result;
27use crate::syntax::MaybeLiteral;
28use crate::syntax::TextUnit;
29use crate::syntax::Word;
30use crate::syntax::WordUnit;
31
32pub fn is_token_delimiter_char(c: char) -> bool {
36 is_operator_char(c) || is_blank(c)
37}
38
39impl Lexer<'_> {
40 async fn token_id(&mut self, word: &Word) -> Result<TokenId> {
45 if word.units.is_empty() {
46 return Ok(TokenId::EndOfInput);
47 }
48
49 if let Some(literal) = word.to_string_if_literal() {
50 if let Ok(keyword) = literal.parse() {
52 return Ok(TokenId::Token(Some(keyword)));
53 }
54
55 if literal.chars().all(|c| c.is_ascii_digit())
57 && matches!(self.peek_char().await?, Some('<' | '>'))
58 {
59 return Ok(TokenId::IoNumber);
60 }
61 }
62
63 if word.units.first() == Some(&WordUnit::Unquoted(TextUnit::Literal('{'))) {
65 let braced = match word.units.last() {
66 Some(WordUnit::Unquoted(TextUnit::Literal('}'))) => word.units.len() >= 3,
67 Some(WordUnit::Unquoted(TextUnit::Backslashed('}'))) => true,
68 Some(WordUnit::Unquoted(TextUnit::BracedParam(_))) => true,
69 _ => false,
70 };
71 if braced && matches!(self.peek_char().await?, Some('<' | '>')) {
72 return Ok(TokenId::IoLocation);
73 }
74 }
75
76 Ok(TokenId::Token(None))
77 }
78
79 pub async fn token(&mut self) -> Result<Token> {
84 if let Some(op) = self.operator().await? {
85 return Ok(op);
86 }
87
88 let index = self.index();
89
90 let mut word_lexer = WordLexer {
91 lexer: self,
92 context: WordContext::Word,
93 };
94 let mut word = word_lexer.word(is_token_delimiter_char).await?;
95 word.parse_tilde_front();
96
97 let id = self.token_id(&word).await?;
98
99 Ok(Token { word, id, index })
100 }
101}
102
103#[cfg(test)]
104mod tests {
105 use super::*;
106 use crate::source::Source;
107 use futures_util::FutureExt as _;
108
109 #[test]
110 fn lexer_token_empty() {
111 let mut lexer = Lexer::with_code("");
113
114 let t = lexer.token().now_or_never().unwrap().unwrap();
115 assert_eq!(*t.word.location.code.value.borrow(), "");
116 assert_eq!(t.word.location.code.start_line_number.get(), 1);
117 assert_eq!(*t.word.location.code.source, Source::Unknown);
118 assert_eq!(t.word.location.range, 0..0);
119 assert_eq!(t.id, TokenId::EndOfInput);
120 assert_eq!(t.index, 0);
121 }
122
123 #[test]
124 fn lexer_token_non_empty() {
125 let mut lexer = Lexer::with_code("abc ");
126
127 let t = lexer.token().now_or_never().unwrap().unwrap();
128 assert_eq!(t.word.units.len(), 3);
129 assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('a')));
130 assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('b')));
131 assert_eq!(t.word.units[2], WordUnit::Unquoted(TextUnit::Literal('c')));
132 assert_eq!(*t.word.location.code.value.borrow(), "abc ");
133 assert_eq!(t.word.location.code.start_line_number.get(), 1);
134 assert_eq!(*t.word.location.code.source, Source::Unknown);
135 assert_eq!(t.word.location.range, 0..3);
136 assert_eq!(t.id, TokenId::Token(None));
137 assert_eq!(t.index, 0);
138
139 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some(' ')));
140 }
141
142 #[test]
143 fn lexer_token_tilde() {
144 let mut lexer = Lexer::with_code("~a:~");
145
146 let t = lexer.token().now_or_never().unwrap().unwrap();
147 assert_eq!(
148 t.word.units,
149 [WordUnit::Tilde {
150 name: "a:~".to_string(),
151 followed_by_slash: false
152 }]
153 );
154 }
155
156 #[test]
157 fn lexer_token_io_number_delimited_by_less() {
158 let mut lexer = Lexer::with_code("12<");
159
160 let t = lexer.token().now_or_never().unwrap().unwrap();
161 assert_eq!(t.word.units.len(), 2);
162 assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('1')));
163 assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('2')));
164 assert_eq!(*t.word.location.code.value.borrow(), "12<");
165 assert_eq!(t.word.location.code.start_line_number.get(), 1);
166 assert_eq!(*t.word.location.code.source, Source::Unknown);
167 assert_eq!(t.word.location.range, 0..2);
168 assert_eq!(t.id, TokenId::IoNumber);
169 assert_eq!(t.index, 0);
170
171 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('<')));
172 }
173
174 #[test]
175 fn lexer_token_io_number_delimited_by_greater() {
176 let mut lexer = Lexer::with_code("0>>");
177
178 let t = lexer.token().now_or_never().unwrap().unwrap();
179 assert_eq!(t.word.units.len(), 1);
180 assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('0')));
181 assert_eq!(*t.word.location.code.value.borrow(), "0>>");
182 assert_eq!(t.word.location.code.start_line_number.get(), 1);
183 assert_eq!(*t.word.location.code.source, Source::Unknown);
184 assert_eq!(t.word.location.range, 0..1);
185 assert_eq!(t.id, TokenId::IoNumber);
186 assert_eq!(t.index, 0);
187
188 assert_eq!(
189 lexer.location().now_or_never().unwrap().unwrap().range,
190 1..2
191 );
192 }
193
194 #[test]
195 fn lexer_token_digit_not_followed_by_less_or_greater() {
196 let mut lexer = Lexer::with_code("12;");
197
198 let t = lexer.token().now_or_never().unwrap().unwrap();
199 assert_eq!(t.word.units.len(), 2);
200 assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('1')));
201 assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('2')));
202 assert_eq!(*t.word.location.code.value.borrow(), "12;");
203 assert_eq!(t.word.location.code.start_line_number.get(), 1);
204 assert_eq!(*t.word.location.code.source, Source::Unknown);
205 assert_eq!(t.word.location.range, 0..2);
206 assert_eq!(t.id, TokenId::Token(None));
207 assert_eq!(t.index, 0);
208
209 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some(';')));
210 }
211
212 #[test]
213 fn lexer_token_io_location_delimited_by_less() {
214 let mut lexer = Lexer::with_code("{n}<");
215
216 let t = lexer.token().now_or_never().unwrap().unwrap();
217 assert_eq!(t.word.units.len(), 3);
218 assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('{')));
219 assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('n')));
220 assert_eq!(t.word.units[2], WordUnit::Unquoted(TextUnit::Literal('}')));
221 assert_eq!(*t.word.location.code.value.borrow(), "{n}<");
222 assert_eq!(t.word.location.code.start_line_number.get(), 1);
223 assert_eq!(*t.word.location.code.source, Source::Unknown);
224 assert_eq!(t.word.location.range, 0..3);
225 assert_eq!(t.id, TokenId::IoLocation);
226 assert_eq!(t.index, 0);
227
228 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('<')));
229 }
230
231 #[test]
232 fn lexer_token_io_location_delimited_by_greater() {
233 let mut lexer = Lexer::with_code("{n}>");
234
235 let t = lexer.token().now_or_never().unwrap().unwrap();
236 assert_eq!(t.id, TokenId::IoLocation);
237
238 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('>')));
239 }
240
241 #[test]
242 fn lexer_token_io_location_ending_with_backslashed_brace() {
243 let mut lexer = Lexer::with_code(r"{\}<");
244
245 let t = lexer.token().now_or_never().unwrap().unwrap();
246 assert_eq!(t.id, TokenId::IoLocation);
247
248 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('<')));
249 }
250
251 #[test]
252 fn lexer_token_io_location_ending_with_braced_parameter() {
253 let mut lexer = Lexer::with_code("{${n}<");
254
255 let t = lexer.token().now_or_never().unwrap().unwrap();
256 assert_eq!(t.id, TokenId::IoLocation);
257
258 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('<')));
259 }
260
261 #[test]
262 fn lexer_token_empty_braces_followed_by_less() {
263 let mut lexer = Lexer::with_code("{}<");
264
265 let t = lexer.token().now_or_never().unwrap().unwrap();
266 assert_eq!(t.id, TokenId::Token(None));
267
268 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('<')));
269 }
270
271 #[test]
272 fn lexer_token_braced_word_not_followed_by_less_or_greater() {
273 let mut lexer = Lexer::with_code("{n};");
274
275 let t = lexer.token().now_or_never().unwrap().unwrap();
276 assert_eq!(t.id, TokenId::Token(None));
277
278 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some(';')));
279 }
280
281 #[test]
282 fn lexer_token_after_blank() {
283 let mut lexer = Lexer::with_code(" a ");
284
285 lexer.skip_blanks().now_or_never().unwrap().unwrap();
286 let t = lexer.token().now_or_never().unwrap().unwrap();
287 assert_eq!(*t.word.location.code.value.borrow(), " a ");
288 assert_eq!(t.word.location.code.start_line_number.get(), 1);
289 assert_eq!(*t.word.location.code.source, Source::Unknown);
290 assert_eq!(t.word.location.range, 1..2);
291 assert_eq!(t.id, TokenId::Token(None));
292 assert_eq!(t.index, 1);
293
294 lexer.skip_blanks().now_or_never().unwrap().unwrap();
295 let t = lexer.token().now_or_never().unwrap().unwrap();
296 assert_eq!(*t.word.location.code.value.borrow(), " a ");
297 assert_eq!(t.word.location.code.start_line_number.get(), 1);
298 assert_eq!(*t.word.location.code.source, Source::Unknown);
299 assert_eq!(t.word.location.range, 4..4);
300 assert_eq!(t.id, TokenId::EndOfInput);
301 assert_eq!(t.index, 4);
302 }
303}