yash_syntax/parser/lex/
token.rs1use super::core::Lexer;
20use super::core::Token;
21use super::core::TokenId;
22use super::core::WordContext;
23use super::core::WordLexer;
24use super::core::is_blank;
25use super::op::is_operator_char;
26use crate::parser::core::Result;
27use crate::syntax::MaybeLiteral;
28use crate::syntax::Word;
29
30pub fn is_token_delimiter_char(c: char) -> bool {
34 is_operator_char(c) || is_blank(c)
35}
36
37impl Lexer<'_> {
38 async fn token_id(&mut self, word: &Word) -> Result<TokenId> {
43 if word.units.is_empty() {
44 return Ok(TokenId::EndOfInput);
45 }
46
47 if let Some(literal) = word.to_string_if_literal() {
48 if let Ok(keyword) = literal.parse() {
49 return Ok(TokenId::Token(Some(keyword)));
50 }
51
52 if literal.chars().all(|c| c.is_ascii_digit()) {
53 if let Some(next) = self.peek_char().await? {
54 if next == '<' || next == '>' {
55 return Ok(TokenId::IoNumber);
56 }
57 }
58 }
59 }
60
61 Ok(TokenId::Token(None))
62 }
63
64 pub async fn token(&mut self) -> Result<Token> {
69 if let Some(op) = self.operator().await? {
70 return Ok(op);
71 }
72
73 let index = self.index();
74
75 let mut word_lexer = WordLexer {
76 lexer: self,
77 context: WordContext::Word,
78 };
79 let mut word = word_lexer.word(is_token_delimiter_char).await?;
80 word.parse_tilde_front();
81
82 let id = self.token_id(&word).await?;
83
84 Ok(Token { word, id, index })
85 }
86}
87
88#[cfg(test)]
89mod tests {
90 use super::*;
91 use crate::source::Source;
92 use crate::syntax::TextUnit;
93 use crate::syntax::WordUnit;
94 use futures_util::FutureExt;
95
96 #[test]
97 fn lexer_token_empty() {
98 let mut lexer = Lexer::with_code("");
100
101 let t = lexer.token().now_or_never().unwrap().unwrap();
102 assert_eq!(*t.word.location.code.value.borrow(), "");
103 assert_eq!(t.word.location.code.start_line_number.get(), 1);
104 assert_eq!(*t.word.location.code.source, Source::Unknown);
105 assert_eq!(t.word.location.range, 0..0);
106 assert_eq!(t.id, TokenId::EndOfInput);
107 assert_eq!(t.index, 0);
108 }
109
110 #[test]
111 fn lexer_token_non_empty() {
112 let mut lexer = Lexer::with_code("abc ");
113
114 let t = lexer.token().now_or_never().unwrap().unwrap();
115 assert_eq!(t.word.units.len(), 3);
116 assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('a')));
117 assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('b')));
118 assert_eq!(t.word.units[2], WordUnit::Unquoted(TextUnit::Literal('c')));
119 assert_eq!(*t.word.location.code.value.borrow(), "abc ");
120 assert_eq!(t.word.location.code.start_line_number.get(), 1);
121 assert_eq!(*t.word.location.code.source, Source::Unknown);
122 assert_eq!(t.word.location.range, 0..3);
123 assert_eq!(t.id, TokenId::Token(None));
124 assert_eq!(t.index, 0);
125
126 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some(' ')));
127 }
128
129 #[test]
130 fn lexer_token_tilde() {
131 let mut lexer = Lexer::with_code("~a:~");
132
133 let t = lexer.token().now_or_never().unwrap().unwrap();
134 assert_eq!(t.word.units, [WordUnit::Tilde("a:~".to_string())]);
135 }
136
137 #[test]
138 fn lexer_token_io_number_delimited_by_less() {
139 let mut lexer = Lexer::with_code("12<");
140
141 let t = lexer.token().now_or_never().unwrap().unwrap();
142 assert_eq!(t.word.units.len(), 2);
143 assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('1')));
144 assert_eq!(t.word.units[1], WordUnit::Unquoted(TextUnit::Literal('2')));
145 assert_eq!(*t.word.location.code.value.borrow(), "12<");
146 assert_eq!(t.word.location.code.start_line_number.get(), 1);
147 assert_eq!(*t.word.location.code.source, Source::Unknown);
148 assert_eq!(t.word.location.range, 0..2);
149 assert_eq!(t.id, TokenId::IoNumber);
150 assert_eq!(t.index, 0);
151
152 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('<')));
153 }
154
155 #[test]
156 fn lexer_token_io_number_delimited_by_greater() {
157 let mut lexer = Lexer::with_code("0>>");
158
159 let t = lexer.token().now_or_never().unwrap().unwrap();
160 assert_eq!(t.word.units.len(), 1);
161 assert_eq!(t.word.units[0], WordUnit::Unquoted(TextUnit::Literal('0')));
162 assert_eq!(*t.word.location.code.value.borrow(), "0>>");
163 assert_eq!(t.word.location.code.start_line_number.get(), 1);
164 assert_eq!(*t.word.location.code.source, Source::Unknown);
165 assert_eq!(t.word.location.range, 0..1);
166 assert_eq!(t.id, TokenId::IoNumber);
167 assert_eq!(t.index, 0);
168
169 assert_eq!(
170 lexer.location().now_or_never().unwrap().unwrap().range,
171 1..2
172 );
173 }
174
175 #[test]
176 fn lexer_token_after_blank() {
177 let mut lexer = Lexer::with_code(" a ");
178
179 lexer.skip_blanks().now_or_never().unwrap().unwrap();
180 let t = lexer.token().now_or_never().unwrap().unwrap();
181 assert_eq!(*t.word.location.code.value.borrow(), " a ");
182 assert_eq!(t.word.location.code.start_line_number.get(), 1);
183 assert_eq!(*t.word.location.code.source, Source::Unknown);
184 assert_eq!(t.word.location.range, 1..2);
185 assert_eq!(t.id, TokenId::Token(None));
186 assert_eq!(t.index, 1);
187
188 lexer.skip_blanks().now_or_never().unwrap().unwrap();
189 let t = lexer.token().now_or_never().unwrap().unwrap();
190 assert_eq!(*t.word.location.code.value.borrow(), " a ");
191 assert_eq!(t.word.location.code.start_line_number.get(), 1);
192 assert_eq!(*t.word.location.code.source, Source::Unknown);
193 assert_eq!(t.word.location.range, 4..4);
194 assert_eq!(t.id, TokenId::EndOfInput);
195 assert_eq!(t.index, 4);
196 }
197}