shuck_parser/parser/lexer/
tokens.rs1use super::*;
2
3impl<'a> Lexer<'a> {
4 pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
10 self.skip_whitespace();
11 let start = self.current_position();
12 let token = self.next_lexed_token_inner(false)?;
13 let end = self.current_position();
14 Some(token.with_span(Span::from_positions(start, end)))
15 }
16
17 pub(in crate::parser) fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
19 self.skip_whitespace();
20 let start = self.current_position();
21 let token = self.next_lexed_token_inner(true)?;
22 let end = self.current_position();
23 Some(token.with_span(Span::from_positions(start, end)))
24 }
25
26 pub(in crate::parser) fn next_lexed_token_inner(
28 &mut self,
29 preserve_comments: bool,
30 ) -> Option<LexedToken<'a>> {
31 let ch = self.peek_char()?;
32
33 match ch {
34 '\n' => {
35 self.consume_ascii_chars(1);
36 Some(LexedToken::punctuation(TokenKind::Newline))
37 }
38 ';' => {
39 if self.second_char() == Some(';') {
40 if self.third_char() == Some('&') {
41 self.consume_ascii_chars(3);
42 Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) } else {
44 self.consume_ascii_chars(2);
45 Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) }
47 } else if self.second_char() == Some('|') {
48 self.consume_ascii_chars(2);
49 Some(LexedToken::punctuation(TokenKind::SemiPipe)) } else if self.second_char() == Some('&') {
51 self.consume_ascii_chars(2);
52 Some(LexedToken::punctuation(TokenKind::SemiAmp)) } else {
54 self.consume_ascii_chars(1);
55 Some(LexedToken::punctuation(TokenKind::Semicolon))
56 }
57 }
58 '|' => {
59 if self.second_char() == Some('|') {
60 self.consume_ascii_chars(2);
61 Some(LexedToken::punctuation(TokenKind::Or))
62 } else if self.second_char() == Some('&') {
63 self.consume_ascii_chars(2);
64 Some(LexedToken::punctuation(TokenKind::PipeBoth))
65 } else {
66 self.consume_ascii_chars(1);
67 Some(LexedToken::punctuation(TokenKind::Pipe))
68 }
69 }
70 '&' => {
71 if self.second_char() == Some('&') {
72 self.consume_ascii_chars(2);
73 Some(LexedToken::punctuation(TokenKind::And))
74 } else if self.second_char() == Some('>') {
75 if self.third_char() == Some('>') {
76 self.consume_ascii_chars(3);
77 Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
78 } else {
79 self.consume_ascii_chars(2);
80 Some(LexedToken::punctuation(TokenKind::RedirectBoth))
81 }
82 } else if self.second_char() == Some('|') {
83 self.consume_ascii_chars(2);
84 Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
85 } else if self.second_char() == Some('!') {
86 self.consume_ascii_chars(2);
87 Some(LexedToken::punctuation(TokenKind::BackgroundBang))
88 } else {
89 self.consume_ascii_chars(1);
90 Some(LexedToken::punctuation(TokenKind::Background))
91 }
92 }
93 '>' => {
94 if self.second_char() == Some('>') {
95 if self.third_char() == Some('|') {
96 self.consume_ascii_chars(3);
97 } else {
98 self.consume_ascii_chars(2);
99 }
100 Some(LexedToken::punctuation(TokenKind::RedirectAppend))
101 } else if self.second_char() == Some('|') {
102 self.consume_ascii_chars(2);
103 Some(LexedToken::punctuation(TokenKind::Clobber))
104 } else if self.second_char() == Some('(') {
105 self.consume_ascii_chars(2);
106 Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
107 } else if self.second_char() == Some('&') {
108 self.consume_ascii_chars(2);
109 Some(LexedToken::punctuation(TokenKind::DupOutput))
110 } else {
111 self.consume_ascii_chars(1);
112 Some(LexedToken::punctuation(TokenKind::RedirectOut))
113 }
114 }
115 '<' => {
116 if self.second_char() == Some('<') {
117 if self.third_char() == Some('<') {
118 self.consume_ascii_chars(3);
119 Some(LexedToken::punctuation(TokenKind::HereString))
120 } else if self.third_char() == Some('-') {
121 self.consume_ascii_chars(3);
122 Some(LexedToken::punctuation(TokenKind::HereDocStrip))
123 } else {
124 self.consume_ascii_chars(2);
125 Some(LexedToken::punctuation(TokenKind::HereDoc))
126 }
127 } else if self.second_char() == Some('>') {
128 self.consume_ascii_chars(2);
129 Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
130 } else if self.second_char() == Some('(') {
131 self.consume_ascii_chars(2);
132 Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
133 } else if self.second_char() == Some('&') {
134 self.consume_ascii_chars(2);
135 Some(LexedToken::punctuation(TokenKind::DupInput))
136 } else {
137 self.consume_ascii_chars(1);
138 Some(LexedToken::punctuation(TokenKind::RedirectIn))
139 }
140 }
141 '(' => {
142 if self.second_char() == Some('(') {
143 self.consume_ascii_chars(2);
144 Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
145 } else {
146 self.consume_ascii_chars(1);
147 Some(LexedToken::punctuation(TokenKind::LeftParen))
148 }
149 }
150 ')' => {
151 if self.second_char() == Some(')') {
152 self.consume_ascii_chars(2);
153 Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
154 } else {
155 self.consume_ascii_chars(1);
156 Some(LexedToken::punctuation(TokenKind::RightParen))
157 }
158 }
159 '{' => {
160 let start = self.current_position();
161 if self.ignore_braces_enabled() {
162 self.consume_ascii_chars(1);
163 match self.peek_char() {
164 Some(' ') | Some('\t') | Some('\n') | None => {
165 Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
166 }
167 _ => self.read_word_starting_with("{", start),
168 }
169 } else if self.looks_like_brace_expansion() {
170 self.read_brace_expansion_word()
174 } else if self.is_brace_group_start() {
175 self.advance();
176 Some(LexedToken::punctuation(TokenKind::LeftBrace))
177 } else if self.brace_literal_starts_case_pattern_delimiter() {
178 self.read_word_starting_with("{", start)
179 } else {
180 self.read_brace_literal_word()
181 }
182 }
183 '}' => {
184 self.consume_ascii_chars(1);
185 if self.ignore_close_braces_enabled() {
186 Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
187 } else {
188 Some(LexedToken::punctuation(TokenKind::RightBrace))
189 }
190 }
191 '[' => {
192 let start = self.current_position();
193 self.consume_ascii_chars(1);
194 if self.peek_char() == Some('[')
195 && matches!(
196 self.second_char(),
197 Some(' ') | Some('\t') | Some('\n') | None
198 )
199 {
200 self.consume_ascii_chars(1);
201 Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
202 } else {
203 match self.peek_char() {
210 Some(' ') | Some('\t') | Some('\n') | None => {
211 Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
212 }
213 _ => self.read_word_starting_with("[", start),
214 }
215 }
216 }
217 ']' => {
218 if self.second_char() == Some(']') {
219 self.consume_ascii_chars(2);
220 Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
221 } else {
222 self.consume_ascii_chars(1);
223 Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
224 }
225 }
226 '\'' => self.read_single_quoted_string(),
227 '"' => self.read_double_quoted_string(),
228 '#' => {
229 if self.should_treat_hash_as_word_char() {
230 let start = self.current_position();
231 return self.read_word_starting_with("#", start);
232 }
233 if preserve_comments {
234 self.read_comment();
235 Some(LexedToken::comment())
236 } else {
237 self.skip_comment();
238 self.next_lexed_token_inner(false)
239 }
240 }
241 '0'..='9' => self.read_word_or_fd_redirect(),
243 _ => self.read_word(),
244 }
245 }
246
247 pub(in crate::parser) fn skip_whitespace(&mut self) {
248 while let Some(ch) = self.peek_char() {
249 if self.reinject_buf.is_empty() {
250 let whitespace_len = self.source_horizontal_whitespace_len();
251 if whitespace_len > 0 {
252 self.consume_source_bytes(whitespace_len);
253 continue;
254 }
255
256 if self.cursor.rest().starts_with("\\\n") {
257 self.consume_source_bytes(2);
258 continue;
259 }
260 }
261
262 if ch == ' ' || ch == '\t' {
263 self.consume_ascii_chars(1);
264 } else if ch == '\\' {
265 if self.second_char() == Some('\n') {
267 self.consume_ascii_chars(2);
268 } else {
269 break;
270 }
271 } else {
272 break;
273 }
274 }
275 }
276
277 pub(in crate::parser) fn skip_comment(&mut self) {
278 if self.reinject_buf.is_empty() {
279 let end = self
280 .cursor
281 .find_byte(b'\n')
282 .unwrap_or(self.cursor.rest().len());
283 self.consume_source_bytes(end);
284 return;
285 }
286
287 while let Some(ch) = self.peek_char() {
288 if ch == '\n' {
289 break;
290 }
291 self.advance();
292 }
293 }
294
295 pub(in crate::parser) fn read_comment(&mut self) {
296 debug_assert_eq!(self.peek_char(), Some('#'));
297
298 if self.reinject_buf.is_empty() {
299 let rest = self.cursor.rest();
300 let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
301 self.consume_source_bytes(end);
302 return;
303 }
304
305 self.advance(); while let Some(ch) = self.peek_char() {
308 if ch == '\n' {
309 break;
310 }
311 self.advance();
312 }
313 }
314
315 pub(in crate::parser) fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
316 if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
317 return false;
318 }
319
320 let line_start = self.input[..self.offset]
321 .rfind('\n')
322 .map_or(0, |index| index + 1);
323 let prefix = &self.input[line_start..self.offset];
324 line_has_unclosed_double_paren(prefix)
325 }
326}