1use super::core::Lexer;
20use super::core::WordContext;
21use super::core::WordLexer;
22use crate::parser::core::Result;
23use crate::parser::error::Error;
24use crate::parser::error::SyntaxError;
25use crate::source::Location;
26use crate::source::SourceChar;
27use crate::syntax::TextUnit;
28use crate::syntax::Word;
29use crate::syntax::WordUnit::{self, DollarSingleQuote, DoubleQuote, SingleQuote, Unquoted};
30
31impl Lexer<'_> {
32 async fn single_quote(&mut self, opening_location: Location) -> Result<WordUnit> {
41 let mut content = String::new();
42 let mut lexer = self.disable_line_continuation();
43 loop {
44 match lexer.consume_char_if(|_| true).await? {
45 Some(&SourceChar { value: '\'', .. }) => break,
46 Some(&SourceChar { value, .. }) => content.push(value),
47 None => {
48 let cause = SyntaxError::UnclosedSingleQuote { opening_location }.into();
49 let location = lexer.location().await?.clone();
50 return Err(Error { cause, location });
51 }
52 }
53 }
54 Lexer::enable_line_continuation(lexer);
55 Ok(SingleQuote(content))
56 }
57
58 async fn double_quote(&mut self, opening_location: Location) -> Result<WordUnit> {
67 fn is_delimiter(c: char) -> bool {
68 c == '"'
69 }
70 fn is_escapable(c: char) -> bool {
71 matches!(c, '$' | '`' | '"' | '\\')
72 }
73
74 let content = self.text(is_delimiter, is_escapable).await?;
75
76 if self.skip_if(|c| c == '"').await? {
77 Ok(DoubleQuote(content))
78 } else {
79 let cause = SyntaxError::UnclosedDoubleQuote { opening_location }.into();
80 let location = self.location().await?.clone();
81 Err(Error { cause, location })
82 }
83 }
84}
85
86impl WordLexer<'_, '_> {
87 pub async fn word_unit<F>(&mut self, is_delimiter: F) -> Result<Option<WordUnit>>
99 where
100 F: Fn(char) -> bool,
101 {
102 self.word_unit_dyn(&is_delimiter).await
103 }
104
105 async fn word_unit_dyn(
107 &mut self,
108 is_delimiter: &dyn Fn(char) -> bool,
109 ) -> Result<Option<WordUnit>> {
110 let allow_single_quote = match self.context {
111 WordContext::Word => true,
112 WordContext::Text => false,
113 };
114 let escape_all = |_| true;
115 let escape_some = |c| matches!(c, '$' | '"' | '`' | '\\') || is_delimiter(c);
116 let is_escapable: &dyn Fn(char) -> bool = match self.context {
117 WordContext::Word => &escape_all,
118 WordContext::Text => &escape_some,
119 };
120
121 match self.peek_char().await? {
122 Some('\'') if allow_single_quote => {
123 let location = self.location().await?.clone();
124 self.consume_char();
125 self.single_quote(location).await.map(Some)
126 }
127 Some('"') => {
128 let location = self.location().await?.clone();
129 self.consume_char();
130 self.double_quote(location).await.map(Some)
131 }
132 _ => {
133 let unit = self.text_unit(is_delimiter, is_escapable).await?;
134 if allow_single_quote && unit == Some(TextUnit::Literal('$')) {
135 if let Some(result) = self.single_quoted_escaped_string().await? {
136 return Ok(Some(DollarSingleQuote(result)));
137 }
138 }
140 Ok(unit.map(Unquoted))
141 }
142 }
143 }
144
145 pub async fn word<F>(&mut self, is_delimiter: F) -> Result<Word>
158 where
159 F: Fn(char) -> bool,
160 {
161 self.word_dyn(&is_delimiter).await
162 }
163
164 async fn word_dyn(&mut self, is_delimiter: &dyn Fn(char) -> bool) -> Result<Word> {
166 let start = self.index();
167 let mut units = vec![];
168 while let Some(unit) = self.word_unit_dyn(is_delimiter).await? {
169 units.push(unit)
170 }
171 let location = self.location_range(start..self.index());
172 Ok(Word { units, location })
173 }
174}
175
176#[cfg(test)]
177mod tests {
178 use super::*;
179 use crate::parser::error::ErrorCause;
180 use crate::parser::lex::WordContext;
181 use crate::source::Source;
182 use crate::syntax::EscapeUnit;
183 use crate::syntax::EscapedString;
184 use crate::syntax::Modifier;
185 use crate::syntax::Text;
186 use crate::syntax::TextUnit::{Backslashed, BracedParam, CommandSubst, Literal};
187 use crate::syntax::WordUnit::{DollarSingleQuote, Tilde};
188 use assert_matches::assert_matches;
189 use futures_util::FutureExt;
190
191 #[test]
192 fn lexer_word_unit_unquoted() {
193 let mut lexer = Lexer::with_code("$()");
194 let mut lexer = WordLexer {
195 lexer: &mut lexer,
196 context: WordContext::Word,
197 };
198 let result = lexer
199 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
200 .now_or_never()
201 .unwrap()
202 .unwrap()
203 .unwrap();
204 assert_matches!(result, Unquoted(CommandSubst { content, location }) => {
205 assert_eq!(&*content, "");
206 assert_eq!(location.range, 0..3);
207 });
208
209 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
210 }
211
212 #[test]
213 fn lexer_word_unit_unquoted_escapes_in_word_context() {
214 let mut lexer = Lexer::with_code(r#"\a\$\`\"\\\'\#\{\}"#);
216 let mut lexer = WordLexer {
217 lexer: &mut lexer,
218 context: WordContext::Word,
219 };
220
221 let result = lexer
222 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
223 .now_or_never()
224 .unwrap();
225 assert_eq!(result, Ok(Some(Unquoted(Backslashed('a')))));
226 let result = lexer
227 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
228 .now_or_never()
229 .unwrap();
230 assert_eq!(result, Ok(Some(Unquoted(Backslashed('$')))));
231 let result = lexer
232 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
233 .now_or_never()
234 .unwrap();
235 assert_eq!(result, Ok(Some(Unquoted(Backslashed('`')))));
236 let result = lexer
237 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
238 .now_or_never()
239 .unwrap();
240 assert_eq!(result, Ok(Some(Unquoted(Backslashed('"')))));
241 let result = lexer
242 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
243 .now_or_never()
244 .unwrap();
245 assert_eq!(result, Ok(Some(Unquoted(Backslashed('\\')))));
246 let result = lexer
247 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
248 .now_or_never()
249 .unwrap();
250 assert_eq!(result, Ok(Some(Unquoted(Backslashed('\'')))));
251 let result = lexer
252 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
253 .now_or_never()
254 .unwrap();
255 assert_eq!(result, Ok(Some(Unquoted(Backslashed('#')))));
256 let result = lexer
257 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
258 .now_or_never()
259 .unwrap();
260 assert_eq!(result, Ok(Some(Unquoted(Backslashed('{')))));
261 let result = lexer
262 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
263 .now_or_never()
264 .unwrap();
265 assert_eq!(result, Ok(Some(Unquoted(Backslashed('}')))));
266
267 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
268 }
269
270 #[test]
271 fn lexer_word_unit_unquoted_escapes_in_text_context() {
272 let mut lexer = Lexer::with_code(r#"\a\$\`\"\\\'\#\{\}"#);
274 let mut lexer = WordLexer {
275 lexer: &mut lexer,
276 context: WordContext::Text,
277 };
278 let is_delimiter = |c| c == '}';
279
280 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
281 assert_eq!(result, Ok(Some(Unquoted(Literal('\\')))));
282 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
283 assert_eq!(result, Ok(Some(Unquoted(Literal('a')))));
284 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
285 assert_eq!(result, Ok(Some(Unquoted(Backslashed('$')))));
286 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
287 assert_eq!(result, Ok(Some(Unquoted(Backslashed('`')))));
288 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
289 assert_eq!(result, Ok(Some(Unquoted(Backslashed('"')))));
290 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
291 assert_eq!(result, Ok(Some(Unquoted(Backslashed('\\')))));
292 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
293 assert_eq!(result, Ok(Some(Unquoted(Literal('\\')))));
294 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
295 assert_eq!(result, Ok(Some(Unquoted(Literal('\'')))));
296 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
297 assert_eq!(result, Ok(Some(Unquoted(Literal('\\')))));
298 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
299 assert_eq!(result, Ok(Some(Unquoted(Literal('#')))));
300 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
301 assert_eq!(result, Ok(Some(Unquoted(Literal('\\')))));
302 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
303 assert_eq!(result, Ok(Some(Unquoted(Literal('{')))));
304 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
305 assert_eq!(result, Ok(Some(Unquoted(Backslashed('}')))));
306
307 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
308 }
309
310 #[test]
311 fn lexer_word_unit_orphan_dollar_is_literal() {
312 let mut lexer = Lexer::with_code("$");
313 let mut lexer = WordLexer {
314 lexer: &mut lexer,
315 context: WordContext::Word,
316 };
317 let result = lexer
318 .word_unit(|c| {
319 assert_eq!(c, '$', "unexpected call to is_delimiter({c:?})");
320 false
321 })
322 .now_or_never()
323 .unwrap()
324 .unwrap()
325 .unwrap();
326 assert_eq!(result, Unquoted(Literal('$')));
327
328 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
329 }
330
331 #[test]
332 fn lexer_word_unit_single_quote_empty() {
333 let mut lexer = Lexer::with_code("''");
334 let mut lexer = WordLexer {
335 lexer: &mut lexer,
336 context: WordContext::Word,
337 };
338 let result = lexer
339 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
340 .now_or_never()
341 .unwrap()
342 .unwrap()
343 .unwrap();
344 assert_matches!(result, SingleQuote(content) => assert_eq!(content, ""));
345
346 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
347 }
348
349 #[test]
350 fn lexer_word_unit_single_quote_nonempty() {
351 let mut lexer = Lexer::with_code("'abc\\\n$def\\'");
352 let mut lexer = WordLexer {
353 lexer: &mut lexer,
354 context: WordContext::Word,
355 };
356 let result = lexer
357 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
358 .now_or_never()
359 .unwrap()
360 .unwrap()
361 .unwrap();
362 assert_matches!(result, SingleQuote(content) => assert_eq!(content, "abc\\\n$def\\"));
363
364 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
365 }
366
367 #[test]
368 fn lexer_word_unit_single_quote_unclosed() {
369 let mut lexer = Lexer::with_code("'abc\ndef\\");
370 let mut lexer = WordLexer {
371 lexer: &mut lexer,
372 context: WordContext::Word,
373 };
374
375 let e = lexer
376 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
377 .now_or_never()
378 .unwrap()
379 .unwrap_err();
380 assert_matches!(e.cause,
381 ErrorCause::Syntax(SyntaxError::UnclosedSingleQuote { opening_location }) => {
382 assert_eq!(*opening_location.code.value.borrow(), "'abc\ndef\\");
383 assert_eq!(opening_location.code.start_line_number.get(), 1);
384 assert_eq!(*opening_location.code.source, Source::Unknown);
385 assert_eq!(opening_location.range, 0..1);
386 });
387 assert_eq!(*e.location.code.value.borrow(), "'abc\ndef\\");
388 assert_eq!(e.location.code.start_line_number.get(), 1);
389 assert_eq!(*e.location.code.source, Source::Unknown);
390 assert_eq!(e.location.range, 9..9);
391 }
392
393 #[test]
394 fn lexer_word_unit_not_single_quote_in_text_context() {
395 let mut lexer = Lexer::with_code("'");
396 let mut lexer = WordLexer {
397 lexer: &mut lexer,
398 context: WordContext::Text,
399 };
400
401 let result = lexer
402 .word_unit(|c| {
403 assert_eq!(c, '\'', "unexpected call to is_delimiter({c:?})");
404 false
405 })
406 .now_or_never()
407 .unwrap()
408 .unwrap()
409 .unwrap();
410 assert_eq!(result, Unquoted(Literal('\'')));
411
412 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
413 }
414
415 #[test]
416 fn lexer_word_unit_dollar_single_quote_empty() {
417 let mut lexer = Lexer::with_code("$''");
418 let mut lexer = WordLexer {
419 lexer: &mut lexer,
420 context: WordContext::Word,
421 };
422 let result = lexer
423 .word_unit(|c| {
424 assert_matches!(c, '$', "unexpected call to is_delimiter({c:?})");
425 false
426 })
427 .now_or_never()
428 .unwrap()
429 .unwrap()
430 .unwrap();
431 assert_matches!(result, DollarSingleQuote(EscapedString(content)) => {
432 assert_eq!(content, []);
433 });
434 }
435
436 #[test]
437 fn lexer_word_unit_dollar_single_quote_nonempty() {
438 let mut lexer = Lexer::with_code(r"$'foo'");
439 let mut lexer = WordLexer {
440 lexer: &mut lexer,
441 context: WordContext::Word,
442 };
443 let result = lexer
444 .word_unit(|c| {
445 assert_matches!(c, '$', "unexpected call to is_delimiter({c:?})");
446 false
447 })
448 .now_or_never()
449 .unwrap()
450 .unwrap()
451 .unwrap();
452 assert_matches!(result, DollarSingleQuote(EscapedString(content)) => {
453 assert_eq!(
454 content,
455 [
456 EscapeUnit::Literal('f'),
457 EscapeUnit::Literal('o'),
458 EscapeUnit::Literal('o'),
459 ]
460 );
461 });
462
463 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
464 }
465
466 #[test]
467 fn lexer_word_unit_not_dollar_single_quote_in_text_context() {
468 let mut lexer = Lexer::with_code("$''");
469 let mut lexer = WordLexer {
470 lexer: &mut lexer,
471 context: WordContext::Text,
472 };
473 let result = lexer
474 .word_unit(|c| {
475 assert_matches!(c, '$', "unexpected call to is_delimiter({c:?})");
476 false
477 })
478 .now_or_never()
479 .unwrap()
480 .unwrap()
481 .unwrap();
482 assert_matches!(result, Unquoted(Literal('$')));
483 }
484
485 #[test]
486 fn lexer_word_unit_double_quote_empty() {
487 let mut lexer = Lexer::with_code("\"\"");
488 let mut lexer = WordLexer {
489 lexer: &mut lexer,
490 context: WordContext::Word,
491 };
492 let result = lexer
493 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
494 .now_or_never()
495 .unwrap()
496 .unwrap()
497 .unwrap();
498 assert_matches!(result, DoubleQuote(Text(content)) => assert_eq!(content, []));
499
500 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
501 }
502
503 #[test]
504 fn lexer_word_unit_double_quote_non_empty() {
505 let mut lexer = Lexer::with_code("\"abc\"");
506 let mut lexer = WordLexer {
507 lexer: &mut lexer,
508 context: WordContext::Word,
509 };
510 let result = lexer
511 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
512 .now_or_never()
513 .unwrap()
514 .unwrap()
515 .unwrap();
516 assert_matches!(result, DoubleQuote(Text(content)) => {
517 assert_eq!(content, [Literal('a'), Literal('b'), Literal('c')]);
518 });
519
520 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
521 }
522
523 #[test]
524 fn lexer_word_unit_double_quote_escapes() {
525 let mut lexer = Lexer::with_code(r#""\a\$\`\"\\\'\#""#);
527 let mut lexer = WordLexer {
528 lexer: &mut lexer,
529 context: WordContext::Word,
530 };
531 let result = lexer
532 .word_unit(|c| match c {
533 'a' | '\'' | '#' => true,
534 _ => unreachable!("unexpected call to is_delimiter({:?})", c),
535 })
536 .now_or_never()
537 .unwrap()
538 .unwrap()
539 .unwrap();
540 assert_matches!(result, DoubleQuote(Text(ref units)) => {
541 assert_eq!(
542 units,
543 &[
544 Literal('\\'),
545 Literal('a'),
546 Backslashed('$'),
547 Backslashed('`'),
548 Backslashed('"'),
549 Backslashed('\\'),
550 Literal('\\'),
551 Literal('\''),
552 Literal('\\'),
553 Literal('#'),
554 ]
555 );
556 });
557
558 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
559 }
560
561 #[test]
562 fn lexer_word_unit_double_quote_unclosed() {
563 let mut lexer = Lexer::with_code("\"abc\ndef");
564 let mut lexer = WordLexer {
565 lexer: &mut lexer,
566 context: WordContext::Word,
567 };
568
569 let e = lexer
570 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
571 .now_or_never()
572 .unwrap()
573 .unwrap_err();
574 assert_matches!(e.cause,
575 ErrorCause::Syntax(SyntaxError::UnclosedDoubleQuote { opening_location }) => {
576 assert_eq!(*opening_location.code.value.borrow(), "\"abc\ndef");
577 assert_eq!(opening_location.code.start_line_number.get(), 1);
578 assert_eq!(*opening_location.code.source, Source::Unknown);
579 assert_eq!(opening_location.range, 0..1);
580 });
581 assert_eq!(*e.location.code.value.borrow(), "\"abc\ndef");
582 assert_eq!(e.location.code.start_line_number.get(), 1);
583 assert_eq!(*e.location.code.source, Source::Unknown);
584 assert_eq!(e.location.range, 8..8);
585 }
586
587 #[test]
588 fn lexer_word_nonempty() {
589 let mut lexer = Lexer::with_code(r"0$(:)X\#");
590 let mut lexer = WordLexer {
591 lexer: &mut lexer,
592 context: WordContext::Word,
593 };
594
595 let word = lexer.word(|_| false).now_or_never().unwrap().unwrap();
596 assert_eq!(word.units.len(), 4);
597 assert_eq!(word.units[0], WordUnit::Unquoted(Literal('0')));
598 assert_matches!(&word.units[1], WordUnit::Unquoted(CommandSubst { content, location }) => {
599 assert_eq!(&**content, ":");
600 assert_eq!(*location.code.value.borrow(), r"0$(:)X\#");
601 assert_eq!(location.code.start_line_number.get(), 1);
602 assert_eq!(*location.code.source, Source::Unknown);
603 assert_eq!(location.range, 1..5);
604 });
605 assert_eq!(word.units[2], WordUnit::Unquoted(Literal('X')));
606 assert_eq!(word.units[3], WordUnit::Unquoted(Backslashed('#')));
607 assert_eq!(*word.location.code.value.borrow(), r"0$(:)X\#");
608 assert_eq!(word.location.code.start_line_number.get(), 1);
609 assert_eq!(*word.location.code.source, Source::Unknown);
610 assert_eq!(word.location.range, 0..8);
611
612 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
613 }
614
615 #[test]
616 fn lexer_word_empty() {
617 let mut lexer = Lexer::with_code("");
618 let mut lexer = WordLexer {
619 lexer: &mut lexer,
620 context: WordContext::Word,
621 };
622 let word = lexer
623 .word(|_| unreachable!("unexpected call to is_delimiter"))
624 .now_or_never()
625 .unwrap()
626 .unwrap();
627 assert_eq!(word.units, []);
628 assert_eq!(*word.location.code.value.borrow(), "");
629 assert_eq!(word.location.code.start_line_number.get(), 1);
630 assert_eq!(*word.location.code.source, Source::Unknown);
631 assert_eq!(word.location.range, 0..0);
632 }
633
634 #[test]
635 fn lexer_word_with_switch_in_word_context() {
636 let mut lexer = Lexer::with_code(r"${x-~}");
637 let mut lexer = WordLexer {
638 lexer: &mut lexer,
639 context: WordContext::Word,
640 };
641
642 let result = lexer
643 .word(|c| {
644 assert_eq!(c, '\'', "unexpected call to is_delimiter({c:?})");
645 false
646 })
647 .now_or_never()
648 .unwrap()
649 .unwrap();
650 assert_matches!(result.units[..], [Unquoted(BracedParam(ref param))] => {
651 assert_matches!(param.modifier, Modifier::Switch(ref switch) => {
652 assert_eq!(
653 switch.word.units,
654 [Tilde {
655 name: "".to_string(),
656 followed_by_slash: false,
657 }]
658 );
659 });
660 });
661 }
662
663 #[test]
664 fn lexer_word_with_switch_in_text_context() {
665 let mut lexer = Lexer::with_code(r#""${x-~}""#);
666 let mut lexer = WordLexer {
667 lexer: &mut lexer,
668 context: WordContext::Word,
669 };
670
671 let result = lexer
672 .word(|c| {
673 assert_eq!(c, '\'', "unexpected call to is_delimiter({c:?})");
674 false
675 })
676 .now_or_never()
677 .unwrap()
678 .unwrap();
679 assert_matches!(result.units[..], [DoubleQuote(Text(ref units))] => {
680 assert_matches!(units[..], [BracedParam(ref param)] => {
681 assert_matches!(param.modifier, Modifier::Switch(ref switch) => {
682 assert_eq!(switch.word.units, [Unquoted(Literal('~'))]);
683 });
684 });
685 });
686
687 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
688 }
689}