1use super::core::Lexer;
20use super::core::WordContext;
21use super::core::WordLexer;
22use crate::parser::core::Result;
23use crate::parser::error::Error;
24use crate::parser::error::SyntaxError;
25use crate::source::Location;
26use crate::source::SourceChar;
27use crate::syntax::TextUnit;
28use crate::syntax::Word;
29use crate::syntax::WordUnit::{self, DollarSingleQuote, DoubleQuote, SingleQuote, Unquoted};
30
31impl Lexer<'_> {
32 async fn single_quote(&mut self, opening_location: Location) -> Result<WordUnit> {
41 let mut content = String::new();
42 let mut lexer = self.disable_line_continuation();
43 loop {
44 match lexer.consume_char_if(|_| true).await? {
45 Some(&SourceChar { value: '\'', .. }) => break,
46 Some(&SourceChar { value, .. }) => content.push(value),
47 None => {
48 let cause = SyntaxError::UnclosedSingleQuote { opening_location }.into();
49 let location = lexer.location().await?.clone();
50 return Err(Error { cause, location });
51 }
52 }
53 }
54 Lexer::enable_line_continuation(lexer);
55 Ok(SingleQuote(content))
56 }
57
58 async fn double_quote(&mut self, opening_location: Location) -> Result<WordUnit> {
67 fn is_delimiter(c: char) -> bool {
68 c == '"'
69 }
70 fn is_escapable(c: char) -> bool {
71 matches!(c, '$' | '`' | '"' | '\\')
72 }
73
74 let content = self.text(is_delimiter, is_escapable).await?;
75
76 if self.skip_if(|c| c == '"').await? {
77 Ok(DoubleQuote(content))
78 } else {
79 let cause = SyntaxError::UnclosedDoubleQuote { opening_location }.into();
80 let location = self.location().await?.clone();
81 Err(Error { cause, location })
82 }
83 }
84}
85
86impl WordLexer<'_, '_> {
87 pub async fn word_unit<F>(&mut self, is_delimiter: F) -> Result<Option<WordUnit>>
99 where
100 F: Fn(char) -> bool,
101 {
102 self.word_unit_dyn(&is_delimiter).await
103 }
104
105 async fn word_unit_dyn(
107 &mut self,
108 is_delimiter: &dyn Fn(char) -> bool,
109 ) -> Result<Option<WordUnit>> {
110 let allow_single_quote = match self.context {
111 WordContext::Word => true,
112 WordContext::Text => false,
113 };
114 let escape_all = |_| true;
115 let escape_some = |c| matches!(c, '$' | '"' | '`' | '\\') || is_delimiter(c);
116 let is_escapable: &dyn Fn(char) -> bool = match self.context {
117 WordContext::Word => &escape_all,
118 WordContext::Text => &escape_some,
119 };
120
121 match self.peek_char().await? {
122 Some('\'') if allow_single_quote => {
123 let location = self.location().await?.clone();
124 self.consume_char();
125 self.single_quote(location).await.map(Some)
126 }
127 Some('"') => {
128 let location = self.location().await?.clone();
129 self.consume_char();
130 self.double_quote(location).await.map(Some)
131 }
132 _ => {
133 let unit = self.text_unit(is_delimiter, is_escapable).await?;
134 if allow_single_quote
135 && unit == Some(TextUnit::Literal('$'))
136 && let Some(result) = self.single_quoted_escaped_string().await?
137 {
138 return Ok(Some(DollarSingleQuote(result)));
139 }
140 Ok(unit.map(Unquoted))
142 }
143 }
144 }
145
146 pub async fn word<F>(&mut self, is_delimiter: F) -> Result<Word>
159 where
160 F: Fn(char) -> bool,
161 {
162 self.word_dyn(&is_delimiter).await
163 }
164
165 async fn word_dyn(&mut self, is_delimiter: &dyn Fn(char) -> bool) -> Result<Word> {
167 let start = self.index();
168 let mut units = vec![];
169 while let Some(unit) = self.word_unit_dyn(is_delimiter).await? {
170 units.push(unit)
171 }
172 let location = self.location_range(start..self.index());
173 Ok(Word { units, location })
174 }
175}
176
177#[cfg(test)]
178mod tests {
179 use super::*;
180 use crate::parser::error::ErrorCause;
181 use crate::parser::lex::WordContext;
182 use crate::source::Source;
183 use crate::syntax::EscapeUnit;
184 use crate::syntax::EscapedString;
185 use crate::syntax::Modifier;
186 use crate::syntax::Text;
187 use crate::syntax::TextUnit::{Backslashed, BracedParam, CommandSubst, Literal};
188 use crate::syntax::WordUnit::{DollarSingleQuote, Tilde};
189 use assert_matches::assert_matches;
190 use futures_util::FutureExt as _;
191
192 #[test]
193 fn lexer_word_unit_unquoted() {
194 let mut lexer = Lexer::with_code("$()");
195 let mut lexer = WordLexer {
196 lexer: &mut lexer,
197 context: WordContext::Word,
198 };
199 let result = lexer
200 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
201 .now_or_never()
202 .unwrap()
203 .unwrap()
204 .unwrap();
205 assert_matches!(result, Unquoted(CommandSubst { content, location }) => {
206 assert_eq!(&*content, "");
207 assert_eq!(location.range, 0..3);
208 });
209
210 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
211 }
212
213 #[test]
214 fn lexer_word_unit_unquoted_escapes_in_word_context() {
215 let mut lexer = Lexer::with_code(r#"\a\$\`\"\\\'\#\{\}"#);
217 let mut lexer = WordLexer {
218 lexer: &mut lexer,
219 context: WordContext::Word,
220 };
221
222 let result = lexer
223 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
224 .now_or_never()
225 .unwrap();
226 assert_eq!(result, Ok(Some(Unquoted(Backslashed('a')))));
227 let result = lexer
228 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
229 .now_or_never()
230 .unwrap();
231 assert_eq!(result, Ok(Some(Unquoted(Backslashed('$')))));
232 let result = lexer
233 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
234 .now_or_never()
235 .unwrap();
236 assert_eq!(result, Ok(Some(Unquoted(Backslashed('`')))));
237 let result = lexer
238 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
239 .now_or_never()
240 .unwrap();
241 assert_eq!(result, Ok(Some(Unquoted(Backslashed('"')))));
242 let result = lexer
243 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
244 .now_or_never()
245 .unwrap();
246 assert_eq!(result, Ok(Some(Unquoted(Backslashed('\\')))));
247 let result = lexer
248 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
249 .now_or_never()
250 .unwrap();
251 assert_eq!(result, Ok(Some(Unquoted(Backslashed('\'')))));
252 let result = lexer
253 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
254 .now_or_never()
255 .unwrap();
256 assert_eq!(result, Ok(Some(Unquoted(Backslashed('#')))));
257 let result = lexer
258 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
259 .now_or_never()
260 .unwrap();
261 assert_eq!(result, Ok(Some(Unquoted(Backslashed('{')))));
262 let result = lexer
263 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
264 .now_or_never()
265 .unwrap();
266 assert_eq!(result, Ok(Some(Unquoted(Backslashed('}')))));
267
268 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
269 }
270
271 #[test]
272 fn lexer_word_unit_unquoted_escapes_in_text_context() {
273 let mut lexer = Lexer::with_code(r#"\a\$\`\"\\\'\#\{\}"#);
275 let mut lexer = WordLexer {
276 lexer: &mut lexer,
277 context: WordContext::Text,
278 };
279 let is_delimiter = |c| c == '}';
280
281 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
282 assert_eq!(result, Ok(Some(Unquoted(Literal('\\')))));
283 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
284 assert_eq!(result, Ok(Some(Unquoted(Literal('a')))));
285 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
286 assert_eq!(result, Ok(Some(Unquoted(Backslashed('$')))));
287 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
288 assert_eq!(result, Ok(Some(Unquoted(Backslashed('`')))));
289 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
290 assert_eq!(result, Ok(Some(Unquoted(Backslashed('"')))));
291 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
292 assert_eq!(result, Ok(Some(Unquoted(Backslashed('\\')))));
293 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
294 assert_eq!(result, Ok(Some(Unquoted(Literal('\\')))));
295 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
296 assert_eq!(result, Ok(Some(Unquoted(Literal('\'')))));
297 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
298 assert_eq!(result, Ok(Some(Unquoted(Literal('\\')))));
299 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
300 assert_eq!(result, Ok(Some(Unquoted(Literal('#')))));
301 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
302 assert_eq!(result, Ok(Some(Unquoted(Literal('\\')))));
303 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
304 assert_eq!(result, Ok(Some(Unquoted(Literal('{')))));
305 let result = lexer.word_unit(is_delimiter).now_or_never().unwrap();
306 assert_eq!(result, Ok(Some(Unquoted(Backslashed('}')))));
307
308 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
309 }
310
311 #[test]
312 fn lexer_word_unit_orphan_dollar_is_literal() {
313 let mut lexer = Lexer::with_code("$");
314 let mut lexer = WordLexer {
315 lexer: &mut lexer,
316 context: WordContext::Word,
317 };
318 let result = lexer
319 .word_unit(|c| {
320 assert_eq!(c, '$', "unexpected call to is_delimiter({c:?})");
321 false
322 })
323 .now_or_never()
324 .unwrap()
325 .unwrap()
326 .unwrap();
327 assert_eq!(result, Unquoted(Literal('$')));
328
329 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
330 }
331
332 #[test]
333 fn lexer_word_unit_single_quote_empty() {
334 let mut lexer = Lexer::with_code("''");
335 let mut lexer = WordLexer {
336 lexer: &mut lexer,
337 context: WordContext::Word,
338 };
339 let result = lexer
340 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
341 .now_or_never()
342 .unwrap()
343 .unwrap()
344 .unwrap();
345 assert_matches!(result, SingleQuote(content) => assert_eq!(content, ""));
346
347 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
348 }
349
350 #[test]
351 fn lexer_word_unit_single_quote_nonempty() {
352 let mut lexer = Lexer::with_code("'abc\\\n$def\\'");
353 let mut lexer = WordLexer {
354 lexer: &mut lexer,
355 context: WordContext::Word,
356 };
357 let result = lexer
358 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
359 .now_or_never()
360 .unwrap()
361 .unwrap()
362 .unwrap();
363 assert_matches!(result, SingleQuote(content) => assert_eq!(content, "abc\\\n$def\\"));
364
365 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
366 }
367
368 #[test]
369 fn lexer_word_unit_single_quote_unclosed() {
370 let mut lexer = Lexer::with_code("'abc\ndef\\");
371 let mut lexer = WordLexer {
372 lexer: &mut lexer,
373 context: WordContext::Word,
374 };
375
376 let e = lexer
377 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
378 .now_or_never()
379 .unwrap()
380 .unwrap_err();
381 assert_matches!(e.cause,
382 ErrorCause::Syntax(SyntaxError::UnclosedSingleQuote { opening_location }) => {
383 assert_eq!(*opening_location.code.value.borrow(), "'abc\ndef\\");
384 assert_eq!(opening_location.code.start_line_number.get(), 1);
385 assert_eq!(*opening_location.code.source, Source::Unknown);
386 assert_eq!(opening_location.range, 0..1);
387 });
388 assert_eq!(*e.location.code.value.borrow(), "'abc\ndef\\");
389 assert_eq!(e.location.code.start_line_number.get(), 1);
390 assert_eq!(*e.location.code.source, Source::Unknown);
391 assert_eq!(e.location.range, 9..9);
392 }
393
394 #[test]
395 fn lexer_word_unit_not_single_quote_in_text_context() {
396 let mut lexer = Lexer::with_code("'");
397 let mut lexer = WordLexer {
398 lexer: &mut lexer,
399 context: WordContext::Text,
400 };
401
402 let result = lexer
403 .word_unit(|c| {
404 assert_eq!(c, '\'', "unexpected call to is_delimiter({c:?})");
405 false
406 })
407 .now_or_never()
408 .unwrap()
409 .unwrap()
410 .unwrap();
411 assert_eq!(result, Unquoted(Literal('\'')));
412
413 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
414 }
415
416 #[test]
417 fn lexer_word_unit_dollar_single_quote_empty() {
418 let mut lexer = Lexer::with_code("$''");
419 let mut lexer = WordLexer {
420 lexer: &mut lexer,
421 context: WordContext::Word,
422 };
423 let result = lexer
424 .word_unit(|c| {
425 assert_matches!(c, '$', "unexpected call to is_delimiter({c:?})");
426 false
427 })
428 .now_or_never()
429 .unwrap()
430 .unwrap()
431 .unwrap();
432 assert_matches!(result, DollarSingleQuote(EscapedString(content)) => {
433 assert_eq!(content, []);
434 });
435 }
436
437 #[test]
438 fn lexer_word_unit_dollar_single_quote_nonempty() {
439 let mut lexer = Lexer::with_code(r"$'foo'");
440 let mut lexer = WordLexer {
441 lexer: &mut lexer,
442 context: WordContext::Word,
443 };
444 let result = lexer
445 .word_unit(|c| {
446 assert_matches!(c, '$', "unexpected call to is_delimiter({c:?})");
447 false
448 })
449 .now_or_never()
450 .unwrap()
451 .unwrap()
452 .unwrap();
453 assert_matches!(result, DollarSingleQuote(EscapedString(content)) => {
454 assert_eq!(
455 content,
456 [
457 EscapeUnit::Literal('f'),
458 EscapeUnit::Literal('o'),
459 EscapeUnit::Literal('o'),
460 ]
461 );
462 });
463
464 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
465 }
466
467 #[test]
468 fn lexer_word_unit_not_dollar_single_quote_in_text_context() {
469 let mut lexer = Lexer::with_code("$''");
470 let mut lexer = WordLexer {
471 lexer: &mut lexer,
472 context: WordContext::Text,
473 };
474 let result = lexer
475 .word_unit(|c| {
476 assert_matches!(c, '$', "unexpected call to is_delimiter({c:?})");
477 false
478 })
479 .now_or_never()
480 .unwrap()
481 .unwrap()
482 .unwrap();
483 assert_matches!(result, Unquoted(Literal('$')));
484 }
485
486 #[test]
487 fn lexer_word_unit_double_quote_empty() {
488 let mut lexer = Lexer::with_code("\"\"");
489 let mut lexer = WordLexer {
490 lexer: &mut lexer,
491 context: WordContext::Word,
492 };
493 let result = lexer
494 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
495 .now_or_never()
496 .unwrap()
497 .unwrap()
498 .unwrap();
499 assert_matches!(result, DoubleQuote(Text(content)) => assert_eq!(content, []));
500
501 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
502 }
503
504 #[test]
505 fn lexer_word_unit_double_quote_non_empty() {
506 let mut lexer = Lexer::with_code("\"abc\"");
507 let mut lexer = WordLexer {
508 lexer: &mut lexer,
509 context: WordContext::Word,
510 };
511 let result = lexer
512 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
513 .now_or_never()
514 .unwrap()
515 .unwrap()
516 .unwrap();
517 assert_matches!(result, DoubleQuote(Text(content)) => {
518 assert_eq!(content, [Literal('a'), Literal('b'), Literal('c')]);
519 });
520
521 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
522 }
523
524 #[test]
525 fn lexer_word_unit_double_quote_escapes() {
526 let mut lexer = Lexer::with_code(r#""\a\$\`\"\\\'\#""#);
528 let mut lexer = WordLexer {
529 lexer: &mut lexer,
530 context: WordContext::Word,
531 };
532 let result = lexer
533 .word_unit(|c| match c {
534 'a' | '\'' | '#' => true,
535 _ => unreachable!("unexpected call to is_delimiter({:?})", c),
536 })
537 .now_or_never()
538 .unwrap()
539 .unwrap()
540 .unwrap();
541 assert_matches!(result, DoubleQuote(Text(ref units)) => {
542 assert_eq!(
543 units,
544 &[
545 Literal('\\'),
546 Literal('a'),
547 Backslashed('$'),
548 Backslashed('`'),
549 Backslashed('"'),
550 Backslashed('\\'),
551 Literal('\\'),
552 Literal('\''),
553 Literal('\\'),
554 Literal('#'),
555 ]
556 );
557 });
558
559 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
560 }
561
562 #[test]
563 fn lexer_word_unit_double_quote_unclosed() {
564 let mut lexer = Lexer::with_code("\"abc\ndef");
565 let mut lexer = WordLexer {
566 lexer: &mut lexer,
567 context: WordContext::Word,
568 };
569
570 let e = lexer
571 .word_unit(|c| unreachable!("unexpected call to is_delimiter({:?})", c))
572 .now_or_never()
573 .unwrap()
574 .unwrap_err();
575 assert_matches!(e.cause,
576 ErrorCause::Syntax(SyntaxError::UnclosedDoubleQuote { opening_location }) => {
577 assert_eq!(*opening_location.code.value.borrow(), "\"abc\ndef");
578 assert_eq!(opening_location.code.start_line_number.get(), 1);
579 assert_eq!(*opening_location.code.source, Source::Unknown);
580 assert_eq!(opening_location.range, 0..1);
581 });
582 assert_eq!(*e.location.code.value.borrow(), "\"abc\ndef");
583 assert_eq!(e.location.code.start_line_number.get(), 1);
584 assert_eq!(*e.location.code.source, Source::Unknown);
585 assert_eq!(e.location.range, 8..8);
586 }
587
588 #[test]
589 fn lexer_word_nonempty() {
590 let mut lexer = Lexer::with_code(r"0$(:)X\#");
591 let mut lexer = WordLexer {
592 lexer: &mut lexer,
593 context: WordContext::Word,
594 };
595
596 let word = lexer.word(|_| false).now_or_never().unwrap().unwrap();
597 assert_eq!(word.units.len(), 4);
598 assert_eq!(word.units[0], WordUnit::Unquoted(Literal('0')));
599 assert_matches!(&word.units[1], WordUnit::Unquoted(CommandSubst { content, location }) => {
600 assert_eq!(&**content, ":");
601 assert_eq!(*location.code.value.borrow(), r"0$(:)X\#");
602 assert_eq!(location.code.start_line_number.get(), 1);
603 assert_eq!(*location.code.source, Source::Unknown);
604 assert_eq!(location.range, 1..5);
605 });
606 assert_eq!(word.units[2], WordUnit::Unquoted(Literal('X')));
607 assert_eq!(word.units[3], WordUnit::Unquoted(Backslashed('#')));
608 assert_eq!(*word.location.code.value.borrow(), r"0$(:)X\#");
609 assert_eq!(word.location.code.start_line_number.get(), 1);
610 assert_eq!(*word.location.code.source, Source::Unknown);
611 assert_eq!(word.location.range, 0..8);
612
613 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
614 }
615
616 #[test]
617 fn lexer_word_empty() {
618 let mut lexer = Lexer::with_code("");
619 let mut lexer = WordLexer {
620 lexer: &mut lexer,
621 context: WordContext::Word,
622 };
623 let word = lexer
624 .word(|_| unreachable!("unexpected call to is_delimiter"))
625 .now_or_never()
626 .unwrap()
627 .unwrap();
628 assert_eq!(word.units, []);
629 assert_eq!(*word.location.code.value.borrow(), "");
630 assert_eq!(word.location.code.start_line_number.get(), 1);
631 assert_eq!(*word.location.code.source, Source::Unknown);
632 assert_eq!(word.location.range, 0..0);
633 }
634
635 #[test]
636 fn lexer_word_with_switch_in_word_context() {
637 let mut lexer = Lexer::with_code(r"${x-~}");
638 let mut lexer = WordLexer {
639 lexer: &mut lexer,
640 context: WordContext::Word,
641 };
642
643 let result = lexer
644 .word(|c| {
645 assert_eq!(c, '\'', "unexpected call to is_delimiter({c:?})");
646 false
647 })
648 .now_or_never()
649 .unwrap()
650 .unwrap();
651 assert_matches!(result.units[..], [Unquoted(BracedParam(ref param))] => {
652 assert_matches!(param.modifier, Modifier::Switch(ref switch) => {
653 assert_eq!(
654 switch.word.units,
655 [Tilde {
656 name: "".to_string(),
657 followed_by_slash: false,
658 }]
659 );
660 });
661 });
662 }
663
664 #[test]
665 fn lexer_word_with_switch_in_text_context() {
666 let mut lexer = Lexer::with_code(r#""${x-~}""#);
667 let mut lexer = WordLexer {
668 lexer: &mut lexer,
669 context: WordContext::Word,
670 };
671
672 let result = lexer
673 .word(|c| {
674 assert_eq!(c, '\'', "unexpected call to is_delimiter({c:?})");
675 false
676 })
677 .now_or_never()
678 .unwrap()
679 .unwrap();
680 assert_matches!(result.units[..], [DoubleQuote(Text(ref units))] => {
681 assert_matches!(units[..], [BracedParam(ref param)] => {
682 assert_matches!(param.modifier, Modifier::Switch(ref switch) => {
683 assert_eq!(switch.word.units, [Unquoted(Literal('~'))]);
684 });
685 });
686 });
687
688 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
689 }
690}