1use super::core::Lexer;
20use super::core::WordContext;
21use super::core::WordLexer;
22use crate::parser::core::Result;
23use crate::parser::error::Error;
24use crate::parser::error::SyntaxError;
25use crate::syntax::Backslashed;
26use crate::syntax::Literal;
27use crate::syntax::Text;
28use crate::syntax::TextUnit;
29
30impl WordLexer<'_, '_> {
31 pub async fn text_unit<F, G>(
49 &mut self,
50 mut is_delimiter: F,
51 mut is_escapable: G,
52 ) -> Result<Option<TextUnit>>
53 where
54 F: FnMut(char) -> bool,
55 G: FnMut(char) -> bool,
56 {
57 self.text_unit_dyn(&mut is_delimiter, &mut is_escapable)
58 .await
59 }
60
61 async fn text_unit_dyn(
63 &mut self,
64 is_delimiter: &mut dyn FnMut(char) -> bool,
65 is_escapable: &mut dyn FnMut(char) -> bool,
66 ) -> Result<Option<TextUnit>> {
67 if self.skip_if(|c| c == '\\').await? {
68 if let Some(c) = self.consume_raw_char_if_dyn(is_escapable).await? {
69 return Ok(Some(Backslashed(c)));
70 } else {
71 return Ok(Some(Literal('\\')));
72 }
73 }
74
75 if let Some(u) = self.dollar_unit().await? {
76 return Ok(Some(u));
77 }
78
79 if let Some(u) = self.backquote().await? {
80 return Ok(Some(u));
81 }
82
83 if let Some(sc) = self.consume_char_if(|c| !is_delimiter(c)).await? {
84 return Ok(Some(Literal(sc.value)));
85 }
86
87 Ok(None)
88 }
89
90 async fn consume_raw_char_if_dyn(
92 &mut self,
93 is_escapable: &mut dyn FnMut(char) -> bool,
94 ) -> Result<Option<char>> {
95 Ok(self
96 .disable_line_continuation()
97 .consume_char_if_dyn(is_escapable)
98 .await?
99 .map(|c| c.value))
100 }
101}
102
103impl Lexer<'_> {
104 pub async fn text<F, G>(&mut self, mut is_delimiter: F, mut is_escapable: G) -> Result<Text>
120 where
121 F: FnMut(char) -> bool,
122 G: FnMut(char) -> bool,
123 {
124 self.text_dyn(&mut is_delimiter, &mut is_escapable).await
125 }
126
127 async fn text_dyn(
129 &mut self,
130 is_delimiter: &mut dyn FnMut(char) -> bool,
131 is_escapable: &mut dyn FnMut(char) -> bool,
132 ) -> Result<Text> {
133 let mut units = vec![];
134
135 let mut word_lexer = WordLexer {
136 lexer: self,
137 context: WordContext::Text,
138 };
139 while let Some(unit) = word_lexer.text_unit_dyn(is_delimiter, is_escapable).await? {
140 units.push(unit);
141 }
142
143 Ok(Text(units))
144 }
145
146 pub async fn text_with_parentheses<F, G>(
159 &mut self,
160 mut is_delimiter: F,
161 mut is_escapable: G,
162 ) -> Result<Text>
163 where
164 F: FnMut(char) -> bool,
165 G: FnMut(char) -> bool,
166 {
167 self.text_with_parentheses_dyn(&mut is_delimiter, &mut is_escapable)
168 .await
169 }
170
171 async fn text_with_parentheses_dyn(
173 &mut self,
174 is_delimiter: &mut dyn FnMut(char) -> bool,
175 is_escapable: &mut dyn FnMut(char) -> bool,
176 ) -> Result<Text> {
177 let mut units = Vec::new();
178 let mut open_paren_locations = Vec::new();
179 loop {
180 let mut is_delimiter_or_paren = |c| {
181 if c == '(' {
182 return true;
183 }
184 if open_paren_locations.is_empty() {
185 is_delimiter(c)
186 } else {
187 c == ')'
188 }
189 };
190 let next_units = self
191 .text_dyn(&mut is_delimiter_or_paren, is_escapable)
192 .await?
193 .0;
194
195 units.extend(next_units);
196
197 if let Some(sc) = self.consume_char_if(|c| c == '(').await? {
198 units.push(Literal('('));
199 open_paren_locations.push(sc.location.clone());
200 } else if let Some(opening_location) = open_paren_locations.pop() {
201 if self.skip_if(|c| c == ')').await? {
202 units.push(Literal(')'));
203 } else {
204 let cause = SyntaxError::UnclosedParen { opening_location }.into();
205 let location = self.location().await?.clone();
206 return Err(Error { cause, location });
207 }
208 } else {
209 break;
210 }
211 }
212 Ok(Text(units))
213 }
214}
215
216#[cfg(test)]
217mod tests {
218 use super::*;
219 use crate::parser::error::ErrorCause;
220 use crate::source::Source;
221 use crate::syntax::Backquote;
222 use crate::syntax::BackquoteUnit;
223 use crate::syntax::CommandSubst;
224 use assert_matches::assert_matches;
225 use futures_util::FutureExt;
226
227 #[test]
228 fn lexer_text_unit_literal_accepted() {
229 let mut lexer = Lexer::with_code("X");
230 let mut lexer = WordLexer {
231 lexer: &mut lexer,
232 context: WordContext::Word,
233 };
234 let mut called = false;
235 let result = lexer
236 .text_unit(
237 |c| {
238 called = true;
239 assert_eq!(c, 'X');
240 false
241 },
242 |c| unreachable!("unexpected call to is_escapable({:?})", c),
243 )
244 .now_or_never()
245 .unwrap()
246 .unwrap()
247 .unwrap();
248 assert!(called);
249 assert_matches!(result, Literal('X'));
250
251 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
252 }
253
254 #[test]
255 fn lexer_text_unit_literal_rejected() {
256 let mut lexer = Lexer::with_code(";");
257 let mut lexer = WordLexer {
258 lexer: &mut lexer,
259 context: WordContext::Word,
260 };
261 let mut called = false;
262 let result = lexer
263 .text_unit(
264 |c| {
265 called = true;
266 assert_eq!(c, ';');
267 true
268 },
269 |c| unreachable!("unexpected call to is_escapable({:?})", c),
270 )
271 .now_or_never()
272 .unwrap()
273 .unwrap();
274 assert!(called);
275 assert_eq!(result, None);
276
277 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some(';')));
278 }
279
280 #[test]
281 fn lexer_text_unit_backslash_accepted() {
282 let mut lexer = Lexer::with_code(r"\#");
283 let mut lexer = WordLexer {
284 lexer: &mut lexer,
285 context: WordContext::Word,
286 };
287 let mut called = false;
288 let result = lexer
289 .text_unit(
290 |c| unreachable!("unexpected call to is_delimiter({:?})", c),
291 |c| {
292 called = true;
293 assert_eq!(c, '#');
294 true
295 },
296 )
297 .now_or_never()
298 .unwrap()
299 .unwrap()
300 .unwrap();
301 assert!(called);
302 assert_eq!(result, Backslashed('#'));
303
304 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
305 }
306
307 #[test]
308 fn lexer_text_unit_backslash_eof() {
309 let mut lexer = Lexer::with_code(r"\");
310 let mut lexer = WordLexer {
311 lexer: &mut lexer,
312 context: WordContext::Word,
313 };
314 let result = lexer
315 .text_unit(
316 |c| unreachable!("unexpected call to is_delimiter({:?})", c),
317 |c| unreachable!("unexpected call to is_escapable({:?})", c),
318 )
319 .now_or_never()
320 .unwrap()
321 .unwrap()
322 .unwrap();
323 assert_eq!(result, Literal('\\'));
324
325 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
326 }
327
328 #[test]
329 fn lexer_text_unit_backslash_line_continuation_not_recognized() {
330 let mut lexer = Lexer::with_code("\\\\\n");
331 let mut lexer = WordLexer {
332 lexer: &mut lexer,
333 context: WordContext::Word,
334 };
335 let mut called = false;
336 let result = lexer
337 .text_unit(
338 |c| unreachable!("unexpected call to is_delimiter({:?})", c),
339 |c| {
340 called = true;
341 assert_eq!(c, '\\');
342 true
343 },
344 )
345 .now_or_never()
346 .unwrap()
347 .unwrap()
348 .unwrap();
349 assert!(called);
350 assert_eq!(result, Backslashed('\\'));
351
352 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\n')));
353 }
354
355 #[test]
356 fn lexer_text_unit_dollar() {
357 let mut lexer = Lexer::with_code("$()");
358 let mut lexer = WordLexer {
359 lexer: &mut lexer,
360 context: WordContext::Word,
361 };
362 let result = lexer
363 .text_unit(
364 |c| unreachable!("unexpected call to is_delimiter({:?})", c),
365 |c| unreachable!("unexpected call to is_escapable({:?})", c),
366 )
367 .now_or_never()
368 .unwrap()
369 .unwrap()
370 .unwrap();
371 assert_matches!(result, CommandSubst { content, location } => {
372 assert_eq!(&*content, "");
373 assert_eq!(location.range, 0..3);
374 });
375
376 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
377 }
378
379 #[test]
380 fn lexer_text_unit_backquote_double_quote_escapable() {
381 let mut lexer = Lexer::with_code(r#"`\"`"#);
382 let mut lexer = WordLexer {
383 lexer: &mut lexer,
384 context: WordContext::Text,
385 };
386 let result = lexer
387 .text_unit(
388 |c| unreachable!("unexpected call to is_delimiter({:?})", c),
389 |c| unreachable!("unexpected call to is_escapable({:?})", c),
390 )
391 .now_or_never()
392 .unwrap()
393 .unwrap()
394 .unwrap();
395 assert_matches!(result, Backquote { content, location } => {
396 assert_eq!(content, [BackquoteUnit::Backslashed('"')]);
397 assert_eq!(location.range, 0..4);
398 });
399
400 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
401 }
402
403 #[test]
404 fn lexer_text_unit_backquote_double_quote_not_escapable() {
405 let mut lexer = Lexer::with_code(r#"`\"`"#);
406 let mut lexer = WordLexer {
407 lexer: &mut lexer,
408 context: WordContext::Word,
409 };
410 let result = lexer
411 .text_unit(
412 |c| unreachable!("unexpected call to is_delimiter({:?})", c),
413 |c| unreachable!("unexpected call to is_escapable({:?})", c),
414 )
415 .now_or_never()
416 .unwrap()
417 .unwrap()
418 .unwrap();
419 assert_matches!(result, Backquote { content, location } => {
420 assert_eq!(
421 content,
422 [BackquoteUnit::Literal('\\'), BackquoteUnit::Literal('"')]
423 );
424 assert_eq!(location.range, 0..4);
425 });
426
427 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
428 }
429
430 #[test]
431 fn lexer_text_unit_line_continuations() {
432 let mut lexer = Lexer::with_code("\\\n\\\nX");
433 let mut lexer = WordLexer {
434 lexer: &mut lexer,
435 context: WordContext::Word,
436 };
437 let result = lexer
438 .text_unit(
439 |_| false,
440 |c| unreachable!("unexpected call to is_escapable({:?})", c),
441 )
442 .now_or_never()
443 .unwrap()
444 .unwrap()
445 .unwrap();
446 assert_eq!(result, Literal('X'));
447
448 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
449 }
450
451 #[test]
452 fn lexer_text_empty() {
453 let mut lexer = Lexer::with_code("");
454 let Text(units) = lexer
455 .text(
456 |c| unreachable!("unexpected call to is_delimiter({:?})", c),
457 |c| unreachable!("unexpected call to is_escapable({:?})", c),
458 )
459 .now_or_never()
460 .unwrap()
461 .unwrap();
462 assert_eq!(units, &[]);
463 }
464
465 #[test]
466 fn lexer_text_nonempty() {
467 let mut lexer = Lexer::with_code("abc");
468 let mut called = 0;
469 let Text(units) = lexer
470 .text(
471 |c| {
472 assert!(
473 matches!(c, 'a' | 'b' | 'c'),
474 "unexpected call to is_delimiter({c:?}), called={called}"
475 );
476 called += 1;
477 false
478 },
479 |c| unreachable!("unexpected call to is_escapable({:?})", c),
480 )
481 .now_or_never()
482 .unwrap()
483 .unwrap();
484 assert_eq!(units, &[Literal('a'), Literal('b'), Literal('c')]);
485 assert_eq!(called, 3);
486
487 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
488 }
489
490 #[test]
491 fn lexer_text_delimiter() {
492 let mut lexer = Lexer::with_code("abc");
493 let mut called = 0;
494 let Text(units) = lexer
495 .text(
496 |c| {
497 assert!(
498 matches!(c, 'a' | 'b' | 'c'),
499 "unexpected call to is_delimiter({c:?}), called={called}"
500 );
501 called += 1;
502 c == 'c'
503 },
504 |c| unreachable!("unexpected call to is_escapable({:?})", c),
505 )
506 .now_or_never()
507 .unwrap()
508 .unwrap();
509 assert_eq!(units, &[Literal('a'), Literal('b')]);
510 assert_eq!(called, 3);
511
512 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('c')));
513 }
514
515 #[test]
516 fn lexer_text_escaping() {
517 let mut lexer = Lexer::with_code(r"a\b\c");
518 let mut tested_chars = String::new();
519 let Text(units) = lexer
520 .text(
521 |_| false,
522 |c| {
523 tested_chars.push(c);
524 c == 'b'
525 },
526 )
527 .now_or_never()
528 .unwrap()
529 .unwrap();
530 assert_eq!(
531 units,
532 &[Literal('a'), Backslashed('b'), Literal('\\'), Literal('c')]
533 );
534 assert_eq!(tested_chars, "bc");
535
536 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
537 }
538
539 #[test]
540 fn lexer_text_with_parentheses_no_parentheses() {
541 let mut lexer = Lexer::with_code("abc");
542 let Text(units) = lexer
543 .text_with_parentheses(|_| false, |_| false)
544 .now_or_never()
545 .unwrap()
546 .unwrap();
547 assert_eq!(units, &[Literal('a'), Literal('b'), Literal('c')]);
548
549 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
550 }
551
552 #[test]
553 fn lexer_text_with_parentheses_nest_1() {
554 let mut lexer = Lexer::with_code("a(b)c)");
555 let Text(units) = lexer
556 .text_with_parentheses(|c| c == 'b' || c == ')', |_| false)
557 .now_or_never()
558 .unwrap()
559 .unwrap();
560 assert_eq!(
561 units,
562 &[
563 Literal('a'),
564 Literal('('),
565 Literal('b'),
566 Literal(')'),
567 Literal('c'),
568 ]
569 );
570
571 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some(')')));
572 }
573
574 #[test]
575 fn lexer_text_with_parentheses_nest_1_1() {
576 let mut lexer = Lexer::with_code("ab(CD)ef(GH)ij;");
577 let Text(units) = lexer
578 .text_with_parentheses(|c| c.is_ascii_uppercase() || c == ';', |_| false)
579 .now_or_never()
580 .unwrap()
581 .unwrap();
582 assert_eq!(
583 units,
584 &[
585 Literal('a'),
586 Literal('b'),
587 Literal('('),
588 Literal('C'),
589 Literal('D'),
590 Literal(')'),
591 Literal('e'),
592 Literal('f'),
593 Literal('('),
594 Literal('G'),
595 Literal('H'),
596 Literal(')'),
597 Literal('i'),
598 Literal('j'),
599 ]
600 );
601
602 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some(';')));
603 }
604
605 #[test]
606 fn lexer_text_with_parentheses_nest_3() {
607 let mut lexer = Lexer::with_code("a(B((C)D))e;");
608 let Text(units) = lexer
609 .text_with_parentheses(|c| c.is_ascii_uppercase() || c == ';', |_| false)
610 .now_or_never()
611 .unwrap()
612 .unwrap();
613 assert_eq!(
614 units,
615 &[
616 Literal('a'),
617 Literal('('),
618 Literal('B'),
619 Literal('('),
620 Literal('('),
621 Literal('C'),
622 Literal(')'),
623 Literal('D'),
624 Literal(')'),
625 Literal(')'),
626 Literal('e'),
627 ]
628 );
629
630 assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some(';')));
631 }
632
633 #[test]
634 fn lexer_text_with_parentheses_unclosed() {
635 let mut lexer = Lexer::with_code("x(()");
636 let e = lexer
637 .text_with_parentheses(|_| false, |_| false)
638 .now_or_never()
639 .unwrap()
640 .unwrap_err();
641 assert_matches!(e.cause,
642 ErrorCause::Syntax(SyntaxError::UnclosedParen { opening_location }) => {
643 assert_eq!(*opening_location.code.value.borrow(), "x(()");
644 assert_eq!(opening_location.code.start_line_number.get(), 1);
645 assert_eq!(*opening_location.code.source, Source::Unknown);
646 assert_eq!(opening_location.range, 1..2);
647 });
648 assert_eq!(*e.location.code.value.borrow(), "x(()");
649 assert_eq!(e.location.code.start_line_number.get(), 1);
650 assert_eq!(*e.location.code.source, Source::Unknown);
651 assert_eq!(e.location.range, 4..4);
652 }
653}