yash_syntax/parser/lex/
core.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2020 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Fundamental building blocks for the lexical analyzer.
18
19use super::keyword::Keyword;
20use super::op::Operator;
21use crate::alias::Alias;
22use crate::alias::EmptyGlossary;
23use crate::input::Context;
24use crate::input::InputObject;
25use crate::input::Memory;
26use crate::parser::core::Result;
27use crate::parser::error::Error;
28use crate::source::source_chars;
29use crate::source::Code;
30use crate::source::Location;
31use crate::source::Source;
32use crate::source::SourceChar;
33use crate::syntax::Word;
34use std::cell::RefCell;
35use std::fmt;
36use std::future::Future;
37use std::num::NonZeroU64;
38use std::ops::Deref;
39use std::ops::DerefMut;
40use std::ops::Range;
41use std::pin::Pin;
42use std::rc::Rc;
43
44/// Returns true if the character is a blank character.
45pub fn is_blank(c: char) -> bool {
46    // TODO locale
47    c != '\n' && c.is_whitespace()
48}
49
50/// Result of [`LexerCore::peek_char`].
51#[derive(Clone, Copy, Debug, Eq, PartialEq)]
52enum PeekChar<'a> {
53    Char(&'a SourceChar),
54    EndOfInput(&'a Location),
55}
56
57impl<'a> PeekChar<'a> {
58    /// Returns the location that was peeked.
59    #[must_use]
60    fn location<'b>(self: &'b PeekChar<'a>) -> &'a Location {
61        match self {
62            PeekChar::Char(c) => &c.location,
63            PeekChar::EndOfInput(l) => l,
64        }
65    }
66}
67
68/// Token identifier, or classification of tokens.
69///
70/// This enum classifies a token as defined in POSIX XCU 2.10.1 Shell Grammar Lexical
71/// Conventions, but does not exactly reflect further distinction defined in
72/// POSIX XCU 2.10.2 Shell Grammar Rules.
73///
74/// For convenience, the special token identifier `EndOfInput` is included.
75#[derive(Clone, Copy, Debug, Eq, PartialEq)]
76pub enum TokenId {
77    /// `TOKEN`
78    ///
79    /// If this token _looks like_ a reserved word, this variant has some
80    /// associated `Keyword` that describes the word. However, it depends on
81    /// context whether a token is actually regarded as a reserved word or
82    /// just as an ordinary word. You must ensure that you're in an
83    /// applicable context when examining the `Keyword` value.
84    Token(Option<Keyword>),
85    /// Operator
86    Operator(Operator),
87    /// `IO_NUMBER`
88    IoNumber,
89    /// Imaginary token identifier for the end of input.
90    EndOfInput,
91}
92
93impl TokenId {
94    /// Determines if this token can be a delimiter of a clause.
95    ///
96    /// This function delegates to [`Keyword::is_clause_delimiter`] if the token
97    /// ID is a (possible) keyword, or to [`Operator::is_clause_delimiter`] if
98    /// it is an operator. For `EndOfInput` the function returns true.
99    /// Otherwise, the result is false.
100    pub fn is_clause_delimiter(self) -> bool {
101        use TokenId::*;
102        match self {
103            Token(Some(keyword)) => keyword.is_clause_delimiter(),
104            Token(None) => false,
105            Operator(operator) => operator.is_clause_delimiter(),
106            IoNumber => false,
107            EndOfInput => true,
108        }
109    }
110}
111
112/// Result of lexical analysis produced by the [`Lexer`].
113#[derive(Debug)]
114pub struct Token {
115    /// Content of the token.
116    ///
117    /// The word value contains at least one [unit](crate::syntax::WordUnit),
118    /// regardless of whether the token is an operator. The only exception is
119    /// when `id` is `EndOfInput`, in which case the word is empty.
120    pub word: Word,
121    /// Token identifier.
122    pub id: TokenId,
123    /// Position of the first character of the word.
124    pub index: usize,
125}
126
127impl fmt::Display for Token {
128    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
129        write!(f, "{}", self.word)
130    }
131}
132
133/// State of the input function in a lexer.
134#[derive(Clone, Debug)]
135enum InputState {
136    Alive,
137    EndOfInput(Location),
138    Error(Error),
139}
140
141/// Source character with additional attribute
142#[derive(Clone, Debug, Eq, PartialEq)]
143struct SourceCharEx {
144    value: SourceChar,
145    is_line_continuation: bool,
146}
147
148fn ex<I: IntoIterator<Item = SourceChar>>(i: I) -> impl Iterator<Item = SourceCharEx> {
149    i.into_iter().map(|sc| SourceCharEx {
150        value: sc,
151        is_line_continuation: false,
152    })
153}
154
155/// Core part of the lexical analyzer.
156struct LexerCore<'a> {
157    input: Box<dyn InputObject + 'a>,
158    state: InputState,
159    raw_code: Rc<Code>,
160    source: Vec<SourceCharEx>,
161    index: usize,
162}
163
164impl<'a> LexerCore<'a> {
165    /// Creates a new lexer core that reads using the given input function.
166    #[must_use]
167    fn new(
168        input: Box<dyn InputObject + 'a>,
169        start_line_number: NonZeroU64,
170        source: Rc<Source>,
171    ) -> LexerCore<'a> {
172        LexerCore {
173            input,
174            raw_code: Rc::new(Code {
175                value: RefCell::new(String::new()),
176                start_line_number,
177                source,
178            }),
179            state: InputState::Alive,
180            source: Vec::new(),
181            index: 0,
182        }
183    }
184
185    /// Computes the start index of the location at the current position.
186    #[must_use]
187    fn next_index(&self) -> usize {
188        let Some(last) = self.source.last() else {
189            return 0;
190        };
191
192        let mut location = &last.value.location;
193        while let Source::Alias { original, .. } = &*location.code.source {
194            location = original;
195        }
196        location.range.end
197    }
198
199    /// Peeks the next character, reading the next line if necessary.
200    async fn peek_char(&mut self) -> Result<PeekChar<'_>> {
201        loop {
202            // if let Some(sc) = self.source.get(self.index) {
203            //     return Ok(PeekChar::Char(&sc.value));
204            if self.index < self.source.len() {
205                return Ok(PeekChar::Char(&self.source[self.index].value));
206            }
207
208            match self.state {
209                InputState::Alive => (),
210                InputState::EndOfInput(ref location) => return Ok(PeekChar::EndOfInput(location)),
211                InputState::Error(ref error) => return Err(error.clone()),
212            }
213
214            // Read more input
215            let index = self.next_index();
216            match self.input.next_line(&self.input_context()).await {
217                Ok(line) => {
218                    if line.is_empty() {
219                        // End of input
220                        self.state = InputState::EndOfInput(Location {
221                            code: Rc::clone(&self.raw_code),
222                            range: index..index,
223                        });
224                    } else {
225                        // Successful read
226                        self.raw_code.value.borrow_mut().push_str(&line);
227                        self.source
228                            .extend(ex(source_chars(&line, &self.raw_code, index)));
229                    }
230                }
231                Err(io_error) => {
232                    self.state = InputState::Error(Error {
233                        cause: io_error.into(),
234                        location: Location {
235                            code: Rc::clone(&self.raw_code),
236                            range: index..index,
237                        },
238                    });
239                }
240            }
241        }
242    }
243
244    /// Returns the input context for the next character.
245    fn input_context(&self) -> Context {
246        let mut context = Context::default();
247        context.set_is_first_line(self.raw_code.value.borrow().is_empty());
248        context
249    }
250
251    /// Consumes the next character.
252    ///
253    /// This function must be called after [`peek_char`](Lexer::peek_char) has successfully
254    /// returned the character. Consuming a character that has not yet been peeked would result
255    /// in a panic!
256    fn consume_char(&mut self) {
257        assert!(
258            self.index < self.source.len(),
259            "A character must have been peeked before being consumed: index={}",
260            self.index
261        );
262        self.index += 1;
263    }
264
265    /// Returns a reference to the character at the given index.
266    #[must_use]
267    fn peek_char_at(&self, index: usize) -> &SourceChar {
268        assert!(
269            index <= self.index,
270            "The index {} must not be larger than the current index {}",
271            index,
272            self.index
273        );
274        &self.source[index].value
275    }
276
277    /// Returns the current index.
278    #[must_use]
279    fn index(&self) -> usize {
280        self.index
281    }
282
283    /// Rewinds the index to the given value.
284    fn rewind(&mut self, index: usize) {
285        assert!(
286            index <= self.index,
287            "The new index {} must not be larger than the current index {}",
288            index,
289            self.index
290        );
291        self.index = index;
292    }
293
294    /// Checks if there is any character that has been read from the input
295    /// source but not yet consumed.
296    #[must_use]
297    fn pending(&self) -> bool {
298        self.index < self.source.len()
299    }
300
301    /// Clears the internal buffer.
302    fn flush(&mut self) {
303        let start_line_number = self.raw_code.line_number(usize::MAX);
304        self.raw_code = Rc::new(Code {
305            value: RefCell::new(String::new()),
306            start_line_number,
307            source: self.raw_code.source.clone(),
308        });
309        self.source.clear();
310        self.index = 0;
311    }
312
313    /// Clears an end-of-input or error status so that the lexer can resume
314    /// parsing.
315    fn reset(&mut self) {
316        self.state = InputState::Alive;
317        self.flush();
318    }
319
320    /// Extracts a string from the source code range.
321    fn source_string(&self, range: Range<usize>) -> String {
322        self.source[range].iter().map(|c| c.value.value).collect()
323    }
324
325    /// Returns a location for a given range of the source code.
326    #[must_use]
327    fn location_range(&self, range: Range<usize>) -> Location {
328        if range.start == self.source.len() {
329            if let InputState::EndOfInput(ref location) = self.state {
330                return location.clone();
331            }
332        }
333        let start = &self.peek_char_at(range.start).location;
334        let code = start.code.clone();
335        let end = range
336            .map(|index| &self.peek_char_at(index).location)
337            .take_while(|location| location.code == code)
338            .last()
339            .map(|location| location.range.end)
340            .unwrap_or(start.range.start);
341        let range = start.range.start..end;
342        Location { code, range }
343    }
344
345    /// Marks the characters in the given range as line continuation.
346    ///
347    /// This function sets the `is_line_continuation` flag of the characters in
348    /// the range to true. The characters must have been read before calling
349    /// this function.
350    fn mark_line_continuation(&mut self, range: Range<usize>) {
351        assert!(
352            range.end <= self.index,
353            "characters must have been read (range = {:?}, current index = {})",
354            range,
355            self.index
356        );
357        for sc in &mut self.source[range] {
358            sc.is_line_continuation = true;
359        }
360    }
361
362    /// Performs alias substitution.
363    ///
364    /// This function replaces the characters starting from the `begin` index up
365    /// to the current position with the alias value. The resulting part of code
366    /// will be characters with a [`Source::Alias`] origin.
367    fn substitute_alias(&mut self, begin: usize, alias: &Rc<Alias>) {
368        let end = self.index;
369        assert!(
370            begin < end,
371            "begin index {begin} should be less than end index {end}"
372        );
373
374        let source = Rc::new(Source::Alias {
375            original: self.location_range(begin..end),
376            alias: alias.clone(),
377        });
378        let code = Rc::new(Code {
379            value: RefCell::new(alias.replacement.clone()),
380            start_line_number: NonZeroU64::new(1).unwrap(),
381            source,
382        });
383        let repl = ex(source_chars(&alias.replacement, &code, 0));
384
385        self.source.splice(begin..end, repl);
386        self.index = begin;
387    }
388
389    /// Tests if the given index is after the replacement string of alias
390    /// substitution that ends with a blank.
391    ///
392    /// # Panics
393    ///
394    /// If `index` is larger than the currently read index.
395    fn is_after_blank_ending_alias(&self, index: usize) -> bool {
396        fn ends_with_blank(s: &str) -> bool {
397            s.chars().next_back().map_or(false, is_blank)
398        }
399        fn is_same_alias(alias: &Alias, sc: Option<&SourceCharEx>) -> bool {
400            sc.is_some_and(|sc| sc.value.location.code.source.is_alias_for(&alias.name))
401        }
402
403        for index in (0..index).rev() {
404            let sc = &self.source[index];
405
406            if !sc.is_line_continuation && !is_blank(sc.value.value) {
407                return false;
408            }
409
410            if let Source::Alias { ref alias, .. } = *sc.value.location.code.source {
411                #[allow(clippy::collapsible_if)]
412                if ends_with_blank(&alias.replacement) {
413                    if !is_same_alias(alias, self.source.get(index + 1)) {
414                        return true;
415                    }
416                }
417            }
418        }
419
420        false
421    }
422}
423
424impl fmt::Debug for LexerCore<'_> {
425    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
426        f.debug_struct("LexerCore")
427            .field("state", &self.state)
428            .field("source", &self.source)
429            .field("index", &self.index)
430            .finish_non_exhaustive()
431    }
432}
433
434/// Lexical analyzer.
435///
436/// A lexer reads lines using an input function and parses the characters into tokens. It has an
437/// internal buffer containing the characters that have been read and the position (or the
438/// index) of the character that is to be parsed next.
439///
440/// `Lexer` has primitive functions such as [`peek_char`](Lexer::peek_char) that provide access
441/// to the character at the current position. Derived functions such as
442/// [`skip_blanks_and_comment`](Lexer::skip_blanks_and_comment) depend on those primitives to
443/// parse more complex structures in the source code.
444#[derive(Debug)]
445pub struct Lexer<'a> {
446    // `Lexer` is a thin wrapper around `LexerCore`. `Lexer` delegates most
447    // functions to `LexerCore`. `Lexer` adds automatic line-continuation
448    // skipping to `LexerCore`.
449    core: LexerCore<'a>,
450    line_continuation_enabled: bool,
451}
452
453impl<'a> Lexer<'a> {
454    /// Creates a new lexer that reads using the given input function.
455    #[must_use]
456    pub fn new(
457        input: Box<dyn InputObject + 'a>,
458        start_line_number: NonZeroU64,
459        source: Rc<Source>,
460    ) -> Lexer<'a> {
461        Lexer {
462            core: LexerCore::new(input, start_line_number, source),
463            line_continuation_enabled: true,
464        }
465    }
466
467    /// Creates a new lexer with a fixed source code.
468    ///
469    /// This is a convenience function that creates a lexer that reads from a
470    /// string using a [`Memory`] input function. The line number starts from 1.
471    #[must_use]
472    pub fn from_memory<S: Into<Rc<Source>>>(code: &'a str, source: S) -> Lexer<'a> {
473        fn inner(code: &str, source: Rc<Source>) -> Lexer {
474            let line = NonZeroU64::new(1).unwrap();
475            Lexer::new(Box::new(Memory::new(code)), line, source)
476        }
477        inner(code, source.into())
478    }
479
480    /// Disables line continuation recognition onward.
481    ///
482    /// By default, [`peek_char`](Self::peek_char) silently skips line
483    /// continuation sequences. When line continuation is disabled, however,
484    /// `peek_char` returns characters literally.
485    ///
486    /// Call [`enable_line_continuation`](Self::enable_line_continuation) to
487    /// switch line continuation recognition on.
488    ///
489    /// This function will panic if line continuation has already been disabled.
490    pub fn disable_line_continuation<'b>(&'b mut self) -> PlainLexer<'b, 'a> {
491        assert!(
492            self.line_continuation_enabled,
493            "line continuation already disabled"
494        );
495        self.line_continuation_enabled = false;
496        PlainLexer { lexer: self }
497    }
498
499    /// Re-enables line continuation.
500    ///
501    /// You can pass the `PlainLexer` returned from
502    /// [`disable_line_continuation`](Self::disable_line_continuation) to this
503    /// function to re-enable line continuation. That is equivalent to dropping
504    /// the `PlainLexer` instance, but the code will be more descriptive.
505    pub fn enable_line_continuation<'b>(_: PlainLexer<'a, 'b>) {}
506
507    /// Skips line continuation, i.e., a backslash followed by a newline.
508    ///
509    /// If there is a line continuation at the current position, this function
510    /// consumes the backslash and the newline and returns `Ok(true)`. The
511    /// characters are marked as line continuation.
512    ///
513    /// If there is no line continuation, this function does nothing and returns
514    /// `Ok(false)`.
515    ///
516    /// This function does nothing if line continuation has been
517    /// [disabled](Self::disable_line_continuation).
518    async fn line_continuation(&mut self) -> Result<bool> {
519        if !self.line_continuation_enabled {
520            return Ok(false);
521        }
522
523        let index = self.core.index();
524        match self.core.peek_char().await? {
525            PeekChar::Char(c) if c.value == '\\' => self.core.consume_char(),
526            _ => return Ok(false),
527        }
528
529        match self.core.peek_char().await? {
530            PeekChar::Char(c) if c.value == '\n' => self.core.consume_char(),
531            _ => {
532                self.core.rewind(index);
533                return Ok(false);
534            }
535        }
536
537        self.core.mark_line_continuation(index..index + 2);
538
539        Ok(true)
540    }
541
542    /// Peeks the next character.
543    ///
544    /// If the end of input is reached, `Ok(None)` is returned. On error,
545    /// `Err(_)` is returned.
546    ///
547    /// If line continuation recognition is enabled, combinations of a backslash
548    /// and a newline are silently skipped before returning the next character.
549    /// Call [`disable_line_continuation`](Self::disable_line_continuation) to
550    /// switch off line continuation recognition.
551    ///
552    /// This function requires a mutable reference to `self` since it may need
553    /// to read the next line if needed.
554    pub async fn peek_char(&mut self) -> Result<Option<char>> {
555        while self.line_continuation().await? {}
556
557        match self.core.peek_char().await? {
558            PeekChar::Char(source_char) => Ok(Some(source_char.value)),
559            PeekChar::EndOfInput(_) => Ok(None),
560        }
561    }
562
563    /// Returns the location of the next character.
564    ///
565    /// If there is no more character (that is, it is the end of input), an imaginary location
566    /// is returned that would be returned if a character existed.
567    ///
568    /// This function requires a mutable reference to `self` since it needs to
569    /// [peek](Self::peek_char) the next character.
570    pub async fn location(&mut self) -> Result<&Location> {
571        self.core.peek_char().await.map(|p| p.location())
572    }
573
574    /// Consumes the next character.
575    ///
576    /// This function must be called after [`peek_char`](Lexer::peek_char) has successfully
577    /// returned the character. Consuming a character that has not yet been peeked would result
578    /// in a panic!
579    pub fn consume_char(&mut self) {
580        self.core.consume_char()
581    }
582
583    /// Returns the position of the next character, counted from zero.
584    ///
585    /// ```
586    /// # use yash_syntax::parser::lex::Lexer;
587    /// # use yash_syntax::source::Source;
588    /// futures_executor::block_on(async {
589    ///     let mut lexer = Lexer::from_memory("abc", Source::Unknown);
590    ///     assert_eq!(lexer.index(), 0);
591    ///     let _ = lexer.peek_char().await;
592    ///     assert_eq!(lexer.index(), 0);
593    ///     lexer.consume_char();
594    ///     assert_eq!(lexer.index(), 1);
595    /// })
596    /// ```
597    #[must_use]
598    pub fn index(&self) -> usize {
599        self.core.index()
600    }
601
602    /// Moves the current position back to the given index so that characters that have been
603    /// consumed can be read again.
604    ///
605    /// The given index must not be larger than the [current index](Lexer::index), or this
606    /// function would panic.
607    ///
608    /// ```
609    /// # use yash_syntax::parser::lex::Lexer;
610    /// # use yash_syntax::source::Source;
611    /// futures_executor::block_on(async {
612    ///     let mut lexer = Lexer::from_memory("abc", Source::Unknown);
613    ///     let saved_index = lexer.index();
614    ///     assert_eq!(lexer.peek_char().await, Ok(Some('a')));
615    ///     lexer.consume_char();
616    ///     assert_eq!(lexer.peek_char().await, Ok(Some('b')));
617    ///     lexer.rewind(saved_index);
618    ///     assert_eq!(lexer.peek_char().await, Ok(Some('a')));
619    /// })
620    /// ```
621    pub fn rewind(&mut self, index: usize) {
622        self.core.rewind(index)
623    }
624
625    /// Checks if there is any character that has been read from the input
626    /// source but not yet consumed.
627    #[must_use]
628    pub fn pending(&self) -> bool {
629        self.core.pending()
630    }
631
632    /// Clears the internal buffer of the lexer.
633    ///
634    /// Locations returned from [`location`](Self::location) share a single code
635    /// instance that is also retained by the lexer. The code grows long as the
636    /// lexer reads more input. To prevent the code from getting too large, you
637    /// can call this function that replaces the retained code with a new empty
638    /// one. The new code's `start_line_number` will be incremented by the
639    /// number of lines in the previous.
640    pub fn flush(&mut self) {
641        self.core.flush()
642    }
643
644    /// Clears an end-of-input or error status so that the lexer can resume
645    /// parsing.
646    ///
647    /// This function will be useful only in an interactive shell where the user
648    /// can continue entering commands even after (s)he sends an end-of-input or
649    /// is interrupted by a syntax error.
650    pub fn reset(&mut self) {
651        self.core.reset()
652    }
653
654    /// Peeks the next character and, if the given decider function returns true for it,
655    /// advances the position.
656    ///
657    /// Returns the consumed character if the function returned true. Returns `Ok(None)` if it
658    /// returned false or there is no more character.
659    pub async fn consume_char_if<F>(&mut self, mut f: F) -> Result<Option<&SourceChar>>
660    where
661        F: FnMut(char) -> bool,
662    {
663        self.consume_char_if_dyn(&mut f).await
664    }
665
666    /// Dynamic version of [`Self::consume_char_if`].
667    pub(crate) async fn consume_char_if_dyn(
668        &mut self,
669        f: &mut dyn FnMut(char) -> bool,
670    ) -> Result<Option<&SourceChar>> {
671        match self.peek_char().await? {
672            Some(c) if f(c) => {
673                let index = self.index();
674                self.consume_char();
675                Ok(Some(self.core.peek_char_at(index)))
676            }
677            _ => Ok(None),
678        }
679    }
680
681    /// Extracts a string from the source code range.
682    ///
683    /// This function returns the source code string for the range specified by
684    /// the argument. The range must specify a valid index. If the index points
685    /// to a character that have not yet read, this function will panic!.
686    ///
687    /// # Panics
688    ///
689    /// If the argument index is out of bounds, i.e., pointing to an unread
690    /// character.
691    #[inline]
692    pub fn source_string(&self, range: Range<usize>) -> String {
693        self.core.source_string(range)
694    }
695
696    /// Returns a location for a given range of the source code.
697    ///
698    /// All the characters in the range must have been
699    /// [consume](Self::consume_char)d. If the range refers to an unconsumed
700    /// character, this function will panic!
701    ///
702    /// If the characters are from more than one [`Code`] fragment, the location
703    /// will only cover the initial portion of the range sharing the same
704    /// `Code`.
705    ///
706    /// # Panics
707    ///
708    /// This function will panic if the range refers to an unconsumed character.
709    ///
710    /// If the start index of the range is the end of input, it must have been
711    /// peeked and the range must be empty, or the function will panic.
712    #[must_use]
713    pub fn location_range(&self, range: Range<usize>) -> Location {
714        self.core.location_range(range)
715    }
716
717    /// Performs alias substitution right before the current position.
718    ///
719    /// This function must be called just after a [word](WordLexer::word) has been parsed that
720    /// matches the name of the argument alias. No check is done in this function that there is
721    /// a matching word before the current position. The characters starting from the `begin`
722    /// index up to the current position are silently replaced with the alias value.
723    ///
724    /// The resulting part of code will be characters with a [`Source::Alias`] origin.
725    ///
726    /// After the substitution, the position will be set before the replaced string.
727    ///
728    /// # Panics
729    ///
730    /// If the replaced part is empty, i.e., `begin >= self.index()`.
731    pub fn substitute_alias(&mut self, begin: usize, alias: &Rc<Alias>) {
732        self.core.substitute_alias(begin, alias)
733    }
734
735    /// Tests if the given index is after the replacement string of alias
736    /// substitution that ends with a blank.
737    ///
738    /// # Panics
739    ///
740    /// If `index` is larger than the currently read index.
741    pub fn is_after_blank_ending_alias(&self, index: usize) -> bool {
742        self.core.is_after_blank_ending_alias(index)
743    }
744
745    /// Parses an optional compound list that is the content of a command
746    /// substitution.
747    ///
748    /// This function consumes characters until a token that cannot be the
749    /// beginning of an and-or list is found and returns the string that was
750    /// consumed.
751    pub async fn inner_program(&mut self) -> Result<String> {
752        let begin = self.index();
753
754        let mut parser = super::super::Parser::new(self, &EmptyGlossary);
755        parser.maybe_compound_list().await?;
756
757        let end = parser.peek_token().await?.index;
758        self.rewind(end);
759
760        Ok(self.core.source_string(begin..end))
761    }
762
763    /// Like [`Lexer::inner_program`], but returns the future in a pinning box.
764    pub fn inner_program_boxed(&mut self) -> Pin<Box<dyn Future<Output = Result<String>> + '_>> {
765        Box::pin(self.inner_program())
766    }
767}
768
769/// Reference to [`Lexer`] with line continuation disabled.
770///
771/// This struct implements the RAII pattern for temporarily disabling line
772/// continuation. When you disable the line continuation of a lexer, you get an
773/// instance of `PlainLexer`. You can access the original lexer via the
774/// `PlainLexer` until you drop it, when the line continuation is automatically
775/// re-enabled.
776#[derive(Debug)]
777#[must_use = "You must retain the PlainLexer to keep line continuation disabled"]
778pub struct PlainLexer<'a, 'b> {
779    lexer: &'a mut Lexer<'b>,
780}
781
782impl<'b> Deref for PlainLexer<'_, 'b> {
783    type Target = Lexer<'b>;
784    fn deref(&self) -> &Lexer<'b> {
785        self.lexer
786    }
787}
788
789impl<'b> DerefMut for PlainLexer<'_, 'b> {
790    fn deref_mut(&mut self) -> &mut Lexer<'b> {
791        self.lexer
792    }
793}
794
795impl Drop for PlainLexer<'_, '_> {
796    fn drop(&mut self) {
797        self.lexer.line_continuation_enabled = true;
798    }
799}
800
801/// Context in which a [word](crate::syntax::Word) is parsed.
802///
803/// The parse of the word of a [switch](crate::syntax::Switch) depends on
804/// whether the parameter expansion containing the switch is part of a text or a
805/// word. A `WordContext` value is used to decide the behavior of the lexer.
806///
807/// Parser functions that depend on the context are implemented in
808/// [`WordLexer`].
809#[derive(Clone, Copy, Debug, Eq, PartialEq)]
810pub enum WordContext {
811    /// The text unit being parsed is part of a [text](crate::syntax::Text).
812    Text,
813    /// The text unit being parsed is part of a [word](crate::syntax::Word).
814    Word,
815}
816
817/// Lexer with additional information for parsing [texts](crate::syntax::Text)
818/// and [words](crate::syntax::Word).
819#[derive(Debug)]
820pub struct WordLexer<'a, 'b> {
821    pub lexer: &'a mut Lexer<'b>,
822    pub context: WordContext,
823}
824
825impl<'b> Deref for WordLexer<'_, 'b> {
826    type Target = Lexer<'b>;
827    fn deref(&self) -> &Lexer<'b> {
828        self.lexer
829    }
830}
831
832impl<'b> DerefMut for WordLexer<'_, 'b> {
833    fn deref_mut(&mut self) -> &mut Lexer<'b> {
834        self.lexer
835    }
836}
837
838#[cfg(test)]
839mod tests {
840    use super::*;
841    use crate::input::Input;
842    use crate::parser::error::ErrorCause;
843    use crate::parser::error::SyntaxError;
844    use assert_matches::assert_matches;
845    use futures_util::FutureExt;
846
847    #[test]
848    fn lexer_core_peek_char_empty_source() {
849        let input = Memory::new("");
850        let line = NonZeroU64::new(32).unwrap();
851        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
852        let result = lexer.peek_char().now_or_never().unwrap();
853        assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
854            assert_eq!(*location.code.value.borrow(), "");
855            assert_eq!(location.code.start_line_number, line);
856            assert_eq!(*location.code.source, Source::Unknown);
857            assert_eq!(location.range, 0..0);
858        });
859    }
860
861    #[test]
862    fn lexer_core_peek_char_io_error() {
863        #[derive(Debug)]
864        struct Failing;
865        impl fmt::Display for Failing {
866            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
867                write!(f, "Failing")
868            }
869        }
870        impl std::error::Error for Failing {}
871        impl Input for Failing {
872            async fn next_line(&mut self, _: &Context) -> crate::input::Result {
873                Err(std::io::Error::new(std::io::ErrorKind::Other, Failing))
874            }
875        }
876        let line = NonZeroU64::new(42).unwrap();
877        let mut lexer = LexerCore::new(Box::new(Failing), line, Rc::new(Source::Unknown));
878
879        let e = lexer.peek_char().now_or_never().unwrap().unwrap_err();
880        assert_matches!(e.cause, ErrorCause::Io(io_error) => {
881            assert_eq!(io_error.kind(), std::io::ErrorKind::Other);
882        });
883        assert_eq!(*e.location.code.value.borrow(), "");
884        assert_eq!(e.location.code.start_line_number, line);
885        assert_eq!(*e.location.code.source, Source::Unknown);
886        assert_eq!(e.location.range, 0..0);
887    }
888
889    #[test]
890    fn lexer_core_peek_char_context_is_first_line() {
891        // In this test case, this mock input function will be called twice.
892        struct InputMock {
893            first: bool,
894        }
895        impl Input for InputMock {
896            async fn next_line(&mut self, context: &Context) -> crate::input::Result {
897                assert_eq!(context.is_first_line(), self.first);
898                self.first = false;
899                Ok("\n".to_owned())
900            }
901        }
902
903        let input = InputMock { first: true };
904        let line = NonZeroU64::new(42).unwrap();
905        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
906
907        let peek = lexer.peek_char().now_or_never().unwrap();
908        assert_matches!(peek, Ok(PeekChar::Char(_)));
909        lexer.consume_char();
910
911        let peek = lexer.peek_char().now_or_never().unwrap();
912        assert_matches!(peek, Ok(PeekChar::Char(_)));
913        lexer.consume_char();
914    }
915
916    #[test]
917    fn lexer_core_consume_char_success() {
918        let input = Memory::new("a\nb");
919        let line = NonZeroU64::new(1).unwrap();
920        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
921
922        let result = lexer.peek_char().now_or_never().unwrap();
923        assert_matches!(result, Ok(PeekChar::Char(c)) => {
924            assert_eq!(c.value, 'a');
925            assert_eq!(*c.location.code.value.borrow(), "a\n");
926            assert_eq!(c.location.code.start_line_number, line);
927            assert_eq!(*c.location.code.source, Source::Unknown);
928            assert_eq!(c.location.range, 0..1);
929        });
930        assert_matches!(result, Ok(PeekChar::Char(c)) => {
931            assert_eq!(c.value, 'a');
932            assert_eq!(*c.location.code.value.borrow(), "a\n");
933            assert_eq!(c.location.code.start_line_number, line);
934            assert_eq!(*c.location.code.source, Source::Unknown);
935            assert_eq!(c.location.range, 0..1);
936        });
937        lexer.consume_char();
938
939        let result = lexer.peek_char().now_or_never().unwrap();
940        assert_matches!(result, Ok(PeekChar::Char(c)) => {
941            assert_eq!(c.value, '\n');
942            assert_eq!(*c.location.code.value.borrow(), "a\n");
943            assert_eq!(c.location.code.start_line_number, line);
944            assert_eq!(*c.location.code.source, Source::Unknown);
945            assert_eq!(c.location.range, 1..2);
946        });
947        lexer.consume_char();
948
949        let result = lexer.peek_char().now_or_never().unwrap();
950        assert_matches!(result, Ok(PeekChar::Char(c)) => {
951            assert_eq!(c.value, 'b');
952            assert_eq!(*c.location.code.value.borrow(), "a\nb");
953            assert_eq!(c.location.code.start_line_number.get(), 1);
954            assert_eq!(*c.location.code.source, Source::Unknown);
955            assert_eq!(c.location.range, 2..3);
956        });
957        lexer.consume_char();
958
959        let result = lexer.peek_char().now_or_never().unwrap();
960        assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
961            assert_eq!(*location.code.value.borrow(), "a\nb");
962            assert_eq!(location.code.start_line_number.get(), 1);
963            assert_eq!(*location.code.source, Source::Unknown);
964            assert_eq!(location.range, 3..3);
965        });
966    }
967
968    #[test]
969    #[should_panic(expected = "A character must have been peeked before being consumed: index=0")]
970    fn lexer_core_consume_char_panic() {
971        let input = Memory::new("a");
972        let line = NonZeroU64::new(1).unwrap();
973        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
974        lexer.consume_char();
975    }
976
977    #[test]
978    fn lexer_core_peek_char_at() {
979        let input = Memory::new("a\nb");
980        let line = NonZeroU64::new(1).unwrap();
981        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
982
983        let c0 = assert_matches!(
984            lexer.peek_char().now_or_never().unwrap(),
985            Ok(PeekChar::Char(c)) => c.clone()
986        );
987        lexer.consume_char();
988
989        let c1 = assert_matches!(
990            lexer.peek_char().now_or_never().unwrap(),
991            Ok(PeekChar::Char(c)) => c.clone()
992        );
993        lexer.consume_char();
994
995        let c2 = assert_matches!(
996            lexer.peek_char().now_or_never().unwrap(),
997            Ok(PeekChar::Char(c)) => c.clone()
998        );
999
1000        assert_eq!(lexer.peek_char_at(0), &c0);
1001        assert_eq!(lexer.peek_char_at(1), &c1);
1002        assert_eq!(lexer.peek_char_at(2), &c2);
1003    }
1004
1005    #[test]
1006    fn lexer_core_index() {
1007        let input = Memory::new("a\nb");
1008        let line = NonZeroU64::new(1).unwrap();
1009        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1010
1011        assert_eq!(lexer.index(), 0);
1012        lexer.peek_char().now_or_never().unwrap().unwrap();
1013        assert_eq!(lexer.index(), 0);
1014        lexer.consume_char();
1015
1016        assert_eq!(lexer.index(), 1);
1017        lexer.peek_char().now_or_never().unwrap().unwrap();
1018        lexer.consume_char();
1019
1020        assert_eq!(lexer.index(), 2);
1021        lexer.peek_char().now_or_never().unwrap().unwrap();
1022        lexer.consume_char();
1023
1024        assert_eq!(lexer.index(), 3);
1025    }
1026
1027    #[test]
1028    fn lexer_core_rewind_success() {
1029        let input = Memory::new("abc");
1030        let line = NonZeroU64::new(1).unwrap();
1031        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1032        lexer.rewind(0);
1033        assert_eq!(lexer.index(), 0);
1034
1035        let _ = lexer.peek_char().now_or_never().unwrap();
1036        lexer.consume_char();
1037        let _ = lexer.peek_char().now_or_never().unwrap();
1038        lexer.consume_char();
1039        lexer.rewind(0);
1040
1041        let result = lexer.peek_char().now_or_never().unwrap();
1042        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1043            assert_eq!(c.value, 'a');
1044            assert_eq!(*c.location.code.value.borrow(), "abc");
1045            assert_eq!(c.location.code.start_line_number, line);
1046            assert_eq!(*c.location.code.source, Source::Unknown);
1047            assert_eq!(c.location.range, 0..1);
1048        });
1049    }
1050
1051    #[test]
1052    #[should_panic(expected = "The new index 1 must not be larger than the current index 0")]
1053    fn lexer_core_rewind_invalid_index() {
1054        let input = Memory::new("abc");
1055        let line = NonZeroU64::new(1).unwrap();
1056        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1057        lexer.rewind(1);
1058    }
1059
1060    #[test]
1061    fn lexer_core_source_string() {
1062        let input = Memory::new("ab\ncd");
1063        let line = NonZeroU64::new(1).unwrap();
1064        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1065        for _ in 0..4 {
1066            let _ = lexer.peek_char().now_or_never().unwrap();
1067            lexer.consume_char();
1068        }
1069
1070        let result = lexer.source_string(1..4);
1071        assert_eq!(result, "b\nc");
1072    }
1073
1074    #[test]
1075    #[should_panic(expected = "begin index 0 should be less than end index 0")]
1076    fn lexer_core_substitute_alias_with_invalid_index() {
1077        let input = Memory::new("a b");
1078        let line = NonZeroU64::new(1).unwrap();
1079        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1080        let alias = Rc::new(Alias {
1081            name: "a".to_string(),
1082            replacement: "".to_string(),
1083            global: false,
1084            origin: Location::dummy("dummy"),
1085        });
1086        lexer.substitute_alias(0, &alias);
1087    }
1088
1089    #[test]
1090    fn lexer_core_substitute_alias_single_line_replacement() {
1091        let input = Memory::new("a b");
1092        let line = NonZeroU64::new(1).unwrap();
1093        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1094        let alias = Rc::new(Alias {
1095            name: "a".to_string(),
1096            replacement: "lex".to_string(),
1097            global: false,
1098            origin: Location::dummy("dummy"),
1099        });
1100
1101        let _ = lexer.peek_char().now_or_never().unwrap();
1102        lexer.consume_char();
1103
1104        lexer.substitute_alias(0, &alias);
1105
1106        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1107            assert_eq!(c.value, 'l');
1108            assert_eq!(*c.location.code.value.borrow(), "lex");
1109            assert_eq!(c.location.code.start_line_number.get(), 1);
1110            assert_matches!(&*c.location.code.source,
1111                Source::Alias { original, alias: alias2 } => {
1112                assert_eq!(*original.code.value.borrow(), "a b");
1113                assert_eq!(original.code.start_line_number, line);
1114                assert_eq!(*original.code.source, Source::Unknown);
1115                assert_eq!(original.range, 0..1);
1116                assert_eq!(alias2, &alias);
1117            });
1118            assert_eq!(c.location.range, 0..1);
1119        });
1120        lexer.consume_char();
1121
1122        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1123            assert_eq!(c.value, 'e');
1124            assert_eq!(*c.location.code.value.borrow(), "lex");
1125            assert_eq!(c.location.code.start_line_number, line);
1126            assert_matches!(&*c.location.code.source,
1127                Source::Alias { original, alias: alias2 } => {
1128                assert_eq!(*original.code.value.borrow(), "a b");
1129                assert_eq!(original.code.start_line_number, line);
1130                assert_eq!(*original.code.source, Source::Unknown);
1131                assert_eq!(original.range, 0..1);
1132                assert_eq!(alias2, &alias);
1133            });
1134            assert_eq!(c.location.range, 1..2);
1135        });
1136        lexer.consume_char();
1137
1138        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1139            assert_eq!(c.value, 'x');
1140            assert_eq!(*c.location.code.value.borrow(), "lex");
1141            assert_eq!(c.location.code.start_line_number, line);
1142            assert_matches!(&*c.location.code.source,
1143                Source::Alias { original, alias: alias2 } => {
1144                assert_eq!(*original.code.value.borrow(), "a b");
1145                assert_eq!(original.code.start_line_number, line);
1146                assert_eq!(*original.code.source, Source::Unknown);
1147                assert_eq!(original.range, 0..1);
1148                assert_eq!(alias2, &alias);
1149            });
1150            assert_eq!(c.location.range, 2..3);
1151        });
1152        lexer.consume_char();
1153
1154        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1155            assert_eq!(c.value, ' ');
1156            assert_eq!(*c.location.code.value.borrow(), "a b");
1157            assert_eq!(c.location.code.start_line_number, line);
1158            assert_eq!(*c.location.code.source, Source::Unknown);
1159            assert_eq!(c.location.range, 1..2);
1160        });
1161        lexer.consume_char();
1162    }
1163
1164    #[test]
1165    fn lexer_core_substitute_alias_multi_line_replacement() {
1166        let input = Memory::new(" foo b");
1167        let line = NonZeroU64::new(1).unwrap();
1168        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1169        let alias = Rc::new(Alias {
1170            name: "foo".to_string(),
1171            replacement: "x\ny".to_string(),
1172            global: true,
1173            origin: Location::dummy("loc"),
1174        });
1175
1176        for _ in 0..4 {
1177            let _ = lexer.peek_char().now_or_never().unwrap();
1178            lexer.consume_char();
1179        }
1180
1181        lexer.substitute_alias(1, &alias);
1182
1183        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1184            assert_eq!(c.value, 'x');
1185            assert_eq!(*c.location.code.value.borrow(), "x\ny");
1186            assert_eq!(c.location.code.start_line_number, line);
1187            assert_matches!(&*c.location.code.source,
1188                Source::Alias { original, alias: alias2 } => {
1189                assert_eq!(*original.code.value.borrow(), " foo b");
1190                assert_eq!(original.code.start_line_number, line);
1191                assert_eq!(*original.code.source, Source::Unknown);
1192                assert_eq!(original.range, 1..4);
1193                assert_eq!(alias2, &alias);
1194            });
1195            assert_eq!(c.location.range, 0..1);
1196        });
1197        lexer.consume_char();
1198
1199        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1200            assert_eq!(c.value, '\n');
1201            assert_eq!(*c.location.code.value.borrow(), "x\ny");
1202            assert_eq!(c.location.code.start_line_number, line);
1203            assert_matches!(&*c.location.code.source,
1204                Source::Alias { original, alias: alias2 } => {
1205                assert_eq!(*original.code.value.borrow(), " foo b");
1206                assert_eq!(original.code.start_line_number, line);
1207                assert_eq!(*original.code.source, Source::Unknown);
1208                assert_eq!(original.range, 1..4);
1209                assert_eq!(alias2, &alias);
1210            });
1211            assert_eq!(c.location.range, 1..2);
1212        });
1213        lexer.consume_char();
1214
1215        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1216            assert_eq!(c.value, 'y');
1217            assert_eq!(*c.location.code.value.borrow(), "x\ny");
1218            assert_eq!(c.location.code.start_line_number, line);
1219            assert_matches!(&*c.location.code.source, Source::Alias { original, alias: alias2 } => {
1220                assert_eq!(*original.code.value.borrow(), " foo b");
1221                assert_eq!(original.code.start_line_number, line);
1222                assert_eq!(*original.code.source, Source::Unknown);
1223                assert_eq!(original.range, 1..4);
1224                assert_eq!(alias2, &alias);
1225            });
1226            assert_eq!(c.location.range, 2..3);
1227        });
1228        lexer.consume_char();
1229
1230        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1231            assert_eq!(c.value, ' ');
1232            assert_eq!(*c.location.code.value.borrow(), " foo b");
1233            assert_eq!(c.location.code.start_line_number, line);
1234            assert_eq!(*c.location.code.source, Source::Unknown);
1235            assert_eq!(c.location.range, 4..5);
1236        });
1237        lexer.consume_char();
1238    }
1239
1240    #[test]
1241    fn lexer_core_substitute_alias_empty_replacement() {
1242        let input = Memory::new("x ");
1243        let line = NonZeroU64::new(1).unwrap();
1244        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1245        let alias = Rc::new(Alias {
1246            name: "x".to_string(),
1247            replacement: "".to_string(),
1248            global: false,
1249            origin: Location::dummy("dummy"),
1250        });
1251
1252        let _ = lexer.peek_char().now_or_never().unwrap();
1253        lexer.consume_char();
1254
1255        lexer.substitute_alias(0, &alias);
1256
1257        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1258            assert_eq!(c.value, ' ');
1259            assert_eq!(*c.location.code.value.borrow(), "x ");
1260            assert_eq!(c.location.code.start_line_number, line);
1261            assert_eq!(*c.location.code.source, Source::Unknown);
1262            assert_eq!(c.location.range, 1..2);
1263        });
1264    }
1265
1266    #[test]
1267    fn lexer_core_peek_char_after_alias_substitution() {
1268        let input = Memory::new("a\nb");
1269        let line = NonZeroU64::new(1).unwrap();
1270        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1271
1272        lexer.peek_char().now_or_never().unwrap().unwrap();
1273        lexer.consume_char();
1274
1275        let alias = Rc::new(Alias {
1276            name: "a".to_string(),
1277            replacement: "".to_string(),
1278            global: false,
1279            origin: Location::dummy("dummy"),
1280        });
1281        lexer.substitute_alias(0, &alias);
1282
1283        let result = lexer.peek_char().now_or_never().unwrap();
1284        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1285            assert_eq!(c.value, '\n');
1286            assert_eq!(*c.location.code.value.borrow(), "a\n");
1287            assert_eq!(c.location.code.start_line_number, line);
1288            assert_eq!(*c.location.code.source, Source::Unknown);
1289            assert_eq!(c.location.range, 1..2);
1290        });
1291        lexer.consume_char();
1292
1293        let result = lexer.peek_char().now_or_never().unwrap();
1294        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1295            assert_eq!(c.value, 'b');
1296            assert_eq!(*c.location.code.value.borrow(), "a\nb");
1297            assert_eq!(c.location.code.start_line_number.get(), 1);
1298            assert_eq!(*c.location.code.source, Source::Unknown);
1299            assert_eq!(c.location.range, 2..3);
1300        });
1301        lexer.consume_char();
1302
1303        let result = lexer.peek_char().now_or_never().unwrap();
1304        assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
1305            assert_eq!(*location.code.value.borrow(), "a\nb");
1306            assert_eq!(location.code.start_line_number.get(), 1);
1307            assert_eq!(*location.code.source, Source::Unknown);
1308            assert_eq!(location.range, 3..3);
1309        });
1310    }
1311
1312    #[test]
1313    fn lexer_core_is_after_blank_ending_alias_index_0() {
1314        let original = Location::dummy("original");
1315        let alias = Rc::new(Alias {
1316            name: "a".to_string(),
1317            replacement: " ".to_string(),
1318            global: false,
1319            origin: Location::dummy("origin"),
1320        });
1321        let source = Source::Alias { original, alias };
1322        let input = Memory::new("a");
1323        let line = NonZeroU64::new(1).unwrap();
1324        let lexer = LexerCore::new(Box::new(input), line, Rc::new(source));
1325        assert!(!lexer.is_after_blank_ending_alias(0));
1326    }
1327
1328    #[test]
1329    fn lexer_core_is_after_blank_ending_alias_not_blank_ending() {
1330        let input = Memory::new("a x");
1331        let line = NonZeroU64::new(1).unwrap();
1332        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1333        let alias = Rc::new(Alias {
1334            name: "a".to_string(),
1335            replacement: " b".to_string(),
1336            global: false,
1337            origin: Location::dummy("dummy"),
1338        });
1339
1340        lexer.peek_char().now_or_never().unwrap().unwrap();
1341        lexer.consume_char();
1342
1343        lexer.substitute_alias(0, &alias);
1344
1345        assert!(!lexer.is_after_blank_ending_alias(0));
1346        assert!(!lexer.is_after_blank_ending_alias(1));
1347        assert!(!lexer.is_after_blank_ending_alias(2));
1348        assert!(!lexer.is_after_blank_ending_alias(3));
1349    }
1350
1351    #[test]
1352    fn lexer_core_is_after_blank_ending_alias_blank_ending() {
1353        let input = Memory::new("a x");
1354        let line = NonZeroU64::new(1).unwrap();
1355        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1356        let alias = Rc::new(Alias {
1357            name: "a".to_string(),
1358            replacement: " b ".to_string(),
1359            global: false,
1360            origin: Location::dummy("dummy"),
1361        });
1362
1363        lexer.peek_char().now_or_never().unwrap().unwrap();
1364        lexer.consume_char();
1365
1366        lexer.substitute_alias(0, &alias);
1367
1368        assert!(!lexer.is_after_blank_ending_alias(0));
1369        assert!(!lexer.is_after_blank_ending_alias(1));
1370        assert!(!lexer.is_after_blank_ending_alias(2));
1371        assert!(lexer.is_after_blank_ending_alias(3));
1372        assert!(lexer.is_after_blank_ending_alias(4));
1373    }
1374
1375    #[test]
1376    fn lexer_core_is_after_blank_ending_alias_after_line_continuation() {
1377        let input = Memory::new("a\\\n x");
1378        let line = NonZeroU64::new(1).unwrap();
1379        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1380        let alias = Rc::new(Alias {
1381            name: "a".to_string(),
1382            replacement: " b ".to_string(),
1383            global: false,
1384            origin: Location::dummy("dummy"),
1385        });
1386
1387        lexer.peek_char().now_or_never().unwrap().unwrap();
1388        lexer.consume_char();
1389        lexer.substitute_alias(0, &alias);
1390
1391        while let Ok(PeekChar::Char(_)) = lexer.peek_char().now_or_never().unwrap() {
1392            lexer.consume_char();
1393        }
1394        lexer.mark_line_continuation(3..5);
1395
1396        assert!(!lexer.is_after_blank_ending_alias(0));
1397        assert!(!lexer.is_after_blank_ending_alias(1));
1398        assert!(!lexer.is_after_blank_ending_alias(2));
1399        assert!(lexer.is_after_blank_ending_alias(5));
1400        assert!(lexer.is_after_blank_ending_alias(6));
1401    }
1402
1403    #[test]
1404    fn lexer_with_empty_source() {
1405        let mut lexer = Lexer::from_memory("", Source::Unknown);
1406        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
1407    }
1408
1409    #[test]
1410    fn lexer_peek_char_with_line_continuation_enabled_stopping_on_non_backslash() {
1411        let mut lexer = Lexer::from_memory("\\\n\n\\", Source::Unknown);
1412        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\n')));
1413        assert_eq!(lexer.index(), 2);
1414    }
1415
1416    #[test]
1417    fn lexer_peek_char_with_line_continuation_enabled_stopping_on_non_newline() {
1418        let mut lexer = Lexer::from_memory("\\\n\\\n\\\n\\\\", Source::Unknown);
1419        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\\')));
1420        assert_eq!(lexer.index(), 6);
1421    }
1422
1423    #[test]
1424    fn lexer_peek_char_with_line_continuation_disabled() {
1425        let mut lexer = Lexer::from_memory("\\\n\\\n\\\\", Source::Unknown);
1426        let mut lexer = lexer.disable_line_continuation();
1427        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\\')));
1428        assert_eq!(lexer.index(), 0);
1429    }
1430
1431    #[test]
1432    fn lexer_flush() {
1433        let mut lexer = Lexer::from_memory(" \n\n\t\n", Source::Unknown);
1434        let location_1 = lexer.location().now_or_never().unwrap().unwrap().clone();
1435        assert_eq!(*location_1.code.value.borrow(), " \n");
1436
1437        lexer.consume_char();
1438        lexer.peek_char().now_or_never().unwrap().unwrap();
1439        lexer.consume_char();
1440        lexer.peek_char().now_or_never().unwrap().unwrap();
1441        lexer.consume_char();
1442        lexer.flush();
1443        lexer.peek_char().now_or_never().unwrap().unwrap();
1444        lexer.consume_char();
1445
1446        let location_2 = lexer.location().now_or_never().unwrap().unwrap().clone();
1447
1448        assert_eq!(*location_1.code.value.borrow(), " \n\n");
1449        assert_eq!(location_1.code.start_line_number.get(), 1);
1450        assert_eq!(*location_1.code.source, Source::Unknown);
1451        assert_eq!(location_1.range, 0..1);
1452        assert_eq!(*location_2.code.value.borrow(), "\t\n");
1453        assert_eq!(location_2.code.start_line_number.get(), 3);
1454        assert_eq!(*location_2.code.source, Source::Unknown);
1455        assert_eq!(location_2.range, 1..2);
1456    }
1457
1458    #[test]
1459    fn lexer_consume_char_if() {
1460        let mut lexer = Lexer::from_memory("word\n", Source::Unknown);
1461
1462        let mut called = 0;
1463        let c = lexer
1464            .consume_char_if(|c| {
1465                assert_eq!(c, 'w');
1466                called += 1;
1467                true
1468            })
1469            .now_or_never()
1470            .unwrap()
1471            .unwrap()
1472            .unwrap();
1473        assert_eq!(called, 1);
1474        assert_eq!(c.value, 'w');
1475        assert_eq!(*c.location.code.value.borrow(), "word\n");
1476        assert_eq!(c.location.code.start_line_number.get(), 1);
1477        assert_eq!(*c.location.code.source, Source::Unknown);
1478        assert_eq!(c.location.range, 0..1);
1479
1480        let mut called = 0;
1481        let r = lexer
1482            .consume_char_if(|c| {
1483                assert_eq!(c, 'o');
1484                called += 1;
1485                false
1486            })
1487            .now_or_never()
1488            .unwrap();
1489        assert_eq!(called, 1);
1490        assert_eq!(r, Ok(None));
1491
1492        let mut called = 0;
1493        let r = lexer
1494            .consume_char_if(|c| {
1495                assert_eq!(c, 'o');
1496                called += 1;
1497                false
1498            })
1499            .now_or_never()
1500            .unwrap();
1501        assert_eq!(called, 1);
1502        assert_eq!(r, Ok(None));
1503
1504        let mut called = 0;
1505        let c = lexer
1506            .consume_char_if(|c| {
1507                assert_eq!(c, 'o');
1508                called += 1;
1509                true
1510            })
1511            .now_or_never()
1512            .unwrap()
1513            .unwrap()
1514            .unwrap();
1515        assert_eq!(called, 1);
1516        assert_eq!(c.value, 'o');
1517        assert_eq!(*c.location.code.value.borrow(), "word\n");
1518        assert_eq!(c.location.code.start_line_number.get(), 1);
1519        assert_eq!(*c.location.code.source, Source::Unknown);
1520        assert_eq!(c.location.range, 1..2);
1521
1522        lexer
1523            .consume_char_if(|c| {
1524                assert_eq!(c, 'r');
1525                true
1526            })
1527            .now_or_never()
1528            .unwrap()
1529            .unwrap()
1530            .unwrap();
1531        lexer
1532            .consume_char_if(|c| {
1533                assert_eq!(c, 'd');
1534                true
1535            })
1536            .now_or_never()
1537            .unwrap()
1538            .unwrap()
1539            .unwrap();
1540        lexer
1541            .consume_char_if(|c| {
1542                assert_eq!(c, '\n');
1543                true
1544            })
1545            .now_or_never()
1546            .unwrap()
1547            .unwrap()
1548            .unwrap();
1549
1550        // end of input
1551        let r = lexer
1552            .consume_char_if(|c| {
1553                unreachable!("unexpected call to the decider function: argument={}", c)
1554            })
1555            .now_or_never()
1556            .unwrap();
1557        assert_eq!(r, Ok(None));
1558    }
1559
1560    #[test]
1561    fn lexer_location_range_with_empty_range() {
1562        let mut lexer = Lexer::from_memory("", Source::Unknown);
1563        lexer.peek_char().now_or_never().unwrap().unwrap();
1564        let location = lexer.location_range(0..0);
1565        assert_eq!(*location.code.value.borrow(), "");
1566        assert_eq!(location.code.start_line_number.get(), 1);
1567        assert_eq!(*location.code.source, Source::Unknown);
1568        assert_eq!(location.range, 0..0);
1569    }
1570
1571    #[test]
1572    fn lexer_location_range_with_nonempty_range() {
1573        let mut lexer = Lexer::from_memory("cat foo", Source::Stdin);
1574        for _ in 0..4 {
1575            lexer.peek_char().now_or_never().unwrap().unwrap();
1576            lexer.consume_char();
1577        }
1578        lexer.peek_char().now_or_never().unwrap().unwrap();
1579
1580        let location = lexer.location_range(1..4);
1581        assert_eq!(*location.code.value.borrow(), "cat foo");
1582        assert_eq!(location.code.start_line_number.get(), 1);
1583        assert_eq!(*location.code.source, Source::Stdin);
1584        assert_eq!(location.range, 1..4);
1585    }
1586
1587    #[test]
1588    fn lexer_location_range_with_range_starting_at_end() {
1589        let mut lexer = Lexer::from_memory("cat", Source::Stdin);
1590        for _ in 0..3 {
1591            lexer.peek_char().now_or_never().unwrap().unwrap();
1592            lexer.consume_char();
1593        }
1594        lexer.peek_char().now_or_never().unwrap().unwrap();
1595
1596        let location = lexer.location_range(3..3);
1597        assert_eq!(*location.code.value.borrow(), "cat");
1598        assert_eq!(location.code.start_line_number.get(), 1);
1599        assert_eq!(*location.code.source, Source::Stdin);
1600        assert_eq!(location.range, 3..3);
1601    }
1602
1603    #[test]
1604    #[should_panic]
1605    fn lexer_location_range_with_unconsumed_code() {
1606        let lexer = Lexer::from_memory("echo ok", Source::Unknown);
1607        let _ = lexer.location_range(0..0);
1608    }
1609
1610    #[test]
1611    #[should_panic(expected = "The index 1 must not be larger than the current index 0")]
1612    fn lexer_location_range_with_range_out_of_bounds() {
1613        let lexer = Lexer::from_memory("", Source::Unknown);
1614        let _ = lexer.location_range(1..2);
1615    }
1616
1617    #[test]
1618    fn lexer_location_range_with_alias_substitution() {
1619        let mut lexer = Lexer::from_memory(" a;", Source::Unknown);
1620        let alias_def = Rc::new(Alias {
1621            name: "a".to_string(),
1622            replacement: "abc".to_string(),
1623            global: false,
1624            origin: Location::dummy("dummy"),
1625        });
1626        for _ in 0..2 {
1627            lexer.peek_char().now_or_never().unwrap().unwrap();
1628            lexer.consume_char();
1629        }
1630        lexer.substitute_alias(1, &alias_def);
1631        for _ in 1..5 {
1632            lexer.peek_char().now_or_never().unwrap().unwrap();
1633            lexer.consume_char();
1634        }
1635
1636        let location = lexer.location_range(2..5);
1637        assert_eq!(*location.code.value.borrow(), "abc");
1638        assert_eq!(location.code.start_line_number.get(), 1);
1639        assert_matches!(&*location.code.source, Source::Alias { original, alias } => {
1640            assert_eq!(*original.code.value.borrow(), " a;");
1641            assert_eq!(original.code.start_line_number.get(), 1);
1642            assert_eq!(*original.code.source, Source::Unknown);
1643            assert_eq!(original.range, 1..2);
1644            assert_eq!(alias, &alias_def);
1645        });
1646        assert_eq!(location.range, 1..3);
1647    }
1648
1649    #[test]
1650    fn lexer_inner_program_success() {
1651        let mut lexer = Lexer::from_memory("x y )", Source::Unknown);
1652        let source = lexer.inner_program().now_or_never().unwrap().unwrap();
1653        assert_eq!(source, "x y ");
1654    }
1655
1656    #[test]
1657    fn lexer_inner_program_failure() {
1658        let mut lexer = Lexer::from_memory("<< )", Source::Unknown);
1659        let e = lexer.inner_program().now_or_never().unwrap().unwrap_err();
1660        assert_eq!(
1661            e.cause,
1662            ErrorCause::Syntax(SyntaxError::MissingHereDocDelimiter)
1663        );
1664        assert_eq!(*e.location.code.value.borrow(), "<< )");
1665        assert_eq!(e.location.code.start_line_number.get(), 1);
1666        assert_eq!(*e.location.code.source, Source::Unknown);
1667        assert_eq!(e.location.range, 3..4);
1668    }
1669}