yash_syntax/parser/lex/
core.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2020 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Fundamental building blocks for the lexical analyzer
18
19use super::keyword::Keyword;
20use super::op::Operator;
21use crate::alias::Alias;
22use crate::input::Context;
23use crate::input::InputObject;
24use crate::input::Memory;
25use crate::parser::core::Result;
26use crate::parser::error::Error;
27use crate::source::Code;
28use crate::source::Location;
29use crate::source::Source;
30use crate::source::SourceChar;
31use crate::source::source_chars;
32use crate::syntax::Word;
33use std::cell::RefCell;
34use std::fmt;
35use std::num::NonZeroU64;
36use std::ops::Deref;
37use std::ops::DerefMut;
38use std::ops::Range;
39use std::pin::Pin;
40use std::rc::Rc;
41
42/// Returns true if the character is a blank character.
43pub fn is_blank(c: char) -> bool {
44    // TODO locale
45    c != '\n' && c.is_whitespace()
46}
47
48/// Result of [`LexerCore::peek_char`]
49#[derive(Clone, Copy, Debug, Eq, PartialEq)]
50enum PeekChar<'a> {
51    Char(&'a SourceChar),
52    EndOfInput(&'a Location),
53}
54
55impl<'a> PeekChar<'a> {
56    /// Returns the location that was peeked.
57    #[must_use]
58    fn location<'b>(self: &'b PeekChar<'a>) -> &'a Location {
59        match self {
60            PeekChar::Char(c) => &c.location,
61            PeekChar::EndOfInput(l) => l,
62        }
63    }
64}
65
66/// Token identifier, or classification of tokens
67///
68/// This enum classifies a token as defined in POSIX XCU 2.10.1 Shell Grammar Lexical
69/// Conventions, but does not exactly reflect further distinction defined in
70/// POSIX XCU 2.10.2 Shell Grammar Rules.
71///
72/// For convenience, the special token identifier `EndOfInput` is included.
73#[derive(Clone, Copy, Debug, Eq, PartialEq)]
74pub enum TokenId {
75    /// `TOKEN`
76    ///
77    /// If this token _looks like_ a reserved word, this variant has some
78    /// associated `Keyword` that describes the word. However, it depends on
79    /// context whether a token is actually regarded as a reserved word or
80    /// just as an ordinary word. You must ensure that you're in an
81    /// applicable context when examining the `Keyword` value.
82    Token(Option<Keyword>),
83    /// Operator
84    Operator(Operator),
85    /// `IO_NUMBER`
86    IoNumber,
87    /// Imaginary token identifier for the end of input
88    EndOfInput,
89}
90
91impl TokenId {
92    /// Determines if this token can be a delimiter of a clause.
93    ///
94    /// This function delegates to [`Keyword::is_clause_delimiter`] if the token
95    /// ID is a (possible) keyword, or to [`Operator::is_clause_delimiter`] if
96    /// it is an operator. For `EndOfInput` the function returns true.
97    /// Otherwise, the result is false.
98    pub fn is_clause_delimiter(self) -> bool {
99        use TokenId::*;
100        match self {
101            Token(Some(keyword)) => keyword.is_clause_delimiter(),
102            Token(None) => false,
103            Operator(operator) => operator.is_clause_delimiter(),
104            IoNumber => false,
105            EndOfInput => true,
106        }
107    }
108}
109
110/// Result of lexical analysis produced by the [`Lexer`]
111#[derive(Debug)]
112pub struct Token {
113    /// Content of the token
114    ///
115    /// The word value contains at least one [unit](crate::syntax::WordUnit),
116    /// regardless of whether the token is an operator. The only exception is
117    /// when `id` is `EndOfInput`, in which case the word is empty.
118    pub word: Word,
119    /// Token identifier
120    pub id: TokenId,
121    /// Position of the first character of the word
122    pub index: usize,
123}
124
125impl fmt::Display for Token {
126    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
127        write!(f, "{}", self.word)
128    }
129}
130
131/// State of the input function in a lexer
132#[derive(Clone, Debug)]
133enum InputState {
134    Alive,
135    EndOfInput(Location),
136    Error(Error),
137}
138
139/// Source character with additional attribute
140#[derive(Clone, Debug, Eq, PartialEq)]
141struct SourceCharEx {
142    value: SourceChar,
143    is_line_continuation: bool,
144}
145
146fn ex<I: IntoIterator<Item = SourceChar>>(i: I) -> impl Iterator<Item = SourceCharEx> {
147    i.into_iter().map(|sc| SourceCharEx {
148        value: sc,
149        is_line_continuation: false,
150    })
151}
152
153/// Core part of the lexical analyzer
154struct LexerCore<'a> {
155    // The `input` field could be a `&'a mut dyn InputObject + 'a`, but it is
156    // `Box<dyn InputObject + 'a>` to allow the lexer to take ownership of the
157    // input object. This is necessary for `Lexer::with_code` and similarly
158    // constructed lexers.
159    input: Box<dyn InputObject + 'a>,
160    state: InputState,
161    raw_code: Rc<Code>,
162    source: Vec<SourceCharEx>,
163    index: usize,
164}
165
166impl<'a> LexerCore<'a> {
167    /// Creates a new lexer core that reads using the given input function.
168    #[must_use]
169    fn new(
170        input: Box<dyn InputObject + 'a>,
171        start_line_number: NonZeroU64,
172        source: Rc<Source>,
173    ) -> LexerCore<'a> {
174        LexerCore {
175            input,
176            raw_code: Rc::new(Code {
177                value: RefCell::new(String::new()),
178                start_line_number,
179                source,
180            }),
181            state: InputState::Alive,
182            source: Vec::new(),
183            index: 0,
184        }
185    }
186
187    /// Computes the start index of the location at the current position.
188    #[must_use]
189    fn next_index(&self) -> usize {
190        let Some(last) = self.source.last() else {
191            return 0;
192        };
193
194        let mut location = &last.value.location;
195        while let Source::Alias { original, .. } = &*location.code.source {
196            location = original;
197        }
198        location.range.end
199    }
200
201    /// Peeks the next character, reading the next line if necessary.
202    async fn peek_char(&mut self) -> Result<PeekChar<'_>> {
203        loop {
204            // if let Some(sc) = self.source.get(self.index) {
205            //     return Ok(PeekChar::Char(&sc.value));
206            if self.index < self.source.len() {
207                return Ok(PeekChar::Char(&self.source[self.index].value));
208            }
209
210            match self.state {
211                InputState::Alive => (),
212                InputState::EndOfInput(ref location) => return Ok(PeekChar::EndOfInput(location)),
213                InputState::Error(ref error) => return Err(error.clone()),
214            }
215
216            // Read more input
217            let index = self.next_index();
218            match self.input.next_line(&self.input_context()).await {
219                Ok(line) => {
220                    if line.is_empty() {
221                        // End of input
222                        self.state = InputState::EndOfInput(Location {
223                            code: Rc::clone(&self.raw_code),
224                            range: index..index,
225                        });
226                    } else {
227                        // Successful read
228                        self.raw_code.value.borrow_mut().push_str(&line);
229                        self.source
230                            .extend(ex(source_chars(&line, &self.raw_code, index)));
231                    }
232                }
233                Err(io_error) => {
234                    self.state = InputState::Error(Error {
235                        cause: io_error.into(),
236                        location: Location {
237                            code: Rc::clone(&self.raw_code),
238                            range: index..index,
239                        },
240                    });
241                }
242            }
243        }
244    }
245
246    /// Returns the input context for the next character.
247    fn input_context(&self) -> Context {
248        let mut context = Context::default();
249        context.set_is_first_line(self.raw_code.value.borrow().is_empty());
250        context
251    }
252
253    /// Consumes the next character.
254    ///
255    /// This function must be called after [`peek_char`](Lexer::peek_char) has successfully
256    /// returned the character. Consuming a character that has not yet been peeked would result
257    /// in a panic!
258    fn consume_char(&mut self) {
259        assert!(
260            self.index < self.source.len(),
261            "A character must have been peeked before being consumed: index={}",
262            self.index
263        );
264        self.index += 1;
265    }
266
267    /// Returns a reference to the character at the given index.
268    #[must_use]
269    fn peek_char_at(&self, index: usize) -> &SourceChar {
270        assert!(
271            index <= self.index,
272            "The index {} must not be larger than the current index {}",
273            index,
274            self.index
275        );
276        &self.source[index].value
277    }
278
279    /// Returns the current index.
280    #[must_use]
281    fn index(&self) -> usize {
282        self.index
283    }
284
285    /// Rewinds the index to the given value.
286    fn rewind(&mut self, index: usize) {
287        assert!(
288            index <= self.index,
289            "The new index {} must not be larger than the current index {}",
290            index,
291            self.index
292        );
293        self.index = index;
294    }
295
296    /// Checks if there is any character that has been read from the input
297    /// source but not yet consumed.
298    #[must_use]
299    fn pending(&self) -> bool {
300        self.index < self.source.len()
301    }
302
303    /// Clears the internal buffer.
304    fn flush(&mut self) {
305        let start_line_number = self.raw_code.line_number(usize::MAX);
306        self.raw_code = Rc::new(Code {
307            value: RefCell::new(String::new()),
308            start_line_number,
309            source: self.raw_code.source.clone(),
310        });
311        self.source.clear();
312        self.index = 0;
313    }
314
315    /// Clears an end-of-input or error status so that the lexer can resume
316    /// parsing.
317    fn reset(&mut self) {
318        self.state = InputState::Alive;
319        self.flush();
320    }
321
322    /// Extracts a string from the source code range.
323    fn source_string(&self, range: Range<usize>) -> String {
324        self.source[range].iter().map(|c| c.value.value).collect()
325    }
326
327    /// Returns a location for a given range of the source code.
328    #[must_use]
329    fn location_range(&self, range: Range<usize>) -> Location {
330        if range.start == self.source.len() {
331            if let InputState::EndOfInput(ref location) = self.state {
332                return location.clone();
333            }
334        }
335        let start = &self.peek_char_at(range.start).location;
336        let code = start.code.clone();
337        let end = range
338            .map(|index| &self.peek_char_at(index).location)
339            .take_while(|location| location.code == code)
340            .last()
341            .map(|location| location.range.end)
342            .unwrap_or(start.range.start);
343        let range = start.range.start..end;
344        Location { code, range }
345    }
346
347    /// Marks the characters in the given range as line continuation.
348    ///
349    /// This function sets the `is_line_continuation` flag of the characters in
350    /// the range to true. The characters must have been read before calling
351    /// this function.
352    fn mark_line_continuation(&mut self, range: Range<usize>) {
353        assert!(
354            range.end <= self.index,
355            "characters must have been read (range = {:?}, current index = {})",
356            range,
357            self.index
358        );
359        for sc in &mut self.source[range] {
360            sc.is_line_continuation = true;
361        }
362    }
363
364    /// Performs alias substitution.
365    ///
366    /// This function replaces the characters starting from the `begin` index up
367    /// to the current position with the alias value. The resulting part of code
368    /// will be characters with a [`Source::Alias`] origin.
369    fn substitute_alias(&mut self, begin: usize, alias: &Rc<Alias>) {
370        let end = self.index;
371        assert!(
372            begin < end,
373            "begin index {begin} should be less than end index {end}"
374        );
375
376        let source = Rc::new(Source::Alias {
377            original: self.location_range(begin..end),
378            alias: alias.clone(),
379        });
380        let code = Rc::new(Code {
381            value: RefCell::new(alias.replacement.clone()),
382            start_line_number: NonZeroU64::new(1).unwrap(),
383            source,
384        });
385        let repl = ex(source_chars(&alias.replacement, &code, 0));
386
387        self.source.splice(begin..end, repl);
388        self.index = begin;
389    }
390
391    /// Tests if the given index is after the replacement string of alias
392    /// substitution that ends with a blank.
393    ///
394    /// # Panics
395    ///
396    /// If `index` is larger than the currently read index.
397    fn is_after_blank_ending_alias(&self, index: usize) -> bool {
398        fn ends_with_blank(s: &str) -> bool {
399            s.chars().next_back().is_some_and(is_blank)
400        }
401        fn is_same_alias(alias: &Alias, sc: Option<&SourceCharEx>) -> bool {
402            sc.is_some_and(|sc| sc.value.location.code.source.is_alias_for(&alias.name))
403        }
404
405        for index in (0..index).rev() {
406            let sc = &self.source[index];
407
408            if !sc.is_line_continuation && !is_blank(sc.value.value) {
409                return false;
410            }
411
412            if let Source::Alias { ref alias, .. } = *sc.value.location.code.source {
413                #[allow(clippy::collapsible_if)]
414                if ends_with_blank(&alias.replacement) {
415                    if !is_same_alias(alias, self.source.get(index + 1)) {
416                        return true;
417                    }
418                }
419            }
420        }
421
422        false
423    }
424}
425
426impl fmt::Debug for LexerCore<'_> {
427    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
428        f.debug_struct("LexerCore")
429            .field("state", &self.state)
430            .field("source", &self.source)
431            .field("index", &self.index)
432            .finish_non_exhaustive()
433    }
434}
435
436/// Configuration for the [lexer](Lexer)
437///
438/// `Config` is a builder for the lexer. A [new](Self::new) instance is created
439/// with default settings. You can then customize the settings by modifying the
440/// corresponding fields. Finally, you can pass an input object to the
441/// [`input`](Self::input) method to create a lexer.
442#[derive(Debug)]
443#[must_use = "you must call `input` to create a lexer"]
444#[non_exhaustive]
445pub struct Config {
446    /// Line number for the first line of the input
447    ///
448    /// The lexer counts the line number from this value to annotate the
449    /// location of the tokens. The line number is saved in the
450    /// `start_line_number` field of the [`Code`] instance that is contained in
451    /// the [`Location`] instance of the token.
452    ///
453    /// The default value is 1.
454    pub start_line_number: NonZeroU64,
455
456    /// Source of the input
457    ///
458    /// The source is used to annotate the location of the tokens. This value
459    /// is saved in the `source` field of the [`Code`] instance that is
460    /// contained in the [`Location`] instance of the token.
461    ///
462    /// The default value is `None`, in which case the source is set to
463    /// [`Source::Unknown`]. It is recommended to set this to a more informative
464    /// value, so that the locations in the parsed syntax tree can be traced
465    /// back to the source code. Especially, the correct source is necessary to
466    /// indicate the location of possible errors that occur during parsing and
467    /// execution.
468    pub source: Option<Rc<Source>>,
469}
470
471impl Config {
472    /// Creates a new configuration with default settings.
473    ///
474    /// You can also call [`Lexer::config`] to create a new configuration.
475    pub fn new() -> Self {
476        Config {
477            start_line_number: NonZeroU64::MIN,
478            source: None,
479        }
480    }
481
482    /// Creates a lexer with the given input object.
483    pub fn input<'a>(self, input: Box<dyn InputObject + 'a>) -> Lexer<'a> {
484        let start_line_number = self.start_line_number;
485        let source = self.source.unwrap_or_else(|| Rc::new(Source::Unknown));
486        Lexer {
487            core: LexerCore::new(input, start_line_number, source),
488            line_continuation_enabled: true,
489        }
490    }
491}
492
493impl Default for Config {
494    fn default() -> Self {
495        Self::new()
496    }
497}
498
499/// Lexical analyzer
500///
501/// A lexer reads lines using an input function and parses the characters into tokens. It has an
502/// internal buffer containing the characters that have been read and the position (or the
503/// index) of the character that is to be parsed next.
504///
505/// `Lexer` has primitive functions such as [`peek_char`](Lexer::peek_char) that provide access
506/// to the character at the current position. Derived functions such as
507/// [`skip_blanks_and_comment`](Lexer::skip_blanks_and_comment) depend on those primitives to
508/// parse more complex structures in the source code. Usually, the lexer is used by a
509/// [parser](super::super::Parser) to read the source code and produce a syntax
510/// tree, so you don't need to call these functions directly.
511///
512/// To construct a lexer, you can use the [`Lexer::new`] function with an input object.
513/// You can also use the [`Lexer::config`] function to create a configuration that allows you to
514/// customize the settings before creating a lexer.
515///
516/// ```
517/// # use yash_syntax::input::Memory;
518/// # use yash_syntax::parser::{lex::Lexer, Parser};
519/// # use yash_syntax::source::Source;
520/// let mut config = Lexer::config();
521/// config.start_line_number = 10.try_into().unwrap();
522/// config.source = Some(Source::CommandString.into());
523/// let mut lexer = config.input(Box::new(Memory::new("echo hello\n")));
524/// let mut parser = Parser::new(&mut lexer);
525/// _ = parser.command_line();
526/// ```
527#[derive(Debug)]
528#[must_use]
529pub struct Lexer<'a> {
530    // `Lexer` is a thin wrapper around `LexerCore`. `Lexer` delegates most
531    // functions to `LexerCore`. `Lexer` adds automatic line-continuation
532    // skipping to `LexerCore`.
533    core: LexerCore<'a>,
534    line_continuation_enabled: bool,
535}
536
537impl<'a> Lexer<'a> {
538    /// Creates a new configuration with default settings.
539    ///
540    /// This is a synonym for [`Config::new`]. You can modify the settings and
541    /// then create a lexer with the [`input`](Config::input) method.
542    #[inline(always)]
543    pub fn config() -> Config {
544        Config::new()
545    }
546
547    /// Creates a new lexer that reads using the given input function.
548    ///
549    /// This is a convenience function that creates a lexer with the given input
550    /// object and the default configuration. To customize the configuration,
551    /// use the [`config`](Self::config) function.
552    ///
553    /// This function is best used for testing or for simple cases where you
554    /// don't need to customize the lexer. For practical use, it is recommended
555    /// to use the [`config`](Self::config) function to create a configuration
556    /// and provide it with supplementary information, especially
557    /// [`source`](Config::source), before creating a lexer.
558    pub fn new(input: Box<dyn InputObject + 'a>) -> Lexer<'a> {
559        Self::config().input(input)
560    }
561
562    /// Creates a new lexer with a fixed source code.
563    ///
564    /// This is a convenience function that creates a lexer that reads from a
565    /// string using [`Memory`] with the default configuration.
566    ///
567    /// This function is best used for testing or for simple cases where you
568    /// don't need to customize the lexer. For practical use, it is recommended
569    /// to use the [`config`](Self::config) function to create a configuration
570    /// and provide it with supplementary information, especially
571    /// [`source`](Config::source), before creating a lexer.
572    pub fn with_code(code: &'a str) -> Lexer<'a> {
573        Self::new(Box::new(Memory::new(code)))
574    }
575
576    /// Creates a new lexer with a fixed source code.
577    ///
578    /// This is a convenience function that creates a lexer that reads from a
579    /// string using [`Memory`] with the specified source starting from line
580    /// number 1.
581    ///
582    /// This function is soft-deprecated. Use [`with_code`](Self::with_code)
583    /// instead if the source is `Unknown`. Otherwise, use
584    /// [`config`](Self::config) to set the source and [`input`](Config::input)
585    /// to create a lexer, which is more descriptive.
586    pub fn from_memory<S: Into<Rc<Source>>>(code: &'a str, source: S) -> Lexer<'a> {
587        fn inner(code: &str, source: Rc<Source>) -> Lexer {
588            let mut config = Lexer::config();
589            config.source = Some(source);
590            config.input(Box::new(Memory::new(code)))
591        }
592        inner(code, source.into())
593    }
594
595    /// Disables line continuation recognition onward.
596    ///
597    /// By default, [`peek_char`](Self::peek_char) silently skips line
598    /// continuation sequences. When line continuation is disabled, however,
599    /// `peek_char` returns characters literally.
600    ///
601    /// Call [`enable_line_continuation`](Self::enable_line_continuation) to
602    /// switch line continuation recognition on.
603    ///
604    /// This function will panic if line continuation has already been disabled.
605    pub fn disable_line_continuation<'b>(&'b mut self) -> PlainLexer<'b, 'a> {
606        assert!(
607            self.line_continuation_enabled,
608            "line continuation already disabled"
609        );
610        self.line_continuation_enabled = false;
611        PlainLexer { lexer: self }
612    }
613
614    /// Re-enables line continuation.
615    ///
616    /// You can pass the `PlainLexer` returned from
617    /// [`disable_line_continuation`](Self::disable_line_continuation) to this
618    /// function to re-enable line continuation. That is equivalent to dropping
619    /// the `PlainLexer` instance, but the code will be more descriptive.
620    pub fn enable_line_continuation<'b>(_: PlainLexer<'a, 'b>) {}
621
622    /// Skips line continuation, i.e., a backslash followed by a newline.
623    ///
624    /// If there is a line continuation at the current position, this function
625    /// consumes the backslash and the newline and returns `Ok(true)`. The
626    /// characters are marked as line continuation.
627    ///
628    /// If there is no line continuation, this function does nothing and returns
629    /// `Ok(false)`.
630    ///
631    /// This function does nothing if line continuation has been
632    /// [disabled](Self::disable_line_continuation).
633    async fn line_continuation(&mut self) -> Result<bool> {
634        if !self.line_continuation_enabled {
635            return Ok(false);
636        }
637
638        let index = self.core.index();
639        match self.core.peek_char().await? {
640            PeekChar::Char(c) if c.value == '\\' => self.core.consume_char(),
641            _ => return Ok(false),
642        }
643
644        match self.core.peek_char().await? {
645            PeekChar::Char(c) if c.value == '\n' => self.core.consume_char(),
646            _ => {
647                self.core.rewind(index);
648                return Ok(false);
649            }
650        }
651
652        self.core.mark_line_continuation(index..index + 2);
653
654        Ok(true)
655    }
656
657    /// Peeks the next character.
658    ///
659    /// If the end of input is reached, `Ok(None)` is returned. On error,
660    /// `Err(_)` is returned.
661    ///
662    /// If line continuation recognition is enabled, combinations of a backslash
663    /// and a newline are silently skipped before returning the next character.
664    /// Call [`disable_line_continuation`](Self::disable_line_continuation) to
665    /// switch off line continuation recognition.
666    ///
667    /// This function requires a mutable reference to `self` since it may need
668    /// to read the next line if needed.
669    pub async fn peek_char(&mut self) -> Result<Option<char>> {
670        while self.line_continuation().await? {}
671
672        match self.core.peek_char().await? {
673            PeekChar::Char(source_char) => Ok(Some(source_char.value)),
674            PeekChar::EndOfInput(_) => Ok(None),
675        }
676    }
677
678    /// Returns the location of the next character.
679    ///
680    /// If there is no more character (that is, it is the end of input), an imaginary location
681    /// is returned that would be returned if a character existed.
682    ///
683    /// This function requires a mutable reference to `self` since it needs to
684    /// [peek](Self::peek_char) the next character.
685    pub async fn location(&mut self) -> Result<&Location> {
686        self.core.peek_char().await.map(|p| p.location())
687    }
688
689    /// Consumes the next character.
690    ///
691    /// This function must be called after [`peek_char`](Lexer::peek_char) has successfully
692    /// returned the character. Consuming a character that has not yet been peeked would result
693    /// in a panic!
694    pub fn consume_char(&mut self) {
695        self.core.consume_char()
696    }
697
698    /// Returns the position of the next character, counted from zero.
699    ///
700    /// ```
701    /// # use yash_syntax::parser::lex::Lexer;
702    /// # futures_executor::block_on(async {
703    /// let mut lexer = Lexer::with_code("abc");
704    /// assert_eq!(lexer.index(), 0);
705    /// let _ = lexer.peek_char().await;
706    /// assert_eq!(lexer.index(), 0);
707    /// lexer.consume_char();
708    /// assert_eq!(lexer.index(), 1);
709    /// # })
710    /// ```
711    #[must_use]
712    pub fn index(&self) -> usize {
713        self.core.index()
714    }
715
716    /// Moves the current position back to the given index so that characters that have been
717    /// consumed can be read again.
718    ///
719    /// The given index must not be larger than the [current index](Lexer::index), or this
720    /// function would panic.
721    ///
722    /// ```
723    /// # use yash_syntax::parser::lex::Lexer;
724    /// # futures_executor::block_on(async {
725    /// let mut lexer = Lexer::with_code("abc");
726    /// let saved_index = lexer.index();
727    /// assert_eq!(lexer.peek_char().await, Ok(Some('a')));
728    /// lexer.consume_char();
729    /// assert_eq!(lexer.peek_char().await, Ok(Some('b')));
730    /// lexer.rewind(saved_index);
731    /// assert_eq!(lexer.peek_char().await, Ok(Some('a')));
732    /// # })
733    /// ```
734    pub fn rewind(&mut self, index: usize) {
735        self.core.rewind(index)
736    }
737
738    /// Checks if there is any character that has been read from the input
739    /// source but not yet consumed.
740    #[must_use]
741    pub fn pending(&self) -> bool {
742        self.core.pending()
743    }
744
745    /// Clears the internal buffer of the lexer.
746    ///
747    /// Locations returned from [`location`](Self::location) share a single code
748    /// instance that is also retained by the lexer. The code grows long as the
749    /// lexer reads more input. To prevent the code from getting too large, you
750    /// can call this function that replaces the retained code with a new empty
751    /// one. The new code's `start_line_number` will be incremented by the
752    /// number of lines in the previous.
753    pub fn flush(&mut self) {
754        self.core.flush()
755    }
756
757    /// Clears an end-of-input or error status so that the lexer can resume
758    /// parsing.
759    ///
760    /// This function will be useful only in an interactive shell where the user
761    /// can continue entering commands even after (s)he sends an end-of-input or
762    /// is interrupted by a syntax error.
763    pub fn reset(&mut self) {
764        self.core.reset()
765    }
766
767    /// Peeks the next character and, if the given decider function returns true for it,
768    /// advances the position.
769    ///
770    /// Returns the consumed character if the function returned true. Returns `Ok(None)` if it
771    /// returned false or there is no more character.
772    pub async fn consume_char_if<F>(&mut self, mut f: F) -> Result<Option<&SourceChar>>
773    where
774        F: FnMut(char) -> bool,
775    {
776        self.consume_char_if_dyn(&mut f).await
777    }
778
779    /// Dynamic version of [`Self::consume_char_if`].
780    pub(crate) async fn consume_char_if_dyn(
781        &mut self,
782        f: &mut dyn FnMut(char) -> bool,
783    ) -> Result<Option<&SourceChar>> {
784        match self.peek_char().await? {
785            Some(c) if f(c) => {
786                let index = self.index();
787                self.consume_char();
788                Ok(Some(self.core.peek_char_at(index)))
789            }
790            _ => Ok(None),
791        }
792    }
793
794    /// Extracts a string from the source code range.
795    ///
796    /// This function returns the source code string for the range specified by
797    /// the argument. The range must specify a valid index. If the index points
798    /// to a character that have not yet read, this function will panic!.
799    ///
800    /// # Panics
801    ///
802    /// If the argument index is out of bounds, i.e., pointing to an unread
803    /// character.
804    #[inline]
805    pub fn source_string(&self, range: Range<usize>) -> String {
806        self.core.source_string(range)
807    }
808
809    /// Returns a location for a given range of the source code.
810    ///
811    /// All the characters in the range must have been
812    /// [consume](Self::consume_char)d. If the range refers to an unconsumed
813    /// character, this function will panic!
814    ///
815    /// If the characters are from more than one [`Code`] fragment, the location
816    /// will only cover the initial portion of the range sharing the same
817    /// `Code`.
818    ///
819    /// # Panics
820    ///
821    /// This function will panic if the range refers to an unconsumed character.
822    ///
823    /// If the start index of the range is the end of input, it must have been
824    /// peeked and the range must be empty, or the function will panic.
825    #[must_use]
826    pub fn location_range(&self, range: Range<usize>) -> Location {
827        self.core.location_range(range)
828    }
829
830    /// Performs alias substitution right before the current position.
831    ///
832    /// This function must be called just after a [word](WordLexer::word) has been parsed that
833    /// matches the name of the argument alias. No check is done in this function that there is
834    /// a matching word before the current position. The characters starting from the `begin`
835    /// index up to the current position are silently replaced with the alias value.
836    ///
837    /// The resulting part of code will be characters with a [`Source::Alias`] origin.
838    ///
839    /// After the substitution, the position will be set before the replaced string.
840    ///
841    /// # Panics
842    ///
843    /// If the replaced part is empty, i.e., `begin >= self.index()`.
844    pub fn substitute_alias(&mut self, begin: usize, alias: &Rc<Alias>) {
845        self.core.substitute_alias(begin, alias)
846    }
847
848    /// Tests if the given index is after the replacement string of alias
849    /// substitution that ends with a blank.
850    ///
851    /// # Panics
852    ///
853    /// If `index` is larger than the currently read index.
854    pub fn is_after_blank_ending_alias(&self, index: usize) -> bool {
855        self.core.is_after_blank_ending_alias(index)
856    }
857
858    /// Parses an optional compound list that is the content of a command
859    /// substitution.
860    ///
861    /// This function consumes characters until a token that cannot be the
862    /// beginning of an and-or list is found and returns the string that was
863    /// consumed.
864    pub async fn inner_program(&mut self) -> Result<String> {
865        let begin = self.index();
866
867        let mut parser = super::super::Parser::new(self);
868        parser.maybe_compound_list().await?;
869
870        let end = parser.peek_token().await?.index;
871        self.rewind(end);
872
873        Ok(self.core.source_string(begin..end))
874    }
875
876    /// Like [`Lexer::inner_program`], but returns the future in a pinning box.
877    pub fn inner_program_boxed(&mut self) -> Pin<Box<dyn Future<Output = Result<String>> + '_>> {
878        Box::pin(self.inner_program())
879    }
880}
881
882/// Reference to [`Lexer`] with line continuation disabled
883///
884/// This struct implements the RAII pattern for temporarily disabling line
885/// continuation. When you disable the line continuation of a lexer, you get an
886/// instance of `PlainLexer`. You can access the original lexer via the
887/// `PlainLexer` until you drop it, when the line continuation is automatically
888/// re-enabled.
889#[derive(Debug)]
890#[must_use = "You must retain the PlainLexer to keep line continuation disabled"]
891pub struct PlainLexer<'a, 'b> {
892    lexer: &'a mut Lexer<'b>,
893}
894
895impl<'b> Deref for PlainLexer<'_, 'b> {
896    type Target = Lexer<'b>;
897    fn deref(&self) -> &Lexer<'b> {
898        self.lexer
899    }
900}
901
902impl<'b> DerefMut for PlainLexer<'_, 'b> {
903    fn deref_mut(&mut self) -> &mut Lexer<'b> {
904        self.lexer
905    }
906}
907
908impl Drop for PlainLexer<'_, '_> {
909    fn drop(&mut self) {
910        self.lexer.line_continuation_enabled = true;
911    }
912}
913
914/// Context in which a [word](crate::syntax::Word) is parsed
915///
916/// The parse of the word of a [switch](crate::syntax::Switch) depends on
917/// whether the parameter expansion containing the switch is part of a text or a
918/// word. A `WordContext` value is used to decide the behavior of the lexer.
919///
920/// Parser functions that depend on the context are implemented in
921/// [`WordLexer`].
922#[derive(Clone, Copy, Debug, Eq, PartialEq)]
923pub enum WordContext {
924    /// The text unit being parsed is part of a [text](crate::syntax::Text).
925    Text,
926    /// The text unit being parsed is part of a [word](crate::syntax::Word).
927    Word,
928}
929
930/// Lexer with additional information for parsing [texts](crate::syntax::Text)
931/// and [words](crate::syntax::Word)
932#[derive(Debug)]
933pub struct WordLexer<'a, 'b> {
934    pub lexer: &'a mut Lexer<'b>,
935    pub context: WordContext,
936}
937
938impl<'b> Deref for WordLexer<'_, 'b> {
939    type Target = Lexer<'b>;
940    fn deref(&self) -> &Lexer<'b> {
941        self.lexer
942    }
943}
944
945impl<'b> DerefMut for WordLexer<'_, 'b> {
946    fn deref_mut(&mut self) -> &mut Lexer<'b> {
947        self.lexer
948    }
949}
950
951#[cfg(test)]
952mod tests {
953    use super::*;
954    use crate::input::Input;
955    use crate::parser::error::ErrorCause;
956    use crate::parser::error::SyntaxError;
957    use assert_matches::assert_matches;
958    use futures_util::FutureExt;
959
960    #[test]
961    fn lexer_core_peek_char_empty_source() {
962        let input = Memory::new("");
963        let line = NonZeroU64::new(32).unwrap();
964        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
965        let result = lexer.peek_char().now_or_never().unwrap();
966        assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
967            assert_eq!(*location.code.value.borrow(), "");
968            assert_eq!(location.code.start_line_number, line);
969            assert_eq!(*location.code.source, Source::Unknown);
970            assert_eq!(location.range, 0..0);
971        });
972    }
973
974    #[test]
975    fn lexer_core_peek_char_io_error() {
976        #[derive(Debug)]
977        struct Failing;
978        impl fmt::Display for Failing {
979            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
980                write!(f, "Failing")
981            }
982        }
983        impl std::error::Error for Failing {}
984        impl Input for Failing {
985            async fn next_line(&mut self, _: &Context) -> crate::input::Result {
986                Err(std::io::Error::new(std::io::ErrorKind::Other, Failing))
987            }
988        }
989        let line = NonZeroU64::new(42).unwrap();
990        let mut lexer = LexerCore::new(Box::new(Failing), line, Rc::new(Source::Unknown));
991
992        let e = lexer.peek_char().now_or_never().unwrap().unwrap_err();
993        assert_matches!(e.cause, ErrorCause::Io(io_error) => {
994            assert_eq!(io_error.kind(), std::io::ErrorKind::Other);
995        });
996        assert_eq!(*e.location.code.value.borrow(), "");
997        assert_eq!(e.location.code.start_line_number, line);
998        assert_eq!(*e.location.code.source, Source::Unknown);
999        assert_eq!(e.location.range, 0..0);
1000    }
1001
1002    #[test]
1003    fn lexer_core_peek_char_context_is_first_line() {
1004        // In this test case, this mock input function will be called twice.
1005        struct InputMock {
1006            first: bool,
1007        }
1008        impl Input for InputMock {
1009            async fn next_line(&mut self, context: &Context) -> crate::input::Result {
1010                assert_eq!(context.is_first_line(), self.first);
1011                self.first = false;
1012                Ok("\n".to_owned())
1013            }
1014        }
1015
1016        let input = InputMock { first: true };
1017        let line = NonZeroU64::new(42).unwrap();
1018        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1019
1020        let peek = lexer.peek_char().now_or_never().unwrap();
1021        assert_matches!(peek, Ok(PeekChar::Char(_)));
1022        lexer.consume_char();
1023
1024        let peek = lexer.peek_char().now_or_never().unwrap();
1025        assert_matches!(peek, Ok(PeekChar::Char(_)));
1026        lexer.consume_char();
1027    }
1028
1029    #[test]
1030    fn lexer_core_consume_char_success() {
1031        let input = Memory::new("a\nb");
1032        let line = NonZeroU64::new(1).unwrap();
1033        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1034
1035        let result = lexer.peek_char().now_or_never().unwrap();
1036        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1037            assert_eq!(c.value, 'a');
1038            assert_eq!(*c.location.code.value.borrow(), "a\n");
1039            assert_eq!(c.location.code.start_line_number, line);
1040            assert_eq!(*c.location.code.source, Source::Unknown);
1041            assert_eq!(c.location.range, 0..1);
1042        });
1043        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1044            assert_eq!(c.value, 'a');
1045            assert_eq!(*c.location.code.value.borrow(), "a\n");
1046            assert_eq!(c.location.code.start_line_number, line);
1047            assert_eq!(*c.location.code.source, Source::Unknown);
1048            assert_eq!(c.location.range, 0..1);
1049        });
1050        lexer.consume_char();
1051
1052        let result = lexer.peek_char().now_or_never().unwrap();
1053        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1054            assert_eq!(c.value, '\n');
1055            assert_eq!(*c.location.code.value.borrow(), "a\n");
1056            assert_eq!(c.location.code.start_line_number, line);
1057            assert_eq!(*c.location.code.source, Source::Unknown);
1058            assert_eq!(c.location.range, 1..2);
1059        });
1060        lexer.consume_char();
1061
1062        let result = lexer.peek_char().now_or_never().unwrap();
1063        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1064            assert_eq!(c.value, 'b');
1065            assert_eq!(*c.location.code.value.borrow(), "a\nb");
1066            assert_eq!(c.location.code.start_line_number.get(), 1);
1067            assert_eq!(*c.location.code.source, Source::Unknown);
1068            assert_eq!(c.location.range, 2..3);
1069        });
1070        lexer.consume_char();
1071
1072        let result = lexer.peek_char().now_or_never().unwrap();
1073        assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
1074            assert_eq!(*location.code.value.borrow(), "a\nb");
1075            assert_eq!(location.code.start_line_number.get(), 1);
1076            assert_eq!(*location.code.source, Source::Unknown);
1077            assert_eq!(location.range, 3..3);
1078        });
1079    }
1080
1081    #[test]
1082    #[should_panic(expected = "A character must have been peeked before being consumed: index=0")]
1083    fn lexer_core_consume_char_panic() {
1084        let input = Memory::new("a");
1085        let line = NonZeroU64::new(1).unwrap();
1086        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1087        lexer.consume_char();
1088    }
1089
1090    #[test]
1091    fn lexer_core_peek_char_at() {
1092        let input = Memory::new("a\nb");
1093        let line = NonZeroU64::new(1).unwrap();
1094        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1095
1096        let c0 = assert_matches!(
1097            lexer.peek_char().now_or_never().unwrap(),
1098            Ok(PeekChar::Char(c)) => c.clone()
1099        );
1100        lexer.consume_char();
1101
1102        let c1 = assert_matches!(
1103            lexer.peek_char().now_or_never().unwrap(),
1104            Ok(PeekChar::Char(c)) => c.clone()
1105        );
1106        lexer.consume_char();
1107
1108        let c2 = assert_matches!(
1109            lexer.peek_char().now_or_never().unwrap(),
1110            Ok(PeekChar::Char(c)) => c.clone()
1111        );
1112
1113        assert_eq!(lexer.peek_char_at(0), &c0);
1114        assert_eq!(lexer.peek_char_at(1), &c1);
1115        assert_eq!(lexer.peek_char_at(2), &c2);
1116    }
1117
1118    #[test]
1119    fn lexer_core_index() {
1120        let input = Memory::new("a\nb");
1121        let line = NonZeroU64::new(1).unwrap();
1122        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1123
1124        assert_eq!(lexer.index(), 0);
1125        lexer.peek_char().now_or_never().unwrap().unwrap();
1126        assert_eq!(lexer.index(), 0);
1127        lexer.consume_char();
1128
1129        assert_eq!(lexer.index(), 1);
1130        lexer.peek_char().now_or_never().unwrap().unwrap();
1131        lexer.consume_char();
1132
1133        assert_eq!(lexer.index(), 2);
1134        lexer.peek_char().now_or_never().unwrap().unwrap();
1135        lexer.consume_char();
1136
1137        assert_eq!(lexer.index(), 3);
1138    }
1139
1140    #[test]
1141    fn lexer_core_rewind_success() {
1142        let input = Memory::new("abc");
1143        let line = NonZeroU64::new(1).unwrap();
1144        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1145        lexer.rewind(0);
1146        assert_eq!(lexer.index(), 0);
1147
1148        let _ = lexer.peek_char().now_or_never().unwrap();
1149        lexer.consume_char();
1150        let _ = lexer.peek_char().now_or_never().unwrap();
1151        lexer.consume_char();
1152        lexer.rewind(0);
1153
1154        let result = lexer.peek_char().now_or_never().unwrap();
1155        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1156            assert_eq!(c.value, 'a');
1157            assert_eq!(*c.location.code.value.borrow(), "abc");
1158            assert_eq!(c.location.code.start_line_number, line);
1159            assert_eq!(*c.location.code.source, Source::Unknown);
1160            assert_eq!(c.location.range, 0..1);
1161        });
1162    }
1163
1164    #[test]
1165    #[should_panic(expected = "The new index 1 must not be larger than the current index 0")]
1166    fn lexer_core_rewind_invalid_index() {
1167        let input = Memory::new("abc");
1168        let line = NonZeroU64::new(1).unwrap();
1169        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1170        lexer.rewind(1);
1171    }
1172
1173    #[test]
1174    fn lexer_core_source_string() {
1175        let input = Memory::new("ab\ncd");
1176        let line = NonZeroU64::new(1).unwrap();
1177        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1178        for _ in 0..4 {
1179            let _ = lexer.peek_char().now_or_never().unwrap();
1180            lexer.consume_char();
1181        }
1182
1183        let result = lexer.source_string(1..4);
1184        assert_eq!(result, "b\nc");
1185    }
1186
1187    #[test]
1188    #[should_panic(expected = "begin index 0 should be less than end index 0")]
1189    fn lexer_core_substitute_alias_with_invalid_index() {
1190        let input = Memory::new("a b");
1191        let line = NonZeroU64::new(1).unwrap();
1192        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1193        let alias = Rc::new(Alias {
1194            name: "a".to_string(),
1195            replacement: "".to_string(),
1196            global: false,
1197            origin: Location::dummy("dummy"),
1198        });
1199        lexer.substitute_alias(0, &alias);
1200    }
1201
1202    #[test]
1203    fn lexer_core_substitute_alias_single_line_replacement() {
1204        let input = Memory::new("a b");
1205        let line = NonZeroU64::new(1).unwrap();
1206        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1207        let alias = Rc::new(Alias {
1208            name: "a".to_string(),
1209            replacement: "lex".to_string(),
1210            global: false,
1211            origin: Location::dummy("dummy"),
1212        });
1213
1214        let _ = lexer.peek_char().now_or_never().unwrap();
1215        lexer.consume_char();
1216
1217        lexer.substitute_alias(0, &alias);
1218
1219        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1220            assert_eq!(c.value, 'l');
1221            assert_eq!(*c.location.code.value.borrow(), "lex");
1222            assert_eq!(c.location.code.start_line_number.get(), 1);
1223            assert_matches!(&*c.location.code.source,
1224                Source::Alias { original, alias: alias2 } => {
1225                assert_eq!(*original.code.value.borrow(), "a b");
1226                assert_eq!(original.code.start_line_number, line);
1227                assert_eq!(*original.code.source, Source::Unknown);
1228                assert_eq!(original.range, 0..1);
1229                assert_eq!(alias2, &alias);
1230            });
1231            assert_eq!(c.location.range, 0..1);
1232        });
1233        lexer.consume_char();
1234
1235        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1236            assert_eq!(c.value, 'e');
1237            assert_eq!(*c.location.code.value.borrow(), "lex");
1238            assert_eq!(c.location.code.start_line_number, line);
1239            assert_matches!(&*c.location.code.source,
1240                Source::Alias { original, alias: alias2 } => {
1241                assert_eq!(*original.code.value.borrow(), "a b");
1242                assert_eq!(original.code.start_line_number, line);
1243                assert_eq!(*original.code.source, Source::Unknown);
1244                assert_eq!(original.range, 0..1);
1245                assert_eq!(alias2, &alias);
1246            });
1247            assert_eq!(c.location.range, 1..2);
1248        });
1249        lexer.consume_char();
1250
1251        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1252            assert_eq!(c.value, 'x');
1253            assert_eq!(*c.location.code.value.borrow(), "lex");
1254            assert_eq!(c.location.code.start_line_number, line);
1255            assert_matches!(&*c.location.code.source,
1256                Source::Alias { original, alias: alias2 } => {
1257                assert_eq!(*original.code.value.borrow(), "a b");
1258                assert_eq!(original.code.start_line_number, line);
1259                assert_eq!(*original.code.source, Source::Unknown);
1260                assert_eq!(original.range, 0..1);
1261                assert_eq!(alias2, &alias);
1262            });
1263            assert_eq!(c.location.range, 2..3);
1264        });
1265        lexer.consume_char();
1266
1267        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1268            assert_eq!(c.value, ' ');
1269            assert_eq!(*c.location.code.value.borrow(), "a b");
1270            assert_eq!(c.location.code.start_line_number, line);
1271            assert_eq!(*c.location.code.source, Source::Unknown);
1272            assert_eq!(c.location.range, 1..2);
1273        });
1274        lexer.consume_char();
1275    }
1276
1277    #[test]
1278    fn lexer_core_substitute_alias_multi_line_replacement() {
1279        let input = Memory::new(" foo b");
1280        let line = NonZeroU64::new(1).unwrap();
1281        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1282        let alias = Rc::new(Alias {
1283            name: "foo".to_string(),
1284            replacement: "x\ny".to_string(),
1285            global: true,
1286            origin: Location::dummy("loc"),
1287        });
1288
1289        for _ in 0..4 {
1290            let _ = lexer.peek_char().now_or_never().unwrap();
1291            lexer.consume_char();
1292        }
1293
1294        lexer.substitute_alias(1, &alias);
1295
1296        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1297            assert_eq!(c.value, 'x');
1298            assert_eq!(*c.location.code.value.borrow(), "x\ny");
1299            assert_eq!(c.location.code.start_line_number, line);
1300            assert_matches!(&*c.location.code.source,
1301                Source::Alias { original, alias: alias2 } => {
1302                assert_eq!(*original.code.value.borrow(), " foo b");
1303                assert_eq!(original.code.start_line_number, line);
1304                assert_eq!(*original.code.source, Source::Unknown);
1305                assert_eq!(original.range, 1..4);
1306                assert_eq!(alias2, &alias);
1307            });
1308            assert_eq!(c.location.range, 0..1);
1309        });
1310        lexer.consume_char();
1311
1312        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1313            assert_eq!(c.value, '\n');
1314            assert_eq!(*c.location.code.value.borrow(), "x\ny");
1315            assert_eq!(c.location.code.start_line_number, line);
1316            assert_matches!(&*c.location.code.source,
1317                Source::Alias { original, alias: alias2 } => {
1318                assert_eq!(*original.code.value.borrow(), " foo b");
1319                assert_eq!(original.code.start_line_number, line);
1320                assert_eq!(*original.code.source, Source::Unknown);
1321                assert_eq!(original.range, 1..4);
1322                assert_eq!(alias2, &alias);
1323            });
1324            assert_eq!(c.location.range, 1..2);
1325        });
1326        lexer.consume_char();
1327
1328        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1329            assert_eq!(c.value, 'y');
1330            assert_eq!(*c.location.code.value.borrow(), "x\ny");
1331            assert_eq!(c.location.code.start_line_number, line);
1332            assert_matches!(&*c.location.code.source, Source::Alias { original, alias: alias2 } => {
1333                assert_eq!(*original.code.value.borrow(), " foo b");
1334                assert_eq!(original.code.start_line_number, line);
1335                assert_eq!(*original.code.source, Source::Unknown);
1336                assert_eq!(original.range, 1..4);
1337                assert_eq!(alias2, &alias);
1338            });
1339            assert_eq!(c.location.range, 2..3);
1340        });
1341        lexer.consume_char();
1342
1343        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1344            assert_eq!(c.value, ' ');
1345            assert_eq!(*c.location.code.value.borrow(), " foo b");
1346            assert_eq!(c.location.code.start_line_number, line);
1347            assert_eq!(*c.location.code.source, Source::Unknown);
1348            assert_eq!(c.location.range, 4..5);
1349        });
1350        lexer.consume_char();
1351    }
1352
1353    #[test]
1354    fn lexer_core_substitute_alias_empty_replacement() {
1355        let input = Memory::new("x ");
1356        let line = NonZeroU64::new(1).unwrap();
1357        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1358        let alias = Rc::new(Alias {
1359            name: "x".to_string(),
1360            replacement: "".to_string(),
1361            global: false,
1362            origin: Location::dummy("dummy"),
1363        });
1364
1365        let _ = lexer.peek_char().now_or_never().unwrap();
1366        lexer.consume_char();
1367
1368        lexer.substitute_alias(0, &alias);
1369
1370        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1371            assert_eq!(c.value, ' ');
1372            assert_eq!(*c.location.code.value.borrow(), "x ");
1373            assert_eq!(c.location.code.start_line_number, line);
1374            assert_eq!(*c.location.code.source, Source::Unknown);
1375            assert_eq!(c.location.range, 1..2);
1376        });
1377    }
1378
1379    #[test]
1380    fn lexer_core_peek_char_after_alias_substitution() {
1381        let input = Memory::new("a\nb");
1382        let line = NonZeroU64::new(1).unwrap();
1383        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1384
1385        lexer.peek_char().now_or_never().unwrap().unwrap();
1386        lexer.consume_char();
1387
1388        let alias = Rc::new(Alias {
1389            name: "a".to_string(),
1390            replacement: "".to_string(),
1391            global: false,
1392            origin: Location::dummy("dummy"),
1393        });
1394        lexer.substitute_alias(0, &alias);
1395
1396        let result = lexer.peek_char().now_or_never().unwrap();
1397        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1398            assert_eq!(c.value, '\n');
1399            assert_eq!(*c.location.code.value.borrow(), "a\n");
1400            assert_eq!(c.location.code.start_line_number, line);
1401            assert_eq!(*c.location.code.source, Source::Unknown);
1402            assert_eq!(c.location.range, 1..2);
1403        });
1404        lexer.consume_char();
1405
1406        let result = lexer.peek_char().now_or_never().unwrap();
1407        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1408            assert_eq!(c.value, 'b');
1409            assert_eq!(*c.location.code.value.borrow(), "a\nb");
1410            assert_eq!(c.location.code.start_line_number.get(), 1);
1411            assert_eq!(*c.location.code.source, Source::Unknown);
1412            assert_eq!(c.location.range, 2..3);
1413        });
1414        lexer.consume_char();
1415
1416        let result = lexer.peek_char().now_or_never().unwrap();
1417        assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
1418            assert_eq!(*location.code.value.borrow(), "a\nb");
1419            assert_eq!(location.code.start_line_number.get(), 1);
1420            assert_eq!(*location.code.source, Source::Unknown);
1421            assert_eq!(location.range, 3..3);
1422        });
1423    }
1424
1425    #[test]
1426    fn lexer_core_is_after_blank_ending_alias_index_0() {
1427        let original = Location::dummy("original");
1428        let alias = Rc::new(Alias {
1429            name: "a".to_string(),
1430            replacement: " ".to_string(),
1431            global: false,
1432            origin: Location::dummy("origin"),
1433        });
1434        let source = Source::Alias { original, alias };
1435        let input = Memory::new("a");
1436        let line = NonZeroU64::new(1).unwrap();
1437        let lexer = LexerCore::new(Box::new(input), line, Rc::new(source));
1438        assert!(!lexer.is_after_blank_ending_alias(0));
1439    }
1440
1441    #[test]
1442    fn lexer_core_is_after_blank_ending_alias_not_blank_ending() {
1443        let input = Memory::new("a x");
1444        let line = NonZeroU64::new(1).unwrap();
1445        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1446        let alias = Rc::new(Alias {
1447            name: "a".to_string(),
1448            replacement: " b".to_string(),
1449            global: false,
1450            origin: Location::dummy("dummy"),
1451        });
1452
1453        lexer.peek_char().now_or_never().unwrap().unwrap();
1454        lexer.consume_char();
1455
1456        lexer.substitute_alias(0, &alias);
1457
1458        assert!(!lexer.is_after_blank_ending_alias(0));
1459        assert!(!lexer.is_after_blank_ending_alias(1));
1460        assert!(!lexer.is_after_blank_ending_alias(2));
1461        assert!(!lexer.is_after_blank_ending_alias(3));
1462    }
1463
1464    #[test]
1465    fn lexer_core_is_after_blank_ending_alias_blank_ending() {
1466        let input = Memory::new("a x");
1467        let line = NonZeroU64::new(1).unwrap();
1468        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1469        let alias = Rc::new(Alias {
1470            name: "a".to_string(),
1471            replacement: " b ".to_string(),
1472            global: false,
1473            origin: Location::dummy("dummy"),
1474        });
1475
1476        lexer.peek_char().now_or_never().unwrap().unwrap();
1477        lexer.consume_char();
1478
1479        lexer.substitute_alias(0, &alias);
1480
1481        assert!(!lexer.is_after_blank_ending_alias(0));
1482        assert!(!lexer.is_after_blank_ending_alias(1));
1483        assert!(!lexer.is_after_blank_ending_alias(2));
1484        assert!(lexer.is_after_blank_ending_alias(3));
1485        assert!(lexer.is_after_blank_ending_alias(4));
1486    }
1487
1488    #[test]
1489    fn lexer_core_is_after_blank_ending_alias_after_line_continuation() {
1490        let input = Memory::new("a\\\n x");
1491        let line = NonZeroU64::new(1).unwrap();
1492        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1493        let alias = Rc::new(Alias {
1494            name: "a".to_string(),
1495            replacement: " b ".to_string(),
1496            global: false,
1497            origin: Location::dummy("dummy"),
1498        });
1499
1500        lexer.peek_char().now_or_never().unwrap().unwrap();
1501        lexer.consume_char();
1502        lexer.substitute_alias(0, &alias);
1503
1504        while let Ok(PeekChar::Char(_)) = lexer.peek_char().now_or_never().unwrap() {
1505            lexer.consume_char();
1506        }
1507        lexer.mark_line_continuation(3..5);
1508
1509        assert!(!lexer.is_after_blank_ending_alias(0));
1510        assert!(!lexer.is_after_blank_ending_alias(1));
1511        assert!(!lexer.is_after_blank_ending_alias(2));
1512        assert!(lexer.is_after_blank_ending_alias(5));
1513        assert!(lexer.is_after_blank_ending_alias(6));
1514    }
1515
1516    #[test]
1517    fn lexer_with_empty_source() {
1518        let mut lexer = Lexer::with_code("");
1519        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
1520    }
1521
1522    #[test]
1523    fn lexer_peek_char_with_line_continuation_enabled_stopping_on_non_backslash() {
1524        let mut lexer = Lexer::with_code("\\\n\n\\");
1525        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\n')));
1526        assert_eq!(lexer.index(), 2);
1527    }
1528
1529    #[test]
1530    fn lexer_peek_char_with_line_continuation_enabled_stopping_on_non_newline() {
1531        let mut lexer = Lexer::with_code("\\\n\\\n\\\n\\\\");
1532        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\\')));
1533        assert_eq!(lexer.index(), 6);
1534    }
1535
1536    #[test]
1537    fn lexer_peek_char_with_line_continuation_disabled() {
1538        let mut lexer = Lexer::with_code("\\\n\\\n\\\\");
1539        let mut lexer = lexer.disable_line_continuation();
1540        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\\')));
1541        assert_eq!(lexer.index(), 0);
1542    }
1543
1544    #[test]
1545    fn lexer_flush() {
1546        let mut lexer = Lexer::with_code(" \n\n\t\n");
1547        let location_1 = lexer.location().now_or_never().unwrap().unwrap().clone();
1548        assert_eq!(*location_1.code.value.borrow(), " \n");
1549
1550        lexer.consume_char();
1551        lexer.peek_char().now_or_never().unwrap().unwrap();
1552        lexer.consume_char();
1553        lexer.peek_char().now_or_never().unwrap().unwrap();
1554        lexer.consume_char();
1555        lexer.flush();
1556        lexer.peek_char().now_or_never().unwrap().unwrap();
1557        lexer.consume_char();
1558
1559        let location_2 = lexer.location().now_or_never().unwrap().unwrap().clone();
1560
1561        assert_eq!(*location_1.code.value.borrow(), " \n\n");
1562        assert_eq!(location_1.code.start_line_number.get(), 1);
1563        assert_eq!(*location_1.code.source, Source::Unknown);
1564        assert_eq!(location_1.range, 0..1);
1565        assert_eq!(*location_2.code.value.borrow(), "\t\n");
1566        assert_eq!(location_2.code.start_line_number.get(), 3);
1567        assert_eq!(*location_2.code.source, Source::Unknown);
1568        assert_eq!(location_2.range, 1..2);
1569    }
1570
1571    #[test]
1572    fn lexer_consume_char_if() {
1573        let mut lexer = Lexer::with_code("word\n");
1574
1575        let mut called = 0;
1576        let c = lexer
1577            .consume_char_if(|c| {
1578                assert_eq!(c, 'w');
1579                called += 1;
1580                true
1581            })
1582            .now_or_never()
1583            .unwrap()
1584            .unwrap()
1585            .unwrap();
1586        assert_eq!(called, 1);
1587        assert_eq!(c.value, 'w');
1588        assert_eq!(*c.location.code.value.borrow(), "word\n");
1589        assert_eq!(c.location.code.start_line_number.get(), 1);
1590        assert_eq!(*c.location.code.source, Source::Unknown);
1591        assert_eq!(c.location.range, 0..1);
1592
1593        let mut called = 0;
1594        let r = lexer
1595            .consume_char_if(|c| {
1596                assert_eq!(c, 'o');
1597                called += 1;
1598                false
1599            })
1600            .now_or_never()
1601            .unwrap();
1602        assert_eq!(called, 1);
1603        assert_eq!(r, Ok(None));
1604
1605        let mut called = 0;
1606        let r = lexer
1607            .consume_char_if(|c| {
1608                assert_eq!(c, 'o');
1609                called += 1;
1610                false
1611            })
1612            .now_or_never()
1613            .unwrap();
1614        assert_eq!(called, 1);
1615        assert_eq!(r, Ok(None));
1616
1617        let mut called = 0;
1618        let c = lexer
1619            .consume_char_if(|c| {
1620                assert_eq!(c, 'o');
1621                called += 1;
1622                true
1623            })
1624            .now_or_never()
1625            .unwrap()
1626            .unwrap()
1627            .unwrap();
1628        assert_eq!(called, 1);
1629        assert_eq!(c.value, 'o');
1630        assert_eq!(*c.location.code.value.borrow(), "word\n");
1631        assert_eq!(c.location.code.start_line_number.get(), 1);
1632        assert_eq!(*c.location.code.source, Source::Unknown);
1633        assert_eq!(c.location.range, 1..2);
1634
1635        lexer
1636            .consume_char_if(|c| {
1637                assert_eq!(c, 'r');
1638                true
1639            })
1640            .now_or_never()
1641            .unwrap()
1642            .unwrap()
1643            .unwrap();
1644        lexer
1645            .consume_char_if(|c| {
1646                assert_eq!(c, 'd');
1647                true
1648            })
1649            .now_or_never()
1650            .unwrap()
1651            .unwrap()
1652            .unwrap();
1653        lexer
1654            .consume_char_if(|c| {
1655                assert_eq!(c, '\n');
1656                true
1657            })
1658            .now_or_never()
1659            .unwrap()
1660            .unwrap()
1661            .unwrap();
1662
1663        // end of input
1664        let r = lexer
1665            .consume_char_if(|c| {
1666                unreachable!("unexpected call to the decider function: argument={}", c)
1667            })
1668            .now_or_never()
1669            .unwrap();
1670        assert_eq!(r, Ok(None));
1671    }
1672
1673    #[test]
1674    fn lexer_location_range_with_empty_range() {
1675        let mut lexer = Lexer::with_code("");
1676        lexer.peek_char().now_or_never().unwrap().unwrap();
1677        let location = lexer.location_range(0..0);
1678        assert_eq!(*location.code.value.borrow(), "");
1679        assert_eq!(location.code.start_line_number.get(), 1);
1680        assert_eq!(*location.code.source, Source::Unknown);
1681        assert_eq!(location.range, 0..0);
1682    }
1683
1684    #[test]
1685    fn lexer_location_range_with_nonempty_range() {
1686        let mut lexer = Lexer::from_memory("cat foo", Source::Stdin);
1687        for _ in 0..4 {
1688            lexer.peek_char().now_or_never().unwrap().unwrap();
1689            lexer.consume_char();
1690        }
1691        lexer.peek_char().now_or_never().unwrap().unwrap();
1692
1693        let location = lexer.location_range(1..4);
1694        assert_eq!(*location.code.value.borrow(), "cat foo");
1695        assert_eq!(location.code.start_line_number.get(), 1);
1696        assert_eq!(*location.code.source, Source::Stdin);
1697        assert_eq!(location.range, 1..4);
1698    }
1699
1700    #[test]
1701    fn lexer_location_range_with_range_starting_at_end() {
1702        let mut lexer = Lexer::from_memory("cat", Source::Stdin);
1703        for _ in 0..3 {
1704            lexer.peek_char().now_or_never().unwrap().unwrap();
1705            lexer.consume_char();
1706        }
1707        lexer.peek_char().now_or_never().unwrap().unwrap();
1708
1709        let location = lexer.location_range(3..3);
1710        assert_eq!(*location.code.value.borrow(), "cat");
1711        assert_eq!(location.code.start_line_number.get(), 1);
1712        assert_eq!(*location.code.source, Source::Stdin);
1713        assert_eq!(location.range, 3..3);
1714    }
1715
1716    #[test]
1717    #[should_panic]
1718    fn lexer_location_range_with_unconsumed_code() {
1719        let lexer = Lexer::with_code("echo ok");
1720        let _ = lexer.location_range(0..0);
1721    }
1722
1723    #[test]
1724    #[should_panic(expected = "The index 1 must not be larger than the current index 0")]
1725    fn lexer_location_range_with_range_out_of_bounds() {
1726        let lexer = Lexer::with_code("");
1727        let _ = lexer.location_range(1..2);
1728    }
1729
1730    #[test]
1731    fn lexer_location_range_with_alias_substitution() {
1732        let mut lexer = Lexer::with_code(" a;");
1733        let alias_def = Rc::new(Alias {
1734            name: "a".to_string(),
1735            replacement: "abc".to_string(),
1736            global: false,
1737            origin: Location::dummy("dummy"),
1738        });
1739        for _ in 0..2 {
1740            lexer.peek_char().now_or_never().unwrap().unwrap();
1741            lexer.consume_char();
1742        }
1743        lexer.substitute_alias(1, &alias_def);
1744        for _ in 1..5 {
1745            lexer.peek_char().now_or_never().unwrap().unwrap();
1746            lexer.consume_char();
1747        }
1748
1749        let location = lexer.location_range(2..5);
1750        assert_eq!(*location.code.value.borrow(), "abc");
1751        assert_eq!(location.code.start_line_number.get(), 1);
1752        assert_matches!(&*location.code.source, Source::Alias { original, alias } => {
1753            assert_eq!(*original.code.value.borrow(), " a;");
1754            assert_eq!(original.code.start_line_number.get(), 1);
1755            assert_eq!(*original.code.source, Source::Unknown);
1756            assert_eq!(original.range, 1..2);
1757            assert_eq!(alias, &alias_def);
1758        });
1759        assert_eq!(location.range, 1..3);
1760    }
1761
1762    #[test]
1763    fn lexer_inner_program_success() {
1764        let mut lexer = Lexer::with_code("x y )");
1765        let source = lexer.inner_program().now_or_never().unwrap().unwrap();
1766        assert_eq!(source, "x y ");
1767    }
1768
1769    #[test]
1770    fn lexer_inner_program_failure() {
1771        let mut lexer = Lexer::with_code("<< )");
1772        let e = lexer.inner_program().now_or_never().unwrap().unwrap_err();
1773        assert_eq!(
1774            e.cause,
1775            ErrorCause::Syntax(SyntaxError::MissingHereDocDelimiter)
1776        );
1777        assert_eq!(*e.location.code.value.borrow(), "<< )");
1778        assert_eq!(e.location.code.start_line_number.get(), 1);
1779        assert_eq!(*e.location.code.source, Source::Unknown);
1780        assert_eq!(e.location.range, 3..4);
1781    }
1782}