yash_syntax/parser/lex/
core.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2020 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Fundamental building blocks for the lexical analyzer
18
19use super::keyword::Keyword;
20use super::op::Operator;
21use crate::alias::Alias;
22use crate::input::Context;
23use crate::input::InputObject;
24use crate::input::Memory;
25use crate::parser::core::Result;
26use crate::parser::error::Error;
27use crate::source::Code;
28use crate::source::Location;
29use crate::source::Source;
30use crate::source::SourceChar;
31use crate::source::source_chars;
32use crate::syntax::Word;
33use std::cell::RefCell;
34use std::fmt;
35use std::num::NonZeroU64;
36use std::ops::Deref;
37use std::ops::DerefMut;
38use std::ops::Range;
39use std::pin::Pin;
40use std::rc::Rc;
41
42/// Returns true if the character is a blank character.
43pub fn is_blank(c: char) -> bool {
44    // TODO locale
45    c != '\n' && c.is_whitespace()
46}
47
48/// Result of [`LexerCore::peek_char`]
49#[derive(Clone, Copy, Debug, Eq, PartialEq)]
50enum PeekChar<'a> {
51    Char(&'a SourceChar),
52    EndOfInput(&'a Location),
53}
54
55impl<'a> PeekChar<'a> {
56    /// Returns the location that was peeked.
57    #[must_use]
58    fn location<'b>(self: &'b PeekChar<'a>) -> &'a Location {
59        match self {
60            PeekChar::Char(c) => &c.location,
61            PeekChar::EndOfInput(l) => l,
62        }
63    }
64}
65
66/// Token identifier, or classification of tokens
67///
68/// This enum classifies a token as defined in POSIX XCU 2.10.1 Shell Grammar Lexical
69/// Conventions, but does not exactly reflect further distinction defined in
70/// POSIX XCU 2.10.2 Shell Grammar Rules.
71///
72/// For convenience, the special token identifier `EndOfInput` is included.
73#[derive(Clone, Copy, Debug, Eq, PartialEq)]
74pub enum TokenId {
75    /// `TOKEN`
76    ///
77    /// If this token _looks like_ a reserved word, this variant has some
78    /// associated `Keyword` that describes the word. However, it depends on
79    /// context whether a token is actually regarded as a reserved word or
80    /// just as an ordinary word. You must ensure that you're in an
81    /// applicable context when examining the `Keyword` value.
82    Token(Option<Keyword>),
83    /// Operator
84    Operator(Operator),
85    /// `IO_NUMBER`
86    IoNumber,
87    /// `IO_LOCATION`
88    IoLocation,
89    /// Imaginary token identifier for the end of input
90    EndOfInput,
91}
92
93impl TokenId {
94    /// Determines if this token can be a delimiter of a clause.
95    ///
96    /// This function delegates to [`Keyword::is_clause_delimiter`] if the token
97    /// ID is a (possible) keyword, or to [`Operator::is_clause_delimiter`] if
98    /// it is an operator. For `EndOfInput` the function returns true.
99    /// Otherwise, the result is false.
100    pub fn is_clause_delimiter(self) -> bool {
101        use TokenId::*;
102        match self {
103            Token(Some(keyword)) => keyword.is_clause_delimiter(),
104            Token(None) => false,
105            Operator(operator) => operator.is_clause_delimiter(),
106            IoNumber => false,
107            IoLocation => false,
108            EndOfInput => true,
109        }
110    }
111}
112
113/// Result of lexical analysis produced by the [`Lexer`]
114#[derive(Debug)]
115pub struct Token {
116    /// Content of the token
117    ///
118    /// The word value contains at least one [unit](crate::syntax::WordUnit),
119    /// regardless of whether the token is an operator. The only exception is
120    /// when `id` is `EndOfInput`, in which case the word is empty.
121    pub word: Word,
122    /// Token identifier
123    pub id: TokenId,
124    /// Position of the first character of the word
125    pub index: usize,
126}
127
128impl fmt::Display for Token {
129    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
130        write!(f, "{}", self.word)
131    }
132}
133
134/// State of the input function in a lexer
135#[derive(Clone, Debug)]
136enum InputState {
137    Alive,
138    EndOfInput(Location),
139    Error(Error),
140}
141
142/// Source character with additional attribute
143#[derive(Clone, Debug, Eq, PartialEq)]
144struct SourceCharEx {
145    value: SourceChar,
146    is_line_continuation: bool,
147}
148
149fn ex<I: IntoIterator<Item = SourceChar>>(i: I) -> impl Iterator<Item = SourceCharEx> {
150    i.into_iter().map(|sc| SourceCharEx {
151        value: sc,
152        is_line_continuation: false,
153    })
154}
155
156/// Core part of the lexical analyzer
157struct LexerCore<'a> {
158    // The `input` field could be a `&'a mut dyn InputObject + 'a`, but it is
159    // `Box<dyn InputObject + 'a>` to allow the lexer to take ownership of the
160    // input object. This is necessary for `Lexer::with_code` and similarly
161    // constructed lexers.
162    input: Box<dyn InputObject + 'a>,
163    state: InputState,
164    raw_code: Rc<Code>,
165    source: Vec<SourceCharEx>,
166    index: usize,
167}
168
169impl<'a> LexerCore<'a> {
170    /// Creates a new lexer core that reads using the given input function.
171    #[must_use]
172    fn new(
173        input: Box<dyn InputObject + 'a>,
174        start_line_number: NonZeroU64,
175        source: Rc<Source>,
176    ) -> LexerCore<'a> {
177        LexerCore {
178            input,
179            raw_code: Rc::new(Code {
180                value: RefCell::new(String::new()),
181                start_line_number,
182                source,
183            }),
184            state: InputState::Alive,
185            source: Vec::new(),
186            index: 0,
187        }
188    }
189
190    /// Computes the start index of the location at the current position.
191    #[must_use]
192    fn next_index(&self) -> usize {
193        let Some(last) = self.source.last() else {
194            return 0;
195        };
196
197        let mut location = &last.value.location;
198        while let Source::Alias { original, .. } = &*location.code.source {
199            location = original;
200        }
201        location.range.end
202    }
203
204    /// Peeks the next character, reading the next line if necessary.
205    async fn peek_char(&mut self) -> Result<PeekChar<'_>> {
206        loop {
207            // if let Some(sc) = self.source.get(self.index) {
208            //     return Ok(PeekChar::Char(&sc.value));
209            if self.index < self.source.len() {
210                return Ok(PeekChar::Char(&self.source[self.index].value));
211            }
212
213            match self.state {
214                InputState::Alive => (),
215                InputState::EndOfInput(ref location) => return Ok(PeekChar::EndOfInput(location)),
216                InputState::Error(ref error) => return Err(error.clone()),
217            }
218
219            // Read more input
220            let index = self.next_index();
221            match self.input.next_line(&self.input_context()).await {
222                Ok(line) => {
223                    if line.is_empty() {
224                        // End of input
225                        self.state = InputState::EndOfInput(Location {
226                            code: Rc::clone(&self.raw_code),
227                            range: index..index,
228                        });
229                    } else {
230                        // Successful read
231                        self.raw_code.value.borrow_mut().push_str(&line);
232                        self.source
233                            .extend(ex(source_chars(&line, &self.raw_code, index)));
234                    }
235                }
236                Err(io_error) => {
237                    self.state = InputState::Error(Error {
238                        cause: io_error.into(),
239                        location: Location {
240                            code: Rc::clone(&self.raw_code),
241                            range: index..index,
242                        },
243                    });
244                }
245            }
246        }
247    }
248
249    /// Returns the input context for the next character.
250    fn input_context(&self) -> Context {
251        let mut context = Context::default();
252        context.set_is_first_line(self.raw_code.value.borrow().is_empty());
253        context
254    }
255
256    /// Consumes the next character.
257    ///
258    /// This function must be called after [`peek_char`](Lexer::peek_char) has successfully
259    /// returned the character. Consuming a character that has not yet been peeked would result
260    /// in a panic!
261    fn consume_char(&mut self) {
262        assert!(
263            self.index < self.source.len(),
264            "A character must have been peeked before being consumed: index={}",
265            self.index
266        );
267        self.index += 1;
268    }
269
270    /// Returns a reference to the character at the given index.
271    #[must_use]
272    fn peek_char_at(&self, index: usize) -> &SourceChar {
273        assert!(
274            index <= self.index,
275            "The index {} must not be larger than the current index {}",
276            index,
277            self.index
278        );
279        &self.source[index].value
280    }
281
282    /// Returns the current index.
283    #[must_use]
284    fn index(&self) -> usize {
285        self.index
286    }
287
288    /// Rewinds the index to the given value.
289    fn rewind(&mut self, index: usize) {
290        assert!(
291            index <= self.index,
292            "The new index {} must not be larger than the current index {}",
293            index,
294            self.index
295        );
296        self.index = index;
297    }
298
299    /// Checks if there is any character that has been read from the input
300    /// source but not yet consumed.
301    #[must_use]
302    fn pending(&self) -> bool {
303        self.index < self.source.len()
304    }
305
306    /// Clears the internal buffer.
307    fn flush(&mut self) {
308        let start_line_number = self.raw_code.line_number(usize::MAX);
309        self.raw_code = Rc::new(Code {
310            value: RefCell::new(String::new()),
311            start_line_number,
312            source: self.raw_code.source.clone(),
313        });
314        self.source.clear();
315        self.index = 0;
316    }
317
318    /// Clears an end-of-input or error status so that the lexer can resume
319    /// parsing.
320    fn reset(&mut self) {
321        self.state = InputState::Alive;
322        self.flush();
323    }
324
325    /// Extracts a string from the source code range.
326    fn source_string(&self, range: Range<usize>) -> String {
327        self.source[range].iter().map(|c| c.value.value).collect()
328    }
329
330    /// Returns a location for a given range of the source code.
331    #[must_use]
332    fn location_range(&self, range: Range<usize>) -> Location {
333        if range.start == self.source.len() {
334            if let InputState::EndOfInput(ref location) = self.state {
335                return location.clone();
336            }
337        }
338        let start = &self.peek_char_at(range.start).location;
339        let code = start.code.clone();
340        let end = range
341            .map(|index| &self.peek_char_at(index).location)
342            .take_while(|location| location.code == code)
343            .last()
344            .map(|location| location.range.end)
345            .unwrap_or(start.range.start);
346        let range = start.range.start..end;
347        Location { code, range }
348    }
349
350    /// Marks the characters in the given range as line continuation.
351    ///
352    /// This function sets the `is_line_continuation` flag of the characters in
353    /// the range to true. The characters must have been read before calling
354    /// this function.
355    fn mark_line_continuation(&mut self, range: Range<usize>) {
356        assert!(
357            range.end <= self.index,
358            "characters must have been read (range = {:?}, current index = {})",
359            range,
360            self.index
361        );
362        for sc in &mut self.source[range] {
363            sc.is_line_continuation = true;
364        }
365    }
366
367    /// Performs alias substitution.
368    ///
369    /// This function replaces the characters starting from the `begin` index up
370    /// to the current position with the alias value. The resulting part of code
371    /// will be characters with a [`Source::Alias`] origin.
372    fn substitute_alias(&mut self, begin: usize, alias: &Rc<Alias>) {
373        let end = self.index;
374        assert!(
375            begin < end,
376            "begin index {begin} should be less than end index {end}"
377        );
378
379        let source = Rc::new(Source::Alias {
380            original: self.location_range(begin..end),
381            alias: alias.clone(),
382        });
383        let code = Rc::new(Code {
384            value: RefCell::new(alias.replacement.clone()),
385            start_line_number: NonZeroU64::new(1).unwrap(),
386            source,
387        });
388        let repl = ex(source_chars(&alias.replacement, &code, 0));
389
390        self.source.splice(begin..end, repl);
391        self.index = begin;
392    }
393
394    /// Tests if the given index is after the replacement string of alias
395    /// substitution that ends with a blank.
396    ///
397    /// # Panics
398    ///
399    /// If `index` is larger than the currently read index.
400    fn is_after_blank_ending_alias(&self, index: usize) -> bool {
401        fn ends_with_blank(s: &str) -> bool {
402            s.chars().next_back().is_some_and(is_blank)
403        }
404        fn is_same_alias(alias: &Alias, sc: Option<&SourceCharEx>) -> bool {
405            sc.is_some_and(|sc| sc.value.location.code.source.is_alias_for(&alias.name))
406        }
407
408        for index in (0..index).rev() {
409            let sc = &self.source[index];
410
411            if !sc.is_line_continuation && !is_blank(sc.value.value) {
412                return false;
413            }
414
415            if let Source::Alias { ref alias, .. } = *sc.value.location.code.source {
416                #[allow(clippy::collapsible_if)]
417                if ends_with_blank(&alias.replacement) {
418                    if !is_same_alias(alias, self.source.get(index + 1)) {
419                        return true;
420                    }
421                }
422            }
423        }
424
425        false
426    }
427}
428
429impl fmt::Debug for LexerCore<'_> {
430    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
431        f.debug_struct("LexerCore")
432            .field("state", &self.state)
433            .field("source", &self.source)
434            .field("index", &self.index)
435            .finish_non_exhaustive()
436    }
437}
438
439/// Configuration for the [lexer](Lexer)
440///
441/// `Config` is a builder for the lexer. A [new](Self::new) instance is created
442/// with default settings. You can then customize the settings by modifying the
443/// corresponding fields. Finally, you can pass an input object to the
444/// [`input`](Self::input) method to create a lexer.
445#[derive(Debug)]
446#[must_use = "you must call `input` to create a lexer"]
447#[non_exhaustive]
448pub struct Config {
449    /// Line number for the first line of the input
450    ///
451    /// The lexer counts the line number from this value to annotate the
452    /// location of the tokens. The line number is saved in the
453    /// `start_line_number` field of the [`Code`] instance that is contained in
454    /// the [`Location`] instance of the token.
455    ///
456    /// The default value is 1.
457    pub start_line_number: NonZeroU64,
458
459    /// Source of the input
460    ///
461    /// The source is used to annotate the location of the tokens. This value
462    /// is saved in the `source` field of the [`Code`] instance that is
463    /// contained in the [`Location`] instance of the token.
464    ///
465    /// The default value is `None`, in which case the source is set to
466    /// [`Source::Unknown`]. It is recommended to set this to a more informative
467    /// value, so that the locations in the parsed syntax tree can be traced
468    /// back to the source code. Especially, the correct source is necessary to
469    /// indicate the location of possible errors that occur during parsing and
470    /// execution.
471    pub source: Option<Rc<Source>>,
472}
473
474impl Config {
475    /// Creates a new configuration with default settings.
476    ///
477    /// You can also call [`Lexer::config`] to create a new configuration.
478    pub fn new() -> Self {
479        Config {
480            start_line_number: NonZeroU64::MIN,
481            source: None,
482        }
483    }
484
485    /// Creates a lexer with the given input object.
486    pub fn input<'a>(self, input: Box<dyn InputObject + 'a>) -> Lexer<'a> {
487        let start_line_number = self.start_line_number;
488        let source = self.source.unwrap_or_else(|| Rc::new(Source::Unknown));
489        Lexer {
490            core: LexerCore::new(input, start_line_number, source),
491            line_continuation_enabled: true,
492        }
493    }
494}
495
496impl Default for Config {
497    fn default() -> Self {
498        Self::new()
499    }
500}
501
502/// Lexical analyzer
503///
504/// A lexer reads lines using an input function and parses the characters into tokens. It has an
505/// internal buffer containing the characters that have been read and the position (or the
506/// index) of the character that is to be parsed next.
507///
508/// `Lexer` has primitive functions such as [`peek_char`](Lexer::peek_char) that provide access
509/// to the character at the current position. Derived functions such as
510/// [`skip_blanks_and_comment`](Lexer::skip_blanks_and_comment) depend on those primitives to
511/// parse more complex structures in the source code. Usually, the lexer is used by a
512/// [parser](super::super::Parser) to read the source code and produce a syntax
513/// tree, so you don't need to call these functions directly.
514///
515/// To construct a lexer, you can use the [`Lexer::new`] function with an input object.
516/// You can also use the [`Lexer::config`] function to create a configuration that allows you to
517/// customize the settings before creating a lexer.
518///
519/// ```
520/// # use yash_syntax::input::Memory;
521/// # use yash_syntax::parser::{lex::Lexer, Parser};
522/// # use yash_syntax::source::Source;
523/// let mut config = Lexer::config();
524/// config.start_line_number = 10.try_into().unwrap();
525/// config.source = Some(Source::CommandString.into());
526/// let mut lexer = config.input(Box::new(Memory::new("echo hello\n")));
527/// let mut parser = Parser::new(&mut lexer);
528/// _ = parser.command_line();
529/// ```
530#[derive(Debug)]
531#[must_use]
532pub struct Lexer<'a> {
533    // `Lexer` is a thin wrapper around `LexerCore`. `Lexer` delegates most
534    // functions to `LexerCore`. `Lexer` adds automatic line-continuation
535    // skipping to `LexerCore`.
536    core: LexerCore<'a>,
537    line_continuation_enabled: bool,
538}
539
540impl<'a> Lexer<'a> {
541    /// Creates a new configuration with default settings.
542    ///
543    /// This is a synonym for [`Config::new`]. You can modify the settings and
544    /// then create a lexer with the [`input`](Config::input) method.
545    #[inline(always)]
546    pub fn config() -> Config {
547        Config::new()
548    }
549
550    /// Creates a new lexer that reads using the given input function.
551    ///
552    /// This is a convenience function that creates a lexer with the given input
553    /// object and the default configuration. To customize the configuration,
554    /// use the [`config`](Self::config) function.
555    ///
556    /// This function is best used for testing or for simple cases where you
557    /// don't need to customize the lexer. For practical use, it is recommended
558    /// to use the [`config`](Self::config) function to create a configuration
559    /// and provide it with supplementary information, especially
560    /// [`source`](Config::source), before creating a lexer.
561    pub fn new(input: Box<dyn InputObject + 'a>) -> Lexer<'a> {
562        Self::config().input(input)
563    }
564
565    /// Creates a new lexer with a fixed source code.
566    ///
567    /// This is a convenience function that creates a lexer that reads from a
568    /// string using [`Memory`] with the default configuration.
569    ///
570    /// This function is best used for testing or for simple cases where you
571    /// don't need to customize the lexer. For practical use, it is recommended
572    /// to use the [`config`](Self::config) function to create a configuration
573    /// and provide it with supplementary information, especially
574    /// [`source`](Config::source), before creating a lexer.
575    pub fn with_code(code: &'a str) -> Lexer<'a> {
576        Self::new(Box::new(Memory::new(code)))
577    }
578
579    /// Creates a new lexer with a fixed source code.
580    ///
581    /// This is a convenience function that creates a lexer that reads from a
582    /// string using [`Memory`] with the specified source starting from line
583    /// number 1.
584    ///
585    /// This function is soft-deprecated. Use [`with_code`](Self::with_code)
586    /// instead if the source is `Unknown`. Otherwise, use
587    /// [`config`](Self::config) to set the source and [`input`](Config::input)
588    /// to create a lexer, which is more descriptive.
589    pub fn from_memory<S: Into<Rc<Source>>>(code: &'a str, source: S) -> Lexer<'a> {
590        fn inner(code: &str, source: Rc<Source>) -> Lexer<'_> {
591            let mut config = Lexer::config();
592            config.source = Some(source);
593            config.input(Box::new(Memory::new(code)))
594        }
595        inner(code, source.into())
596    }
597
598    /// Disables line continuation recognition onward.
599    ///
600    /// By default, [`peek_char`](Self::peek_char) silently skips line
601    /// continuation sequences. When line continuation is disabled, however,
602    /// `peek_char` returns characters literally.
603    ///
604    /// Call [`enable_line_continuation`](Self::enable_line_continuation) to
605    /// switch line continuation recognition on.
606    ///
607    /// This function will panic if line continuation has already been disabled.
608    pub fn disable_line_continuation<'b>(&'b mut self) -> PlainLexer<'b, 'a> {
609        assert!(
610            self.line_continuation_enabled,
611            "line continuation already disabled"
612        );
613        self.line_continuation_enabled = false;
614        PlainLexer { lexer: self }
615    }
616
617    /// Re-enables line continuation.
618    ///
619    /// You can pass the `PlainLexer` returned from
620    /// [`disable_line_continuation`](Self::disable_line_continuation) to this
621    /// function to re-enable line continuation. That is equivalent to dropping
622    /// the `PlainLexer` instance, but the code will be more descriptive.
623    pub fn enable_line_continuation<'b>(_: PlainLexer<'a, 'b>) {}
624
625    /// Skips line continuation, i.e., a backslash followed by a newline.
626    ///
627    /// If there is a line continuation at the current position, this function
628    /// consumes the backslash and the newline and returns `Ok(true)`. The
629    /// characters are marked as line continuation.
630    ///
631    /// If there is no line continuation, this function does nothing and returns
632    /// `Ok(false)`.
633    ///
634    /// This function does nothing if line continuation has been
635    /// [disabled](Self::disable_line_continuation).
636    async fn line_continuation(&mut self) -> Result<bool> {
637        if !self.line_continuation_enabled {
638            return Ok(false);
639        }
640
641        let index = self.core.index();
642        match self.core.peek_char().await? {
643            PeekChar::Char(c) if c.value == '\\' => self.core.consume_char(),
644            _ => return Ok(false),
645        }
646
647        match self.core.peek_char().await? {
648            PeekChar::Char(c) if c.value == '\n' => self.core.consume_char(),
649            _ => {
650                self.core.rewind(index);
651                return Ok(false);
652            }
653        }
654
655        self.core.mark_line_continuation(index..index + 2);
656
657        Ok(true)
658    }
659
660    /// Peeks the next character.
661    ///
662    /// If the end of input is reached, `Ok(None)` is returned. On error,
663    /// `Err(_)` is returned.
664    ///
665    /// If line continuation recognition is enabled, combinations of a backslash
666    /// and a newline are silently skipped before returning the next character.
667    /// Call [`disable_line_continuation`](Self::disable_line_continuation) to
668    /// switch off line continuation recognition.
669    ///
670    /// This function requires a mutable reference to `self` since it may need
671    /// to read the next line if needed.
672    pub async fn peek_char(&mut self) -> Result<Option<char>> {
673        while self.line_continuation().await? {}
674
675        match self.core.peek_char().await? {
676            PeekChar::Char(source_char) => Ok(Some(source_char.value)),
677            PeekChar::EndOfInput(_) => Ok(None),
678        }
679    }
680
681    /// Returns the location of the next character.
682    ///
683    /// If there is no more character (that is, it is the end of input), an imaginary location
684    /// is returned that would be returned if a character existed.
685    ///
686    /// This function requires a mutable reference to `self` since it needs to
687    /// [peek](Self::peek_char) the next character.
688    pub async fn location(&mut self) -> Result<&Location> {
689        self.core.peek_char().await.map(|p| p.location())
690    }
691
692    /// Consumes the next character.
693    ///
694    /// This function must be called after [`peek_char`](Lexer::peek_char) has successfully
695    /// returned the character. Consuming a character that has not yet been peeked would result
696    /// in a panic!
697    pub fn consume_char(&mut self) {
698        self.core.consume_char()
699    }
700
701    /// Returns the position of the next character, counted from zero.
702    ///
703    /// ```
704    /// # use yash_syntax::parser::lex::Lexer;
705    /// # futures_executor::block_on(async {
706    /// let mut lexer = Lexer::with_code("abc");
707    /// assert_eq!(lexer.index(), 0);
708    /// let _ = lexer.peek_char().await;
709    /// assert_eq!(lexer.index(), 0);
710    /// lexer.consume_char();
711    /// assert_eq!(lexer.index(), 1);
712    /// # })
713    /// ```
714    #[must_use]
715    pub fn index(&self) -> usize {
716        self.core.index()
717    }
718
719    /// Moves the current position back to the given index so that characters that have been
720    /// consumed can be read again.
721    ///
722    /// The given index must not be larger than the [current index](Lexer::index), or this
723    /// function would panic.
724    ///
725    /// ```
726    /// # use yash_syntax::parser::lex::Lexer;
727    /// # futures_executor::block_on(async {
728    /// let mut lexer = Lexer::with_code("abc");
729    /// let saved_index = lexer.index();
730    /// assert_eq!(lexer.peek_char().await, Ok(Some('a')));
731    /// lexer.consume_char();
732    /// assert_eq!(lexer.peek_char().await, Ok(Some('b')));
733    /// lexer.rewind(saved_index);
734    /// assert_eq!(lexer.peek_char().await, Ok(Some('a')));
735    /// # })
736    /// ```
737    pub fn rewind(&mut self, index: usize) {
738        self.core.rewind(index)
739    }
740
741    /// Checks if there is any character that has been read from the input
742    /// source but not yet consumed.
743    #[must_use]
744    pub fn pending(&self) -> bool {
745        self.core.pending()
746    }
747
748    /// Clears the internal buffer of the lexer.
749    ///
750    /// Locations returned from [`location`](Self::location) share a single code
751    /// instance that is also retained by the lexer. The code grows long as the
752    /// lexer reads more input. To prevent the code from getting too large, you
753    /// can call this function that replaces the retained code with a new empty
754    /// one. The new code's `start_line_number` will be incremented by the
755    /// number of lines in the previous.
756    pub fn flush(&mut self) {
757        self.core.flush()
758    }
759
760    /// Clears an end-of-input or error status so that the lexer can resume
761    /// parsing.
762    ///
763    /// This function will be useful only in an interactive shell where the user
764    /// can continue entering commands even after (s)he sends an end-of-input or
765    /// is interrupted by a syntax error.
766    pub fn reset(&mut self) {
767        self.core.reset()
768    }
769
770    /// Peeks the next character and, if the given decider function returns true for it,
771    /// advances the position.
772    ///
773    /// Returns the consumed character if the function returned true. Returns `Ok(None)` if it
774    /// returned false or there is no more character.
775    pub async fn consume_char_if<F>(&mut self, mut f: F) -> Result<Option<&SourceChar>>
776    where
777        F: FnMut(char) -> bool,
778    {
779        self.consume_char_if_dyn(&mut f).await
780    }
781
782    /// Dynamic version of [`Self::consume_char_if`].
783    pub(crate) async fn consume_char_if_dyn(
784        &mut self,
785        f: &mut dyn FnMut(char) -> bool,
786    ) -> Result<Option<&SourceChar>> {
787        match self.peek_char().await? {
788            Some(c) if f(c) => {
789                let index = self.index();
790                self.consume_char();
791                Ok(Some(self.core.peek_char_at(index)))
792            }
793            _ => Ok(None),
794        }
795    }
796
797    /// Extracts a string from the source code range.
798    ///
799    /// This function returns the source code string for the range specified by
800    /// the argument. The range must specify a valid index. If the index points
801    /// to a character that have not yet read, this function will panic!.
802    ///
803    /// # Panics
804    ///
805    /// If the argument index is out of bounds, i.e., pointing to an unread
806    /// character.
807    #[inline]
808    pub fn source_string(&self, range: Range<usize>) -> String {
809        self.core.source_string(range)
810    }
811
812    /// Returns a location for a given range of the source code.
813    ///
814    /// All the characters in the range must have been
815    /// [consume](Self::consume_char)d. If the range refers to an unconsumed
816    /// character, this function will panic!
817    ///
818    /// If the characters are from more than one [`Code`] fragment, the location
819    /// will only cover the initial portion of the range sharing the same
820    /// `Code`.
821    ///
822    /// # Panics
823    ///
824    /// This function will panic if the range refers to an unconsumed character.
825    ///
826    /// If the start index of the range is the end of input, it must have been
827    /// peeked and the range must be empty, or the function will panic.
828    #[must_use]
829    pub fn location_range(&self, range: Range<usize>) -> Location {
830        self.core.location_range(range)
831    }
832
833    /// Performs alias substitution right before the current position.
834    ///
835    /// This function must be called just after a [word](WordLexer::word) has been parsed that
836    /// matches the name of the argument alias. No check is done in this function that there is
837    /// a matching word before the current position. The characters starting from the `begin`
838    /// index up to the current position are silently replaced with the alias value.
839    ///
840    /// The resulting part of code will be characters with a [`Source::Alias`] origin.
841    ///
842    /// After the substitution, the position will be set before the replaced string.
843    ///
844    /// # Panics
845    ///
846    /// If the replaced part is empty, i.e., `begin >= self.index()`.
847    pub fn substitute_alias(&mut self, begin: usize, alias: &Rc<Alias>) {
848        self.core.substitute_alias(begin, alias)
849    }
850
851    /// Tests if the given index is after the replacement string of alias
852    /// substitution that ends with a blank.
853    ///
854    /// # Panics
855    ///
856    /// If `index` is larger than the currently read index.
857    pub fn is_after_blank_ending_alias(&self, index: usize) -> bool {
858        self.core.is_after_blank_ending_alias(index)
859    }
860
861    /// Parses an optional compound list that is the content of a command
862    /// substitution.
863    ///
864    /// This function consumes characters until a token that cannot be the
865    /// beginning of an and-or list is found and returns the string that was
866    /// consumed.
867    pub async fn inner_program(&mut self) -> Result<String> {
868        let begin = self.index();
869
870        let mut parser = super::super::Parser::new(self);
871        parser.maybe_compound_list().await?;
872
873        let end = parser.peek_token().await?.index;
874        self.rewind(end);
875
876        Ok(self.core.source_string(begin..end))
877    }
878
879    /// Like [`Lexer::inner_program`], but returns the future in a pinning box.
880    pub fn inner_program_boxed(&mut self) -> Pin<Box<dyn Future<Output = Result<String>> + '_>> {
881        Box::pin(self.inner_program())
882    }
883}
884
885/// Reference to [`Lexer`] with line continuation disabled
886///
887/// This struct implements the RAII pattern for temporarily disabling line
888/// continuation. When you disable the line continuation of a lexer, you get an
889/// instance of `PlainLexer`. You can access the original lexer via the
890/// `PlainLexer` until you drop it, when the line continuation is automatically
891/// re-enabled.
892#[derive(Debug)]
893#[must_use = "You must retain the PlainLexer to keep line continuation disabled"]
894pub struct PlainLexer<'a, 'b> {
895    lexer: &'a mut Lexer<'b>,
896}
897
898impl<'b> Deref for PlainLexer<'_, 'b> {
899    type Target = Lexer<'b>;
900    fn deref(&self) -> &Lexer<'b> {
901        self.lexer
902    }
903}
904
905impl<'b> DerefMut for PlainLexer<'_, 'b> {
906    fn deref_mut(&mut self) -> &mut Lexer<'b> {
907        self.lexer
908    }
909}
910
911impl Drop for PlainLexer<'_, '_> {
912    fn drop(&mut self) {
913        self.lexer.line_continuation_enabled = true;
914    }
915}
916
917/// Context in which a [word](crate::syntax::Word) is parsed
918///
919/// The parse of the word of a [switch](crate::syntax::Switch) depends on
920/// whether the parameter expansion containing the switch is part of a text or a
921/// word. A `WordContext` value is used to decide the behavior of the lexer.
922///
923/// Parser functions that depend on the context are implemented in
924/// [`WordLexer`].
925#[derive(Clone, Copy, Debug, Eq, PartialEq)]
926pub enum WordContext {
927    /// The text unit being parsed is part of a [text](crate::syntax::Text).
928    Text,
929    /// The text unit being parsed is part of a [word](crate::syntax::Word).
930    Word,
931}
932
933/// Lexer with additional information for parsing [texts](crate::syntax::Text)
934/// and [words](crate::syntax::Word)
935#[derive(Debug)]
936pub struct WordLexer<'a, 'b> {
937    pub lexer: &'a mut Lexer<'b>,
938    pub context: WordContext,
939}
940
941impl<'b> Deref for WordLexer<'_, 'b> {
942    type Target = Lexer<'b>;
943    fn deref(&self) -> &Lexer<'b> {
944        self.lexer
945    }
946}
947
948impl<'b> DerefMut for WordLexer<'_, 'b> {
949    fn deref_mut(&mut self) -> &mut Lexer<'b> {
950        self.lexer
951    }
952}
953
954#[cfg(test)]
955mod tests {
956    use super::*;
957    use crate::input::Input;
958    use crate::parser::error::ErrorCause;
959    use crate::parser::error::SyntaxError;
960    use assert_matches::assert_matches;
961    use futures_util::FutureExt;
962
963    #[test]
964    fn lexer_core_peek_char_empty_source() {
965        let input = Memory::new("");
966        let line = NonZeroU64::new(32).unwrap();
967        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
968        let result = lexer.peek_char().now_or_never().unwrap();
969        assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
970            assert_eq!(*location.code.value.borrow(), "");
971            assert_eq!(location.code.start_line_number, line);
972            assert_eq!(*location.code.source, Source::Unknown);
973            assert_eq!(location.range, 0..0);
974        });
975    }
976
977    #[test]
978    fn lexer_core_peek_char_io_error() {
979        #[derive(Debug)]
980        struct Failing;
981        impl fmt::Display for Failing {
982            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
983                write!(f, "Failing")
984            }
985        }
986        impl std::error::Error for Failing {}
987        impl Input for Failing {
988            async fn next_line(&mut self, _: &Context) -> crate::input::Result {
989                Err(std::io::Error::other(Failing))
990            }
991        }
992        let line = NonZeroU64::new(42).unwrap();
993        let mut lexer = LexerCore::new(Box::new(Failing), line, Rc::new(Source::Unknown));
994
995        let e = lexer.peek_char().now_or_never().unwrap().unwrap_err();
996        assert_matches!(e.cause, ErrorCause::Io(io_error) => {
997            assert_eq!(io_error.kind(), std::io::ErrorKind::Other);
998        });
999        assert_eq!(*e.location.code.value.borrow(), "");
1000        assert_eq!(e.location.code.start_line_number, line);
1001        assert_eq!(*e.location.code.source, Source::Unknown);
1002        assert_eq!(e.location.range, 0..0);
1003    }
1004
1005    #[test]
1006    fn lexer_core_peek_char_context_is_first_line() {
1007        // In this test case, this mock input function will be called twice.
1008        struct InputMock {
1009            first: bool,
1010        }
1011        impl Input for InputMock {
1012            async fn next_line(&mut self, context: &Context) -> crate::input::Result {
1013                assert_eq!(context.is_first_line(), self.first);
1014                self.first = false;
1015                Ok("\n".to_owned())
1016            }
1017        }
1018
1019        let input = InputMock { first: true };
1020        let line = NonZeroU64::new(42).unwrap();
1021        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1022
1023        let peek = lexer.peek_char().now_or_never().unwrap();
1024        assert_matches!(peek, Ok(PeekChar::Char(_)));
1025        lexer.consume_char();
1026
1027        let peek = lexer.peek_char().now_or_never().unwrap();
1028        assert_matches!(peek, Ok(PeekChar::Char(_)));
1029        lexer.consume_char();
1030    }
1031
1032    #[test]
1033    fn lexer_core_consume_char_success() {
1034        let input = Memory::new("a\nb");
1035        let line = NonZeroU64::new(1).unwrap();
1036        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1037
1038        let result = lexer.peek_char().now_or_never().unwrap();
1039        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1040            assert_eq!(c.value, 'a');
1041            assert_eq!(*c.location.code.value.borrow(), "a\n");
1042            assert_eq!(c.location.code.start_line_number, line);
1043            assert_eq!(*c.location.code.source, Source::Unknown);
1044            assert_eq!(c.location.range, 0..1);
1045        });
1046        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1047            assert_eq!(c.value, 'a');
1048            assert_eq!(*c.location.code.value.borrow(), "a\n");
1049            assert_eq!(c.location.code.start_line_number, line);
1050            assert_eq!(*c.location.code.source, Source::Unknown);
1051            assert_eq!(c.location.range, 0..1);
1052        });
1053        lexer.consume_char();
1054
1055        let result = lexer.peek_char().now_or_never().unwrap();
1056        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1057            assert_eq!(c.value, '\n');
1058            assert_eq!(*c.location.code.value.borrow(), "a\n");
1059            assert_eq!(c.location.code.start_line_number, line);
1060            assert_eq!(*c.location.code.source, Source::Unknown);
1061            assert_eq!(c.location.range, 1..2);
1062        });
1063        lexer.consume_char();
1064
1065        let result = lexer.peek_char().now_or_never().unwrap();
1066        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1067            assert_eq!(c.value, 'b');
1068            assert_eq!(*c.location.code.value.borrow(), "a\nb");
1069            assert_eq!(c.location.code.start_line_number.get(), 1);
1070            assert_eq!(*c.location.code.source, Source::Unknown);
1071            assert_eq!(c.location.range, 2..3);
1072        });
1073        lexer.consume_char();
1074
1075        let result = lexer.peek_char().now_or_never().unwrap();
1076        assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
1077            assert_eq!(*location.code.value.borrow(), "a\nb");
1078            assert_eq!(location.code.start_line_number.get(), 1);
1079            assert_eq!(*location.code.source, Source::Unknown);
1080            assert_eq!(location.range, 3..3);
1081        });
1082    }
1083
1084    #[test]
1085    #[should_panic(expected = "A character must have been peeked before being consumed: index=0")]
1086    fn lexer_core_consume_char_panic() {
1087        let input = Memory::new("a");
1088        let line = NonZeroU64::new(1).unwrap();
1089        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1090        lexer.consume_char();
1091    }
1092
1093    #[test]
1094    fn lexer_core_peek_char_at() {
1095        let input = Memory::new("a\nb");
1096        let line = NonZeroU64::new(1).unwrap();
1097        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1098
1099        let c0 = assert_matches!(
1100            lexer.peek_char().now_or_never().unwrap(),
1101            Ok(PeekChar::Char(c)) => c.clone()
1102        );
1103        lexer.consume_char();
1104
1105        let c1 = assert_matches!(
1106            lexer.peek_char().now_or_never().unwrap(),
1107            Ok(PeekChar::Char(c)) => c.clone()
1108        );
1109        lexer.consume_char();
1110
1111        let c2 = assert_matches!(
1112            lexer.peek_char().now_or_never().unwrap(),
1113            Ok(PeekChar::Char(c)) => c.clone()
1114        );
1115
1116        assert_eq!(lexer.peek_char_at(0), &c0);
1117        assert_eq!(lexer.peek_char_at(1), &c1);
1118        assert_eq!(lexer.peek_char_at(2), &c2);
1119    }
1120
1121    #[test]
1122    fn lexer_core_index() {
1123        let input = Memory::new("a\nb");
1124        let line = NonZeroU64::new(1).unwrap();
1125        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1126
1127        assert_eq!(lexer.index(), 0);
1128        lexer.peek_char().now_or_never().unwrap().unwrap();
1129        assert_eq!(lexer.index(), 0);
1130        lexer.consume_char();
1131
1132        assert_eq!(lexer.index(), 1);
1133        lexer.peek_char().now_or_never().unwrap().unwrap();
1134        lexer.consume_char();
1135
1136        assert_eq!(lexer.index(), 2);
1137        lexer.peek_char().now_or_never().unwrap().unwrap();
1138        lexer.consume_char();
1139
1140        assert_eq!(lexer.index(), 3);
1141    }
1142
1143    #[test]
1144    fn lexer_core_rewind_success() {
1145        let input = Memory::new("abc");
1146        let line = NonZeroU64::new(1).unwrap();
1147        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1148        lexer.rewind(0);
1149        assert_eq!(lexer.index(), 0);
1150
1151        let _ = lexer.peek_char().now_or_never().unwrap();
1152        lexer.consume_char();
1153        let _ = lexer.peek_char().now_or_never().unwrap();
1154        lexer.consume_char();
1155        lexer.rewind(0);
1156
1157        let result = lexer.peek_char().now_or_never().unwrap();
1158        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1159            assert_eq!(c.value, 'a');
1160            assert_eq!(*c.location.code.value.borrow(), "abc");
1161            assert_eq!(c.location.code.start_line_number, line);
1162            assert_eq!(*c.location.code.source, Source::Unknown);
1163            assert_eq!(c.location.range, 0..1);
1164        });
1165    }
1166
1167    #[test]
1168    #[should_panic(expected = "The new index 1 must not be larger than the current index 0")]
1169    fn lexer_core_rewind_invalid_index() {
1170        let input = Memory::new("abc");
1171        let line = NonZeroU64::new(1).unwrap();
1172        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1173        lexer.rewind(1);
1174    }
1175
1176    #[test]
1177    fn lexer_core_source_string() {
1178        let input = Memory::new("ab\ncd");
1179        let line = NonZeroU64::new(1).unwrap();
1180        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1181        for _ in 0..4 {
1182            let _ = lexer.peek_char().now_or_never().unwrap();
1183            lexer.consume_char();
1184        }
1185
1186        let result = lexer.source_string(1..4);
1187        assert_eq!(result, "b\nc");
1188    }
1189
1190    #[test]
1191    #[should_panic(expected = "begin index 0 should be less than end index 0")]
1192    fn lexer_core_substitute_alias_with_invalid_index() {
1193        let input = Memory::new("a b");
1194        let line = NonZeroU64::new(1).unwrap();
1195        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1196        let alias = Rc::new(Alias {
1197            name: "a".to_string(),
1198            replacement: "".to_string(),
1199            global: false,
1200            origin: Location::dummy("dummy"),
1201        });
1202        lexer.substitute_alias(0, &alias);
1203    }
1204
1205    #[test]
1206    fn lexer_core_substitute_alias_single_line_replacement() {
1207        let input = Memory::new("a b");
1208        let line = NonZeroU64::new(1).unwrap();
1209        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1210        let alias = Rc::new(Alias {
1211            name: "a".to_string(),
1212            replacement: "lex".to_string(),
1213            global: false,
1214            origin: Location::dummy("dummy"),
1215        });
1216
1217        let _ = lexer.peek_char().now_or_never().unwrap();
1218        lexer.consume_char();
1219
1220        lexer.substitute_alias(0, &alias);
1221
1222        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1223            assert_eq!(c.value, 'l');
1224            assert_eq!(*c.location.code.value.borrow(), "lex");
1225            assert_eq!(c.location.code.start_line_number.get(), 1);
1226            assert_matches!(&*c.location.code.source,
1227                Source::Alias { original, alias: alias2 } => {
1228                assert_eq!(*original.code.value.borrow(), "a b");
1229                assert_eq!(original.code.start_line_number, line);
1230                assert_eq!(*original.code.source, Source::Unknown);
1231                assert_eq!(original.range, 0..1);
1232                assert_eq!(alias2, &alias);
1233            });
1234            assert_eq!(c.location.range, 0..1);
1235        });
1236        lexer.consume_char();
1237
1238        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1239            assert_eq!(c.value, 'e');
1240            assert_eq!(*c.location.code.value.borrow(), "lex");
1241            assert_eq!(c.location.code.start_line_number, line);
1242            assert_matches!(&*c.location.code.source,
1243                Source::Alias { original, alias: alias2 } => {
1244                assert_eq!(*original.code.value.borrow(), "a b");
1245                assert_eq!(original.code.start_line_number, line);
1246                assert_eq!(*original.code.source, Source::Unknown);
1247                assert_eq!(original.range, 0..1);
1248                assert_eq!(alias2, &alias);
1249            });
1250            assert_eq!(c.location.range, 1..2);
1251        });
1252        lexer.consume_char();
1253
1254        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1255            assert_eq!(c.value, 'x');
1256            assert_eq!(*c.location.code.value.borrow(), "lex");
1257            assert_eq!(c.location.code.start_line_number, line);
1258            assert_matches!(&*c.location.code.source,
1259                Source::Alias { original, alias: alias2 } => {
1260                assert_eq!(*original.code.value.borrow(), "a b");
1261                assert_eq!(original.code.start_line_number, line);
1262                assert_eq!(*original.code.source, Source::Unknown);
1263                assert_eq!(original.range, 0..1);
1264                assert_eq!(alias2, &alias);
1265            });
1266            assert_eq!(c.location.range, 2..3);
1267        });
1268        lexer.consume_char();
1269
1270        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1271            assert_eq!(c.value, ' ');
1272            assert_eq!(*c.location.code.value.borrow(), "a b");
1273            assert_eq!(c.location.code.start_line_number, line);
1274            assert_eq!(*c.location.code.source, Source::Unknown);
1275            assert_eq!(c.location.range, 1..2);
1276        });
1277        lexer.consume_char();
1278    }
1279
1280    #[test]
1281    fn lexer_core_substitute_alias_multi_line_replacement() {
1282        let input = Memory::new(" foo b");
1283        let line = NonZeroU64::new(1).unwrap();
1284        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1285        let alias = Rc::new(Alias {
1286            name: "foo".to_string(),
1287            replacement: "x\ny".to_string(),
1288            global: true,
1289            origin: Location::dummy("loc"),
1290        });
1291
1292        for _ in 0..4 {
1293            let _ = lexer.peek_char().now_or_never().unwrap();
1294            lexer.consume_char();
1295        }
1296
1297        lexer.substitute_alias(1, &alias);
1298
1299        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1300            assert_eq!(c.value, 'x');
1301            assert_eq!(*c.location.code.value.borrow(), "x\ny");
1302            assert_eq!(c.location.code.start_line_number, line);
1303            assert_matches!(&*c.location.code.source,
1304                Source::Alias { original, alias: alias2 } => {
1305                assert_eq!(*original.code.value.borrow(), " foo b");
1306                assert_eq!(original.code.start_line_number, line);
1307                assert_eq!(*original.code.source, Source::Unknown);
1308                assert_eq!(original.range, 1..4);
1309                assert_eq!(alias2, &alias);
1310            });
1311            assert_eq!(c.location.range, 0..1);
1312        });
1313        lexer.consume_char();
1314
1315        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1316            assert_eq!(c.value, '\n');
1317            assert_eq!(*c.location.code.value.borrow(), "x\ny");
1318            assert_eq!(c.location.code.start_line_number, line);
1319            assert_matches!(&*c.location.code.source,
1320                Source::Alias { original, alias: alias2 } => {
1321                assert_eq!(*original.code.value.borrow(), " foo b");
1322                assert_eq!(original.code.start_line_number, line);
1323                assert_eq!(*original.code.source, Source::Unknown);
1324                assert_eq!(original.range, 1..4);
1325                assert_eq!(alias2, &alias);
1326            });
1327            assert_eq!(c.location.range, 1..2);
1328        });
1329        lexer.consume_char();
1330
1331        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1332            assert_eq!(c.value, 'y');
1333            assert_eq!(*c.location.code.value.borrow(), "x\ny");
1334            assert_eq!(c.location.code.start_line_number, line);
1335            assert_matches!(&*c.location.code.source, Source::Alias { original, alias: alias2 } => {
1336                assert_eq!(*original.code.value.borrow(), " foo b");
1337                assert_eq!(original.code.start_line_number, line);
1338                assert_eq!(*original.code.source, Source::Unknown);
1339                assert_eq!(original.range, 1..4);
1340                assert_eq!(alias2, &alias);
1341            });
1342            assert_eq!(c.location.range, 2..3);
1343        });
1344        lexer.consume_char();
1345
1346        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1347            assert_eq!(c.value, ' ');
1348            assert_eq!(*c.location.code.value.borrow(), " foo b");
1349            assert_eq!(c.location.code.start_line_number, line);
1350            assert_eq!(*c.location.code.source, Source::Unknown);
1351            assert_eq!(c.location.range, 4..5);
1352        });
1353        lexer.consume_char();
1354    }
1355
1356    #[test]
1357    fn lexer_core_substitute_alias_empty_replacement() {
1358        let input = Memory::new("x ");
1359        let line = NonZeroU64::new(1).unwrap();
1360        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1361        let alias = Rc::new(Alias {
1362            name: "x".to_string(),
1363            replacement: "".to_string(),
1364            global: false,
1365            origin: Location::dummy("dummy"),
1366        });
1367
1368        let _ = lexer.peek_char().now_or_never().unwrap();
1369        lexer.consume_char();
1370
1371        lexer.substitute_alias(0, &alias);
1372
1373        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1374            assert_eq!(c.value, ' ');
1375            assert_eq!(*c.location.code.value.borrow(), "x ");
1376            assert_eq!(c.location.code.start_line_number, line);
1377            assert_eq!(*c.location.code.source, Source::Unknown);
1378            assert_eq!(c.location.range, 1..2);
1379        });
1380    }
1381
1382    #[test]
1383    fn lexer_core_peek_char_after_alias_substitution() {
1384        let input = Memory::new("a\nb");
1385        let line = NonZeroU64::new(1).unwrap();
1386        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1387
1388        lexer.peek_char().now_or_never().unwrap().unwrap();
1389        lexer.consume_char();
1390
1391        let alias = Rc::new(Alias {
1392            name: "a".to_string(),
1393            replacement: "".to_string(),
1394            global: false,
1395            origin: Location::dummy("dummy"),
1396        });
1397        lexer.substitute_alias(0, &alias);
1398
1399        let result = lexer.peek_char().now_or_never().unwrap();
1400        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1401            assert_eq!(c.value, '\n');
1402            assert_eq!(*c.location.code.value.borrow(), "a\n");
1403            assert_eq!(c.location.code.start_line_number, line);
1404            assert_eq!(*c.location.code.source, Source::Unknown);
1405            assert_eq!(c.location.range, 1..2);
1406        });
1407        lexer.consume_char();
1408
1409        let result = lexer.peek_char().now_or_never().unwrap();
1410        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1411            assert_eq!(c.value, 'b');
1412            assert_eq!(*c.location.code.value.borrow(), "a\nb");
1413            assert_eq!(c.location.code.start_line_number.get(), 1);
1414            assert_eq!(*c.location.code.source, Source::Unknown);
1415            assert_eq!(c.location.range, 2..3);
1416        });
1417        lexer.consume_char();
1418
1419        let result = lexer.peek_char().now_or_never().unwrap();
1420        assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
1421            assert_eq!(*location.code.value.borrow(), "a\nb");
1422            assert_eq!(location.code.start_line_number.get(), 1);
1423            assert_eq!(*location.code.source, Source::Unknown);
1424            assert_eq!(location.range, 3..3);
1425        });
1426    }
1427
1428    #[test]
1429    fn lexer_core_is_after_blank_ending_alias_index_0() {
1430        let original = Location::dummy("original");
1431        let alias = Rc::new(Alias {
1432            name: "a".to_string(),
1433            replacement: " ".to_string(),
1434            global: false,
1435            origin: Location::dummy("origin"),
1436        });
1437        let source = Source::Alias { original, alias };
1438        let input = Memory::new("a");
1439        let line = NonZeroU64::new(1).unwrap();
1440        let lexer = LexerCore::new(Box::new(input), line, Rc::new(source));
1441        assert!(!lexer.is_after_blank_ending_alias(0));
1442    }
1443
1444    #[test]
1445    fn lexer_core_is_after_blank_ending_alias_not_blank_ending() {
1446        let input = Memory::new("a x");
1447        let line = NonZeroU64::new(1).unwrap();
1448        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1449        let alias = Rc::new(Alias {
1450            name: "a".to_string(),
1451            replacement: " b".to_string(),
1452            global: false,
1453            origin: Location::dummy("dummy"),
1454        });
1455
1456        lexer.peek_char().now_or_never().unwrap().unwrap();
1457        lexer.consume_char();
1458
1459        lexer.substitute_alias(0, &alias);
1460
1461        assert!(!lexer.is_after_blank_ending_alias(0));
1462        assert!(!lexer.is_after_blank_ending_alias(1));
1463        assert!(!lexer.is_after_blank_ending_alias(2));
1464        assert!(!lexer.is_after_blank_ending_alias(3));
1465    }
1466
1467    #[test]
1468    fn lexer_core_is_after_blank_ending_alias_blank_ending() {
1469        let input = Memory::new("a x");
1470        let line = NonZeroU64::new(1).unwrap();
1471        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1472        let alias = Rc::new(Alias {
1473            name: "a".to_string(),
1474            replacement: " b ".to_string(),
1475            global: false,
1476            origin: Location::dummy("dummy"),
1477        });
1478
1479        lexer.peek_char().now_or_never().unwrap().unwrap();
1480        lexer.consume_char();
1481
1482        lexer.substitute_alias(0, &alias);
1483
1484        assert!(!lexer.is_after_blank_ending_alias(0));
1485        assert!(!lexer.is_after_blank_ending_alias(1));
1486        assert!(!lexer.is_after_blank_ending_alias(2));
1487        assert!(lexer.is_after_blank_ending_alias(3));
1488        assert!(lexer.is_after_blank_ending_alias(4));
1489    }
1490
1491    #[test]
1492    fn lexer_core_is_after_blank_ending_alias_after_line_continuation() {
1493        let input = Memory::new("a\\\n x");
1494        let line = NonZeroU64::new(1).unwrap();
1495        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1496        let alias = Rc::new(Alias {
1497            name: "a".to_string(),
1498            replacement: " b ".to_string(),
1499            global: false,
1500            origin: Location::dummy("dummy"),
1501        });
1502
1503        lexer.peek_char().now_or_never().unwrap().unwrap();
1504        lexer.consume_char();
1505        lexer.substitute_alias(0, &alias);
1506
1507        while let Ok(PeekChar::Char(_)) = lexer.peek_char().now_or_never().unwrap() {
1508            lexer.consume_char();
1509        }
1510        lexer.mark_line_continuation(3..5);
1511
1512        assert!(!lexer.is_after_blank_ending_alias(0));
1513        assert!(!lexer.is_after_blank_ending_alias(1));
1514        assert!(!lexer.is_after_blank_ending_alias(2));
1515        assert!(lexer.is_after_blank_ending_alias(5));
1516        assert!(lexer.is_after_blank_ending_alias(6));
1517    }
1518
1519    #[test]
1520    fn lexer_with_empty_source() {
1521        let mut lexer = Lexer::with_code("");
1522        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
1523    }
1524
1525    #[test]
1526    fn lexer_peek_char_with_line_continuation_enabled_stopping_on_non_backslash() {
1527        let mut lexer = Lexer::with_code("\\\n\n\\");
1528        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\n')));
1529        assert_eq!(lexer.index(), 2);
1530    }
1531
1532    #[test]
1533    fn lexer_peek_char_with_line_continuation_enabled_stopping_on_non_newline() {
1534        let mut lexer = Lexer::with_code("\\\n\\\n\\\n\\\\");
1535        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\\')));
1536        assert_eq!(lexer.index(), 6);
1537    }
1538
1539    #[test]
1540    fn lexer_peek_char_with_line_continuation_disabled() {
1541        let mut lexer = Lexer::with_code("\\\n\\\n\\\\");
1542        let mut lexer = lexer.disable_line_continuation();
1543        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\\')));
1544        assert_eq!(lexer.index(), 0);
1545    }
1546
1547    #[test]
1548    fn lexer_flush() {
1549        let mut lexer = Lexer::with_code(" \n\n\t\n");
1550        let location_1 = lexer.location().now_or_never().unwrap().unwrap().clone();
1551        assert_eq!(*location_1.code.value.borrow(), " \n");
1552
1553        lexer.consume_char();
1554        lexer.peek_char().now_or_never().unwrap().unwrap();
1555        lexer.consume_char();
1556        lexer.peek_char().now_or_never().unwrap().unwrap();
1557        lexer.consume_char();
1558        lexer.flush();
1559        lexer.peek_char().now_or_never().unwrap().unwrap();
1560        lexer.consume_char();
1561
1562        let location_2 = lexer.location().now_or_never().unwrap().unwrap().clone();
1563
1564        assert_eq!(*location_1.code.value.borrow(), " \n\n");
1565        assert_eq!(location_1.code.start_line_number.get(), 1);
1566        assert_eq!(*location_1.code.source, Source::Unknown);
1567        assert_eq!(location_1.range, 0..1);
1568        assert_eq!(*location_2.code.value.borrow(), "\t\n");
1569        assert_eq!(location_2.code.start_line_number.get(), 3);
1570        assert_eq!(*location_2.code.source, Source::Unknown);
1571        assert_eq!(location_2.range, 1..2);
1572    }
1573
1574    #[test]
1575    fn lexer_consume_char_if() {
1576        let mut lexer = Lexer::with_code("word\n");
1577
1578        let mut called = 0;
1579        let c = lexer
1580            .consume_char_if(|c| {
1581                assert_eq!(c, 'w');
1582                called += 1;
1583                true
1584            })
1585            .now_or_never()
1586            .unwrap()
1587            .unwrap()
1588            .unwrap();
1589        assert_eq!(called, 1);
1590        assert_eq!(c.value, 'w');
1591        assert_eq!(*c.location.code.value.borrow(), "word\n");
1592        assert_eq!(c.location.code.start_line_number.get(), 1);
1593        assert_eq!(*c.location.code.source, Source::Unknown);
1594        assert_eq!(c.location.range, 0..1);
1595
1596        let mut called = 0;
1597        let r = lexer
1598            .consume_char_if(|c| {
1599                assert_eq!(c, 'o');
1600                called += 1;
1601                false
1602            })
1603            .now_or_never()
1604            .unwrap();
1605        assert_eq!(called, 1);
1606        assert_eq!(r, Ok(None));
1607
1608        let mut called = 0;
1609        let r = lexer
1610            .consume_char_if(|c| {
1611                assert_eq!(c, 'o');
1612                called += 1;
1613                false
1614            })
1615            .now_or_never()
1616            .unwrap();
1617        assert_eq!(called, 1);
1618        assert_eq!(r, Ok(None));
1619
1620        let mut called = 0;
1621        let c = lexer
1622            .consume_char_if(|c| {
1623                assert_eq!(c, 'o');
1624                called += 1;
1625                true
1626            })
1627            .now_or_never()
1628            .unwrap()
1629            .unwrap()
1630            .unwrap();
1631        assert_eq!(called, 1);
1632        assert_eq!(c.value, 'o');
1633        assert_eq!(*c.location.code.value.borrow(), "word\n");
1634        assert_eq!(c.location.code.start_line_number.get(), 1);
1635        assert_eq!(*c.location.code.source, Source::Unknown);
1636        assert_eq!(c.location.range, 1..2);
1637
1638        lexer
1639            .consume_char_if(|c| {
1640                assert_eq!(c, 'r');
1641                true
1642            })
1643            .now_or_never()
1644            .unwrap()
1645            .unwrap()
1646            .unwrap();
1647        lexer
1648            .consume_char_if(|c| {
1649                assert_eq!(c, 'd');
1650                true
1651            })
1652            .now_or_never()
1653            .unwrap()
1654            .unwrap()
1655            .unwrap();
1656        lexer
1657            .consume_char_if(|c| {
1658                assert_eq!(c, '\n');
1659                true
1660            })
1661            .now_or_never()
1662            .unwrap()
1663            .unwrap()
1664            .unwrap();
1665
1666        // end of input
1667        let r = lexer
1668            .consume_char_if(|c| {
1669                unreachable!("unexpected call to the decider function: argument={}", c)
1670            })
1671            .now_or_never()
1672            .unwrap();
1673        assert_eq!(r, Ok(None));
1674    }
1675
1676    #[test]
1677    fn lexer_location_range_with_empty_range() {
1678        let mut lexer = Lexer::with_code("");
1679        lexer.peek_char().now_or_never().unwrap().unwrap();
1680        let location = lexer.location_range(0..0);
1681        assert_eq!(*location.code.value.borrow(), "");
1682        assert_eq!(location.code.start_line_number.get(), 1);
1683        assert_eq!(*location.code.source, Source::Unknown);
1684        assert_eq!(location.range, 0..0);
1685    }
1686
1687    #[test]
1688    fn lexer_location_range_with_nonempty_range() {
1689        let mut lexer = Lexer::from_memory("cat foo", Source::Stdin);
1690        for _ in 0..4 {
1691            lexer.peek_char().now_or_never().unwrap().unwrap();
1692            lexer.consume_char();
1693        }
1694        lexer.peek_char().now_or_never().unwrap().unwrap();
1695
1696        let location = lexer.location_range(1..4);
1697        assert_eq!(*location.code.value.borrow(), "cat foo");
1698        assert_eq!(location.code.start_line_number.get(), 1);
1699        assert_eq!(*location.code.source, Source::Stdin);
1700        assert_eq!(location.range, 1..4);
1701    }
1702
1703    #[test]
1704    fn lexer_location_range_with_range_starting_at_end() {
1705        let mut lexer = Lexer::from_memory("cat", Source::Stdin);
1706        for _ in 0..3 {
1707            lexer.peek_char().now_or_never().unwrap().unwrap();
1708            lexer.consume_char();
1709        }
1710        lexer.peek_char().now_or_never().unwrap().unwrap();
1711
1712        let location = lexer.location_range(3..3);
1713        assert_eq!(*location.code.value.borrow(), "cat");
1714        assert_eq!(location.code.start_line_number.get(), 1);
1715        assert_eq!(*location.code.source, Source::Stdin);
1716        assert_eq!(location.range, 3..3);
1717    }
1718
1719    #[test]
1720    #[should_panic]
1721    fn lexer_location_range_with_unconsumed_code() {
1722        let lexer = Lexer::with_code("echo ok");
1723        let _ = lexer.location_range(0..0);
1724    }
1725
1726    #[test]
1727    #[should_panic(expected = "The index 1 must not be larger than the current index 0")]
1728    fn lexer_location_range_with_range_out_of_bounds() {
1729        let lexer = Lexer::with_code("");
1730        let _ = lexer.location_range(1..2);
1731    }
1732
1733    #[test]
1734    fn lexer_location_range_with_alias_substitution() {
1735        let mut lexer = Lexer::with_code(" a;");
1736        let alias_def = Rc::new(Alias {
1737            name: "a".to_string(),
1738            replacement: "abc".to_string(),
1739            global: false,
1740            origin: Location::dummy("dummy"),
1741        });
1742        for _ in 0..2 {
1743            lexer.peek_char().now_or_never().unwrap().unwrap();
1744            lexer.consume_char();
1745        }
1746        lexer.substitute_alias(1, &alias_def);
1747        for _ in 1..5 {
1748            lexer.peek_char().now_or_never().unwrap().unwrap();
1749            lexer.consume_char();
1750        }
1751
1752        let location = lexer.location_range(2..5);
1753        assert_eq!(*location.code.value.borrow(), "abc");
1754        assert_eq!(location.code.start_line_number.get(), 1);
1755        assert_matches!(&*location.code.source, Source::Alias { original, alias } => {
1756            assert_eq!(*original.code.value.borrow(), " a;");
1757            assert_eq!(original.code.start_line_number.get(), 1);
1758            assert_eq!(*original.code.source, Source::Unknown);
1759            assert_eq!(original.range, 1..2);
1760            assert_eq!(alias, &alias_def);
1761        });
1762        assert_eq!(location.range, 1..3);
1763    }
1764
1765    #[test]
1766    fn lexer_inner_program_success() {
1767        let mut lexer = Lexer::with_code("x y )");
1768        let source = lexer.inner_program().now_or_never().unwrap().unwrap();
1769        assert_eq!(source, "x y ");
1770    }
1771
1772    #[test]
1773    fn lexer_inner_program_failure() {
1774        let mut lexer = Lexer::with_code("<< )");
1775        let e = lexer.inner_program().now_or_never().unwrap().unwrap_err();
1776        assert_eq!(
1777            e.cause,
1778            ErrorCause::Syntax(SyntaxError::MissingHereDocDelimiter)
1779        );
1780        assert_eq!(*e.location.code.value.borrow(), "<< )");
1781        assert_eq!(e.location.code.start_line_number.get(), 1);
1782        assert_eq!(*e.location.code.source, Source::Unknown);
1783        assert_eq!(e.location.range, 3..4);
1784    }
1785}