yash_syntax/parser/lex/
core.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2020 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Fundamental building blocks for the lexical analyzer
18
19use super::keyword::Keyword;
20use super::op::Operator;
21use crate::alias::Alias;
22use crate::input::Context;
23use crate::input::InputObject;
24use crate::input::Memory;
25use crate::parser::core::Result;
26use crate::parser::error::Error;
27use crate::source::Code;
28use crate::source::Location;
29use crate::source::Source;
30use crate::source::SourceChar;
31use crate::source::source_chars;
32use crate::syntax::Word;
33use std::cell::RefCell;
34use std::fmt;
35use std::num::NonZeroU64;
36use std::ops::Deref;
37use std::ops::DerefMut;
38use std::ops::Range;
39use std::pin::Pin;
40use std::rc::Rc;
41
42/// Returns true if the character is a blank character.
43pub fn is_blank(c: char) -> bool {
44    // TODO locale
45    c != '\n' && c.is_whitespace()
46}
47
48/// Result of [`LexerCore::peek_char`]
49#[derive(Clone, Copy, Debug, Eq, PartialEq)]
50enum PeekChar<'a> {
51    Char(&'a SourceChar),
52    EndOfInput(&'a Location),
53}
54
55impl<'a> PeekChar<'a> {
56    /// Returns the location that was peeked.
57    #[must_use]
58    fn location<'b>(self: &'b PeekChar<'a>) -> &'a Location {
59        match self {
60            PeekChar::Char(c) => &c.location,
61            PeekChar::EndOfInput(l) => l,
62        }
63    }
64}
65
66/// Token identifier, or classification of tokens
67///
68/// This enum classifies a token as defined in POSIX XCU 2.10.1 Shell Grammar Lexical
69/// Conventions, but does not exactly reflect further distinction defined in
70/// POSIX XCU 2.10.2 Shell Grammar Rules.
71///
72/// For convenience, the special token identifier `EndOfInput` is included.
73#[derive(Clone, Copy, Debug, Eq, PartialEq)]
74pub enum TokenId {
75    /// `TOKEN`
76    ///
77    /// If this token _looks like_ a reserved word, this variant has some
78    /// associated `Keyword` that describes the word. However, it depends on
79    /// context whether a token is actually regarded as a reserved word or
80    /// just as an ordinary word. You must ensure that you're in an
81    /// applicable context when examining the `Keyword` value.
82    Token(Option<Keyword>),
83    /// Operator
84    Operator(Operator),
85    /// `IO_NUMBER`
86    IoNumber,
87    /// `IO_LOCATION`
88    IoLocation,
89    /// Imaginary token identifier for the end of input
90    EndOfInput,
91}
92
93impl TokenId {
94    /// Determines if this token can be a delimiter of a clause.
95    ///
96    /// This function delegates to [`Keyword::is_clause_delimiter`] if the token
97    /// ID is a (possible) keyword, or to [`Operator::is_clause_delimiter`] if
98    /// it is an operator. For `EndOfInput` the function returns true.
99    /// Otherwise, the result is false.
100    pub fn is_clause_delimiter(self) -> bool {
101        use TokenId::*;
102        match self {
103            Token(Some(keyword)) => keyword.is_clause_delimiter(),
104            Token(None) => false,
105            Operator(operator) => operator.is_clause_delimiter(),
106            IoNumber => false,
107            IoLocation => false,
108            EndOfInput => true,
109        }
110    }
111}
112
113/// Result of lexical analysis produced by the [`Lexer`]
114#[derive(Debug)]
115pub struct Token {
116    /// Content of the token
117    ///
118    /// The word value contains at least one [unit](crate::syntax::WordUnit),
119    /// regardless of whether the token is an operator. The only exception is
120    /// when `id` is `EndOfInput`, in which case the word is empty.
121    pub word: Word,
122    /// Token identifier
123    pub id: TokenId,
124    /// Position of the first character of the word
125    pub index: usize,
126}
127
128impl fmt::Display for Token {
129    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
130        write!(f, "{}", self.word)
131    }
132}
133
134/// State of the input function in a lexer
135#[derive(Clone, Debug)]
136enum InputState {
137    Alive,
138    EndOfInput(Location),
139    Error(Error),
140}
141
142/// Source character with additional attribute
143#[derive(Clone, Debug, Eq, PartialEq)]
144struct SourceCharEx {
145    value: SourceChar,
146    is_line_continuation: bool,
147}
148
149fn ex<I: IntoIterator<Item = SourceChar>>(i: I) -> impl Iterator<Item = SourceCharEx> {
150    i.into_iter().map(|sc| SourceCharEx {
151        value: sc,
152        is_line_continuation: false,
153    })
154}
155
156/// Core part of the lexical analyzer
157struct LexerCore<'a> {
158    // The `input` field could be a `&'a mut dyn InputObject + 'a`, but it is
159    // `Box<dyn InputObject + 'a>` to allow the lexer to take ownership of the
160    // input object. This is necessary for `Lexer::with_code` and similarly
161    // constructed lexers.
162    input: Box<dyn InputObject + 'a>,
163    state: InputState,
164    raw_code: Rc<Code>,
165    source: Vec<SourceCharEx>,
166    index: usize,
167}
168
169impl<'a> LexerCore<'a> {
170    /// Creates a new lexer core that reads using the given input function.
171    #[must_use]
172    fn new(
173        input: Box<dyn InputObject + 'a>,
174        start_line_number: NonZeroU64,
175        source: Rc<Source>,
176    ) -> LexerCore<'a> {
177        LexerCore {
178            input,
179            raw_code: Rc::new(Code {
180                value: RefCell::new(String::new()),
181                start_line_number,
182                source,
183            }),
184            state: InputState::Alive,
185            source: Vec::new(),
186            index: 0,
187        }
188    }
189
190    /// Computes the start index of the location at the current position.
191    #[must_use]
192    fn next_index(&self) -> usize {
193        let Some(last) = self.source.last() else {
194            return 0;
195        };
196
197        let mut location = &last.value.location;
198        while let Source::Alias { original, .. } = &*location.code.source {
199            location = original;
200        }
201        location.range.end
202    }
203
204    /// Peeks the next character, reading the next line if necessary.
205    async fn peek_char(&mut self) -> Result<PeekChar<'_>> {
206        loop {
207            // if let Some(sc) = self.source.get(self.index) {
208            //     return Ok(PeekChar::Char(&sc.value));
209            if self.index < self.source.len() {
210                return Ok(PeekChar::Char(&self.source[self.index].value));
211            }
212
213            match self.state {
214                InputState::Alive => (),
215                InputState::EndOfInput(ref location) => return Ok(PeekChar::EndOfInput(location)),
216                InputState::Error(ref error) => return Err(error.clone()),
217            }
218
219            // Read more input
220            let index = self.next_index();
221            match self.input.next_line(&self.input_context()).await {
222                Ok(line) => {
223                    if line.is_empty() {
224                        // End of input
225                        self.state = InputState::EndOfInput(Location {
226                            code: Rc::clone(&self.raw_code),
227                            range: index..index,
228                        });
229                    } else {
230                        // Successful read
231                        self.raw_code.value.borrow_mut().push_str(&line);
232                        self.source
233                            .extend(ex(source_chars(&line, &self.raw_code, index)));
234                    }
235                }
236                Err(io_error) => {
237                    self.state = InputState::Error(Error {
238                        cause: io_error.into(),
239                        location: Location {
240                            code: Rc::clone(&self.raw_code),
241                            range: index..index,
242                        },
243                    });
244                }
245            }
246        }
247    }
248
249    /// Returns the input context for the next character.
250    fn input_context(&self) -> Context {
251        let mut context = Context::default();
252        context.set_is_first_line(self.raw_code.value.borrow().is_empty());
253        context
254    }
255
256    /// Consumes the next character.
257    ///
258    /// This function must be called after [`peek_char`](Lexer::peek_char) has successfully
259    /// returned the character. Consuming a character that has not yet been peeked would result
260    /// in a panic!
261    fn consume_char(&mut self) {
262        assert!(
263            self.index < self.source.len(),
264            "A character must have been peeked before being consumed: index={}",
265            self.index
266        );
267        self.index += 1;
268    }
269
270    /// Returns a reference to the character at the given index.
271    #[must_use]
272    fn peek_char_at(&self, index: usize) -> &SourceChar {
273        assert!(
274            index <= self.index,
275            "The index {} must not be larger than the current index {}",
276            index,
277            self.index
278        );
279        &self.source[index].value
280    }
281
282    /// Returns the current index.
283    #[must_use]
284    fn index(&self) -> usize {
285        self.index
286    }
287
288    /// Rewinds the index to the given value.
289    fn rewind(&mut self, index: usize) {
290        assert!(
291            index <= self.index,
292            "The new index {} must not be larger than the current index {}",
293            index,
294            self.index
295        );
296        self.index = index;
297    }
298
299    /// Checks if there is any character that has been read from the input
300    /// source but not yet consumed.
301    #[must_use]
302    fn pending(&self) -> bool {
303        self.index < self.source.len()
304    }
305
306    /// Clears the internal buffer.
307    fn flush(&mut self) {
308        let start_line_number = self.raw_code.line_number(usize::MAX);
309        self.raw_code = Rc::new(Code {
310            value: RefCell::new(String::new()),
311            start_line_number,
312            source: self.raw_code.source.clone(),
313        });
314        self.source.clear();
315        self.index = 0;
316    }
317
318    /// Clears an end-of-input or error status so that the lexer can resume
319    /// parsing.
320    fn reset(&mut self) {
321        self.state = InputState::Alive;
322        self.flush();
323    }
324
325    /// Extracts a string from the source code range.
326    fn source_string(&self, range: Range<usize>) -> String {
327        self.source[range].iter().map(|c| c.value.value).collect()
328    }
329
330    /// Returns a location for a given range of the source code.
331    #[must_use]
332    fn location_range(&self, range: Range<usize>) -> Location {
333        if range.start == self.source.len() {
334            if let InputState::EndOfInput(ref location) = self.state {
335                return location.clone();
336            }
337        }
338        let start = &self.peek_char_at(range.start).location;
339        let code = start.code.clone();
340        let end = range
341            .map(|index| &self.peek_char_at(index).location)
342            .take_while(|location| location.code == code)
343            .last()
344            .map(|location| location.range.end)
345            .unwrap_or(start.range.start);
346        let range = start.range.start..end;
347        Location { code, range }
348    }
349
350    /// Marks the characters in the given range as line continuation.
351    ///
352    /// This function sets the `is_line_continuation` flag of the characters in
353    /// the range to true. The characters must have been read before calling
354    /// this function.
355    fn mark_line_continuation(&mut self, range: Range<usize>) {
356        assert!(
357            range.end <= self.index,
358            "characters must have been read (range = {:?}, current index = {})",
359            range,
360            self.index
361        );
362        for sc in &mut self.source[range] {
363            sc.is_line_continuation = true;
364        }
365    }
366
367    /// Performs alias substitution.
368    ///
369    /// This function replaces the characters starting from the `begin` index up
370    /// to the current position with the alias value. The resulting part of code
371    /// will be characters with a [`Source::Alias`] origin.
372    fn substitute_alias(&mut self, begin: usize, alias: &Rc<Alias>) {
373        let end = self.index;
374        assert!(
375            begin < end,
376            "begin index {begin} should be less than end index {end}"
377        );
378
379        let source = Rc::new(Source::Alias {
380            original: self.location_range(begin..end),
381            alias: alias.clone(),
382        });
383        let code = Rc::new(Code {
384            value: RefCell::new(alias.replacement.clone()),
385            start_line_number: NonZeroU64::new(1).unwrap(),
386            source,
387        });
388        let repl = ex(source_chars(&alias.replacement, &code, 0));
389
390        self.source.splice(begin..end, repl);
391        self.index = begin;
392    }
393
394    /// Tests if the given index is after the replacement string of alias
395    /// substitution that ends with a blank.
396    ///
397    /// # Panics
398    ///
399    /// If `index` is larger than the currently read index.
400    fn is_after_blank_ending_alias(&self, index: usize) -> bool {
401        fn ends_with_blank(s: &str) -> bool {
402            s.chars().next_back().is_some_and(is_blank)
403        }
404        fn is_same_alias(alias: &Alias, sc: Option<&SourceCharEx>) -> bool {
405            sc.is_some_and(|sc| sc.value.location.code.source.is_alias_for(&alias.name))
406        }
407
408        for index in (0..index).rev() {
409            let sc = &self.source[index];
410
411            if !sc.is_line_continuation && !is_blank(sc.value.value) {
412                return false;
413            }
414
415            if let Source::Alias { ref alias, .. } = *sc.value.location.code.source {
416                #[allow(clippy::collapsible_if)]
417                if ends_with_blank(&alias.replacement) {
418                    if !is_same_alias(alias, self.source.get(index + 1)) {
419                        return true;
420                    }
421                }
422            }
423        }
424
425        false
426    }
427}
428
429impl fmt::Debug for LexerCore<'_> {
430    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
431        f.debug_struct("LexerCore")
432            .field("state", &self.state)
433            .field("source", &self.source)
434            .field("index", &self.index)
435            .finish_non_exhaustive()
436    }
437}
438
439/// Configuration for the [lexer](Lexer)
440///
441/// `Config` is a builder for the lexer. A [new](Self::new) instance is created
442/// with default settings. You can then customize the settings by modifying the
443/// corresponding fields. Finally, you can pass an input object to the
444/// [`input`](Self::input) method to create a lexer.
445///
446/// # Deprecation
447///
448/// This struct is deprecated. Use [`yash_env::parser::Config`] instead.
449#[deprecated(since = "0.17.0", note = "use `yash_env::parser::Config` instead")]
450#[derive(Debug)]
451#[must_use = "you must call `input` to create a lexer"]
452#[non_exhaustive]
453pub struct Config {
454    /// Line number for the first line of the input
455    ///
456    /// The lexer counts the line number from this value to annotate the
457    /// location of the tokens. The line number is saved in the
458    /// `start_line_number` field of the [`Code`] instance that is contained in
459    /// the [`Location`] instance of the token.
460    ///
461    /// The default value is 1.
462    pub start_line_number: NonZeroU64,
463
464    /// Source of the input
465    ///
466    /// The source is used to annotate the location of the tokens. This value
467    /// is saved in the `source` field of the [`Code`] instance that is
468    /// contained in the [`Location`] instance of the token.
469    ///
470    /// The default value is `None`, in which case the source is set to
471    /// [`Source::Unknown`]. It is recommended to set this to a more informative
472    /// value, so that the locations in the parsed syntax tree can be traced
473    /// back to the source code. Especially, the correct source is necessary to
474    /// indicate the location of possible errors that occur during parsing and
475    /// execution.
476    pub source: Option<Rc<Source>>,
477}
478
479#[allow(deprecated)]
480impl Config {
481    /// Creates a new configuration with default settings.
482    ///
483    /// # Deprecation
484    ///
485    /// This struct is deprecated. Use [`yash_env::parser::Config`] instead.
486    #[deprecated(since = "0.17.0", note = "use `yash_env::parser::Config` instead")]
487    pub fn new() -> Self {
488        Config {
489            start_line_number: NonZeroU64::MIN,
490            source: None,
491        }
492    }
493
494    /// Creates a lexer with the given input object.
495    pub fn input<'a>(self, input: Box<dyn InputObject + 'a>) -> Lexer<'a> {
496        let mut config = yash_env::parser::Config::with_input(input);
497        config.start_line_number = self.start_line_number;
498        config.source = self.source;
499        config.into()
500    }
501}
502
503#[allow(deprecated)]
504impl Default for Config {
505    fn default() -> Self {
506        Self::new()
507    }
508}
509
510/// Lexical analyzer
511///
512/// A lexer reads lines using an input function and parses the characters into tokens. It has an
513/// internal buffer containing the characters that have been read and the position (or the
514/// index) of the character that is to be parsed next.
515///
516/// `Lexer` has primitive functions such as [`peek_char`](Lexer::peek_char) that provide access
517/// to the character at the current position. Derived functions such as
518/// [`skip_blanks_and_comment`](Lexer::skip_blanks_and_comment) depend on those primitives to
519/// parse more complex structures in the source code. Usually, the lexer is used by a
520/// [parser](super::super::Parser) to read the source code and produce a syntax
521/// tree, so you don't need to call these functions directly.
522///
523/// To construct a lexer, create a configuration object
524/// ([`yash_env::parser::Config`]), set the desired fields, and then call
525/// `into()` or [`Lexer::from`].
526/// `Lexer` has several convenience functions such as [`new`](Self::new) and
527/// [`with_code`](Self::with_code) for creating a lexer with minimal
528/// configuration.
529///
530/// ```
531/// # use yash_env::parser::Config;
532/// # use yash_syntax::input::Memory;
533/// # use yash_syntax::parser::{lex::Lexer, Parser};
534/// # use yash_syntax::source::Source;
535/// let mut config = Config::with_input(Box::new(Memory::new("echo hello\n")));
536/// config.start_line_number = 10.try_into().unwrap();
537/// config.source = Some(Source::CommandString.into());
538/// let mut lexer = Lexer::from(config);
539/// let mut parser = Parser::new(&mut lexer);
540/// _ = parser.command_line();
541/// ```
542#[derive(Debug)]
543#[must_use]
544pub struct Lexer<'a> {
545    // `Lexer` is a thin wrapper around `LexerCore`. `Lexer` delegates most
546    // functions to `LexerCore`. `Lexer` adds automatic line-continuation
547    // skipping to `LexerCore`.
548    core: LexerCore<'a>,
549    line_continuation_enabled: bool,
550}
551
552/// Creates a lexer from a configuration defined in the [`yash-env`](yash_env)
553/// crate.
554impl<'a> From<yash_env::parser::Config<'a>> for Lexer<'a> {
555    fn from(config: yash_env::parser::Config<'a>) -> Self {
556        let input = config.input;
557        let start_line_number = config.start_line_number;
558        let source = config.source.unwrap_or_else(|| Rc::new(Source::Unknown));
559        Lexer {
560            core: LexerCore::new(input, start_line_number, source),
561            line_continuation_enabled: true,
562        }
563    }
564}
565
566impl<'a> Lexer<'a> {
567    /// Creates a new configuration with default settings.
568    ///
569    /// This is a synonym for [`Config::new`]. You can modify the settings and
570    /// then create a lexer with the [`input`](Config::input) method.
571    ///
572    /// # Deprecation
573    ///
574    /// The `Config` struct defined in this module is deprecated. Use
575    /// [`yash_env::parser::Config`] instead.
576    #[allow(deprecated)]
577    #[deprecated(since = "0.17.0", note = "use `yash_env::parser::Config` instead")]
578    #[inline(always)]
579    pub fn config() -> Config {
580        Config::new()
581    }
582
583    /// Creates a new lexer that reads using the given input function.
584    ///
585    /// This is a convenience function that creates a lexer with the given input
586    /// object and the default configuration. To customize the configuration,
587    /// instantiate a [`yash_env::parser::Config`] object instead.
588    ///
589    /// This function is best used for testing or for simple cases where you
590    /// don't need to customize the lexer. For practical use, it is recommended
591    /// to provide supplementary information with a configuration before
592    /// creating a lexer.
593    pub fn new(input: Box<dyn InputObject + 'a>) -> Lexer<'a> {
594        yash_env::parser::Config::with_input(input).into()
595    }
596
597    /// Creates a new lexer with a fixed source code.
598    ///
599    /// This is a convenience function that creates a lexer that reads from a
600    /// string using [`Memory`] with the default configuration.
601    ///
602    /// This function is best used for testing or for simple cases where you
603    /// don't need to customize the lexer. For practical use, it is recommended
604    /// to provide supplementary information with a configuration before
605    /// creating a lexer.
606    pub fn with_code(code: &'a str) -> Lexer<'a> {
607        Self::new(Box::new(Memory::new(code)))
608    }
609
610    /// Creates a new lexer with a fixed source code.
611    ///
612    /// This is a convenience function that creates a lexer that reads from a
613    /// string using [`Memory`] with the specified source starting from line
614    /// number 1.
615    ///
616    /// This function is soft-deprecated. Use [`with_code`](Self::with_code)
617    /// instead if the source is `Unknown`. Otherwise, use
618    /// [`yash_env::parser::Config`] to create a lexer with a customized
619    /// configuration.
620    pub fn from_memory<S: Into<Rc<Source>>>(code: &'a str, source: S) -> Lexer<'a> {
621        fn inner(code: &str, source: Rc<Source>) -> Lexer<'_> {
622            let mut config = yash_env::parser::Config::with_input(Box::new(Memory::new(code)));
623            config.source = Some(source);
624            config.into()
625        }
626        inner(code, source.into())
627    }
628
629    /// Disables line continuation recognition onward.
630    ///
631    /// By default, [`peek_char`](Self::peek_char) silently skips line
632    /// continuation sequences. When line continuation is disabled, however,
633    /// `peek_char` returns characters literally.
634    ///
635    /// Call [`enable_line_continuation`](Self::enable_line_continuation) to
636    /// switch line continuation recognition on.
637    ///
638    /// This function will panic if line continuation has already been disabled.
639    pub fn disable_line_continuation<'b>(&'b mut self) -> PlainLexer<'b, 'a> {
640        assert!(
641            self.line_continuation_enabled,
642            "line continuation already disabled"
643        );
644        self.line_continuation_enabled = false;
645        PlainLexer { lexer: self }
646    }
647
648    /// Re-enables line continuation.
649    ///
650    /// You can pass the `PlainLexer` returned from
651    /// [`disable_line_continuation`](Self::disable_line_continuation) to this
652    /// function to re-enable line continuation. That is equivalent to dropping
653    /// the `PlainLexer` instance, but the code will be more descriptive.
654    pub fn enable_line_continuation<'b>(_: PlainLexer<'a, 'b>) {}
655
656    /// Skips line continuation, i.e., a backslash followed by a newline.
657    ///
658    /// If there is a line continuation at the current position, this function
659    /// consumes the backslash and the newline and returns `Ok(true)`. The
660    /// characters are marked as line continuation.
661    ///
662    /// If there is no line continuation, this function does nothing and returns
663    /// `Ok(false)`.
664    ///
665    /// This function does nothing if line continuation has been
666    /// [disabled](Self::disable_line_continuation).
667    async fn line_continuation(&mut self) -> Result<bool> {
668        if !self.line_continuation_enabled {
669            return Ok(false);
670        }
671
672        let index = self.core.index();
673        match self.core.peek_char().await? {
674            PeekChar::Char(c) if c.value == '\\' => self.core.consume_char(),
675            _ => return Ok(false),
676        }
677
678        match self.core.peek_char().await? {
679            PeekChar::Char(c) if c.value == '\n' => self.core.consume_char(),
680            _ => {
681                self.core.rewind(index);
682                return Ok(false);
683            }
684        }
685
686        self.core.mark_line_continuation(index..index + 2);
687
688        Ok(true)
689    }
690
691    /// Peeks the next character.
692    ///
693    /// If the end of input is reached, `Ok(None)` is returned. On error,
694    /// `Err(_)` is returned.
695    ///
696    /// If line continuation recognition is enabled, combinations of a backslash
697    /// and a newline are silently skipped before returning the next character.
698    /// Call [`disable_line_continuation`](Self::disable_line_continuation) to
699    /// switch off line continuation recognition.
700    ///
701    /// This function requires a mutable reference to `self` since it may need
702    /// to read the next line if needed.
703    pub async fn peek_char(&mut self) -> Result<Option<char>> {
704        while self.line_continuation().await? {}
705
706        match self.core.peek_char().await? {
707            PeekChar::Char(source_char) => Ok(Some(source_char.value)),
708            PeekChar::EndOfInput(_) => Ok(None),
709        }
710    }
711
712    /// Returns the location of the next character.
713    ///
714    /// If there is no more character (that is, it is the end of input), an imaginary location
715    /// is returned that would be returned if a character existed.
716    ///
717    /// This function requires a mutable reference to `self` since it needs to
718    /// [peek](Self::peek_char) the next character.
719    pub async fn location(&mut self) -> Result<&Location> {
720        self.core.peek_char().await.map(|p| p.location())
721    }
722
723    /// Consumes the next character.
724    ///
725    /// This function must be called after [`peek_char`](Lexer::peek_char) has successfully
726    /// returned the character. Consuming a character that has not yet been peeked would result
727    /// in a panic!
728    pub fn consume_char(&mut self) {
729        self.core.consume_char()
730    }
731
732    /// Returns the position of the next character, counted from zero.
733    ///
734    /// ```
735    /// # use yash_syntax::parser::lex::Lexer;
736    /// # futures_executor::block_on(async {
737    /// let mut lexer = Lexer::with_code("abc");
738    /// assert_eq!(lexer.index(), 0);
739    /// let _ = lexer.peek_char().await;
740    /// assert_eq!(lexer.index(), 0);
741    /// lexer.consume_char();
742    /// assert_eq!(lexer.index(), 1);
743    /// # })
744    /// ```
745    #[must_use]
746    pub fn index(&self) -> usize {
747        self.core.index()
748    }
749
750    /// Moves the current position back to the given index so that characters that have been
751    /// consumed can be read again.
752    ///
753    /// The given index must not be larger than the [current index](Lexer::index), or this
754    /// function would panic.
755    ///
756    /// ```
757    /// # use yash_syntax::parser::lex::Lexer;
758    /// # futures_executor::block_on(async {
759    /// let mut lexer = Lexer::with_code("abc");
760    /// let saved_index = lexer.index();
761    /// assert_eq!(lexer.peek_char().await, Ok(Some('a')));
762    /// lexer.consume_char();
763    /// assert_eq!(lexer.peek_char().await, Ok(Some('b')));
764    /// lexer.rewind(saved_index);
765    /// assert_eq!(lexer.peek_char().await, Ok(Some('a')));
766    /// # })
767    /// ```
768    pub fn rewind(&mut self, index: usize) {
769        self.core.rewind(index)
770    }
771
772    /// Checks if there is any character that has been read from the input
773    /// source but not yet consumed.
774    #[must_use]
775    pub fn pending(&self) -> bool {
776        self.core.pending()
777    }
778
779    /// Clears the internal buffer of the lexer.
780    ///
781    /// Locations returned from [`location`](Self::location) share a single code
782    /// instance that is also retained by the lexer. The code grows long as the
783    /// lexer reads more input. To prevent the code from getting too large, you
784    /// can call this function that replaces the retained code with a new empty
785    /// one. The new code's `start_line_number` will be incremented by the
786    /// number of lines in the previous.
787    pub fn flush(&mut self) {
788        self.core.flush()
789    }
790
791    /// Clears an end-of-input or error status so that the lexer can resume
792    /// parsing.
793    ///
794    /// This function will be useful only in an interactive shell where the user
795    /// can continue entering commands even after (s)he sends an end-of-input or
796    /// is interrupted by a syntax error.
797    pub fn reset(&mut self) {
798        self.core.reset()
799    }
800
801    /// Peeks the next character and, if the given decider function returns true for it,
802    /// advances the position.
803    ///
804    /// Returns the consumed character if the function returned true. Returns `Ok(None)` if it
805    /// returned false or there is no more character.
806    pub async fn consume_char_if<F>(&mut self, mut f: F) -> Result<Option<&SourceChar>>
807    where
808        F: FnMut(char) -> bool,
809    {
810        self.consume_char_if_dyn(&mut f).await
811    }
812
813    /// Dynamic version of [`Self::consume_char_if`].
814    pub(crate) async fn consume_char_if_dyn(
815        &mut self,
816        f: &mut dyn FnMut(char) -> bool,
817    ) -> Result<Option<&SourceChar>> {
818        match self.peek_char().await? {
819            Some(c) if f(c) => {
820                let index = self.index();
821                self.consume_char();
822                Ok(Some(self.core.peek_char_at(index)))
823            }
824            _ => Ok(None),
825        }
826    }
827
828    /// Extracts a string from the source code range.
829    ///
830    /// This function returns the source code string for the range specified by
831    /// the argument. The range must specify a valid index. If the index points
832    /// to a character that have not yet read, this function will panic!.
833    ///
834    /// # Panics
835    ///
836    /// If the argument index is out of bounds, i.e., pointing to an unread
837    /// character.
838    #[inline]
839    pub fn source_string(&self, range: Range<usize>) -> String {
840        self.core.source_string(range)
841    }
842
843    /// Returns a location for a given range of the source code.
844    ///
845    /// All the characters in the range must have been
846    /// [consume](Self::consume_char)d. If the range refers to an unconsumed
847    /// character, this function will panic!
848    ///
849    /// If the characters are from more than one [`Code`] fragment, the location
850    /// will only cover the initial portion of the range sharing the same
851    /// `Code`.
852    ///
853    /// # Panics
854    ///
855    /// This function will panic if the range refers to an unconsumed character.
856    ///
857    /// If the start index of the range is the end of input, it must have been
858    /// peeked and the range must be empty, or the function will panic.
859    #[must_use]
860    pub fn location_range(&self, range: Range<usize>) -> Location {
861        self.core.location_range(range)
862    }
863
864    /// Performs alias substitution right before the current position.
865    ///
866    /// This function must be called just after a [word](WordLexer::word) has been parsed that
867    /// matches the name of the argument alias. No check is done in this function that there is
868    /// a matching word before the current position. The characters starting from the `begin`
869    /// index up to the current position are silently replaced with the alias value.
870    ///
871    /// The resulting part of code will be characters with a [`Source::Alias`] origin.
872    ///
873    /// After the substitution, the position will be set before the replaced string.
874    ///
875    /// # Panics
876    ///
877    /// If the replaced part is empty, i.e., `begin >= self.index()`.
878    pub fn substitute_alias(&mut self, begin: usize, alias: &Rc<Alias>) {
879        self.core.substitute_alias(begin, alias)
880    }
881
882    /// Tests if the given index is after the replacement string of alias
883    /// substitution that ends with a blank.
884    ///
885    /// # Panics
886    ///
887    /// If `index` is larger than the currently read index.
888    pub fn is_after_blank_ending_alias(&self, index: usize) -> bool {
889        self.core.is_after_blank_ending_alias(index)
890    }
891
892    /// Parses an optional compound list that is the content of a command
893    /// substitution.
894    ///
895    /// This function consumes characters until a token that cannot be the
896    /// beginning of an and-or list is found and returns the string that was
897    /// consumed.
898    pub async fn inner_program(&mut self) -> Result<String> {
899        let begin = self.index();
900
901        let mut parser = super::super::Parser::new(self);
902        parser.maybe_compound_list().await?;
903
904        let end = parser.peek_token().await?.index;
905        self.rewind(end);
906
907        Ok(self.core.source_string(begin..end))
908    }
909
910    /// Like [`Lexer::inner_program`], but returns the future in a pinning box.
911    pub fn inner_program_boxed(&mut self) -> Pin<Box<dyn Future<Output = Result<String>> + '_>> {
912        Box::pin(self.inner_program())
913    }
914}
915
916/// Reference to [`Lexer`] with line continuation disabled
917///
918/// This struct implements the RAII pattern for temporarily disabling line
919/// continuation. When you disable the line continuation of a lexer, you get an
920/// instance of `PlainLexer`. You can access the original lexer via the
921/// `PlainLexer` until you drop it, when the line continuation is automatically
922/// re-enabled.
923#[derive(Debug)]
924#[must_use = "You must retain the PlainLexer to keep line continuation disabled"]
925pub struct PlainLexer<'a, 'b> {
926    lexer: &'a mut Lexer<'b>,
927}
928
929impl<'b> Deref for PlainLexer<'_, 'b> {
930    type Target = Lexer<'b>;
931    fn deref(&self) -> &Lexer<'b> {
932        self.lexer
933    }
934}
935
936impl<'b> DerefMut for PlainLexer<'_, 'b> {
937    fn deref_mut(&mut self) -> &mut Lexer<'b> {
938        self.lexer
939    }
940}
941
942impl Drop for PlainLexer<'_, '_> {
943    fn drop(&mut self) {
944        self.lexer.line_continuation_enabled = true;
945    }
946}
947
948/// Context in which a [word](crate::syntax::Word) is parsed
949///
950/// The parse of the word of a [switch](crate::syntax::Switch) depends on
951/// whether the parameter expansion containing the switch is part of a text or a
952/// word. A `WordContext` value is used to decide the behavior of the lexer.
953///
954/// Parser functions that depend on the context are implemented in
955/// [`WordLexer`].
956#[derive(Clone, Copy, Debug, Eq, PartialEq)]
957pub enum WordContext {
958    /// The text unit being parsed is part of a [text](crate::syntax::Text).
959    Text,
960    /// The text unit being parsed is part of a [word](crate::syntax::Word).
961    Word,
962}
963
964/// Lexer with additional information for parsing [texts](crate::syntax::Text)
965/// and [words](crate::syntax::Word)
966#[derive(Debug)]
967pub struct WordLexer<'a, 'b> {
968    pub lexer: &'a mut Lexer<'b>,
969    pub context: WordContext,
970}
971
972impl<'b> Deref for WordLexer<'_, 'b> {
973    type Target = Lexer<'b>;
974    fn deref(&self) -> &Lexer<'b> {
975        self.lexer
976    }
977}
978
979impl<'b> DerefMut for WordLexer<'_, 'b> {
980    fn deref_mut(&mut self) -> &mut Lexer<'b> {
981        self.lexer
982    }
983}
984
985#[cfg(test)]
986mod tests {
987    use super::*;
988    use crate::input::Input;
989    use crate::parser::error::ErrorCause;
990    use crate::parser::error::SyntaxError;
991    use assert_matches::assert_matches;
992    use futures_util::FutureExt;
993
994    #[test]
995    fn lexer_core_peek_char_empty_source() {
996        let input = Memory::new("");
997        let line = NonZeroU64::new(32).unwrap();
998        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
999        let result = lexer.peek_char().now_or_never().unwrap();
1000        assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
1001            assert_eq!(*location.code.value.borrow(), "");
1002            assert_eq!(location.code.start_line_number, line);
1003            assert_eq!(*location.code.source, Source::Unknown);
1004            assert_eq!(location.range, 0..0);
1005        });
1006    }
1007
1008    #[test]
1009    fn lexer_core_peek_char_io_error() {
1010        #[derive(Debug)]
1011        struct Failing;
1012        impl fmt::Display for Failing {
1013            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1014                write!(f, "Failing")
1015            }
1016        }
1017        impl std::error::Error for Failing {}
1018        impl Input for Failing {
1019            async fn next_line(&mut self, _: &Context) -> crate::input::Result {
1020                Err(std::io::Error::other(Failing))
1021            }
1022        }
1023        let line = NonZeroU64::new(42).unwrap();
1024        let mut lexer = LexerCore::new(Box::new(Failing), line, Rc::new(Source::Unknown));
1025
1026        let e = lexer.peek_char().now_or_never().unwrap().unwrap_err();
1027        assert_matches!(e.cause, ErrorCause::Io(io_error) => {
1028            assert_eq!(io_error.kind(), std::io::ErrorKind::Other);
1029        });
1030        assert_eq!(*e.location.code.value.borrow(), "");
1031        assert_eq!(e.location.code.start_line_number, line);
1032        assert_eq!(*e.location.code.source, Source::Unknown);
1033        assert_eq!(e.location.range, 0..0);
1034    }
1035
1036    #[test]
1037    fn lexer_core_peek_char_context_is_first_line() {
1038        // In this test case, this mock input function will be called twice.
1039        struct InputMock {
1040            first: bool,
1041        }
1042        impl Input for InputMock {
1043            async fn next_line(&mut self, context: &Context) -> crate::input::Result {
1044                assert_eq!(context.is_first_line(), self.first);
1045                self.first = false;
1046                Ok("\n".to_owned())
1047            }
1048        }
1049
1050        let input = InputMock { first: true };
1051        let line = NonZeroU64::new(42).unwrap();
1052        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1053
1054        let peek = lexer.peek_char().now_or_never().unwrap();
1055        assert_matches!(peek, Ok(PeekChar::Char(_)));
1056        lexer.consume_char();
1057
1058        let peek = lexer.peek_char().now_or_never().unwrap();
1059        assert_matches!(peek, Ok(PeekChar::Char(_)));
1060        lexer.consume_char();
1061    }
1062
1063    #[test]
1064    fn lexer_core_consume_char_success() {
1065        let input = Memory::new("a\nb");
1066        let line = NonZeroU64::new(1).unwrap();
1067        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1068
1069        let result = lexer.peek_char().now_or_never().unwrap();
1070        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1071            assert_eq!(c.value, 'a');
1072            assert_eq!(*c.location.code.value.borrow(), "a\n");
1073            assert_eq!(c.location.code.start_line_number, line);
1074            assert_eq!(*c.location.code.source, Source::Unknown);
1075            assert_eq!(c.location.range, 0..1);
1076        });
1077        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1078            assert_eq!(c.value, 'a');
1079            assert_eq!(*c.location.code.value.borrow(), "a\n");
1080            assert_eq!(c.location.code.start_line_number, line);
1081            assert_eq!(*c.location.code.source, Source::Unknown);
1082            assert_eq!(c.location.range, 0..1);
1083        });
1084        lexer.consume_char();
1085
1086        let result = lexer.peek_char().now_or_never().unwrap();
1087        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1088            assert_eq!(c.value, '\n');
1089            assert_eq!(*c.location.code.value.borrow(), "a\n");
1090            assert_eq!(c.location.code.start_line_number, line);
1091            assert_eq!(*c.location.code.source, Source::Unknown);
1092            assert_eq!(c.location.range, 1..2);
1093        });
1094        lexer.consume_char();
1095
1096        let result = lexer.peek_char().now_or_never().unwrap();
1097        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1098            assert_eq!(c.value, 'b');
1099            assert_eq!(*c.location.code.value.borrow(), "a\nb");
1100            assert_eq!(c.location.code.start_line_number.get(), 1);
1101            assert_eq!(*c.location.code.source, Source::Unknown);
1102            assert_eq!(c.location.range, 2..3);
1103        });
1104        lexer.consume_char();
1105
1106        let result = lexer.peek_char().now_or_never().unwrap();
1107        assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
1108            assert_eq!(*location.code.value.borrow(), "a\nb");
1109            assert_eq!(location.code.start_line_number.get(), 1);
1110            assert_eq!(*location.code.source, Source::Unknown);
1111            assert_eq!(location.range, 3..3);
1112        });
1113    }
1114
1115    #[test]
1116    #[should_panic(expected = "A character must have been peeked before being consumed: index=0")]
1117    fn lexer_core_consume_char_panic() {
1118        let input = Memory::new("a");
1119        let line = NonZeroU64::new(1).unwrap();
1120        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1121        lexer.consume_char();
1122    }
1123
1124    #[test]
1125    fn lexer_core_peek_char_at() {
1126        let input = Memory::new("a\nb");
1127        let line = NonZeroU64::new(1).unwrap();
1128        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1129
1130        let c0 = assert_matches!(
1131            lexer.peek_char().now_or_never().unwrap(),
1132            Ok(PeekChar::Char(c)) => c.clone()
1133        );
1134        lexer.consume_char();
1135
1136        let c1 = assert_matches!(
1137            lexer.peek_char().now_or_never().unwrap(),
1138            Ok(PeekChar::Char(c)) => c.clone()
1139        );
1140        lexer.consume_char();
1141
1142        let c2 = assert_matches!(
1143            lexer.peek_char().now_or_never().unwrap(),
1144            Ok(PeekChar::Char(c)) => c.clone()
1145        );
1146
1147        assert_eq!(lexer.peek_char_at(0), &c0);
1148        assert_eq!(lexer.peek_char_at(1), &c1);
1149        assert_eq!(lexer.peek_char_at(2), &c2);
1150    }
1151
1152    #[test]
1153    fn lexer_core_index() {
1154        let input = Memory::new("a\nb");
1155        let line = NonZeroU64::new(1).unwrap();
1156        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1157
1158        assert_eq!(lexer.index(), 0);
1159        lexer.peek_char().now_or_never().unwrap().unwrap();
1160        assert_eq!(lexer.index(), 0);
1161        lexer.consume_char();
1162
1163        assert_eq!(lexer.index(), 1);
1164        lexer.peek_char().now_or_never().unwrap().unwrap();
1165        lexer.consume_char();
1166
1167        assert_eq!(lexer.index(), 2);
1168        lexer.peek_char().now_or_never().unwrap().unwrap();
1169        lexer.consume_char();
1170
1171        assert_eq!(lexer.index(), 3);
1172    }
1173
1174    #[test]
1175    fn lexer_core_rewind_success() {
1176        let input = Memory::new("abc");
1177        let line = NonZeroU64::new(1).unwrap();
1178        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1179        lexer.rewind(0);
1180        assert_eq!(lexer.index(), 0);
1181
1182        let _ = lexer.peek_char().now_or_never().unwrap();
1183        lexer.consume_char();
1184        let _ = lexer.peek_char().now_or_never().unwrap();
1185        lexer.consume_char();
1186        lexer.rewind(0);
1187
1188        let result = lexer.peek_char().now_or_never().unwrap();
1189        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1190            assert_eq!(c.value, 'a');
1191            assert_eq!(*c.location.code.value.borrow(), "abc");
1192            assert_eq!(c.location.code.start_line_number, line);
1193            assert_eq!(*c.location.code.source, Source::Unknown);
1194            assert_eq!(c.location.range, 0..1);
1195        });
1196    }
1197
1198    #[test]
1199    #[should_panic(expected = "The new index 1 must not be larger than the current index 0")]
1200    fn lexer_core_rewind_invalid_index() {
1201        let input = Memory::new("abc");
1202        let line = NonZeroU64::new(1).unwrap();
1203        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1204        lexer.rewind(1);
1205    }
1206
1207    #[test]
1208    fn lexer_core_source_string() {
1209        let input = Memory::new("ab\ncd");
1210        let line = NonZeroU64::new(1).unwrap();
1211        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1212        for _ in 0..4 {
1213            let _ = lexer.peek_char().now_or_never().unwrap();
1214            lexer.consume_char();
1215        }
1216
1217        let result = lexer.source_string(1..4);
1218        assert_eq!(result, "b\nc");
1219    }
1220
1221    #[test]
1222    #[should_panic(expected = "begin index 0 should be less than end index 0")]
1223    fn lexer_core_substitute_alias_with_invalid_index() {
1224        let input = Memory::new("a b");
1225        let line = NonZeroU64::new(1).unwrap();
1226        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1227        let alias = Rc::new(Alias {
1228            name: "a".to_string(),
1229            replacement: "".to_string(),
1230            global: false,
1231            origin: Location::dummy("dummy"),
1232        });
1233        lexer.substitute_alias(0, &alias);
1234    }
1235
1236    #[test]
1237    fn lexer_core_substitute_alias_single_line_replacement() {
1238        let input = Memory::new("a b");
1239        let line = NonZeroU64::new(1).unwrap();
1240        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1241        let alias = Rc::new(Alias {
1242            name: "a".to_string(),
1243            replacement: "lex".to_string(),
1244            global: false,
1245            origin: Location::dummy("dummy"),
1246        });
1247
1248        let _ = lexer.peek_char().now_or_never().unwrap();
1249        lexer.consume_char();
1250
1251        lexer.substitute_alias(0, &alias);
1252
1253        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1254            assert_eq!(c.value, 'l');
1255            assert_eq!(*c.location.code.value.borrow(), "lex");
1256            assert_eq!(c.location.code.start_line_number.get(), 1);
1257            assert_matches!(&*c.location.code.source,
1258                Source::Alias { original, alias: alias2 } => {
1259                assert_eq!(*original.code.value.borrow(), "a b");
1260                assert_eq!(original.code.start_line_number, line);
1261                assert_eq!(*original.code.source, Source::Unknown);
1262                assert_eq!(original.range, 0..1);
1263                assert_eq!(alias2, &alias);
1264            });
1265            assert_eq!(c.location.range, 0..1);
1266        });
1267        lexer.consume_char();
1268
1269        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1270            assert_eq!(c.value, 'e');
1271            assert_eq!(*c.location.code.value.borrow(), "lex");
1272            assert_eq!(c.location.code.start_line_number, line);
1273            assert_matches!(&*c.location.code.source,
1274                Source::Alias { original, alias: alias2 } => {
1275                assert_eq!(*original.code.value.borrow(), "a b");
1276                assert_eq!(original.code.start_line_number, line);
1277                assert_eq!(*original.code.source, Source::Unknown);
1278                assert_eq!(original.range, 0..1);
1279                assert_eq!(alias2, &alias);
1280            });
1281            assert_eq!(c.location.range, 1..2);
1282        });
1283        lexer.consume_char();
1284
1285        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1286            assert_eq!(c.value, 'x');
1287            assert_eq!(*c.location.code.value.borrow(), "lex");
1288            assert_eq!(c.location.code.start_line_number, line);
1289            assert_matches!(&*c.location.code.source,
1290                Source::Alias { original, alias: alias2 } => {
1291                assert_eq!(*original.code.value.borrow(), "a b");
1292                assert_eq!(original.code.start_line_number, line);
1293                assert_eq!(*original.code.source, Source::Unknown);
1294                assert_eq!(original.range, 0..1);
1295                assert_eq!(alias2, &alias);
1296            });
1297            assert_eq!(c.location.range, 2..3);
1298        });
1299        lexer.consume_char();
1300
1301        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1302            assert_eq!(c.value, ' ');
1303            assert_eq!(*c.location.code.value.borrow(), "a b");
1304            assert_eq!(c.location.code.start_line_number, line);
1305            assert_eq!(*c.location.code.source, Source::Unknown);
1306            assert_eq!(c.location.range, 1..2);
1307        });
1308        lexer.consume_char();
1309    }
1310
1311    #[test]
1312    fn lexer_core_substitute_alias_multi_line_replacement() {
1313        let input = Memory::new(" foo b");
1314        let line = NonZeroU64::new(1).unwrap();
1315        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1316        let alias = Rc::new(Alias {
1317            name: "foo".to_string(),
1318            replacement: "x\ny".to_string(),
1319            global: true,
1320            origin: Location::dummy("loc"),
1321        });
1322
1323        for _ in 0..4 {
1324            let _ = lexer.peek_char().now_or_never().unwrap();
1325            lexer.consume_char();
1326        }
1327
1328        lexer.substitute_alias(1, &alias);
1329
1330        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1331            assert_eq!(c.value, 'x');
1332            assert_eq!(*c.location.code.value.borrow(), "x\ny");
1333            assert_eq!(c.location.code.start_line_number, line);
1334            assert_matches!(&*c.location.code.source,
1335                Source::Alias { original, alias: alias2 } => {
1336                assert_eq!(*original.code.value.borrow(), " foo b");
1337                assert_eq!(original.code.start_line_number, line);
1338                assert_eq!(*original.code.source, Source::Unknown);
1339                assert_eq!(original.range, 1..4);
1340                assert_eq!(alias2, &alias);
1341            });
1342            assert_eq!(c.location.range, 0..1);
1343        });
1344        lexer.consume_char();
1345
1346        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1347            assert_eq!(c.value, '\n');
1348            assert_eq!(*c.location.code.value.borrow(), "x\ny");
1349            assert_eq!(c.location.code.start_line_number, line);
1350            assert_matches!(&*c.location.code.source,
1351                Source::Alias { original, alias: alias2 } => {
1352                assert_eq!(*original.code.value.borrow(), " foo b");
1353                assert_eq!(original.code.start_line_number, line);
1354                assert_eq!(*original.code.source, Source::Unknown);
1355                assert_eq!(original.range, 1..4);
1356                assert_eq!(alias2, &alias);
1357            });
1358            assert_eq!(c.location.range, 1..2);
1359        });
1360        lexer.consume_char();
1361
1362        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1363            assert_eq!(c.value, 'y');
1364            assert_eq!(*c.location.code.value.borrow(), "x\ny");
1365            assert_eq!(c.location.code.start_line_number, line);
1366            assert_matches!(&*c.location.code.source, Source::Alias { original, alias: alias2 } => {
1367                assert_eq!(*original.code.value.borrow(), " foo b");
1368                assert_eq!(original.code.start_line_number, line);
1369                assert_eq!(*original.code.source, Source::Unknown);
1370                assert_eq!(original.range, 1..4);
1371                assert_eq!(alias2, &alias);
1372            });
1373            assert_eq!(c.location.range, 2..3);
1374        });
1375        lexer.consume_char();
1376
1377        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1378            assert_eq!(c.value, ' ');
1379            assert_eq!(*c.location.code.value.borrow(), " foo b");
1380            assert_eq!(c.location.code.start_line_number, line);
1381            assert_eq!(*c.location.code.source, Source::Unknown);
1382            assert_eq!(c.location.range, 4..5);
1383        });
1384        lexer.consume_char();
1385    }
1386
1387    #[test]
1388    fn lexer_core_substitute_alias_empty_replacement() {
1389        let input = Memory::new("x ");
1390        let line = NonZeroU64::new(1).unwrap();
1391        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1392        let alias = Rc::new(Alias {
1393            name: "x".to_string(),
1394            replacement: "".to_string(),
1395            global: false,
1396            origin: Location::dummy("dummy"),
1397        });
1398
1399        let _ = lexer.peek_char().now_or_never().unwrap();
1400        lexer.consume_char();
1401
1402        lexer.substitute_alias(0, &alias);
1403
1404        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1405            assert_eq!(c.value, ' ');
1406            assert_eq!(*c.location.code.value.borrow(), "x ");
1407            assert_eq!(c.location.code.start_line_number, line);
1408            assert_eq!(*c.location.code.source, Source::Unknown);
1409            assert_eq!(c.location.range, 1..2);
1410        });
1411    }
1412
1413    #[test]
1414    fn lexer_core_peek_char_after_alias_substitution() {
1415        let input = Memory::new("a\nb");
1416        let line = NonZeroU64::new(1).unwrap();
1417        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1418
1419        lexer.peek_char().now_or_never().unwrap().unwrap();
1420        lexer.consume_char();
1421
1422        let alias = Rc::new(Alias {
1423            name: "a".to_string(),
1424            replacement: "".to_string(),
1425            global: false,
1426            origin: Location::dummy("dummy"),
1427        });
1428        lexer.substitute_alias(0, &alias);
1429
1430        let result = lexer.peek_char().now_or_never().unwrap();
1431        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1432            assert_eq!(c.value, '\n');
1433            assert_eq!(*c.location.code.value.borrow(), "a\n");
1434            assert_eq!(c.location.code.start_line_number, line);
1435            assert_eq!(*c.location.code.source, Source::Unknown);
1436            assert_eq!(c.location.range, 1..2);
1437        });
1438        lexer.consume_char();
1439
1440        let result = lexer.peek_char().now_or_never().unwrap();
1441        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1442            assert_eq!(c.value, 'b');
1443            assert_eq!(*c.location.code.value.borrow(), "a\nb");
1444            assert_eq!(c.location.code.start_line_number.get(), 1);
1445            assert_eq!(*c.location.code.source, Source::Unknown);
1446            assert_eq!(c.location.range, 2..3);
1447        });
1448        lexer.consume_char();
1449
1450        let result = lexer.peek_char().now_or_never().unwrap();
1451        assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
1452            assert_eq!(*location.code.value.borrow(), "a\nb");
1453            assert_eq!(location.code.start_line_number.get(), 1);
1454            assert_eq!(*location.code.source, Source::Unknown);
1455            assert_eq!(location.range, 3..3);
1456        });
1457    }
1458
1459    #[test]
1460    fn lexer_core_is_after_blank_ending_alias_index_0() {
1461        let original = Location::dummy("original");
1462        let alias = Rc::new(Alias {
1463            name: "a".to_string(),
1464            replacement: " ".to_string(),
1465            global: false,
1466            origin: Location::dummy("origin"),
1467        });
1468        let source = Source::Alias { original, alias };
1469        let input = Memory::new("a");
1470        let line = NonZeroU64::new(1).unwrap();
1471        let lexer = LexerCore::new(Box::new(input), line, Rc::new(source));
1472        assert!(!lexer.is_after_blank_ending_alias(0));
1473    }
1474
1475    #[test]
1476    fn lexer_core_is_after_blank_ending_alias_not_blank_ending() {
1477        let input = Memory::new("a x");
1478        let line = NonZeroU64::new(1).unwrap();
1479        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1480        let alias = Rc::new(Alias {
1481            name: "a".to_string(),
1482            replacement: " b".to_string(),
1483            global: false,
1484            origin: Location::dummy("dummy"),
1485        });
1486
1487        lexer.peek_char().now_or_never().unwrap().unwrap();
1488        lexer.consume_char();
1489
1490        lexer.substitute_alias(0, &alias);
1491
1492        assert!(!lexer.is_after_blank_ending_alias(0));
1493        assert!(!lexer.is_after_blank_ending_alias(1));
1494        assert!(!lexer.is_after_blank_ending_alias(2));
1495        assert!(!lexer.is_after_blank_ending_alias(3));
1496    }
1497
1498    #[test]
1499    fn lexer_core_is_after_blank_ending_alias_blank_ending() {
1500        let input = Memory::new("a x");
1501        let line = NonZeroU64::new(1).unwrap();
1502        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1503        let alias = Rc::new(Alias {
1504            name: "a".to_string(),
1505            replacement: " b ".to_string(),
1506            global: false,
1507            origin: Location::dummy("dummy"),
1508        });
1509
1510        lexer.peek_char().now_or_never().unwrap().unwrap();
1511        lexer.consume_char();
1512
1513        lexer.substitute_alias(0, &alias);
1514
1515        assert!(!lexer.is_after_blank_ending_alias(0));
1516        assert!(!lexer.is_after_blank_ending_alias(1));
1517        assert!(!lexer.is_after_blank_ending_alias(2));
1518        assert!(lexer.is_after_blank_ending_alias(3));
1519        assert!(lexer.is_after_blank_ending_alias(4));
1520    }
1521
1522    #[test]
1523    fn lexer_core_is_after_blank_ending_alias_after_line_continuation() {
1524        let input = Memory::new("a\\\n x");
1525        let line = NonZeroU64::new(1).unwrap();
1526        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1527        let alias = Rc::new(Alias {
1528            name: "a".to_string(),
1529            replacement: " b ".to_string(),
1530            global: false,
1531            origin: Location::dummy("dummy"),
1532        });
1533
1534        lexer.peek_char().now_or_never().unwrap().unwrap();
1535        lexer.consume_char();
1536        lexer.substitute_alias(0, &alias);
1537
1538        while let Ok(PeekChar::Char(_)) = lexer.peek_char().now_or_never().unwrap() {
1539            lexer.consume_char();
1540        }
1541        lexer.mark_line_continuation(3..5);
1542
1543        assert!(!lexer.is_after_blank_ending_alias(0));
1544        assert!(!lexer.is_after_blank_ending_alias(1));
1545        assert!(!lexer.is_after_blank_ending_alias(2));
1546        assert!(lexer.is_after_blank_ending_alias(5));
1547        assert!(lexer.is_after_blank_ending_alias(6));
1548    }
1549
1550    #[test]
1551    fn lexer_with_empty_source() {
1552        let mut lexer = Lexer::with_code("");
1553        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
1554    }
1555
1556    #[test]
1557    fn lexer_peek_char_with_line_continuation_enabled_stopping_on_non_backslash() {
1558        let mut lexer = Lexer::with_code("\\\n\n\\");
1559        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\n')));
1560        assert_eq!(lexer.index(), 2);
1561    }
1562
1563    #[test]
1564    fn lexer_peek_char_with_line_continuation_enabled_stopping_on_non_newline() {
1565        let mut lexer = Lexer::with_code("\\\n\\\n\\\n\\\\");
1566        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\\')));
1567        assert_eq!(lexer.index(), 6);
1568    }
1569
1570    #[test]
1571    fn lexer_peek_char_with_line_continuation_disabled() {
1572        let mut lexer = Lexer::with_code("\\\n\\\n\\\\");
1573        let mut lexer = lexer.disable_line_continuation();
1574        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\\')));
1575        assert_eq!(lexer.index(), 0);
1576    }
1577
1578    #[test]
1579    fn lexer_flush() {
1580        let mut lexer = Lexer::with_code(" \n\n\t\n");
1581        let location_1 = lexer.location().now_or_never().unwrap().unwrap().clone();
1582        assert_eq!(*location_1.code.value.borrow(), " \n");
1583
1584        lexer.consume_char();
1585        lexer.peek_char().now_or_never().unwrap().unwrap();
1586        lexer.consume_char();
1587        lexer.peek_char().now_or_never().unwrap().unwrap();
1588        lexer.consume_char();
1589        lexer.flush();
1590        lexer.peek_char().now_or_never().unwrap().unwrap();
1591        lexer.consume_char();
1592
1593        let location_2 = lexer.location().now_or_never().unwrap().unwrap().clone();
1594
1595        assert_eq!(*location_1.code.value.borrow(), " \n\n");
1596        assert_eq!(location_1.code.start_line_number.get(), 1);
1597        assert_eq!(*location_1.code.source, Source::Unknown);
1598        assert_eq!(location_1.range, 0..1);
1599        assert_eq!(*location_2.code.value.borrow(), "\t\n");
1600        assert_eq!(location_2.code.start_line_number.get(), 3);
1601        assert_eq!(*location_2.code.source, Source::Unknown);
1602        assert_eq!(location_2.range, 1..2);
1603    }
1604
1605    #[test]
1606    fn lexer_consume_char_if() {
1607        let mut lexer = Lexer::with_code("word\n");
1608
1609        let mut called = 0;
1610        let c = lexer
1611            .consume_char_if(|c| {
1612                assert_eq!(c, 'w');
1613                called += 1;
1614                true
1615            })
1616            .now_or_never()
1617            .unwrap()
1618            .unwrap()
1619            .unwrap();
1620        assert_eq!(called, 1);
1621        assert_eq!(c.value, 'w');
1622        assert_eq!(*c.location.code.value.borrow(), "word\n");
1623        assert_eq!(c.location.code.start_line_number.get(), 1);
1624        assert_eq!(*c.location.code.source, Source::Unknown);
1625        assert_eq!(c.location.range, 0..1);
1626
1627        let mut called = 0;
1628        let r = lexer
1629            .consume_char_if(|c| {
1630                assert_eq!(c, 'o');
1631                called += 1;
1632                false
1633            })
1634            .now_or_never()
1635            .unwrap();
1636        assert_eq!(called, 1);
1637        assert_eq!(r, Ok(None));
1638
1639        let mut called = 0;
1640        let r = lexer
1641            .consume_char_if(|c| {
1642                assert_eq!(c, 'o');
1643                called += 1;
1644                false
1645            })
1646            .now_or_never()
1647            .unwrap();
1648        assert_eq!(called, 1);
1649        assert_eq!(r, Ok(None));
1650
1651        let mut called = 0;
1652        let c = lexer
1653            .consume_char_if(|c| {
1654                assert_eq!(c, 'o');
1655                called += 1;
1656                true
1657            })
1658            .now_or_never()
1659            .unwrap()
1660            .unwrap()
1661            .unwrap();
1662        assert_eq!(called, 1);
1663        assert_eq!(c.value, 'o');
1664        assert_eq!(*c.location.code.value.borrow(), "word\n");
1665        assert_eq!(c.location.code.start_line_number.get(), 1);
1666        assert_eq!(*c.location.code.source, Source::Unknown);
1667        assert_eq!(c.location.range, 1..2);
1668
1669        lexer
1670            .consume_char_if(|c| {
1671                assert_eq!(c, 'r');
1672                true
1673            })
1674            .now_or_never()
1675            .unwrap()
1676            .unwrap()
1677            .unwrap();
1678        lexer
1679            .consume_char_if(|c| {
1680                assert_eq!(c, 'd');
1681                true
1682            })
1683            .now_or_never()
1684            .unwrap()
1685            .unwrap()
1686            .unwrap();
1687        lexer
1688            .consume_char_if(|c| {
1689                assert_eq!(c, '\n');
1690                true
1691            })
1692            .now_or_never()
1693            .unwrap()
1694            .unwrap()
1695            .unwrap();
1696
1697        // end of input
1698        let r = lexer
1699            .consume_char_if(|c| {
1700                unreachable!("unexpected call to the decider function: argument={}", c)
1701            })
1702            .now_or_never()
1703            .unwrap();
1704        assert_eq!(r, Ok(None));
1705    }
1706
1707    #[test]
1708    fn lexer_location_range_with_empty_range() {
1709        let mut lexer = Lexer::with_code("");
1710        lexer.peek_char().now_or_never().unwrap().unwrap();
1711        let location = lexer.location_range(0..0);
1712        assert_eq!(*location.code.value.borrow(), "");
1713        assert_eq!(location.code.start_line_number.get(), 1);
1714        assert_eq!(*location.code.source, Source::Unknown);
1715        assert_eq!(location.range, 0..0);
1716    }
1717
1718    #[test]
1719    fn lexer_location_range_with_nonempty_range() {
1720        let mut lexer = Lexer::from_memory("cat foo", Source::Stdin);
1721        for _ in 0..4 {
1722            lexer.peek_char().now_or_never().unwrap().unwrap();
1723            lexer.consume_char();
1724        }
1725        lexer.peek_char().now_or_never().unwrap().unwrap();
1726
1727        let location = lexer.location_range(1..4);
1728        assert_eq!(*location.code.value.borrow(), "cat foo");
1729        assert_eq!(location.code.start_line_number.get(), 1);
1730        assert_eq!(*location.code.source, Source::Stdin);
1731        assert_eq!(location.range, 1..4);
1732    }
1733
1734    #[test]
1735    fn lexer_location_range_with_range_starting_at_end() {
1736        let mut lexer = Lexer::from_memory("cat", Source::Stdin);
1737        for _ in 0..3 {
1738            lexer.peek_char().now_or_never().unwrap().unwrap();
1739            lexer.consume_char();
1740        }
1741        lexer.peek_char().now_or_never().unwrap().unwrap();
1742
1743        let location = lexer.location_range(3..3);
1744        assert_eq!(*location.code.value.borrow(), "cat");
1745        assert_eq!(location.code.start_line_number.get(), 1);
1746        assert_eq!(*location.code.source, Source::Stdin);
1747        assert_eq!(location.range, 3..3);
1748    }
1749
1750    #[test]
1751    #[should_panic]
1752    fn lexer_location_range_with_unconsumed_code() {
1753        let lexer = Lexer::with_code("echo ok");
1754        let _ = lexer.location_range(0..0);
1755    }
1756
1757    #[test]
1758    #[should_panic(expected = "The index 1 must not be larger than the current index 0")]
1759    fn lexer_location_range_with_range_out_of_bounds() {
1760        let lexer = Lexer::with_code("");
1761        let _ = lexer.location_range(1..2);
1762    }
1763
1764    #[test]
1765    fn lexer_location_range_with_alias_substitution() {
1766        let mut lexer = Lexer::with_code(" a;");
1767        let alias_def = Rc::new(Alias {
1768            name: "a".to_string(),
1769            replacement: "abc".to_string(),
1770            global: false,
1771            origin: Location::dummy("dummy"),
1772        });
1773        for _ in 0..2 {
1774            lexer.peek_char().now_or_never().unwrap().unwrap();
1775            lexer.consume_char();
1776        }
1777        lexer.substitute_alias(1, &alias_def);
1778        for _ in 1..5 {
1779            lexer.peek_char().now_or_never().unwrap().unwrap();
1780            lexer.consume_char();
1781        }
1782
1783        let location = lexer.location_range(2..5);
1784        assert_eq!(*location.code.value.borrow(), "abc");
1785        assert_eq!(location.code.start_line_number.get(), 1);
1786        assert_matches!(&*location.code.source, Source::Alias { original, alias } => {
1787            assert_eq!(*original.code.value.borrow(), " a;");
1788            assert_eq!(original.code.start_line_number.get(), 1);
1789            assert_eq!(*original.code.source, Source::Unknown);
1790            assert_eq!(original.range, 1..2);
1791            assert_eq!(alias, &alias_def);
1792        });
1793        assert_eq!(location.range, 1..3);
1794    }
1795
1796    #[test]
1797    fn lexer_inner_program_success() {
1798        let mut lexer = Lexer::with_code("x y )");
1799        let source = lexer.inner_program().now_or_never().unwrap().unwrap();
1800        assert_eq!(source, "x y ");
1801    }
1802
1803    #[test]
1804    fn lexer_inner_program_failure() {
1805        let mut lexer = Lexer::with_code("<< )");
1806        let e = lexer.inner_program().now_or_never().unwrap().unwrap_err();
1807        assert_eq!(
1808            e.cause,
1809            ErrorCause::Syntax(SyntaxError::MissingHereDocDelimiter)
1810        );
1811        assert_eq!(*e.location.code.value.borrow(), "<< )");
1812        assert_eq!(e.location.code.start_line_number.get(), 1);
1813        assert_eq!(*e.location.code.source, Source::Unknown);
1814        assert_eq!(e.location.range, 3..4);
1815    }
1816}