Skip to main content

yash_syntax/parser/lex/
core.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2020 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Fundamental building blocks for the lexical analyzer
18
19use super::keyword::Keyword;
20use super::op::Operator;
21use crate::alias::Alias;
22use crate::input::Context;
23use crate::input::InputObject;
24use crate::input::Memory;
25use crate::parser::core::Result;
26use crate::parser::error::Error;
27use crate::source::Code;
28use crate::source::Location;
29use crate::source::Source;
30use crate::source::SourceChar;
31use crate::source::source_chars;
32use crate::syntax::Word;
33use std::cell::RefCell;
34use std::fmt;
35use std::num::NonZeroU64;
36use std::ops::Deref;
37use std::ops::DerefMut;
38use std::ops::Range;
39use std::pin::Pin;
40use std::rc::Rc;
41
42/// Returns true if the character is a blank character.
43pub fn is_blank(c: char) -> bool {
44    // TODO locale
45    c != '\n' && c.is_whitespace()
46}
47
48/// Result of [`LexerCore::peek_char`]
49#[derive(Clone, Copy, Debug, Eq, PartialEq)]
50enum PeekChar<'a> {
51    Char(&'a SourceChar),
52    EndOfInput(&'a Location),
53}
54
55impl<'a> PeekChar<'a> {
56    /// Returns the location that was peeked.
57    #[must_use]
58    fn location<'b>(self: &'b PeekChar<'a>) -> &'a Location {
59        match self {
60            PeekChar::Char(c) => &c.location,
61            PeekChar::EndOfInput(l) => l,
62        }
63    }
64}
65
66/// Token identifier, or classification of tokens
67///
68/// This enum classifies a token as defined in POSIX XCU 2.10.1 Shell Grammar Lexical
69/// Conventions, but does not exactly reflect further distinction defined in
70/// POSIX XCU 2.10.2 Shell Grammar Rules.
71///
72/// For convenience, the special token identifier `EndOfInput` is included.
73#[derive(Clone, Copy, Debug, Eq, PartialEq)]
74pub enum TokenId {
75    /// `TOKEN`
76    ///
77    /// If this token _looks like_ a reserved word, this variant has some
78    /// associated `Keyword` that describes the word. However, it depends on
79    /// context whether a token is actually regarded as a reserved word or
80    /// just as an ordinary word. You must ensure that you're in an
81    /// applicable context when examining the `Keyword` value.
82    Token(Option<Keyword>),
83    /// Operator
84    Operator(Operator),
85    /// `IO_NUMBER`
86    IoNumber,
87    /// `IO_LOCATION`
88    IoLocation,
89    /// Imaginary token identifier for the end of input
90    EndOfInput,
91}
92
93impl TokenId {
94    /// Determines if this token can be a delimiter of a clause.
95    ///
96    /// This function delegates to [`Keyword::is_clause_delimiter`] if the token
97    /// ID is a (possible) keyword, or to [`Operator::is_clause_delimiter`] if
98    /// it is an operator. For `EndOfInput` the function returns true.
99    /// Otherwise, the result is false.
100    pub fn is_clause_delimiter(self) -> bool {
101        use TokenId::*;
102        match self {
103            Token(Some(keyword)) => keyword.is_clause_delimiter(),
104            Token(None) => false,
105            Operator(operator) => operator.is_clause_delimiter(),
106            IoNumber => false,
107            IoLocation => false,
108            EndOfInput => true,
109        }
110    }
111}
112
113/// Result of lexical analysis produced by the [`Lexer`]
114#[derive(Debug)]
115pub struct Token {
116    /// Content of the token
117    ///
118    /// The word value contains at least one [unit](crate::syntax::WordUnit),
119    /// regardless of whether the token is an operator. The only exception is
120    /// when `id` is `EndOfInput`, in which case the word is empty.
121    pub word: Word,
122    /// Token identifier
123    pub id: TokenId,
124    /// Position of the first character of the word
125    pub index: usize,
126}
127
128impl fmt::Display for Token {
129    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
130        write!(f, "{}", self.word)
131    }
132}
133
134/// State of the input function in a lexer
135#[derive(Clone, Debug)]
136enum InputState {
137    Alive,
138    EndOfInput(Location),
139    Error(Error),
140}
141
142/// Source character with additional attribute
143#[derive(Clone, Debug, Eq, PartialEq)]
144struct SourceCharEx {
145    value: SourceChar,
146    is_line_continuation: bool,
147}
148
149fn ex<I: IntoIterator<Item = SourceChar>>(i: I) -> impl Iterator<Item = SourceCharEx> {
150    i.into_iter().map(|sc| SourceCharEx {
151        value: sc,
152        is_line_continuation: false,
153    })
154}
155
156/// Core part of the lexical analyzer
157struct LexerCore<'a> {
158    // The `input` field could be a `&'a mut dyn InputObject + 'a`, but it is
159    // `Box<dyn InputObject + 'a>` to allow the lexer to take ownership of the
160    // input object. This is necessary for `Lexer::with_code` and similarly
161    // constructed lexers.
162    input: Box<dyn InputObject + 'a>,
163    state: InputState,
164    raw_code: Rc<Code>,
165    source: Vec<SourceCharEx>,
166    index: usize,
167}
168
169impl<'a> LexerCore<'a> {
170    /// Creates a new lexer core that reads using the given input function.
171    #[must_use]
172    fn new(
173        input: Box<dyn InputObject + 'a>,
174        start_line_number: NonZeroU64,
175        source: Rc<Source>,
176    ) -> LexerCore<'a> {
177        LexerCore {
178            input,
179            raw_code: Rc::new(Code {
180                value: RefCell::new(String::new()),
181                start_line_number,
182                source,
183            }),
184            state: InputState::Alive,
185            source: Vec::new(),
186            index: 0,
187        }
188    }
189
190    /// Computes the start index of the location at the current position.
191    #[must_use]
192    fn next_index(&self) -> usize {
193        let Some(last) = self.source.last() else {
194            return 0;
195        };
196
197        let mut location = &last.value.location;
198        while let Source::Alias { original, .. } = &*location.code.source {
199            location = original;
200        }
201        location.range.end
202    }
203
204    /// Peeks the next character, reading the next line if necessary.
205    async fn peek_char(&mut self) -> Result<PeekChar<'_>> {
206        loop {
207            // if let Some(sc) = self.source.get(self.index) {
208            //     return Ok(PeekChar::Char(&sc.value));
209            if self.index < self.source.len() {
210                return Ok(PeekChar::Char(&self.source[self.index].value));
211            }
212
213            match self.state {
214                InputState::Alive => (),
215                InputState::EndOfInput(ref location) => return Ok(PeekChar::EndOfInput(location)),
216                InputState::Error(ref error) => return Err(error.clone()),
217            }
218
219            // Read more input
220            let index = self.next_index();
221            match self.input.next_line(&self.input_context()).await {
222                Ok(line) => {
223                    if line.is_empty() {
224                        // End of input
225                        self.state = InputState::EndOfInput(Location {
226                            code: Rc::clone(&self.raw_code),
227                            range: index..index,
228                        });
229                    } else {
230                        // Successful read
231                        self.raw_code.value.borrow_mut().push_str(&line);
232                        self.source
233                            .extend(ex(source_chars(&line, &self.raw_code, index)));
234                    }
235                }
236                Err(io_error) => {
237                    self.state = InputState::Error(Error {
238                        cause: io_error.into(),
239                        location: Location {
240                            code: Rc::clone(&self.raw_code),
241                            range: index..index,
242                        },
243                    });
244                }
245            }
246        }
247    }
248
249    /// Returns the input context for the next character.
250    fn input_context(&self) -> Context {
251        let mut context = Context::default();
252        context.set_is_first_line(self.raw_code.value.borrow().is_empty());
253        context
254    }
255
256    /// Consumes the next character.
257    ///
258    /// This function must be called after [`peek_char`](Lexer::peek_char) has successfully
259    /// returned the character. Consuming a character that has not yet been peeked would result
260    /// in a panic!
261    fn consume_char(&mut self) {
262        assert!(
263            self.index < self.source.len(),
264            "A character must have been peeked before being consumed: index={}",
265            self.index
266        );
267        self.index += 1;
268    }
269
270    /// Returns a reference to the character at the given index.
271    #[must_use]
272    fn peek_char_at(&self, index: usize) -> &SourceChar {
273        assert!(
274            index <= self.index,
275            "The index {} must not be larger than the current index {}",
276            index,
277            self.index
278        );
279        &self.source[index].value
280    }
281
282    /// Returns the current index.
283    #[must_use]
284    fn index(&self) -> usize {
285        self.index
286    }
287
288    /// Rewinds the index to the given value.
289    fn rewind(&mut self, index: usize) {
290        assert!(
291            index <= self.index,
292            "The new index {} must not be larger than the current index {}",
293            index,
294            self.index
295        );
296        self.index = index;
297    }
298
299    /// Checks if there is any character that has been read from the input
300    /// source but not yet consumed.
301    #[must_use]
302    fn pending(&self) -> bool {
303        self.index < self.source.len()
304    }
305
306    /// Clears the internal buffer.
307    fn flush(&mut self) {
308        let start_line_number = self.raw_code.line_number(usize::MAX);
309        self.raw_code = Rc::new(Code {
310            value: RefCell::new(String::new()),
311            start_line_number,
312            source: self.raw_code.source.clone(),
313        });
314        self.source.clear();
315        self.index = 0;
316    }
317
318    /// Clears an end-of-input or error status so that the lexer can resume
319    /// parsing.
320    fn reset(&mut self) {
321        self.state = InputState::Alive;
322        self.flush();
323    }
324
325    /// Extracts a string from the source code range.
326    fn source_string(&self, range: Range<usize>) -> String {
327        self.source[range].iter().map(|c| c.value.value).collect()
328    }
329
330    /// Returns a location for a given range of the source code.
331    #[must_use]
332    fn location_range(&self, range: Range<usize>) -> Location {
333        if range.start == self.source.len() {
334            if let InputState::EndOfInput(ref location) = self.state {
335                return location.clone();
336            }
337        }
338        let start = &self.peek_char_at(range.start).location;
339        let code = start.code.clone();
340        let end = range
341            .map(|index| &self.peek_char_at(index).location)
342            .take_while(|location| location.code == code)
343            .last()
344            .map(|location| location.range.end)
345            .unwrap_or(start.range.start);
346        let range = start.range.start..end;
347        Location { code, range }
348    }
349
350    /// Marks the characters in the given range as line continuation.
351    ///
352    /// This function sets the `is_line_continuation` flag of the characters in
353    /// the range to true. The characters must have been read before calling
354    /// this function.
355    fn mark_line_continuation(&mut self, range: Range<usize>) {
356        assert!(
357            range.end <= self.index,
358            "characters must have been read (range = {:?}, current index = {})",
359            range,
360            self.index
361        );
362        for sc in &mut self.source[range] {
363            sc.is_line_continuation = true;
364        }
365    }
366
367    /// Performs alias substitution.
368    ///
369    /// This function replaces the characters starting from the `begin` index up
370    /// to the current position with the alias value. The resulting part of code
371    /// will be characters with a [`Source::Alias`] origin.
372    fn substitute_alias(&mut self, begin: usize, alias: &Rc<Alias>) {
373        let end = self.index;
374        assert!(
375            begin < end,
376            "begin index {begin} should be less than end index {end}"
377        );
378
379        let source = Rc::new(Source::Alias {
380            original: self.location_range(begin..end),
381            alias: alias.clone(),
382        });
383        let code = Rc::new(Code {
384            value: RefCell::new(alias.replacement.clone()),
385            start_line_number: NonZeroU64::new(1).unwrap(),
386            source,
387        });
388        let repl = ex(source_chars(&alias.replacement, &code, 0));
389
390        self.source.splice(begin..end, repl);
391        self.index = begin;
392    }
393
394    /// Tests if the given index is after the replacement string of alias
395    /// substitution that ends with a blank.
396    ///
397    /// # Panics
398    ///
399    /// If `index` is larger than the currently read index.
400    fn is_after_blank_ending_alias(&self, index: usize) -> bool {
401        fn ends_with_blank(s: &str) -> bool {
402            s.chars().next_back().is_some_and(is_blank)
403        }
404        fn is_same_alias(alias: &Alias, sc: Option<&SourceCharEx>) -> bool {
405            sc.is_some_and(|sc| sc.value.location.code.source.is_alias_for(&alias.name))
406        }
407
408        for index in (0..index).rev() {
409            let sc = &self.source[index];
410
411            if !sc.is_line_continuation && !is_blank(sc.value.value) {
412                return false;
413            }
414
415            if let Source::Alias { ref alias, .. } = *sc.value.location.code.source {
416                if ends_with_blank(&alias.replacement)
417                    && !is_same_alias(alias, self.source.get(index + 1))
418                {
419                    return true;
420                }
421            }
422        }
423
424        false
425    }
426}
427
428impl fmt::Debug for LexerCore<'_> {
429    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
430        f.debug_struct("LexerCore")
431            .field("state", &self.state)
432            .field("source", &self.source)
433            .field("index", &self.index)
434            .finish_non_exhaustive()
435    }
436}
437
438/// Configuration for the [lexer](Lexer)
439///
440/// `Config` is a builder for the lexer. A [new](Self::new) instance is created
441/// with default settings. You can then customize the settings by modifying the
442/// corresponding fields. Finally, you can pass an input object to the
443/// [`input`](Self::input) method to create a lexer.
444///
445/// # Deprecation
446///
447/// This struct is deprecated. Use [`yash_env::parser::Config`] instead.
448#[deprecated(since = "0.17.0", note = "use `yash_env::parser::Config` instead")]
449#[derive(Debug)]
450#[must_use = "you must call `input` to create a lexer"]
451#[non_exhaustive]
452pub struct Config {
453    /// Line number for the first line of the input
454    ///
455    /// The lexer counts the line number from this value to annotate the
456    /// location of the tokens. The line number is saved in the
457    /// `start_line_number` field of the [`Code`] instance that is contained in
458    /// the [`Location`] instance of the token.
459    ///
460    /// The default value is 1.
461    pub start_line_number: NonZeroU64,
462
463    /// Source of the input
464    ///
465    /// The source is used to annotate the location of the tokens. This value
466    /// is saved in the `source` field of the [`Code`] instance that is
467    /// contained in the [`Location`] instance of the token.
468    ///
469    /// The default value is `None`, in which case the source is set to
470    /// [`Source::Unknown`]. It is recommended to set this to a more informative
471    /// value, so that the locations in the parsed syntax tree can be traced
472    /// back to the source code. Especially, the correct source is necessary to
473    /// indicate the location of possible errors that occur during parsing and
474    /// execution.
475    pub source: Option<Rc<Source>>,
476}
477
478#[allow(deprecated, reason = "for backward compatible API")]
479impl Config {
480    /// Creates a new configuration with default settings.
481    ///
482    /// # Deprecation
483    ///
484    /// This struct is deprecated. Use [`yash_env::parser::Config`] instead.
485    #[deprecated(since = "0.17.0", note = "use `yash_env::parser::Config` instead")]
486    pub fn new() -> Self {
487        Config {
488            start_line_number: NonZeroU64::MIN,
489            source: None,
490        }
491    }
492
493    /// Creates a lexer with the given input object.
494    pub fn input<'a>(self, input: Box<dyn InputObject + 'a>) -> Lexer<'a> {
495        let mut config = yash_env::parser::Config::with_input(input);
496        config.start_line_number = self.start_line_number;
497        config.source = self.source;
498        config.into()
499    }
500}
501
502#[allow(deprecated, reason = "for backward compatible API")]
503impl Default for Config {
504    fn default() -> Self {
505        Self::new()
506    }
507}
508
509/// Lexical analyzer
510///
511/// A lexer reads lines using an input function and parses the characters into tokens. It has an
512/// internal buffer containing the characters that have been read and the position (or the
513/// index) of the character that is to be parsed next.
514///
515/// `Lexer` has primitive functions such as [`peek_char`](Lexer::peek_char) that provide access
516/// to the character at the current position. Derived functions such as
517/// [`skip_blanks_and_comment`](Lexer::skip_blanks_and_comment) depend on those primitives to
518/// parse more complex structures in the source code. Usually, the lexer is used by a
519/// [parser](super::super::Parser) to read the source code and produce a syntax
520/// tree, so you don't need to call these functions directly.
521///
522/// To construct a lexer, create a configuration object
523/// ([`yash_env::parser::Config`]), set the desired fields, and then call
524/// `into()` or [`Lexer::from`].
525/// `Lexer` has several convenience functions such as [`new`](Self::new) and
526/// [`with_code`](Self::with_code) for creating a lexer with minimal
527/// configuration.
528///
529/// ```
530/// # use yash_env::parser::Config;
531/// # use yash_syntax::input::Memory;
532/// # use yash_syntax::parser::{lex::Lexer, Parser};
533/// # use yash_syntax::source::Source;
534/// let mut config = Config::with_input(Box::new(Memory::new("echo hello\n")));
535/// config.start_line_number = 10.try_into().unwrap();
536/// config.source = Some(Source::CommandString.into());
537/// let mut lexer = Lexer::from(config);
538/// let mut parser = Parser::new(&mut lexer);
539/// _ = parser.command_line();
540/// ```
541#[derive(Debug)]
542#[must_use]
543pub struct Lexer<'a> {
544    // `Lexer` is a thin wrapper around `LexerCore`. `Lexer` delegates most
545    // functions to `LexerCore`. `Lexer` adds automatic line-continuation
546    // skipping to `LexerCore`.
547    core: LexerCore<'a>,
548    line_continuation_enabled: bool,
549}
550
551/// Creates a lexer from a configuration defined in the [`yash-env`](yash_env)
552/// crate.
553impl<'a> From<yash_env::parser::Config<'a>> for Lexer<'a> {
554    fn from(config: yash_env::parser::Config<'a>) -> Self {
555        let input = config.input;
556        let start_line_number = config.start_line_number;
557        let source = config.source.unwrap_or_else(|| Rc::new(Source::Unknown));
558        Lexer {
559            core: LexerCore::new(input, start_line_number, source),
560            line_continuation_enabled: true,
561        }
562    }
563}
564
565impl<'a> Lexer<'a> {
566    /// Creates a new configuration with default settings.
567    ///
568    /// This is a synonym for [`Config::new`]. You can modify the settings and
569    /// then create a lexer with the [`input`](Config::input) method.
570    ///
571    /// # Deprecation
572    ///
573    /// The `Config` struct defined in this module is deprecated. Use
574    /// [`yash_env::parser::Config`] instead.
575    #[allow(deprecated, reason = "for backward compatible API")]
576    #[deprecated(since = "0.17.0", note = "use `yash_env::parser::Config` instead")]
577    #[inline(always)]
578    pub fn config() -> Config {
579        Config::new()
580    }
581
582    /// Creates a new lexer that reads using the given input function.
583    ///
584    /// This is a convenience function that creates a lexer with the given input
585    /// object and the default configuration. To customize the configuration,
586    /// instantiate a [`yash_env::parser::Config`] object instead.
587    ///
588    /// This function is best used for testing or for simple cases where you
589    /// don't need to customize the lexer. For practical use, it is recommended
590    /// to provide supplementary information with a configuration before
591    /// creating a lexer.
592    pub fn new(input: Box<dyn InputObject + 'a>) -> Lexer<'a> {
593        yash_env::parser::Config::with_input(input).into()
594    }
595
596    /// Creates a new lexer with a fixed source code.
597    ///
598    /// This is a convenience function that creates a lexer that reads from a
599    /// string using [`Memory`] with the default configuration.
600    ///
601    /// This function is best used for testing or for simple cases where you
602    /// don't need to customize the lexer. For practical use, it is recommended
603    /// to provide supplementary information with a configuration before
604    /// creating a lexer.
605    pub fn with_code(code: &'a str) -> Lexer<'a> {
606        Self::new(Box::new(Memory::new(code)))
607    }
608
609    /// Creates a new lexer with a fixed source code.
610    ///
611    /// This is a convenience function that creates a lexer that reads from a
612    /// string using [`Memory`] with the specified source starting from line
613    /// number 1.
614    ///
615    /// This function is soft-deprecated. Use [`with_code`](Self::with_code)
616    /// instead if the source is `Unknown`. Otherwise, use
617    /// [`yash_env::parser::Config`] to create a lexer with a customized
618    /// configuration.
619    pub fn from_memory<S: Into<Rc<Source>>>(code: &'a str, source: S) -> Lexer<'a> {
620        fn inner(code: &str, source: Rc<Source>) -> Lexer<'_> {
621            let mut config = yash_env::parser::Config::with_input(Box::new(Memory::new(code)));
622            config.source = Some(source);
623            config.into()
624        }
625        inner(code, source.into())
626    }
627
628    /// Disables line continuation recognition onward.
629    ///
630    /// By default, [`peek_char`](Self::peek_char) silently skips line
631    /// continuation sequences. When line continuation is disabled, however,
632    /// `peek_char` returns characters literally.
633    ///
634    /// Call [`enable_line_continuation`](Self::enable_line_continuation) to
635    /// switch line continuation recognition on.
636    ///
637    /// This function will panic if line continuation has already been disabled.
638    pub fn disable_line_continuation<'b>(&'b mut self) -> PlainLexer<'b, 'a> {
639        assert!(
640            self.line_continuation_enabled,
641            "line continuation already disabled"
642        );
643        self.line_continuation_enabled = false;
644        PlainLexer { lexer: self }
645    }
646
647    /// Re-enables line continuation.
648    ///
649    /// You can pass the `PlainLexer` returned from
650    /// [`disable_line_continuation`](Self::disable_line_continuation) to this
651    /// function to re-enable line continuation. That is equivalent to dropping
652    /// the `PlainLexer` instance, but the code will be more descriptive.
653    pub fn enable_line_continuation<'b>(_: PlainLexer<'a, 'b>) {}
654
655    /// Skips line continuation, i.e., a backslash followed by a newline.
656    ///
657    /// If there is a line continuation at the current position, this function
658    /// consumes the backslash and the newline and returns `Ok(true)`. The
659    /// characters are marked as line continuation.
660    ///
661    /// If there is no line continuation, this function does nothing and returns
662    /// `Ok(false)`.
663    ///
664    /// This function does nothing if line continuation has been
665    /// [disabled](Self::disable_line_continuation).
666    async fn line_continuation(&mut self) -> Result<bool> {
667        if !self.line_continuation_enabled {
668            return Ok(false);
669        }
670
671        let index = self.core.index();
672        match self.core.peek_char().await? {
673            PeekChar::Char(c) if c.value == '\\' => self.core.consume_char(),
674            _ => return Ok(false),
675        }
676
677        match self.core.peek_char().await? {
678            PeekChar::Char(c) if c.value == '\n' => self.core.consume_char(),
679            _ => {
680                self.core.rewind(index);
681                return Ok(false);
682            }
683        }
684
685        self.core.mark_line_continuation(index..index + 2);
686
687        Ok(true)
688    }
689
690    /// Peeks the next character.
691    ///
692    /// If the end of input is reached, `Ok(None)` is returned. On error,
693    /// `Err(_)` is returned.
694    ///
695    /// If line continuation recognition is enabled, combinations of a backslash
696    /// and a newline are silently skipped before returning the next character.
697    /// Call [`disable_line_continuation`](Self::disable_line_continuation) to
698    /// switch off line continuation recognition.
699    ///
700    /// This function requires a mutable reference to `self` since it may need
701    /// to read the next line if needed.
702    pub async fn peek_char(&mut self) -> Result<Option<char>> {
703        while self.line_continuation().await? {}
704
705        match self.core.peek_char().await? {
706            PeekChar::Char(source_char) => Ok(Some(source_char.value)),
707            PeekChar::EndOfInput(_) => Ok(None),
708        }
709    }
710
711    /// Returns the location of the next character.
712    ///
713    /// If there is no more character (that is, it is the end of input), an imaginary location
714    /// is returned that would be returned if a character existed.
715    ///
716    /// This function requires a mutable reference to `self` since it needs to
717    /// [peek](Self::peek_char) the next character.
718    pub async fn location(&mut self) -> Result<&Location> {
719        self.core.peek_char().await.map(|p| p.location())
720    }
721
722    /// Consumes the next character.
723    ///
724    /// This function must be called after [`peek_char`](Lexer::peek_char) has successfully
725    /// returned the character. Consuming a character that has not yet been peeked would result
726    /// in a panic!
727    pub fn consume_char(&mut self) {
728        self.core.consume_char()
729    }
730
731    /// Returns the position of the next character, counted from zero.
732    ///
733    /// ```
734    /// # use yash_syntax::parser::lex::Lexer;
735    /// # futures_executor::block_on(async {
736    /// let mut lexer = Lexer::with_code("abc");
737    /// assert_eq!(lexer.index(), 0);
738    /// let _ = lexer.peek_char().await;
739    /// assert_eq!(lexer.index(), 0);
740    /// lexer.consume_char();
741    /// assert_eq!(lexer.index(), 1);
742    /// # })
743    /// ```
744    #[must_use]
745    pub fn index(&self) -> usize {
746        self.core.index()
747    }
748
749    /// Moves the current position back to the given index so that characters that have been
750    /// consumed can be read again.
751    ///
752    /// The given index must not be larger than the [current index](Lexer::index), or this
753    /// function would panic.
754    ///
755    /// ```
756    /// # use yash_syntax::parser::lex::Lexer;
757    /// # futures_executor::block_on(async {
758    /// let mut lexer = Lexer::with_code("abc");
759    /// let saved_index = lexer.index();
760    /// assert_eq!(lexer.peek_char().await, Ok(Some('a')));
761    /// lexer.consume_char();
762    /// assert_eq!(lexer.peek_char().await, Ok(Some('b')));
763    /// lexer.rewind(saved_index);
764    /// assert_eq!(lexer.peek_char().await, Ok(Some('a')));
765    /// # })
766    /// ```
767    pub fn rewind(&mut self, index: usize) {
768        self.core.rewind(index)
769    }
770
771    /// Checks if there is any character that has been read from the input
772    /// source but not yet consumed.
773    #[must_use]
774    pub fn pending(&self) -> bool {
775        self.core.pending()
776    }
777
778    /// Clears the internal buffer of the lexer.
779    ///
780    /// Locations returned from [`location`](Self::location) share a single code
781    /// instance that is also retained by the lexer. The code grows long as the
782    /// lexer reads more input. To prevent the code from getting too large, you
783    /// can call this function that replaces the retained code with a new empty
784    /// one. The new code's `start_line_number` will be incremented by the
785    /// number of lines in the previous.
786    pub fn flush(&mut self) {
787        self.core.flush()
788    }
789
790    /// Clears an end-of-input or error status so that the lexer can resume
791    /// parsing.
792    ///
793    /// This function will be useful only in an interactive shell where the user
794    /// can continue entering commands even after (s)he sends an end-of-input or
795    /// is interrupted by a syntax error.
796    pub fn reset(&mut self) {
797        self.core.reset()
798    }
799
800    /// Peeks the next character and, if the given decider function returns true for it,
801    /// advances the position.
802    ///
803    /// Returns the consumed character if the function returned true. Returns `Ok(None)` if it
804    /// returned false or there is no more character.
805    pub async fn consume_char_if<F>(&mut self, mut f: F) -> Result<Option<&SourceChar>>
806    where
807        F: FnMut(char) -> bool,
808    {
809        self.consume_char_if_dyn(&mut f).await
810    }
811
812    /// Dynamic version of [`Self::consume_char_if`].
813    pub(crate) async fn consume_char_if_dyn(
814        &mut self,
815        f: &mut dyn FnMut(char) -> bool,
816    ) -> Result<Option<&SourceChar>> {
817        match self.peek_char().await? {
818            Some(c) if f(c) => {
819                let index = self.index();
820                self.consume_char();
821                Ok(Some(self.core.peek_char_at(index)))
822            }
823            _ => Ok(None),
824        }
825    }
826
827    /// Extracts a string from the source code range.
828    ///
829    /// This function returns the source code string for the range specified by
830    /// the argument. The range must specify a valid index. If the index points
831    /// to a character that have not yet read, this function will panic!.
832    ///
833    /// # Panics
834    ///
835    /// If the argument index is out of bounds, i.e., pointing to an unread
836    /// character.
837    #[inline]
838    pub fn source_string(&self, range: Range<usize>) -> String {
839        self.core.source_string(range)
840    }
841
842    /// Returns a location for a given range of the source code.
843    ///
844    /// All the characters in the range must have been
845    /// [consume](Self::consume_char)d. If the range refers to an unconsumed
846    /// character, this function will panic!
847    ///
848    /// If the characters are from more than one [`Code`] fragment, the location
849    /// will only cover the initial portion of the range sharing the same
850    /// `Code`.
851    ///
852    /// # Panics
853    ///
854    /// This function will panic if the range refers to an unconsumed character.
855    ///
856    /// If the start index of the range is the end of input, it must have been
857    /// peeked and the range must be empty, or the function will panic.
858    #[must_use]
859    pub fn location_range(&self, range: Range<usize>) -> Location {
860        self.core.location_range(range)
861    }
862
863    /// Performs alias substitution right before the current position.
864    ///
865    /// This function must be called just after a [word](WordLexer::word) has been parsed that
866    /// matches the name of the argument alias. No check is done in this function that there is
867    /// a matching word before the current position. The characters starting from the `begin`
868    /// index up to the current position are silently replaced with the alias value.
869    ///
870    /// The resulting part of code will be characters with a [`Source::Alias`] origin.
871    ///
872    /// After the substitution, the position will be set before the replaced string.
873    ///
874    /// # Panics
875    ///
876    /// If the replaced part is empty, i.e., `begin >= self.index()`.
877    pub fn substitute_alias(&mut self, begin: usize, alias: &Rc<Alias>) {
878        self.core.substitute_alias(begin, alias)
879    }
880
881    /// Tests if the given index is after the replacement string of alias
882    /// substitution that ends with a blank.
883    ///
884    /// # Panics
885    ///
886    /// If `index` is larger than the currently read index.
887    pub fn is_after_blank_ending_alias(&self, index: usize) -> bool {
888        self.core.is_after_blank_ending_alias(index)
889    }
890
891    /// Parses an optional compound list that is the content of a command
892    /// substitution.
893    ///
894    /// This function consumes characters until a token that cannot be the
895    /// beginning of an and-or list is found and returns the string that was
896    /// consumed.
897    pub async fn inner_program(&mut self) -> Result<String> {
898        let begin = self.index();
899
900        let mut parser = super::super::Parser::new(self);
901        parser.maybe_compound_list().await?;
902
903        let end = parser.peek_token().await?.index;
904        self.rewind(end);
905
906        Ok(self.core.source_string(begin..end))
907    }
908
909    /// Like [`Lexer::inner_program`], but returns the future in a pinning box.
910    pub fn inner_program_boxed(&mut self) -> Pin<Box<dyn Future<Output = Result<String>> + '_>> {
911        Box::pin(self.inner_program())
912    }
913}
914
915/// Reference to [`Lexer`] with line continuation disabled
916///
917/// This struct implements the RAII pattern for temporarily disabling line
918/// continuation. When you disable the line continuation of a lexer, you get an
919/// instance of `PlainLexer`. You can access the original lexer via the
920/// `PlainLexer` until you drop it, when the line continuation is automatically
921/// re-enabled.
922#[derive(Debug)]
923#[must_use = "You must retain the PlainLexer to keep line continuation disabled"]
924pub struct PlainLexer<'a, 'b> {
925    lexer: &'a mut Lexer<'b>,
926}
927
928impl<'b> Deref for PlainLexer<'_, 'b> {
929    type Target = Lexer<'b>;
930    fn deref(&self) -> &Lexer<'b> {
931        self.lexer
932    }
933}
934
935impl<'b> DerefMut for PlainLexer<'_, 'b> {
936    fn deref_mut(&mut self) -> &mut Lexer<'b> {
937        self.lexer
938    }
939}
940
941impl Drop for PlainLexer<'_, '_> {
942    fn drop(&mut self) {
943        self.lexer.line_continuation_enabled = true;
944    }
945}
946
947/// Context in which a [word](crate::syntax::Word) is parsed
948///
949/// The parse of the word of a [switch](crate::syntax::Switch) depends on
950/// whether the parameter expansion containing the switch is part of a text or a
951/// word. A `WordContext` value is used to decide the behavior of the lexer.
952///
953/// Parser functions that depend on the context are implemented in
954/// [`WordLexer`].
955#[derive(Clone, Copy, Debug, Eq, PartialEq)]
956pub enum WordContext {
957    /// The text unit being parsed is part of a [text](crate::syntax::Text).
958    Text,
959    /// The text unit being parsed is part of a [word](crate::syntax::Word).
960    Word,
961}
962
963/// Lexer with additional information for parsing [texts](crate::syntax::Text)
964/// and [words](crate::syntax::Word)
965#[derive(Debug)]
966pub struct WordLexer<'a, 'b> {
967    pub lexer: &'a mut Lexer<'b>,
968    pub context: WordContext,
969}
970
971impl<'b> Deref for WordLexer<'_, 'b> {
972    type Target = Lexer<'b>;
973    fn deref(&self) -> &Lexer<'b> {
974        self.lexer
975    }
976}
977
978impl<'b> DerefMut for WordLexer<'_, 'b> {
979    fn deref_mut(&mut self) -> &mut Lexer<'b> {
980        self.lexer
981    }
982}
983
984#[cfg(test)]
985mod tests {
986    use super::*;
987    use crate::input::Input;
988    use crate::parser::error::ErrorCause;
989    use crate::parser::error::SyntaxError;
990    use assert_matches::assert_matches;
991    use futures_util::FutureExt as _;
992
993    #[test]
994    fn lexer_core_peek_char_empty_source() {
995        let input = Memory::new("");
996        let line = NonZeroU64::new(32).unwrap();
997        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
998        let result = lexer.peek_char().now_or_never().unwrap();
999        assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
1000            assert_eq!(*location.code.value.borrow(), "");
1001            assert_eq!(location.code.start_line_number, line);
1002            assert_eq!(*location.code.source, Source::Unknown);
1003            assert_eq!(location.range, 0..0);
1004        });
1005    }
1006
1007    #[test]
1008    fn lexer_core_peek_char_io_error() {
1009        #[derive(Debug)]
1010        struct Failing;
1011        impl fmt::Display for Failing {
1012            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1013                write!(f, "Failing")
1014            }
1015        }
1016        impl std::error::Error for Failing {}
1017        impl Input for Failing {
1018            async fn next_line(&mut self, _: &Context) -> crate::input::Result {
1019                Err(std::io::Error::other(Failing))
1020            }
1021        }
1022        let line = NonZeroU64::new(42).unwrap();
1023        let mut lexer = LexerCore::new(Box::new(Failing), line, Rc::new(Source::Unknown));
1024
1025        let e = lexer.peek_char().now_or_never().unwrap().unwrap_err();
1026        assert_matches!(e.cause, ErrorCause::Io(io_error) => {
1027            assert_eq!(io_error.kind(), std::io::ErrorKind::Other);
1028        });
1029        assert_eq!(*e.location.code.value.borrow(), "");
1030        assert_eq!(e.location.code.start_line_number, line);
1031        assert_eq!(*e.location.code.source, Source::Unknown);
1032        assert_eq!(e.location.range, 0..0);
1033    }
1034
1035    #[test]
1036    fn lexer_core_peek_char_context_is_first_line() {
1037        // In this test case, this mock input function will be called twice.
1038        struct InputMock {
1039            first: bool,
1040        }
1041        impl Input for InputMock {
1042            async fn next_line(&mut self, context: &Context) -> crate::input::Result {
1043                assert_eq!(context.is_first_line(), self.first);
1044                self.first = false;
1045                Ok("\n".to_owned())
1046            }
1047        }
1048
1049        let input = InputMock { first: true };
1050        let line = NonZeroU64::new(42).unwrap();
1051        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1052
1053        let peek = lexer.peek_char().now_or_never().unwrap();
1054        assert_matches!(peek, Ok(PeekChar::Char(_)));
1055        lexer.consume_char();
1056
1057        let peek = lexer.peek_char().now_or_never().unwrap();
1058        assert_matches!(peek, Ok(PeekChar::Char(_)));
1059        lexer.consume_char();
1060    }
1061
1062    #[test]
1063    fn lexer_core_consume_char_success() {
1064        let input = Memory::new("a\nb");
1065        let line = NonZeroU64::new(1).unwrap();
1066        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1067
1068        let result = lexer.peek_char().now_or_never().unwrap();
1069        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1070            assert_eq!(c.value, 'a');
1071            assert_eq!(*c.location.code.value.borrow(), "a\n");
1072            assert_eq!(c.location.code.start_line_number, line);
1073            assert_eq!(*c.location.code.source, Source::Unknown);
1074            assert_eq!(c.location.range, 0..1);
1075        });
1076        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1077            assert_eq!(c.value, 'a');
1078            assert_eq!(*c.location.code.value.borrow(), "a\n");
1079            assert_eq!(c.location.code.start_line_number, line);
1080            assert_eq!(*c.location.code.source, Source::Unknown);
1081            assert_eq!(c.location.range, 0..1);
1082        });
1083        lexer.consume_char();
1084
1085        let result = lexer.peek_char().now_or_never().unwrap();
1086        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1087            assert_eq!(c.value, '\n');
1088            assert_eq!(*c.location.code.value.borrow(), "a\n");
1089            assert_eq!(c.location.code.start_line_number, line);
1090            assert_eq!(*c.location.code.source, Source::Unknown);
1091            assert_eq!(c.location.range, 1..2);
1092        });
1093        lexer.consume_char();
1094
1095        let result = lexer.peek_char().now_or_never().unwrap();
1096        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1097            assert_eq!(c.value, 'b');
1098            assert_eq!(*c.location.code.value.borrow(), "a\nb");
1099            assert_eq!(c.location.code.start_line_number.get(), 1);
1100            assert_eq!(*c.location.code.source, Source::Unknown);
1101            assert_eq!(c.location.range, 2..3);
1102        });
1103        lexer.consume_char();
1104
1105        let result = lexer.peek_char().now_or_never().unwrap();
1106        assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
1107            assert_eq!(*location.code.value.borrow(), "a\nb");
1108            assert_eq!(location.code.start_line_number.get(), 1);
1109            assert_eq!(*location.code.source, Source::Unknown);
1110            assert_eq!(location.range, 3..3);
1111        });
1112    }
1113
1114    #[test]
1115    #[should_panic(expected = "A character must have been peeked before being consumed: index=0")]
1116    fn lexer_core_consume_char_panic() {
1117        let input = Memory::new("a");
1118        let line = NonZeroU64::new(1).unwrap();
1119        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1120        lexer.consume_char();
1121    }
1122
1123    #[test]
1124    fn lexer_core_peek_char_at() {
1125        let input = Memory::new("a\nb");
1126        let line = NonZeroU64::new(1).unwrap();
1127        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1128
1129        let c0 = assert_matches!(
1130            lexer.peek_char().now_or_never().unwrap(),
1131            Ok(PeekChar::Char(c)) => c.clone()
1132        );
1133        lexer.consume_char();
1134
1135        let c1 = assert_matches!(
1136            lexer.peek_char().now_or_never().unwrap(),
1137            Ok(PeekChar::Char(c)) => c.clone()
1138        );
1139        lexer.consume_char();
1140
1141        let c2 = assert_matches!(
1142            lexer.peek_char().now_or_never().unwrap(),
1143            Ok(PeekChar::Char(c)) => c.clone()
1144        );
1145
1146        assert_eq!(lexer.peek_char_at(0), &c0);
1147        assert_eq!(lexer.peek_char_at(1), &c1);
1148        assert_eq!(lexer.peek_char_at(2), &c2);
1149    }
1150
1151    #[test]
1152    fn lexer_core_index() {
1153        let input = Memory::new("a\nb");
1154        let line = NonZeroU64::new(1).unwrap();
1155        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1156
1157        assert_eq!(lexer.index(), 0);
1158        lexer.peek_char().now_or_never().unwrap().unwrap();
1159        assert_eq!(lexer.index(), 0);
1160        lexer.consume_char();
1161
1162        assert_eq!(lexer.index(), 1);
1163        lexer.peek_char().now_or_never().unwrap().unwrap();
1164        lexer.consume_char();
1165
1166        assert_eq!(lexer.index(), 2);
1167        lexer.peek_char().now_or_never().unwrap().unwrap();
1168        lexer.consume_char();
1169
1170        assert_eq!(lexer.index(), 3);
1171    }
1172
1173    #[test]
1174    fn lexer_core_rewind_success() {
1175        let input = Memory::new("abc");
1176        let line = NonZeroU64::new(1).unwrap();
1177        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1178        lexer.rewind(0);
1179        assert_eq!(lexer.index(), 0);
1180
1181        let _ = lexer.peek_char().now_or_never().unwrap();
1182        lexer.consume_char();
1183        let _ = lexer.peek_char().now_or_never().unwrap();
1184        lexer.consume_char();
1185        lexer.rewind(0);
1186
1187        let result = lexer.peek_char().now_or_never().unwrap();
1188        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1189            assert_eq!(c.value, 'a');
1190            assert_eq!(*c.location.code.value.borrow(), "abc");
1191            assert_eq!(c.location.code.start_line_number, line);
1192            assert_eq!(*c.location.code.source, Source::Unknown);
1193            assert_eq!(c.location.range, 0..1);
1194        });
1195    }
1196
1197    #[test]
1198    #[should_panic(expected = "The new index 1 must not be larger than the current index 0")]
1199    fn lexer_core_rewind_invalid_index() {
1200        let input = Memory::new("abc");
1201        let line = NonZeroU64::new(1).unwrap();
1202        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1203        lexer.rewind(1);
1204    }
1205
1206    #[test]
1207    fn lexer_core_source_string() {
1208        let input = Memory::new("ab\ncd");
1209        let line = NonZeroU64::new(1).unwrap();
1210        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1211        for _ in 0..4 {
1212            let _ = lexer.peek_char().now_or_never().unwrap();
1213            lexer.consume_char();
1214        }
1215
1216        let result = lexer.source_string(1..4);
1217        assert_eq!(result, "b\nc");
1218    }
1219
1220    #[test]
1221    #[should_panic(expected = "begin index 0 should be less than end index 0")]
1222    fn lexer_core_substitute_alias_with_invalid_index() {
1223        let input = Memory::new("a b");
1224        let line = NonZeroU64::new(1).unwrap();
1225        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1226        let alias = Rc::new(Alias {
1227            name: "a".to_string(),
1228            replacement: "".to_string(),
1229            global: false,
1230            origin: Location::dummy("dummy"),
1231        });
1232        lexer.substitute_alias(0, &alias);
1233    }
1234
1235    #[test]
1236    fn lexer_core_substitute_alias_single_line_replacement() {
1237        let input = Memory::new("a b");
1238        let line = NonZeroU64::new(1).unwrap();
1239        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1240        let alias = Rc::new(Alias {
1241            name: "a".to_string(),
1242            replacement: "lex".to_string(),
1243            global: false,
1244            origin: Location::dummy("dummy"),
1245        });
1246
1247        let _ = lexer.peek_char().now_or_never().unwrap();
1248        lexer.consume_char();
1249
1250        lexer.substitute_alias(0, &alias);
1251
1252        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1253            assert_eq!(c.value, 'l');
1254            assert_eq!(*c.location.code.value.borrow(), "lex");
1255            assert_eq!(c.location.code.start_line_number.get(), 1);
1256            assert_matches!(&*c.location.code.source,
1257                Source::Alias { original, alias: alias2 } => {
1258                assert_eq!(*original.code.value.borrow(), "a b");
1259                assert_eq!(original.code.start_line_number, line);
1260                assert_eq!(*original.code.source, Source::Unknown);
1261                assert_eq!(original.range, 0..1);
1262                assert_eq!(alias2, &alias);
1263            });
1264            assert_eq!(c.location.range, 0..1);
1265        });
1266        lexer.consume_char();
1267
1268        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1269            assert_eq!(c.value, 'e');
1270            assert_eq!(*c.location.code.value.borrow(), "lex");
1271            assert_eq!(c.location.code.start_line_number, line);
1272            assert_matches!(&*c.location.code.source,
1273                Source::Alias { original, alias: alias2 } => {
1274                assert_eq!(*original.code.value.borrow(), "a b");
1275                assert_eq!(original.code.start_line_number, line);
1276                assert_eq!(*original.code.source, Source::Unknown);
1277                assert_eq!(original.range, 0..1);
1278                assert_eq!(alias2, &alias);
1279            });
1280            assert_eq!(c.location.range, 1..2);
1281        });
1282        lexer.consume_char();
1283
1284        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1285            assert_eq!(c.value, 'x');
1286            assert_eq!(*c.location.code.value.borrow(), "lex");
1287            assert_eq!(c.location.code.start_line_number, line);
1288            assert_matches!(&*c.location.code.source,
1289                Source::Alias { original, alias: alias2 } => {
1290                assert_eq!(*original.code.value.borrow(), "a b");
1291                assert_eq!(original.code.start_line_number, line);
1292                assert_eq!(*original.code.source, Source::Unknown);
1293                assert_eq!(original.range, 0..1);
1294                assert_eq!(alias2, &alias);
1295            });
1296            assert_eq!(c.location.range, 2..3);
1297        });
1298        lexer.consume_char();
1299
1300        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1301            assert_eq!(c.value, ' ');
1302            assert_eq!(*c.location.code.value.borrow(), "a b");
1303            assert_eq!(c.location.code.start_line_number, line);
1304            assert_eq!(*c.location.code.source, Source::Unknown);
1305            assert_eq!(c.location.range, 1..2);
1306        });
1307        lexer.consume_char();
1308    }
1309
1310    #[test]
1311    fn lexer_core_substitute_alias_multi_line_replacement() {
1312        let input = Memory::new(" foo b");
1313        let line = NonZeroU64::new(1).unwrap();
1314        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1315        let alias = Rc::new(Alias {
1316            name: "foo".to_string(),
1317            replacement: "x\ny".to_string(),
1318            global: true,
1319            origin: Location::dummy("loc"),
1320        });
1321
1322        for _ in 0..4 {
1323            let _ = lexer.peek_char().now_or_never().unwrap();
1324            lexer.consume_char();
1325        }
1326
1327        lexer.substitute_alias(1, &alias);
1328
1329        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1330            assert_eq!(c.value, 'x');
1331            assert_eq!(*c.location.code.value.borrow(), "x\ny");
1332            assert_eq!(c.location.code.start_line_number, line);
1333            assert_matches!(&*c.location.code.source,
1334                Source::Alias { original, alias: alias2 } => {
1335                assert_eq!(*original.code.value.borrow(), " foo b");
1336                assert_eq!(original.code.start_line_number, line);
1337                assert_eq!(*original.code.source, Source::Unknown);
1338                assert_eq!(original.range, 1..4);
1339                assert_eq!(alias2, &alias);
1340            });
1341            assert_eq!(c.location.range, 0..1);
1342        });
1343        lexer.consume_char();
1344
1345        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1346            assert_eq!(c.value, '\n');
1347            assert_eq!(*c.location.code.value.borrow(), "x\ny");
1348            assert_eq!(c.location.code.start_line_number, line);
1349            assert_matches!(&*c.location.code.source,
1350                Source::Alias { original, alias: alias2 } => {
1351                assert_eq!(*original.code.value.borrow(), " foo b");
1352                assert_eq!(original.code.start_line_number, line);
1353                assert_eq!(*original.code.source, Source::Unknown);
1354                assert_eq!(original.range, 1..4);
1355                assert_eq!(alias2, &alias);
1356            });
1357            assert_eq!(c.location.range, 1..2);
1358        });
1359        lexer.consume_char();
1360
1361        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1362            assert_eq!(c.value, 'y');
1363            assert_eq!(*c.location.code.value.borrow(), "x\ny");
1364            assert_eq!(c.location.code.start_line_number, line);
1365            assert_matches!(&*c.location.code.source, Source::Alias { original, alias: alias2 } => {
1366                assert_eq!(*original.code.value.borrow(), " foo b");
1367                assert_eq!(original.code.start_line_number, line);
1368                assert_eq!(*original.code.source, Source::Unknown);
1369                assert_eq!(original.range, 1..4);
1370                assert_eq!(alias2, &alias);
1371            });
1372            assert_eq!(c.location.range, 2..3);
1373        });
1374        lexer.consume_char();
1375
1376        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1377            assert_eq!(c.value, ' ');
1378            assert_eq!(*c.location.code.value.borrow(), " foo b");
1379            assert_eq!(c.location.code.start_line_number, line);
1380            assert_eq!(*c.location.code.source, Source::Unknown);
1381            assert_eq!(c.location.range, 4..5);
1382        });
1383        lexer.consume_char();
1384    }
1385
1386    #[test]
1387    fn lexer_core_substitute_alias_empty_replacement() {
1388        let input = Memory::new("x ");
1389        let line = NonZeroU64::new(1).unwrap();
1390        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1391        let alias = Rc::new(Alias {
1392            name: "x".to_string(),
1393            replacement: "".to_string(),
1394            global: false,
1395            origin: Location::dummy("dummy"),
1396        });
1397
1398        let _ = lexer.peek_char().now_or_never().unwrap();
1399        lexer.consume_char();
1400
1401        lexer.substitute_alias(0, &alias);
1402
1403        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1404            assert_eq!(c.value, ' ');
1405            assert_eq!(*c.location.code.value.borrow(), "x ");
1406            assert_eq!(c.location.code.start_line_number, line);
1407            assert_eq!(*c.location.code.source, Source::Unknown);
1408            assert_eq!(c.location.range, 1..2);
1409        });
1410    }
1411
1412    #[test]
1413    fn lexer_core_peek_char_after_alias_substitution() {
1414        let input = Memory::new("a\nb");
1415        let line = NonZeroU64::new(1).unwrap();
1416        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1417
1418        lexer.peek_char().now_or_never().unwrap().unwrap();
1419        lexer.consume_char();
1420
1421        let alias = Rc::new(Alias {
1422            name: "a".to_string(),
1423            replacement: "".to_string(),
1424            global: false,
1425            origin: Location::dummy("dummy"),
1426        });
1427        lexer.substitute_alias(0, &alias);
1428
1429        let result = lexer.peek_char().now_or_never().unwrap();
1430        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1431            assert_eq!(c.value, '\n');
1432            assert_eq!(*c.location.code.value.borrow(), "a\n");
1433            assert_eq!(c.location.code.start_line_number, line);
1434            assert_eq!(*c.location.code.source, Source::Unknown);
1435            assert_eq!(c.location.range, 1..2);
1436        });
1437        lexer.consume_char();
1438
1439        let result = lexer.peek_char().now_or_never().unwrap();
1440        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1441            assert_eq!(c.value, 'b');
1442            assert_eq!(*c.location.code.value.borrow(), "a\nb");
1443            assert_eq!(c.location.code.start_line_number.get(), 1);
1444            assert_eq!(*c.location.code.source, Source::Unknown);
1445            assert_eq!(c.location.range, 2..3);
1446        });
1447        lexer.consume_char();
1448
1449        let result = lexer.peek_char().now_or_never().unwrap();
1450        assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
1451            assert_eq!(*location.code.value.borrow(), "a\nb");
1452            assert_eq!(location.code.start_line_number.get(), 1);
1453            assert_eq!(*location.code.source, Source::Unknown);
1454            assert_eq!(location.range, 3..3);
1455        });
1456    }
1457
1458    #[test]
1459    fn lexer_core_is_after_blank_ending_alias_index_0() {
1460        let original = Location::dummy("original");
1461        let alias = Rc::new(Alias {
1462            name: "a".to_string(),
1463            replacement: " ".to_string(),
1464            global: false,
1465            origin: Location::dummy("origin"),
1466        });
1467        let source = Source::Alias { original, alias };
1468        let input = Memory::new("a");
1469        let line = NonZeroU64::new(1).unwrap();
1470        let lexer = LexerCore::new(Box::new(input), line, Rc::new(source));
1471        assert!(!lexer.is_after_blank_ending_alias(0));
1472    }
1473
1474    #[test]
1475    fn lexer_core_is_after_blank_ending_alias_not_blank_ending() {
1476        let input = Memory::new("a x");
1477        let line = NonZeroU64::new(1).unwrap();
1478        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1479        let alias = Rc::new(Alias {
1480            name: "a".to_string(),
1481            replacement: " b".to_string(),
1482            global: false,
1483            origin: Location::dummy("dummy"),
1484        });
1485
1486        lexer.peek_char().now_or_never().unwrap().unwrap();
1487        lexer.consume_char();
1488
1489        lexer.substitute_alias(0, &alias);
1490
1491        assert!(!lexer.is_after_blank_ending_alias(0));
1492        assert!(!lexer.is_after_blank_ending_alias(1));
1493        assert!(!lexer.is_after_blank_ending_alias(2));
1494        assert!(!lexer.is_after_blank_ending_alias(3));
1495    }
1496
1497    #[test]
1498    fn lexer_core_is_after_blank_ending_alias_blank_ending() {
1499        let input = Memory::new("a x");
1500        let line = NonZeroU64::new(1).unwrap();
1501        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1502        let alias = Rc::new(Alias {
1503            name: "a".to_string(),
1504            replacement: " b ".to_string(),
1505            global: false,
1506            origin: Location::dummy("dummy"),
1507        });
1508
1509        lexer.peek_char().now_or_never().unwrap().unwrap();
1510        lexer.consume_char();
1511
1512        lexer.substitute_alias(0, &alias);
1513
1514        assert!(!lexer.is_after_blank_ending_alias(0));
1515        assert!(!lexer.is_after_blank_ending_alias(1));
1516        assert!(!lexer.is_after_blank_ending_alias(2));
1517        assert!(lexer.is_after_blank_ending_alias(3));
1518        assert!(lexer.is_after_blank_ending_alias(4));
1519    }
1520
1521    #[test]
1522    fn lexer_core_is_after_blank_ending_alias_after_line_continuation() {
1523        let input = Memory::new("a\\\n x");
1524        let line = NonZeroU64::new(1).unwrap();
1525        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1526        let alias = Rc::new(Alias {
1527            name: "a".to_string(),
1528            replacement: " b ".to_string(),
1529            global: false,
1530            origin: Location::dummy("dummy"),
1531        });
1532
1533        lexer.peek_char().now_or_never().unwrap().unwrap();
1534        lexer.consume_char();
1535        lexer.substitute_alias(0, &alias);
1536
1537        while let Ok(PeekChar::Char(_)) = lexer.peek_char().now_or_never().unwrap() {
1538            lexer.consume_char();
1539        }
1540        lexer.mark_line_continuation(3..5);
1541
1542        assert!(!lexer.is_after_blank_ending_alias(0));
1543        assert!(!lexer.is_after_blank_ending_alias(1));
1544        assert!(!lexer.is_after_blank_ending_alias(2));
1545        assert!(lexer.is_after_blank_ending_alias(5));
1546        assert!(lexer.is_after_blank_ending_alias(6));
1547    }
1548
1549    #[test]
1550    fn lexer_with_empty_source() {
1551        let mut lexer = Lexer::with_code("");
1552        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
1553    }
1554
1555    #[test]
1556    fn lexer_peek_char_with_line_continuation_enabled_stopping_on_non_backslash() {
1557        let mut lexer = Lexer::with_code("\\\n\n\\");
1558        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\n')));
1559        assert_eq!(lexer.index(), 2);
1560    }
1561
1562    #[test]
1563    fn lexer_peek_char_with_line_continuation_enabled_stopping_on_non_newline() {
1564        let mut lexer = Lexer::with_code("\\\n\\\n\\\n\\\\");
1565        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\\')));
1566        assert_eq!(lexer.index(), 6);
1567    }
1568
1569    #[test]
1570    fn lexer_peek_char_with_line_continuation_disabled() {
1571        let mut lexer = Lexer::with_code("\\\n\\\n\\\\");
1572        let mut lexer = lexer.disable_line_continuation();
1573        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\\')));
1574        assert_eq!(lexer.index(), 0);
1575    }
1576
1577    #[test]
1578    fn lexer_flush() {
1579        let mut lexer = Lexer::with_code(" \n\n\t\n");
1580        let location_1 = lexer.location().now_or_never().unwrap().unwrap().clone();
1581        assert_eq!(*location_1.code.value.borrow(), " \n");
1582
1583        lexer.consume_char();
1584        lexer.peek_char().now_or_never().unwrap().unwrap();
1585        lexer.consume_char();
1586        lexer.peek_char().now_or_never().unwrap().unwrap();
1587        lexer.consume_char();
1588        lexer.flush();
1589        lexer.peek_char().now_or_never().unwrap().unwrap();
1590        lexer.consume_char();
1591
1592        let location_2 = lexer.location().now_or_never().unwrap().unwrap().clone();
1593
1594        assert_eq!(*location_1.code.value.borrow(), " \n\n");
1595        assert_eq!(location_1.code.start_line_number.get(), 1);
1596        assert_eq!(*location_1.code.source, Source::Unknown);
1597        assert_eq!(location_1.range, 0..1);
1598        assert_eq!(*location_2.code.value.borrow(), "\t\n");
1599        assert_eq!(location_2.code.start_line_number.get(), 3);
1600        assert_eq!(*location_2.code.source, Source::Unknown);
1601        assert_eq!(location_2.range, 1..2);
1602    }
1603
1604    #[test]
1605    fn lexer_consume_char_if() {
1606        let mut lexer = Lexer::with_code("word\n");
1607
1608        let mut called = 0;
1609        let c = lexer
1610            .consume_char_if(|c| {
1611                assert_eq!(c, 'w');
1612                called += 1;
1613                true
1614            })
1615            .now_or_never()
1616            .unwrap()
1617            .unwrap()
1618            .unwrap();
1619        assert_eq!(called, 1);
1620        assert_eq!(c.value, 'w');
1621        assert_eq!(*c.location.code.value.borrow(), "word\n");
1622        assert_eq!(c.location.code.start_line_number.get(), 1);
1623        assert_eq!(*c.location.code.source, Source::Unknown);
1624        assert_eq!(c.location.range, 0..1);
1625
1626        let mut called = 0;
1627        let r = lexer
1628            .consume_char_if(|c| {
1629                assert_eq!(c, 'o');
1630                called += 1;
1631                false
1632            })
1633            .now_or_never()
1634            .unwrap();
1635        assert_eq!(called, 1);
1636        assert_eq!(r, Ok(None));
1637
1638        let mut called = 0;
1639        let r = lexer
1640            .consume_char_if(|c| {
1641                assert_eq!(c, 'o');
1642                called += 1;
1643                false
1644            })
1645            .now_or_never()
1646            .unwrap();
1647        assert_eq!(called, 1);
1648        assert_eq!(r, Ok(None));
1649
1650        let mut called = 0;
1651        let c = lexer
1652            .consume_char_if(|c| {
1653                assert_eq!(c, 'o');
1654                called += 1;
1655                true
1656            })
1657            .now_or_never()
1658            .unwrap()
1659            .unwrap()
1660            .unwrap();
1661        assert_eq!(called, 1);
1662        assert_eq!(c.value, 'o');
1663        assert_eq!(*c.location.code.value.borrow(), "word\n");
1664        assert_eq!(c.location.code.start_line_number.get(), 1);
1665        assert_eq!(*c.location.code.source, Source::Unknown);
1666        assert_eq!(c.location.range, 1..2);
1667
1668        lexer
1669            .consume_char_if(|c| {
1670                assert_eq!(c, 'r');
1671                true
1672            })
1673            .now_or_never()
1674            .unwrap()
1675            .unwrap()
1676            .unwrap();
1677        lexer
1678            .consume_char_if(|c| {
1679                assert_eq!(c, 'd');
1680                true
1681            })
1682            .now_or_never()
1683            .unwrap()
1684            .unwrap()
1685            .unwrap();
1686        lexer
1687            .consume_char_if(|c| {
1688                assert_eq!(c, '\n');
1689                true
1690            })
1691            .now_or_never()
1692            .unwrap()
1693            .unwrap()
1694            .unwrap();
1695
1696        // end of input
1697        let r = lexer
1698            .consume_char_if(|c| {
1699                unreachable!("unexpected call to the decider function: argument={}", c)
1700            })
1701            .now_or_never()
1702            .unwrap();
1703        assert_eq!(r, Ok(None));
1704    }
1705
1706    #[test]
1707    fn lexer_location_range_with_empty_range() {
1708        let mut lexer = Lexer::with_code("");
1709        lexer.peek_char().now_or_never().unwrap().unwrap();
1710        let location = lexer.location_range(0..0);
1711        assert_eq!(*location.code.value.borrow(), "");
1712        assert_eq!(location.code.start_line_number.get(), 1);
1713        assert_eq!(*location.code.source, Source::Unknown);
1714        assert_eq!(location.range, 0..0);
1715    }
1716
1717    #[test]
1718    fn lexer_location_range_with_nonempty_range() {
1719        let mut lexer = Lexer::from_memory("cat foo", Source::Stdin);
1720        for _ in 0..4 {
1721            lexer.peek_char().now_or_never().unwrap().unwrap();
1722            lexer.consume_char();
1723        }
1724        lexer.peek_char().now_or_never().unwrap().unwrap();
1725
1726        let location = lexer.location_range(1..4);
1727        assert_eq!(*location.code.value.borrow(), "cat foo");
1728        assert_eq!(location.code.start_line_number.get(), 1);
1729        assert_eq!(*location.code.source, Source::Stdin);
1730        assert_eq!(location.range, 1..4);
1731    }
1732
1733    #[test]
1734    fn lexer_location_range_with_range_starting_at_end() {
1735        let mut lexer = Lexer::from_memory("cat", Source::Stdin);
1736        for _ in 0..3 {
1737            lexer.peek_char().now_or_never().unwrap().unwrap();
1738            lexer.consume_char();
1739        }
1740        lexer.peek_char().now_or_never().unwrap().unwrap();
1741
1742        let location = lexer.location_range(3..3);
1743        assert_eq!(*location.code.value.borrow(), "cat");
1744        assert_eq!(location.code.start_line_number.get(), 1);
1745        assert_eq!(*location.code.source, Source::Stdin);
1746        assert_eq!(location.range, 3..3);
1747    }
1748
1749    #[test]
1750    #[should_panic]
1751    fn lexer_location_range_with_unconsumed_code() {
1752        let lexer = Lexer::with_code("echo ok");
1753        let _ = lexer.location_range(0..0);
1754    }
1755
1756    #[test]
1757    #[should_panic(expected = "The index 1 must not be larger than the current index 0")]
1758    fn lexer_location_range_with_range_out_of_bounds() {
1759        let lexer = Lexer::with_code("");
1760        let _ = lexer.location_range(1..2);
1761    }
1762
1763    #[test]
1764    fn lexer_location_range_with_alias_substitution() {
1765        let mut lexer = Lexer::with_code(" a;");
1766        let alias_def = Rc::new(Alias {
1767            name: "a".to_string(),
1768            replacement: "abc".to_string(),
1769            global: false,
1770            origin: Location::dummy("dummy"),
1771        });
1772        for _ in 0..2 {
1773            lexer.peek_char().now_or_never().unwrap().unwrap();
1774            lexer.consume_char();
1775        }
1776        lexer.substitute_alias(1, &alias_def);
1777        for _ in 1..5 {
1778            lexer.peek_char().now_or_never().unwrap().unwrap();
1779            lexer.consume_char();
1780        }
1781
1782        let location = lexer.location_range(2..5);
1783        assert_eq!(*location.code.value.borrow(), "abc");
1784        assert_eq!(location.code.start_line_number.get(), 1);
1785        assert_matches!(&*location.code.source, Source::Alias { original, alias } => {
1786            assert_eq!(*original.code.value.borrow(), " a;");
1787            assert_eq!(original.code.start_line_number.get(), 1);
1788            assert_eq!(*original.code.source, Source::Unknown);
1789            assert_eq!(original.range, 1..2);
1790            assert_eq!(alias, &alias_def);
1791        });
1792        assert_eq!(location.range, 1..3);
1793    }
1794
1795    #[test]
1796    fn lexer_inner_program_success() {
1797        let mut lexer = Lexer::with_code("x y )");
1798        let source = lexer.inner_program().now_or_never().unwrap().unwrap();
1799        assert_eq!(source, "x y ");
1800    }
1801
1802    #[test]
1803    fn lexer_inner_program_failure() {
1804        let mut lexer = Lexer::with_code("<< )");
1805        let e = lexer.inner_program().now_or_never().unwrap().unwrap_err();
1806        assert_eq!(
1807            e.cause,
1808            ErrorCause::Syntax(SyntaxError::MissingHereDocDelimiter)
1809        );
1810        assert_eq!(*e.location.code.value.borrow(), "<< )");
1811        assert_eq!(e.location.code.start_line_number.get(), 1);
1812        assert_eq!(*e.location.code.source, Source::Unknown);
1813        assert_eq!(e.location.range, 3..4);
1814    }
1815}