Skip to main content

yash_syntax/parser/lex/
core.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2020 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Fundamental building blocks for the lexical analyzer
18
19use super::keyword::Keyword;
20use super::op::Operator;
21use crate::alias::Alias;
22use crate::input::Context;
23use crate::input::InputObject;
24use crate::input::Memory;
25use crate::parser::core::Result;
26use crate::parser::error::Error;
27use crate::source::Code;
28use crate::source::Location;
29use crate::source::Source;
30use crate::source::SourceChar;
31use crate::source::source_chars;
32use crate::syntax::Word;
33use std::cell::RefCell;
34use std::fmt;
35use std::num::NonZeroU64;
36use std::ops::Deref;
37use std::ops::DerefMut;
38use std::ops::Range;
39use std::pin::Pin;
40use std::rc::Rc;
41
42/// Returns true if the character is a blank character.
43pub fn is_blank(c: char) -> bool {
44    // TODO locale
45    c != '\n' && c.is_whitespace()
46}
47
48/// Result of [`LexerCore::peek_char`]
49#[derive(Clone, Copy, Debug, Eq, PartialEq)]
50enum PeekChar<'a> {
51    Char(&'a SourceChar),
52    EndOfInput(&'a Location),
53}
54
55impl<'a> PeekChar<'a> {
56    /// Returns the location that was peeked.
57    #[must_use]
58    fn location<'b>(self: &'b PeekChar<'a>) -> &'a Location {
59        match self {
60            PeekChar::Char(c) => &c.location,
61            PeekChar::EndOfInput(l) => l,
62        }
63    }
64}
65
66/// Token identifier, or classification of tokens
67///
68/// This enum classifies a token as defined in POSIX XCU 2.10.1 Shell Grammar Lexical
69/// Conventions, but does not exactly reflect further distinction defined in
70/// POSIX XCU 2.10.2 Shell Grammar Rules.
71///
72/// For convenience, the special token identifier `EndOfInput` is included.
73#[derive(Clone, Copy, Debug, Eq, PartialEq)]
74pub enum TokenId {
75    /// `TOKEN`
76    ///
77    /// If this token _looks like_ a reserved word, this variant has some
78    /// associated `Keyword` that describes the word. However, it depends on
79    /// context whether a token is actually regarded as a reserved word or
80    /// just as an ordinary word. You must ensure that you're in an
81    /// applicable context when examining the `Keyword` value.
82    Token(Option<Keyword>),
83    /// Operator
84    Operator(Operator),
85    /// `IO_NUMBER`
86    IoNumber,
87    /// `IO_LOCATION`
88    IoLocation,
89    /// Imaginary token identifier for the end of input
90    EndOfInput,
91}
92
93impl TokenId {
94    /// Determines if this token can be a delimiter of a clause.
95    ///
96    /// This function delegates to [`Keyword::is_clause_delimiter`] if the token
97    /// ID is a (possible) keyword, or to [`Operator::is_clause_delimiter`] if
98    /// it is an operator. For `EndOfInput` the function returns true.
99    /// Otherwise, the result is false.
100    pub fn is_clause_delimiter(self) -> bool {
101        use TokenId::*;
102        match self {
103            Token(Some(keyword)) => keyword.is_clause_delimiter(),
104            Token(None) => false,
105            Operator(operator) => operator.is_clause_delimiter(),
106            IoNumber => false,
107            IoLocation => false,
108            EndOfInput => true,
109        }
110    }
111}
112
113/// Result of lexical analysis produced by the [`Lexer`]
114#[derive(Debug)]
115pub struct Token {
116    /// Content of the token
117    ///
118    /// The word value contains at least one [unit](crate::syntax::WordUnit),
119    /// regardless of whether the token is an operator. The only exception is
120    /// when `id` is `EndOfInput`, in which case the word is empty.
121    pub word: Word,
122    /// Token identifier
123    pub id: TokenId,
124    /// Position of the first character of the word
125    pub index: usize,
126}
127
128impl fmt::Display for Token {
129    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
130        write!(f, "{}", self.word)
131    }
132}
133
134/// State of the input function in a lexer
135#[derive(Clone, Debug)]
136enum InputState {
137    Alive,
138    EndOfInput(Location),
139    Error(Error),
140}
141
142/// Source character with additional attribute
143#[derive(Clone, Debug, Eq, PartialEq)]
144struct SourceCharEx {
145    value: SourceChar,
146    is_line_continuation: bool,
147}
148
149fn ex<I: IntoIterator<Item = SourceChar>>(i: I) -> impl Iterator<Item = SourceCharEx> {
150    i.into_iter().map(|sc| SourceCharEx {
151        value: sc,
152        is_line_continuation: false,
153    })
154}
155
156/// Core part of the lexical analyzer
157struct LexerCore<'a> {
158    // The `input` field could be a `&'a mut dyn InputObject + 'a`, but it is
159    // `Box<dyn InputObject + 'a>` to allow the lexer to take ownership of the
160    // input object. This is necessary for `Lexer::with_code` and similarly
161    // constructed lexers.
162    input: Box<dyn InputObject + 'a>,
163    state: InputState,
164    raw_code: Rc<Code>,
165    source: Vec<SourceCharEx>,
166    index: usize,
167}
168
169impl<'a> LexerCore<'a> {
170    /// Creates a new lexer core that reads using the given input function.
171    #[must_use]
172    fn new(
173        input: Box<dyn InputObject + 'a>,
174        start_line_number: NonZeroU64,
175        source: Rc<Source>,
176    ) -> LexerCore<'a> {
177        LexerCore {
178            input,
179            raw_code: Rc::new(Code {
180                value: RefCell::new(String::new()),
181                start_line_number,
182                source,
183            }),
184            state: InputState::Alive,
185            source: Vec::new(),
186            index: 0,
187        }
188    }
189
190    /// Computes the start index of the location at the current position.
191    #[must_use]
192    fn next_index(&self) -> usize {
193        let Some(last) = self.source.last() else {
194            return 0;
195        };
196
197        let mut location = &last.value.location;
198        while let Source::Alias { original, .. } = &*location.code.source {
199            location = original;
200        }
201        location.range.end
202    }
203
204    /// Peeks the next character, reading the next line if necessary.
205    async fn peek_char(&mut self) -> Result<PeekChar<'_>> {
206        loop {
207            // if let Some(sc) = self.source.get(self.index) {
208            //     return Ok(PeekChar::Char(&sc.value));
209            if self.index < self.source.len() {
210                return Ok(PeekChar::Char(&self.source[self.index].value));
211            }
212
213            match self.state {
214                InputState::Alive => (),
215                InputState::EndOfInput(ref location) => return Ok(PeekChar::EndOfInput(location)),
216                InputState::Error(ref error) => return Err(error.clone()),
217            }
218
219            // Read more input
220            let index = self.next_index();
221            match self.input.next_line(&self.input_context()).await {
222                Ok(line) => {
223                    if line.is_empty() {
224                        // End of input
225                        self.state = InputState::EndOfInput(Location {
226                            code: Rc::clone(&self.raw_code),
227                            range: index..index,
228                        });
229                    } else {
230                        // Successful read
231                        self.raw_code.value.borrow_mut().push_str(&line);
232                        self.source
233                            .extend(ex(source_chars(&line, &self.raw_code, index)));
234                    }
235                }
236                Err(io_error) => {
237                    self.state = InputState::Error(Error {
238                        cause: io_error.into(),
239                        location: Location {
240                            code: Rc::clone(&self.raw_code),
241                            range: index..index,
242                        },
243                    });
244                }
245            }
246        }
247    }
248
249    /// Returns the input context for the next character.
250    fn input_context(&self) -> Context {
251        let mut context = Context::default();
252        context.set_is_first_line(self.raw_code.value.borrow().is_empty());
253        context
254    }
255
256    /// Consumes the next character.
257    ///
258    /// This function must be called after [`peek_char`](Lexer::peek_char) has successfully
259    /// returned the character. Consuming a character that has not yet been peeked would result
260    /// in a panic!
261    fn consume_char(&mut self) {
262        assert!(
263            self.index < self.source.len(),
264            "A character must have been peeked before being consumed: index={}",
265            self.index
266        );
267        self.index += 1;
268    }
269
270    /// Returns a reference to the character at the given index.
271    #[must_use]
272    fn peek_char_at(&self, index: usize) -> &SourceChar {
273        assert!(
274            index <= self.index,
275            "The index {} must not be larger than the current index {}",
276            index,
277            self.index
278        );
279        &self.source[index].value
280    }
281
282    /// Returns the current index.
283    #[must_use]
284    fn index(&self) -> usize {
285        self.index
286    }
287
288    /// Rewinds the index to the given value.
289    fn rewind(&mut self, index: usize) {
290        assert!(
291            index <= self.index,
292            "The new index {} must not be larger than the current index {}",
293            index,
294            self.index
295        );
296        self.index = index;
297    }
298
299    /// Checks if there is any character that has been read from the input
300    /// source but not yet consumed.
301    #[must_use]
302    fn pending(&self) -> bool {
303        self.index < self.source.len()
304    }
305
306    /// Clears the internal buffer.
307    fn flush(&mut self) {
308        let start_line_number = self.raw_code.line_number(usize::MAX);
309        self.raw_code = Rc::new(Code {
310            value: RefCell::new(String::new()),
311            start_line_number,
312            source: self.raw_code.source.clone(),
313        });
314        self.source.clear();
315        self.index = 0;
316    }
317
318    /// Clears an end-of-input or error status so that the lexer can resume
319    /// parsing.
320    fn reset(&mut self) {
321        self.state = InputState::Alive;
322        self.flush();
323    }
324
325    /// Extracts a string from the source code range.
326    fn source_string(&self, range: Range<usize>) -> String {
327        self.source[range].iter().map(|c| c.value.value).collect()
328    }
329
330    /// Returns a location for a given range of the source code.
331    #[must_use]
332    fn location_range(&self, range: Range<usize>) -> Location {
333        if range.start == self.source.len()
334            && let InputState::EndOfInput(ref location) = self.state
335        {
336            return location.clone();
337        }
338        let start = &self.peek_char_at(range.start).location;
339        let code = start.code.clone();
340        let end = range
341            .map(|index| &self.peek_char_at(index).location)
342            .take_while(|location| location.code == code)
343            .last()
344            .map(|location| location.range.end)
345            .unwrap_or(start.range.start);
346        let range = start.range.start..end;
347        Location { code, range }
348    }
349
350    /// Marks the characters in the given range as line continuation.
351    ///
352    /// This function sets the `is_line_continuation` flag of the characters in
353    /// the range to true. The characters must have been read before calling
354    /// this function.
355    fn mark_line_continuation(&mut self, range: Range<usize>) {
356        assert!(
357            range.end <= self.index,
358            "characters must have been read (range = {:?}, current index = {})",
359            range,
360            self.index
361        );
362        for sc in &mut self.source[range] {
363            sc.is_line_continuation = true;
364        }
365    }
366
367    /// Performs alias substitution.
368    ///
369    /// This function replaces the characters starting from the `begin` index up
370    /// to the current position with the alias value. The resulting part of code
371    /// will be characters with a [`Source::Alias`] origin.
372    fn substitute_alias(&mut self, begin: usize, alias: &Rc<Alias>) {
373        let end = self.index;
374        assert!(
375            begin < end,
376            "begin index {begin} should be less than end index {end}"
377        );
378
379        let source = Rc::new(Source::Alias {
380            original: self.location_range(begin..end),
381            alias: alias.clone(),
382        });
383        let code = Rc::new(Code {
384            value: RefCell::new(alias.replacement.clone()),
385            start_line_number: NonZeroU64::new(1).unwrap(),
386            source,
387        });
388        let repl = ex(source_chars(&alias.replacement, &code, 0));
389
390        self.source.splice(begin..end, repl);
391        self.index = begin;
392    }
393
394    /// Tests if the given index is after the replacement string of alias
395    /// substitution that ends with a blank.
396    ///
397    /// # Panics
398    ///
399    /// If `index` is larger than the currently read index.
400    fn is_after_blank_ending_alias(&self, index: usize) -> bool {
401        fn ends_with_blank(s: &str) -> bool {
402            s.chars().next_back().is_some_and(is_blank)
403        }
404        fn is_same_alias(alias: &Alias, sc: Option<&SourceCharEx>) -> bool {
405            sc.is_some_and(|sc| sc.value.location.code.source.is_alias_for(&alias.name))
406        }
407
408        for index in (0..index).rev() {
409            let sc = &self.source[index];
410
411            if !sc.is_line_continuation && !is_blank(sc.value.value) {
412                return false;
413            }
414
415            if let Source::Alias { ref alias, .. } = *sc.value.location.code.source
416                && ends_with_blank(&alias.replacement)
417                && !is_same_alias(alias, self.source.get(index + 1))
418            {
419                return true;
420            }
421        }
422
423        false
424    }
425}
426
427impl fmt::Debug for LexerCore<'_> {
428    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
429        f.debug_struct("LexerCore")
430            .field("state", &self.state)
431            .field("source", &self.source)
432            .field("index", &self.index)
433            .finish_non_exhaustive()
434    }
435}
436
437/// Configuration for the [lexer](Lexer)
438///
439/// `Config` is a builder for the lexer. A [new](Self::new) instance is created
440/// with default settings. You can then customize the settings by modifying the
441/// corresponding fields. Finally, you can pass an input object to the
442/// [`input`](Self::input) method to create a lexer.
443///
444/// # Deprecation
445///
446/// This struct is deprecated. Use [`yash_env::parser::Config`] instead.
447#[deprecated(since = "0.17.0", note = "use `yash_env::parser::Config` instead")]
448#[derive(Debug)]
449#[must_use = "you must call `input` to create a lexer"]
450#[non_exhaustive]
451pub struct Config {
452    /// Line number for the first line of the input
453    ///
454    /// The lexer counts the line number from this value to annotate the
455    /// location of the tokens. The line number is saved in the
456    /// `start_line_number` field of the [`Code`] instance that is contained in
457    /// the [`Location`] instance of the token.
458    ///
459    /// The default value is 1.
460    pub start_line_number: NonZeroU64,
461
462    /// Source of the input
463    ///
464    /// The source is used to annotate the location of the tokens. This value
465    /// is saved in the `source` field of the [`Code`] instance that is
466    /// contained in the [`Location`] instance of the token.
467    ///
468    /// The default value is `None`, in which case the source is set to
469    /// [`Source::Unknown`]. It is recommended to set this to a more informative
470    /// value, so that the locations in the parsed syntax tree can be traced
471    /// back to the source code. Especially, the correct source is necessary to
472    /// indicate the location of possible errors that occur during parsing and
473    /// execution.
474    pub source: Option<Rc<Source>>,
475}
476
477#[allow(deprecated, reason = "for backward compatible API")]
478impl Config {
479    /// Creates a new configuration with default settings.
480    ///
481    /// # Deprecation
482    ///
483    /// This struct is deprecated. Use [`yash_env::parser::Config`] instead.
484    #[deprecated(since = "0.17.0", note = "use `yash_env::parser::Config` instead")]
485    pub fn new() -> Self {
486        Config {
487            start_line_number: NonZeroU64::MIN,
488            source: None,
489        }
490    }
491
492    /// Creates a lexer with the given input object.
493    pub fn input<'a>(self, input: Box<dyn InputObject + 'a>) -> Lexer<'a> {
494        let mut config = yash_env::parser::Config::with_input(input);
495        config.start_line_number = self.start_line_number;
496        config.source = self.source;
497        config.into()
498    }
499}
500
501#[allow(deprecated, reason = "for backward compatible API")]
502impl Default for Config {
503    fn default() -> Self {
504        Self::new()
505    }
506}
507
508/// Lexical analyzer
509///
510/// A lexer reads lines using an input function and parses the characters into tokens. It has an
511/// internal buffer containing the characters that have been read and the position (or the
512/// index) of the character that is to be parsed next.
513///
514/// `Lexer` has primitive functions such as [`peek_char`](Lexer::peek_char) that provide access
515/// to the character at the current position. Derived functions such as
516/// [`skip_blanks_and_comment`](Lexer::skip_blanks_and_comment) depend on those primitives to
517/// parse more complex structures in the source code. Usually, the lexer is used by a
518/// [parser](super::super::Parser) to read the source code and produce a syntax
519/// tree, so you don't need to call these functions directly.
520///
521/// To construct a lexer, create a configuration object
522/// ([`yash_env::parser::Config`]), set the desired fields, and then call
523/// `into()` or [`Lexer::from`].
524/// `Lexer` has several convenience functions such as [`new`](Self::new) and
525/// [`with_code`](Self::with_code) for creating a lexer with minimal
526/// configuration.
527///
528/// ```
529/// # use yash_env::parser::Config;
530/// # use yash_syntax::input::Memory;
531/// # use yash_syntax::parser::{lex::Lexer, Parser};
532/// # use yash_syntax::source::Source;
533/// let mut config = Config::with_input(Box::new(Memory::new("echo hello\n")));
534/// config.start_line_number = 10.try_into().unwrap();
535/// config.source = Some(Source::CommandString.into());
536/// let mut lexer = Lexer::from(config);
537/// let mut parser = Parser::new(&mut lexer);
538/// _ = parser.command_line();
539/// ```
540#[derive(Debug)]
541#[must_use]
542pub struct Lexer<'a> {
543    // `Lexer` is a thin wrapper around `LexerCore`. `Lexer` delegates most
544    // functions to `LexerCore`. `Lexer` adds automatic line-continuation
545    // skipping to `LexerCore`.
546    core: LexerCore<'a>,
547    line_continuation_enabled: bool,
548}
549
550/// Creates a lexer from a configuration defined in the [`yash-env`](yash_env)
551/// crate.
552impl<'a> From<yash_env::parser::Config<'a>> for Lexer<'a> {
553    fn from(config: yash_env::parser::Config<'a>) -> Self {
554        let input = config.input;
555        let start_line_number = config.start_line_number;
556        let source = config.source.unwrap_or_else(|| Rc::new(Source::Unknown));
557        Lexer {
558            core: LexerCore::new(input, start_line_number, source),
559            line_continuation_enabled: true,
560        }
561    }
562}
563
564impl<'a> Lexer<'a> {
565    /// Creates a new configuration with default settings.
566    ///
567    /// This is a synonym for [`Config::new`]. You can modify the settings and
568    /// then create a lexer with the [`input`](Config::input) method.
569    ///
570    /// # Deprecation
571    ///
572    /// The `Config` struct defined in this module is deprecated. Use
573    /// [`yash_env::parser::Config`] instead.
574    #[allow(deprecated, reason = "for backward compatible API")]
575    #[deprecated(since = "0.17.0", note = "use `yash_env::parser::Config` instead")]
576    #[inline(always)]
577    pub fn config() -> Config {
578        Config::new()
579    }
580
581    /// Creates a new lexer that reads using the given input function.
582    ///
583    /// This is a convenience function that creates a lexer with the given input
584    /// object and the default configuration. To customize the configuration,
585    /// instantiate a [`yash_env::parser::Config`] object instead.
586    ///
587    /// This function is best used for testing or for simple cases where you
588    /// don't need to customize the lexer. For practical use, it is recommended
589    /// to provide supplementary information with a configuration before
590    /// creating a lexer.
591    pub fn new(input: Box<dyn InputObject + 'a>) -> Lexer<'a> {
592        yash_env::parser::Config::with_input(input).into()
593    }
594
595    /// Creates a new lexer with a fixed source code.
596    ///
597    /// This is a convenience function that creates a lexer that reads from a
598    /// string using [`Memory`] with the default configuration.
599    ///
600    /// This function is best used for testing or for simple cases where you
601    /// don't need to customize the lexer. For practical use, it is recommended
602    /// to provide supplementary information with a configuration before
603    /// creating a lexer.
604    pub fn with_code(code: &'a str) -> Lexer<'a> {
605        Self::new(Box::new(Memory::new(code)))
606    }
607
608    /// Creates a new lexer with a fixed source code.
609    ///
610    /// This is a convenience function that creates a lexer that reads from a
611    /// string using [`Memory`] with the specified source starting from line
612    /// number 1.
613    ///
614    /// This function is soft-deprecated. Use [`with_code`](Self::with_code)
615    /// instead if the source is `Unknown`. Otherwise, use
616    /// [`yash_env::parser::Config`] to create a lexer with a customized
617    /// configuration.
618    pub fn from_memory<S: Into<Rc<Source>>>(code: &'a str, source: S) -> Lexer<'a> {
619        fn inner(code: &str, source: Rc<Source>) -> Lexer<'_> {
620            let mut config = yash_env::parser::Config::with_input(Box::new(Memory::new(code)));
621            config.source = Some(source);
622            config.into()
623        }
624        inner(code, source.into())
625    }
626
627    /// Disables line continuation recognition onward.
628    ///
629    /// By default, [`peek_char`](Self::peek_char) silently skips line
630    /// continuation sequences. When line continuation is disabled, however,
631    /// `peek_char` returns characters literally.
632    ///
633    /// Call [`enable_line_continuation`](Self::enable_line_continuation) to
634    /// switch line continuation recognition on.
635    ///
636    /// This function will panic if line continuation has already been disabled.
637    pub fn disable_line_continuation<'b>(&'b mut self) -> PlainLexer<'b, 'a> {
638        assert!(
639            self.line_continuation_enabled,
640            "line continuation already disabled"
641        );
642        self.line_continuation_enabled = false;
643        PlainLexer { lexer: self }
644    }
645
646    /// Re-enables line continuation.
647    ///
648    /// You can pass the `PlainLexer` returned from
649    /// [`disable_line_continuation`](Self::disable_line_continuation) to this
650    /// function to re-enable line continuation. That is equivalent to dropping
651    /// the `PlainLexer` instance, but the code will be more descriptive.
652    pub fn enable_line_continuation<'b>(_: PlainLexer<'a, 'b>) {}
653
654    /// Skips line continuation, i.e., a backslash followed by a newline.
655    ///
656    /// If there is a line continuation at the current position, this function
657    /// consumes the backslash and the newline and returns `Ok(true)`. The
658    /// characters are marked as line continuation.
659    ///
660    /// If there is no line continuation, this function does nothing and returns
661    /// `Ok(false)`.
662    ///
663    /// This function does nothing if line continuation has been
664    /// [disabled](Self::disable_line_continuation).
665    async fn line_continuation(&mut self) -> Result<bool> {
666        if !self.line_continuation_enabled {
667            return Ok(false);
668        }
669
670        let index = self.core.index();
671        match self.core.peek_char().await? {
672            PeekChar::Char(c) if c.value == '\\' => self.core.consume_char(),
673            _ => return Ok(false),
674        }
675
676        match self.core.peek_char().await? {
677            PeekChar::Char(c) if c.value == '\n' => self.core.consume_char(),
678            _ => {
679                self.core.rewind(index);
680                return Ok(false);
681            }
682        }
683
684        self.core.mark_line_continuation(index..index + 2);
685
686        Ok(true)
687    }
688
689    /// Peeks the next character.
690    ///
691    /// If the end of input is reached, `Ok(None)` is returned. On error,
692    /// `Err(_)` is returned.
693    ///
694    /// If line continuation recognition is enabled, combinations of a backslash
695    /// and a newline are silently skipped before returning the next character.
696    /// Call [`disable_line_continuation`](Self::disable_line_continuation) to
697    /// switch off line continuation recognition.
698    ///
699    /// This function requires a mutable reference to `self` since it may need
700    /// to read the next line if needed.
701    pub async fn peek_char(&mut self) -> Result<Option<char>> {
702        while self.line_continuation().await? {}
703
704        match self.core.peek_char().await? {
705            PeekChar::Char(source_char) => Ok(Some(source_char.value)),
706            PeekChar::EndOfInput(_) => Ok(None),
707        }
708    }
709
710    /// Returns the location of the next character.
711    ///
712    /// If there is no more character (that is, it is the end of input), an imaginary location
713    /// is returned that would be returned if a character existed.
714    ///
715    /// This function requires a mutable reference to `self` since it needs to
716    /// [peek](Self::peek_char) the next character.
717    pub async fn location(&mut self) -> Result<&Location> {
718        self.core.peek_char().await.map(|p| p.location())
719    }
720
721    /// Consumes the next character.
722    ///
723    /// This function must be called after [`peek_char`](Lexer::peek_char) has successfully
724    /// returned the character. Consuming a character that has not yet been peeked would result
725    /// in a panic!
726    pub fn consume_char(&mut self) {
727        self.core.consume_char()
728    }
729
730    /// Returns the position of the next character, counted from zero.
731    ///
732    /// ```
733    /// # use yash_syntax::parser::lex::Lexer;
734    /// # futures_executor::block_on(async {
735    /// let mut lexer = Lexer::with_code("abc");
736    /// assert_eq!(lexer.index(), 0);
737    /// let _ = lexer.peek_char().await;
738    /// assert_eq!(lexer.index(), 0);
739    /// lexer.consume_char();
740    /// assert_eq!(lexer.index(), 1);
741    /// # })
742    /// ```
743    #[must_use]
744    pub fn index(&self) -> usize {
745        self.core.index()
746    }
747
748    /// Moves the current position back to the given index so that characters that have been
749    /// consumed can be read again.
750    ///
751    /// The given index must not be larger than the [current index](Lexer::index), or this
752    /// function would panic.
753    ///
754    /// ```
755    /// # use yash_syntax::parser::lex::Lexer;
756    /// # futures_executor::block_on(async {
757    /// let mut lexer = Lexer::with_code("abc");
758    /// let saved_index = lexer.index();
759    /// assert_eq!(lexer.peek_char().await, Ok(Some('a')));
760    /// lexer.consume_char();
761    /// assert_eq!(lexer.peek_char().await, Ok(Some('b')));
762    /// lexer.rewind(saved_index);
763    /// assert_eq!(lexer.peek_char().await, Ok(Some('a')));
764    /// # })
765    /// ```
766    pub fn rewind(&mut self, index: usize) {
767        self.core.rewind(index)
768    }
769
770    /// Checks if there is any character that has been read from the input
771    /// source but not yet consumed.
772    #[must_use]
773    pub fn pending(&self) -> bool {
774        self.core.pending()
775    }
776
777    /// Clears the internal buffer of the lexer.
778    ///
779    /// Locations returned from [`location`](Self::location) share a single code
780    /// instance that is also retained by the lexer. The code grows long as the
781    /// lexer reads more input. To prevent the code from getting too large, you
782    /// can call this function that replaces the retained code with a new empty
783    /// one. The new code's `start_line_number` will be incremented by the
784    /// number of lines in the previous.
785    pub fn flush(&mut self) {
786        self.core.flush()
787    }
788
789    /// Clears an end-of-input or error status so that the lexer can resume
790    /// parsing.
791    ///
792    /// This function will be useful only in an interactive shell where the user
793    /// can continue entering commands even after (s)he sends an end-of-input or
794    /// is interrupted by a syntax error.
795    pub fn reset(&mut self) {
796        self.core.reset()
797    }
798
799    /// Peeks the next character and, if the given decider function returns true for it,
800    /// advances the position.
801    ///
802    /// Returns the consumed character if the function returned true. Returns `Ok(None)` if it
803    /// returned false or there is no more character.
804    pub async fn consume_char_if<F>(&mut self, mut f: F) -> Result<Option<&SourceChar>>
805    where
806        F: FnMut(char) -> bool,
807    {
808        self.consume_char_if_dyn(&mut f).await
809    }
810
811    /// Dynamic version of [`Self::consume_char_if`].
812    pub(crate) async fn consume_char_if_dyn(
813        &mut self,
814        f: &mut dyn FnMut(char) -> bool,
815    ) -> Result<Option<&SourceChar>> {
816        match self.peek_char().await? {
817            Some(c) if f(c) => {
818                let index = self.index();
819                self.consume_char();
820                Ok(Some(self.core.peek_char_at(index)))
821            }
822            _ => Ok(None),
823        }
824    }
825
826    /// Extracts a string from the source code range.
827    ///
828    /// This function returns the source code string for the range specified by
829    /// the argument. The range must specify a valid index. If the index points
830    /// to a character that have not yet read, this function will panic!.
831    ///
832    /// # Panics
833    ///
834    /// If the argument index is out of bounds, i.e., pointing to an unread
835    /// character.
836    #[inline]
837    pub fn source_string(&self, range: Range<usize>) -> String {
838        self.core.source_string(range)
839    }
840
841    /// Returns a location for a given range of the source code.
842    ///
843    /// All the characters in the range must have been
844    /// [consume](Self::consume_char)d. If the range refers to an unconsumed
845    /// character, this function will panic!
846    ///
847    /// If the characters are from more than one [`Code`] fragment, the location
848    /// will only cover the initial portion of the range sharing the same
849    /// `Code`.
850    ///
851    /// # Panics
852    ///
853    /// This function will panic if the range refers to an unconsumed character.
854    ///
855    /// If the start index of the range is the end of input, it must have been
856    /// peeked and the range must be empty, or the function will panic.
857    #[must_use]
858    pub fn location_range(&self, range: Range<usize>) -> Location {
859        self.core.location_range(range)
860    }
861
862    /// Performs alias substitution right before the current position.
863    ///
864    /// This function must be called just after a [word](WordLexer::word) has been parsed that
865    /// matches the name of the argument alias. No check is done in this function that there is
866    /// a matching word before the current position. The characters starting from the `begin`
867    /// index up to the current position are silently replaced with the alias value.
868    ///
869    /// The resulting part of code will be characters with a [`Source::Alias`] origin.
870    ///
871    /// After the substitution, the position will be set before the replaced string.
872    ///
873    /// # Panics
874    ///
875    /// If the replaced part is empty, i.e., `begin >= self.index()`.
876    pub fn substitute_alias(&mut self, begin: usize, alias: &Rc<Alias>) {
877        self.core.substitute_alias(begin, alias)
878    }
879
880    /// Tests if the given index is after the replacement string of alias
881    /// substitution that ends with a blank.
882    ///
883    /// # Panics
884    ///
885    /// If `index` is larger than the currently read index.
886    pub fn is_after_blank_ending_alias(&self, index: usize) -> bool {
887        self.core.is_after_blank_ending_alias(index)
888    }
889
890    /// Parses an optional compound list that is the content of a command
891    /// substitution.
892    ///
893    /// This function consumes characters until a token that cannot be the
894    /// beginning of an and-or list is found and returns the string that was
895    /// consumed.
896    pub async fn inner_program(&mut self) -> Result<String> {
897        let begin = self.index();
898
899        let mut parser = super::super::Parser::new(self);
900        parser.maybe_compound_list().await?;
901
902        let end = parser.peek_token().await?.index;
903        self.rewind(end);
904
905        Ok(self.core.source_string(begin..end))
906    }
907
908    /// Like [`Lexer::inner_program`], but returns the future in a pinning box.
909    pub fn inner_program_boxed(&mut self) -> Pin<Box<dyn Future<Output = Result<String>> + '_>> {
910        Box::pin(self.inner_program())
911    }
912}
913
914/// Reference to [`Lexer`] with line continuation disabled
915///
916/// This struct implements the RAII pattern for temporarily disabling line
917/// continuation. When you disable the line continuation of a lexer, you get an
918/// instance of `PlainLexer`. You can access the original lexer via the
919/// `PlainLexer` until you drop it, when the line continuation is automatically
920/// re-enabled.
921#[derive(Debug)]
922#[must_use = "You must retain the PlainLexer to keep line continuation disabled"]
923pub struct PlainLexer<'a, 'b> {
924    lexer: &'a mut Lexer<'b>,
925}
926
927impl<'b> Deref for PlainLexer<'_, 'b> {
928    type Target = Lexer<'b>;
929    fn deref(&self) -> &Lexer<'b> {
930        self.lexer
931    }
932}
933
934impl<'b> DerefMut for PlainLexer<'_, 'b> {
935    fn deref_mut(&mut self) -> &mut Lexer<'b> {
936        self.lexer
937    }
938}
939
940impl Drop for PlainLexer<'_, '_> {
941    fn drop(&mut self) {
942        self.lexer.line_continuation_enabled = true;
943    }
944}
945
946/// Context in which a [word](crate::syntax::Word) is parsed
947///
948/// The parse of the word of a [switch](crate::syntax::Switch) depends on
949/// whether the parameter expansion containing the switch is part of a text or a
950/// word. A `WordContext` value is used to decide the behavior of the lexer.
951///
952/// Parser functions that depend on the context are implemented in
953/// [`WordLexer`].
954#[derive(Clone, Copy, Debug, Eq, PartialEq)]
955pub enum WordContext {
956    /// The text unit being parsed is part of a [text](crate::syntax::Text).
957    Text,
958    /// The text unit being parsed is part of a [word](crate::syntax::Word).
959    Word,
960}
961
962/// Lexer with additional information for parsing [texts](crate::syntax::Text)
963/// and [words](crate::syntax::Word)
964#[derive(Debug)]
965pub struct WordLexer<'a, 'b> {
966    pub lexer: &'a mut Lexer<'b>,
967    pub context: WordContext,
968}
969
970impl<'b> Deref for WordLexer<'_, 'b> {
971    type Target = Lexer<'b>;
972    fn deref(&self) -> &Lexer<'b> {
973        self.lexer
974    }
975}
976
977impl<'b> DerefMut for WordLexer<'_, 'b> {
978    fn deref_mut(&mut self) -> &mut Lexer<'b> {
979        self.lexer
980    }
981}
982
983#[cfg(test)]
984mod tests {
985    use super::*;
986    use crate::input::Input;
987    use crate::parser::error::ErrorCause;
988    use crate::parser::error::SyntaxError;
989    use assert_matches::assert_matches;
990    use futures_util::FutureExt as _;
991
992    #[test]
993    fn lexer_core_peek_char_empty_source() {
994        let input = Memory::new("");
995        let line = NonZeroU64::new(32).unwrap();
996        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
997        let result = lexer.peek_char().now_or_never().unwrap();
998        assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
999            assert_eq!(*location.code.value.borrow(), "");
1000            assert_eq!(location.code.start_line_number, line);
1001            assert_eq!(*location.code.source, Source::Unknown);
1002            assert_eq!(location.range, 0..0);
1003        });
1004    }
1005
1006    #[test]
1007    fn lexer_core_peek_char_io_error() {
1008        #[derive(Debug)]
1009        struct Failing;
1010        impl fmt::Display for Failing {
1011            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1012                write!(f, "Failing")
1013            }
1014        }
1015        impl std::error::Error for Failing {}
1016        impl Input for Failing {
1017            async fn next_line(&mut self, _: &Context) -> crate::input::Result {
1018                Err(std::io::Error::other(Failing))
1019            }
1020        }
1021        let line = NonZeroU64::new(42).unwrap();
1022        let mut lexer = LexerCore::new(Box::new(Failing), line, Rc::new(Source::Unknown));
1023
1024        let e = lexer.peek_char().now_or_never().unwrap().unwrap_err();
1025        assert_matches!(e.cause, ErrorCause::Io(io_error) => {
1026            assert_eq!(io_error.kind(), std::io::ErrorKind::Other);
1027        });
1028        assert_eq!(*e.location.code.value.borrow(), "");
1029        assert_eq!(e.location.code.start_line_number, line);
1030        assert_eq!(*e.location.code.source, Source::Unknown);
1031        assert_eq!(e.location.range, 0..0);
1032    }
1033
1034    #[test]
1035    fn lexer_core_peek_char_context_is_first_line() {
1036        // In this test case, this mock input function will be called twice.
1037        struct InputMock {
1038            first: bool,
1039        }
1040        impl Input for InputMock {
1041            async fn next_line(&mut self, context: &Context) -> crate::input::Result {
1042                assert_eq!(context.is_first_line(), self.first);
1043                self.first = false;
1044                Ok("\n".to_owned())
1045            }
1046        }
1047
1048        let input = InputMock { first: true };
1049        let line = NonZeroU64::new(42).unwrap();
1050        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1051
1052        let peek = lexer.peek_char().now_or_never().unwrap();
1053        assert_matches!(peek, Ok(PeekChar::Char(_)));
1054        lexer.consume_char();
1055
1056        let peek = lexer.peek_char().now_or_never().unwrap();
1057        assert_matches!(peek, Ok(PeekChar::Char(_)));
1058        lexer.consume_char();
1059    }
1060
1061    #[test]
1062    fn lexer_core_consume_char_success() {
1063        let input = Memory::new("a\nb");
1064        let line = NonZeroU64::new(1).unwrap();
1065        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1066
1067        let result = lexer.peek_char().now_or_never().unwrap();
1068        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1069            assert_eq!(c.value, 'a');
1070            assert_eq!(*c.location.code.value.borrow(), "a\n");
1071            assert_eq!(c.location.code.start_line_number, line);
1072            assert_eq!(*c.location.code.source, Source::Unknown);
1073            assert_eq!(c.location.range, 0..1);
1074        });
1075        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1076            assert_eq!(c.value, 'a');
1077            assert_eq!(*c.location.code.value.borrow(), "a\n");
1078            assert_eq!(c.location.code.start_line_number, line);
1079            assert_eq!(*c.location.code.source, Source::Unknown);
1080            assert_eq!(c.location.range, 0..1);
1081        });
1082        lexer.consume_char();
1083
1084        let result = lexer.peek_char().now_or_never().unwrap();
1085        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1086            assert_eq!(c.value, '\n');
1087            assert_eq!(*c.location.code.value.borrow(), "a\n");
1088            assert_eq!(c.location.code.start_line_number, line);
1089            assert_eq!(*c.location.code.source, Source::Unknown);
1090            assert_eq!(c.location.range, 1..2);
1091        });
1092        lexer.consume_char();
1093
1094        let result = lexer.peek_char().now_or_never().unwrap();
1095        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1096            assert_eq!(c.value, 'b');
1097            assert_eq!(*c.location.code.value.borrow(), "a\nb");
1098            assert_eq!(c.location.code.start_line_number.get(), 1);
1099            assert_eq!(*c.location.code.source, Source::Unknown);
1100            assert_eq!(c.location.range, 2..3);
1101        });
1102        lexer.consume_char();
1103
1104        let result = lexer.peek_char().now_or_never().unwrap();
1105        assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
1106            assert_eq!(*location.code.value.borrow(), "a\nb");
1107            assert_eq!(location.code.start_line_number.get(), 1);
1108            assert_eq!(*location.code.source, Source::Unknown);
1109            assert_eq!(location.range, 3..3);
1110        });
1111    }
1112
1113    #[test]
1114    #[should_panic(expected = "A character must have been peeked before being consumed: index=0")]
1115    fn lexer_core_consume_char_panic() {
1116        let input = Memory::new("a");
1117        let line = NonZeroU64::new(1).unwrap();
1118        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1119        lexer.consume_char();
1120    }
1121
1122    #[test]
1123    fn lexer_core_peek_char_at() {
1124        let input = Memory::new("a\nb");
1125        let line = NonZeroU64::new(1).unwrap();
1126        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1127
1128        let c0 = assert_matches!(
1129            lexer.peek_char().now_or_never().unwrap(),
1130            Ok(PeekChar::Char(c)) => c.clone()
1131        );
1132        lexer.consume_char();
1133
1134        let c1 = assert_matches!(
1135            lexer.peek_char().now_or_never().unwrap(),
1136            Ok(PeekChar::Char(c)) => c.clone()
1137        );
1138        lexer.consume_char();
1139
1140        let c2 = assert_matches!(
1141            lexer.peek_char().now_or_never().unwrap(),
1142            Ok(PeekChar::Char(c)) => c.clone()
1143        );
1144
1145        assert_eq!(lexer.peek_char_at(0), &c0);
1146        assert_eq!(lexer.peek_char_at(1), &c1);
1147        assert_eq!(lexer.peek_char_at(2), &c2);
1148    }
1149
1150    #[test]
1151    fn lexer_core_index() {
1152        let input = Memory::new("a\nb");
1153        let line = NonZeroU64::new(1).unwrap();
1154        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1155
1156        assert_eq!(lexer.index(), 0);
1157        lexer.peek_char().now_or_never().unwrap().unwrap();
1158        assert_eq!(lexer.index(), 0);
1159        lexer.consume_char();
1160
1161        assert_eq!(lexer.index(), 1);
1162        lexer.peek_char().now_or_never().unwrap().unwrap();
1163        lexer.consume_char();
1164
1165        assert_eq!(lexer.index(), 2);
1166        lexer.peek_char().now_or_never().unwrap().unwrap();
1167        lexer.consume_char();
1168
1169        assert_eq!(lexer.index(), 3);
1170    }
1171
1172    #[test]
1173    fn lexer_core_rewind_success() {
1174        let input = Memory::new("abc");
1175        let line = NonZeroU64::new(1).unwrap();
1176        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1177        lexer.rewind(0);
1178        assert_eq!(lexer.index(), 0);
1179
1180        let _ = lexer.peek_char().now_or_never().unwrap();
1181        lexer.consume_char();
1182        let _ = lexer.peek_char().now_or_never().unwrap();
1183        lexer.consume_char();
1184        lexer.rewind(0);
1185
1186        let result = lexer.peek_char().now_or_never().unwrap();
1187        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1188            assert_eq!(c.value, 'a');
1189            assert_eq!(*c.location.code.value.borrow(), "abc");
1190            assert_eq!(c.location.code.start_line_number, line);
1191            assert_eq!(*c.location.code.source, Source::Unknown);
1192            assert_eq!(c.location.range, 0..1);
1193        });
1194    }
1195
1196    #[test]
1197    #[should_panic(expected = "The new index 1 must not be larger than the current index 0")]
1198    fn lexer_core_rewind_invalid_index() {
1199        let input = Memory::new("abc");
1200        let line = NonZeroU64::new(1).unwrap();
1201        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1202        lexer.rewind(1);
1203    }
1204
1205    #[test]
1206    fn lexer_core_source_string() {
1207        let input = Memory::new("ab\ncd");
1208        let line = NonZeroU64::new(1).unwrap();
1209        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1210        for _ in 0..4 {
1211            let _ = lexer.peek_char().now_or_never().unwrap();
1212            lexer.consume_char();
1213        }
1214
1215        let result = lexer.source_string(1..4);
1216        assert_eq!(result, "b\nc");
1217    }
1218
1219    #[test]
1220    #[should_panic(expected = "begin index 0 should be less than end index 0")]
1221    fn lexer_core_substitute_alias_with_invalid_index() {
1222        let input = Memory::new("a b");
1223        let line = NonZeroU64::new(1).unwrap();
1224        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1225        let alias = Rc::new(Alias {
1226            name: "a".to_string(),
1227            replacement: "".to_string(),
1228            global: false,
1229            origin: Location::dummy("dummy"),
1230        });
1231        lexer.substitute_alias(0, &alias);
1232    }
1233
1234    #[test]
1235    fn lexer_core_substitute_alias_single_line_replacement() {
1236        let input = Memory::new("a b");
1237        let line = NonZeroU64::new(1).unwrap();
1238        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1239        let alias = Rc::new(Alias {
1240            name: "a".to_string(),
1241            replacement: "lex".to_string(),
1242            global: false,
1243            origin: Location::dummy("dummy"),
1244        });
1245
1246        let _ = lexer.peek_char().now_or_never().unwrap();
1247        lexer.consume_char();
1248
1249        lexer.substitute_alias(0, &alias);
1250
1251        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1252            assert_eq!(c.value, 'l');
1253            assert_eq!(*c.location.code.value.borrow(), "lex");
1254            assert_eq!(c.location.code.start_line_number.get(), 1);
1255            assert_matches!(&*c.location.code.source,
1256                Source::Alias { original, alias: alias2 } => {
1257                assert_eq!(*original.code.value.borrow(), "a b");
1258                assert_eq!(original.code.start_line_number, line);
1259                assert_eq!(*original.code.source, Source::Unknown);
1260                assert_eq!(original.range, 0..1);
1261                assert_eq!(alias2, &alias);
1262            });
1263            assert_eq!(c.location.range, 0..1);
1264        });
1265        lexer.consume_char();
1266
1267        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1268            assert_eq!(c.value, 'e');
1269            assert_eq!(*c.location.code.value.borrow(), "lex");
1270            assert_eq!(c.location.code.start_line_number, line);
1271            assert_matches!(&*c.location.code.source,
1272                Source::Alias { original, alias: alias2 } => {
1273                assert_eq!(*original.code.value.borrow(), "a b");
1274                assert_eq!(original.code.start_line_number, line);
1275                assert_eq!(*original.code.source, Source::Unknown);
1276                assert_eq!(original.range, 0..1);
1277                assert_eq!(alias2, &alias);
1278            });
1279            assert_eq!(c.location.range, 1..2);
1280        });
1281        lexer.consume_char();
1282
1283        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1284            assert_eq!(c.value, 'x');
1285            assert_eq!(*c.location.code.value.borrow(), "lex");
1286            assert_eq!(c.location.code.start_line_number, line);
1287            assert_matches!(&*c.location.code.source,
1288                Source::Alias { original, alias: alias2 } => {
1289                assert_eq!(*original.code.value.borrow(), "a b");
1290                assert_eq!(original.code.start_line_number, line);
1291                assert_eq!(*original.code.source, Source::Unknown);
1292                assert_eq!(original.range, 0..1);
1293                assert_eq!(alias2, &alias);
1294            });
1295            assert_eq!(c.location.range, 2..3);
1296        });
1297        lexer.consume_char();
1298
1299        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1300            assert_eq!(c.value, ' ');
1301            assert_eq!(*c.location.code.value.borrow(), "a b");
1302            assert_eq!(c.location.code.start_line_number, line);
1303            assert_eq!(*c.location.code.source, Source::Unknown);
1304            assert_eq!(c.location.range, 1..2);
1305        });
1306        lexer.consume_char();
1307    }
1308
1309    #[test]
1310    fn lexer_core_substitute_alias_multi_line_replacement() {
1311        let input = Memory::new(" foo b");
1312        let line = NonZeroU64::new(1).unwrap();
1313        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1314        let alias = Rc::new(Alias {
1315            name: "foo".to_string(),
1316            replacement: "x\ny".to_string(),
1317            global: true,
1318            origin: Location::dummy("loc"),
1319        });
1320
1321        for _ in 0..4 {
1322            let _ = lexer.peek_char().now_or_never().unwrap();
1323            lexer.consume_char();
1324        }
1325
1326        lexer.substitute_alias(1, &alias);
1327
1328        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1329            assert_eq!(c.value, 'x');
1330            assert_eq!(*c.location.code.value.borrow(), "x\ny");
1331            assert_eq!(c.location.code.start_line_number, line);
1332            assert_matches!(&*c.location.code.source,
1333                Source::Alias { original, alias: alias2 } => {
1334                assert_eq!(*original.code.value.borrow(), " foo b");
1335                assert_eq!(original.code.start_line_number, line);
1336                assert_eq!(*original.code.source, Source::Unknown);
1337                assert_eq!(original.range, 1..4);
1338                assert_eq!(alias2, &alias);
1339            });
1340            assert_eq!(c.location.range, 0..1);
1341        });
1342        lexer.consume_char();
1343
1344        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1345            assert_eq!(c.value, '\n');
1346            assert_eq!(*c.location.code.value.borrow(), "x\ny");
1347            assert_eq!(c.location.code.start_line_number, line);
1348            assert_matches!(&*c.location.code.source,
1349                Source::Alias { original, alias: alias2 } => {
1350                assert_eq!(*original.code.value.borrow(), " foo b");
1351                assert_eq!(original.code.start_line_number, line);
1352                assert_eq!(*original.code.source, Source::Unknown);
1353                assert_eq!(original.range, 1..4);
1354                assert_eq!(alias2, &alias);
1355            });
1356            assert_eq!(c.location.range, 1..2);
1357        });
1358        lexer.consume_char();
1359
1360        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1361            assert_eq!(c.value, 'y');
1362            assert_eq!(*c.location.code.value.borrow(), "x\ny");
1363            assert_eq!(c.location.code.start_line_number, line);
1364            assert_matches!(&*c.location.code.source, Source::Alias { original, alias: alias2 } => {
1365                assert_eq!(*original.code.value.borrow(), " foo b");
1366                assert_eq!(original.code.start_line_number, line);
1367                assert_eq!(*original.code.source, Source::Unknown);
1368                assert_eq!(original.range, 1..4);
1369                assert_eq!(alias2, &alias);
1370            });
1371            assert_eq!(c.location.range, 2..3);
1372        });
1373        lexer.consume_char();
1374
1375        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1376            assert_eq!(c.value, ' ');
1377            assert_eq!(*c.location.code.value.borrow(), " foo b");
1378            assert_eq!(c.location.code.start_line_number, line);
1379            assert_eq!(*c.location.code.source, Source::Unknown);
1380            assert_eq!(c.location.range, 4..5);
1381        });
1382        lexer.consume_char();
1383    }
1384
1385    #[test]
1386    fn lexer_core_substitute_alias_empty_replacement() {
1387        let input = Memory::new("x ");
1388        let line = NonZeroU64::new(1).unwrap();
1389        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1390        let alias = Rc::new(Alias {
1391            name: "x".to_string(),
1392            replacement: "".to_string(),
1393            global: false,
1394            origin: Location::dummy("dummy"),
1395        });
1396
1397        let _ = lexer.peek_char().now_or_never().unwrap();
1398        lexer.consume_char();
1399
1400        lexer.substitute_alias(0, &alias);
1401
1402        assert_matches!(lexer.peek_char().now_or_never().unwrap(), Ok(PeekChar::Char(c)) => {
1403            assert_eq!(c.value, ' ');
1404            assert_eq!(*c.location.code.value.borrow(), "x ");
1405            assert_eq!(c.location.code.start_line_number, line);
1406            assert_eq!(*c.location.code.source, Source::Unknown);
1407            assert_eq!(c.location.range, 1..2);
1408        });
1409    }
1410
1411    #[test]
1412    fn lexer_core_peek_char_after_alias_substitution() {
1413        let input = Memory::new("a\nb");
1414        let line = NonZeroU64::new(1).unwrap();
1415        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1416
1417        lexer.peek_char().now_or_never().unwrap().unwrap();
1418        lexer.consume_char();
1419
1420        let alias = Rc::new(Alias {
1421            name: "a".to_string(),
1422            replacement: "".to_string(),
1423            global: false,
1424            origin: Location::dummy("dummy"),
1425        });
1426        lexer.substitute_alias(0, &alias);
1427
1428        let result = lexer.peek_char().now_or_never().unwrap();
1429        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1430            assert_eq!(c.value, '\n');
1431            assert_eq!(*c.location.code.value.borrow(), "a\n");
1432            assert_eq!(c.location.code.start_line_number, line);
1433            assert_eq!(*c.location.code.source, Source::Unknown);
1434            assert_eq!(c.location.range, 1..2);
1435        });
1436        lexer.consume_char();
1437
1438        let result = lexer.peek_char().now_or_never().unwrap();
1439        assert_matches!(result, Ok(PeekChar::Char(c)) => {
1440            assert_eq!(c.value, 'b');
1441            assert_eq!(*c.location.code.value.borrow(), "a\nb");
1442            assert_eq!(c.location.code.start_line_number.get(), 1);
1443            assert_eq!(*c.location.code.source, Source::Unknown);
1444            assert_eq!(c.location.range, 2..3);
1445        });
1446        lexer.consume_char();
1447
1448        let result = lexer.peek_char().now_or_never().unwrap();
1449        assert_matches!(result, Ok(PeekChar::EndOfInput(location)) => {
1450            assert_eq!(*location.code.value.borrow(), "a\nb");
1451            assert_eq!(location.code.start_line_number.get(), 1);
1452            assert_eq!(*location.code.source, Source::Unknown);
1453            assert_eq!(location.range, 3..3);
1454        });
1455    }
1456
1457    #[test]
1458    fn lexer_core_is_after_blank_ending_alias_index_0() {
1459        let original = Location::dummy("original");
1460        let alias = Rc::new(Alias {
1461            name: "a".to_string(),
1462            replacement: " ".to_string(),
1463            global: false,
1464            origin: Location::dummy("origin"),
1465        });
1466        let source = Source::Alias { original, alias };
1467        let input = Memory::new("a");
1468        let line = NonZeroU64::new(1).unwrap();
1469        let lexer = LexerCore::new(Box::new(input), line, Rc::new(source));
1470        assert!(!lexer.is_after_blank_ending_alias(0));
1471    }
1472
1473    #[test]
1474    fn lexer_core_is_after_blank_ending_alias_not_blank_ending() {
1475        let input = Memory::new("a x");
1476        let line = NonZeroU64::new(1).unwrap();
1477        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1478        let alias = Rc::new(Alias {
1479            name: "a".to_string(),
1480            replacement: " b".to_string(),
1481            global: false,
1482            origin: Location::dummy("dummy"),
1483        });
1484
1485        lexer.peek_char().now_or_never().unwrap().unwrap();
1486        lexer.consume_char();
1487
1488        lexer.substitute_alias(0, &alias);
1489
1490        assert!(!lexer.is_after_blank_ending_alias(0));
1491        assert!(!lexer.is_after_blank_ending_alias(1));
1492        assert!(!lexer.is_after_blank_ending_alias(2));
1493        assert!(!lexer.is_after_blank_ending_alias(3));
1494    }
1495
1496    #[test]
1497    fn lexer_core_is_after_blank_ending_alias_blank_ending() {
1498        let input = Memory::new("a x");
1499        let line = NonZeroU64::new(1).unwrap();
1500        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1501        let alias = Rc::new(Alias {
1502            name: "a".to_string(),
1503            replacement: " b ".to_string(),
1504            global: false,
1505            origin: Location::dummy("dummy"),
1506        });
1507
1508        lexer.peek_char().now_or_never().unwrap().unwrap();
1509        lexer.consume_char();
1510
1511        lexer.substitute_alias(0, &alias);
1512
1513        assert!(!lexer.is_after_blank_ending_alias(0));
1514        assert!(!lexer.is_after_blank_ending_alias(1));
1515        assert!(!lexer.is_after_blank_ending_alias(2));
1516        assert!(lexer.is_after_blank_ending_alias(3));
1517        assert!(lexer.is_after_blank_ending_alias(4));
1518    }
1519
1520    #[test]
1521    fn lexer_core_is_after_blank_ending_alias_after_line_continuation() {
1522        let input = Memory::new("a\\\n x");
1523        let line = NonZeroU64::new(1).unwrap();
1524        let mut lexer = LexerCore::new(Box::new(input), line, Rc::new(Source::Unknown));
1525        let alias = Rc::new(Alias {
1526            name: "a".to_string(),
1527            replacement: " b ".to_string(),
1528            global: false,
1529            origin: Location::dummy("dummy"),
1530        });
1531
1532        lexer.peek_char().now_or_never().unwrap().unwrap();
1533        lexer.consume_char();
1534        lexer.substitute_alias(0, &alias);
1535
1536        while let Ok(PeekChar::Char(_)) = lexer.peek_char().now_or_never().unwrap() {
1537            lexer.consume_char();
1538        }
1539        lexer.mark_line_continuation(3..5);
1540
1541        assert!(!lexer.is_after_blank_ending_alias(0));
1542        assert!(!lexer.is_after_blank_ending_alias(1));
1543        assert!(!lexer.is_after_blank_ending_alias(2));
1544        assert!(lexer.is_after_blank_ending_alias(5));
1545        assert!(lexer.is_after_blank_ending_alias(6));
1546    }
1547
1548    #[test]
1549    fn lexer_with_empty_source() {
1550        let mut lexer = Lexer::with_code("");
1551        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(None));
1552    }
1553
1554    #[test]
1555    fn lexer_peek_char_with_line_continuation_enabled_stopping_on_non_backslash() {
1556        let mut lexer = Lexer::with_code("\\\n\n\\");
1557        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\n')));
1558        assert_eq!(lexer.index(), 2);
1559    }
1560
1561    #[test]
1562    fn lexer_peek_char_with_line_continuation_enabled_stopping_on_non_newline() {
1563        let mut lexer = Lexer::with_code("\\\n\\\n\\\n\\\\");
1564        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\\')));
1565        assert_eq!(lexer.index(), 6);
1566    }
1567
1568    #[test]
1569    fn lexer_peek_char_with_line_continuation_disabled() {
1570        let mut lexer = Lexer::with_code("\\\n\\\n\\\\");
1571        let mut lexer = lexer.disable_line_continuation();
1572        assert_eq!(lexer.peek_char().now_or_never().unwrap(), Ok(Some('\\')));
1573        assert_eq!(lexer.index(), 0);
1574    }
1575
1576    #[test]
1577    fn lexer_flush() {
1578        let mut lexer = Lexer::with_code(" \n\n\t\n");
1579        let location_1 = lexer.location().now_or_never().unwrap().unwrap().clone();
1580        assert_eq!(*location_1.code.value.borrow(), " \n");
1581
1582        lexer.consume_char();
1583        lexer.peek_char().now_or_never().unwrap().unwrap();
1584        lexer.consume_char();
1585        lexer.peek_char().now_or_never().unwrap().unwrap();
1586        lexer.consume_char();
1587        lexer.flush();
1588        lexer.peek_char().now_or_never().unwrap().unwrap();
1589        lexer.consume_char();
1590
1591        let location_2 = lexer.location().now_or_never().unwrap().unwrap().clone();
1592
1593        assert_eq!(*location_1.code.value.borrow(), " \n\n");
1594        assert_eq!(location_1.code.start_line_number.get(), 1);
1595        assert_eq!(*location_1.code.source, Source::Unknown);
1596        assert_eq!(location_1.range, 0..1);
1597        assert_eq!(*location_2.code.value.borrow(), "\t\n");
1598        assert_eq!(location_2.code.start_line_number.get(), 3);
1599        assert_eq!(*location_2.code.source, Source::Unknown);
1600        assert_eq!(location_2.range, 1..2);
1601    }
1602
1603    #[test]
1604    fn lexer_consume_char_if() {
1605        let mut lexer = Lexer::with_code("word\n");
1606
1607        let mut called = 0;
1608        let c = lexer
1609            .consume_char_if(|c| {
1610                assert_eq!(c, 'w');
1611                called += 1;
1612                true
1613            })
1614            .now_or_never()
1615            .unwrap()
1616            .unwrap()
1617            .unwrap();
1618        assert_eq!(called, 1);
1619        assert_eq!(c.value, 'w');
1620        assert_eq!(*c.location.code.value.borrow(), "word\n");
1621        assert_eq!(c.location.code.start_line_number.get(), 1);
1622        assert_eq!(*c.location.code.source, Source::Unknown);
1623        assert_eq!(c.location.range, 0..1);
1624
1625        let mut called = 0;
1626        let r = lexer
1627            .consume_char_if(|c| {
1628                assert_eq!(c, 'o');
1629                called += 1;
1630                false
1631            })
1632            .now_or_never()
1633            .unwrap();
1634        assert_eq!(called, 1);
1635        assert_eq!(r, Ok(None));
1636
1637        let mut called = 0;
1638        let r = lexer
1639            .consume_char_if(|c| {
1640                assert_eq!(c, 'o');
1641                called += 1;
1642                false
1643            })
1644            .now_or_never()
1645            .unwrap();
1646        assert_eq!(called, 1);
1647        assert_eq!(r, Ok(None));
1648
1649        let mut called = 0;
1650        let c = lexer
1651            .consume_char_if(|c| {
1652                assert_eq!(c, 'o');
1653                called += 1;
1654                true
1655            })
1656            .now_or_never()
1657            .unwrap()
1658            .unwrap()
1659            .unwrap();
1660        assert_eq!(called, 1);
1661        assert_eq!(c.value, 'o');
1662        assert_eq!(*c.location.code.value.borrow(), "word\n");
1663        assert_eq!(c.location.code.start_line_number.get(), 1);
1664        assert_eq!(*c.location.code.source, Source::Unknown);
1665        assert_eq!(c.location.range, 1..2);
1666
1667        lexer
1668            .consume_char_if(|c| {
1669                assert_eq!(c, 'r');
1670                true
1671            })
1672            .now_or_never()
1673            .unwrap()
1674            .unwrap()
1675            .unwrap();
1676        lexer
1677            .consume_char_if(|c| {
1678                assert_eq!(c, 'd');
1679                true
1680            })
1681            .now_or_never()
1682            .unwrap()
1683            .unwrap()
1684            .unwrap();
1685        lexer
1686            .consume_char_if(|c| {
1687                assert_eq!(c, '\n');
1688                true
1689            })
1690            .now_or_never()
1691            .unwrap()
1692            .unwrap()
1693            .unwrap();
1694
1695        // end of input
1696        let r = lexer
1697            .consume_char_if(|c| {
1698                unreachable!("unexpected call to the decider function: argument={}", c)
1699            })
1700            .now_or_never()
1701            .unwrap();
1702        assert_eq!(r, Ok(None));
1703    }
1704
1705    #[test]
1706    fn lexer_location_range_with_empty_range() {
1707        let mut lexer = Lexer::with_code("");
1708        lexer.peek_char().now_or_never().unwrap().unwrap();
1709        let location = lexer.location_range(0..0);
1710        assert_eq!(*location.code.value.borrow(), "");
1711        assert_eq!(location.code.start_line_number.get(), 1);
1712        assert_eq!(*location.code.source, Source::Unknown);
1713        assert_eq!(location.range, 0..0);
1714    }
1715
1716    #[test]
1717    fn lexer_location_range_with_nonempty_range() {
1718        let mut lexer = Lexer::from_memory("cat foo", Source::Stdin);
1719        for _ in 0..4 {
1720            lexer.peek_char().now_or_never().unwrap().unwrap();
1721            lexer.consume_char();
1722        }
1723        lexer.peek_char().now_or_never().unwrap().unwrap();
1724
1725        let location = lexer.location_range(1..4);
1726        assert_eq!(*location.code.value.borrow(), "cat foo");
1727        assert_eq!(location.code.start_line_number.get(), 1);
1728        assert_eq!(*location.code.source, Source::Stdin);
1729        assert_eq!(location.range, 1..4);
1730    }
1731
1732    #[test]
1733    fn lexer_location_range_with_range_starting_at_end() {
1734        let mut lexer = Lexer::from_memory("cat", Source::Stdin);
1735        for _ in 0..3 {
1736            lexer.peek_char().now_or_never().unwrap().unwrap();
1737            lexer.consume_char();
1738        }
1739        lexer.peek_char().now_or_never().unwrap().unwrap();
1740
1741        let location = lexer.location_range(3..3);
1742        assert_eq!(*location.code.value.borrow(), "cat");
1743        assert_eq!(location.code.start_line_number.get(), 1);
1744        assert_eq!(*location.code.source, Source::Stdin);
1745        assert_eq!(location.range, 3..3);
1746    }
1747
1748    #[test]
1749    #[should_panic]
1750    fn lexer_location_range_with_unconsumed_code() {
1751        let lexer = Lexer::with_code("echo ok");
1752        let _ = lexer.location_range(0..0);
1753    }
1754
1755    #[test]
1756    #[should_panic(expected = "The index 1 must not be larger than the current index 0")]
1757    fn lexer_location_range_with_range_out_of_bounds() {
1758        let lexer = Lexer::with_code("");
1759        let _ = lexer.location_range(1..2);
1760    }
1761
1762    #[test]
1763    fn lexer_location_range_with_alias_substitution() {
1764        let mut lexer = Lexer::with_code(" a;");
1765        let alias_def = Rc::new(Alias {
1766            name: "a".to_string(),
1767            replacement: "abc".to_string(),
1768            global: false,
1769            origin: Location::dummy("dummy"),
1770        });
1771        for _ in 0..2 {
1772            lexer.peek_char().now_or_never().unwrap().unwrap();
1773            lexer.consume_char();
1774        }
1775        lexer.substitute_alias(1, &alias_def);
1776        for _ in 1..5 {
1777            lexer.peek_char().now_or_never().unwrap().unwrap();
1778            lexer.consume_char();
1779        }
1780
1781        let location = lexer.location_range(2..5);
1782        assert_eq!(*location.code.value.borrow(), "abc");
1783        assert_eq!(location.code.start_line_number.get(), 1);
1784        assert_matches!(&*location.code.source, Source::Alias { original, alias } => {
1785            assert_eq!(*original.code.value.borrow(), " a;");
1786            assert_eq!(original.code.start_line_number.get(), 1);
1787            assert_eq!(*original.code.source, Source::Unknown);
1788            assert_eq!(original.range, 1..2);
1789            assert_eq!(alias, &alias_def);
1790        });
1791        assert_eq!(location.range, 1..3);
1792    }
1793
1794    #[test]
1795    fn lexer_inner_program_success() {
1796        let mut lexer = Lexer::with_code("x y )");
1797        let source = lexer.inner_program().now_or_never().unwrap().unwrap();
1798        assert_eq!(source, "x y ");
1799    }
1800
1801    #[test]
1802    fn lexer_inner_program_failure() {
1803        let mut lexer = Lexer::with_code("<< )");
1804        let e = lexer.inner_program().now_or_never().unwrap().unwrap_err();
1805        assert_eq!(
1806            e.cause,
1807            ErrorCause::Syntax(SyntaxError::MissingHereDocDelimiter)
1808        );
1809        assert_eq!(*e.location.code.value.borrow(), "<< )");
1810        assert_eq!(e.location.code.start_line_number.get(), 1);
1811        assert_eq!(*e.location.code.source, Source::Unknown);
1812        assert_eq!(e.location.range, 3..4);
1813    }
1814}