Skip to main content

shuck_parser/parser/
parser_state.rs

1use std::{
2    collections::{HashMap, VecDeque},
3    sync::Arc,
4};
5
6use smallvec::SmallVec;
7
8use shuck_ast::{
9    AnonymousFunctionCommand, Assignment, Comment, CompoundCommand, DeclOperand, FunctionDef, Name,
10    Position, Redirect, Span, TokenKind, Word,
11};
12
13use super::{
14    Keyword, LexedToken, Lexer, ShellDialect, ShellProfile, SyntaxFacts, ZshOptionTimeline,
15};
16
17#[cfg(feature = "benchmarking")]
18#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
19#[doc(hidden)]
20pub struct ParserBenchmarkCounters {
21    /// Number of lexer current-position lookups performed while parsing.
22    pub lexer_current_position_calls: u64,
23    /// Number of parser calls that updated the current spanned token.
24    pub parser_set_current_spanned_calls: u64,
25    /// Number of raw token-advance operations performed by the parser.
26    pub parser_advance_raw_calls: u64,
27}
28
29#[derive(Debug, Clone)]
30pub(super) struct SimpleCommand {
31    pub(super) name: Word,
32    pub(super) args: SmallVec<[Word; 2]>,
33    pub(super) redirects: SmallVec<[Redirect; 1]>,
34    pub(super) assignments: SmallVec<[Assignment; 1]>,
35    pub(super) span: Span,
36}
37
38#[derive(Debug, Clone)]
39pub(super) struct BreakCommand {
40    pub(super) depth: Option<Word>,
41    pub(super) extra_args: SmallVec<[Word; 2]>,
42    pub(super) redirects: SmallVec<[Redirect; 1]>,
43    pub(super) assignments: SmallVec<[Assignment; 1]>,
44    pub(super) span: Span,
45}
46
47#[derive(Debug, Clone)]
48pub(super) struct ContinueCommand {
49    pub(super) depth: Option<Word>,
50    pub(super) extra_args: SmallVec<[Word; 2]>,
51    pub(super) redirects: SmallVec<[Redirect; 1]>,
52    pub(super) assignments: SmallVec<[Assignment; 1]>,
53    pub(super) span: Span,
54}
55
56#[derive(Debug, Clone)]
57pub(super) struct ReturnCommand {
58    pub(super) code: Option<Word>,
59    pub(super) extra_args: SmallVec<[Word; 2]>,
60    pub(super) redirects: SmallVec<[Redirect; 1]>,
61    pub(super) assignments: SmallVec<[Assignment; 1]>,
62    pub(super) span: Span,
63}
64
65#[derive(Debug, Clone)]
66pub(super) struct ExitCommand {
67    pub(super) code: Option<Word>,
68    pub(super) extra_args: SmallVec<[Word; 2]>,
69    pub(super) redirects: SmallVec<[Redirect; 1]>,
70    pub(super) assignments: SmallVec<[Assignment; 1]>,
71    pub(super) span: Span,
72}
73
74#[derive(Debug, Clone)]
75pub(super) enum BuiltinCommand {
76    Break(BreakCommand),
77    Continue(ContinueCommand),
78    Return(ReturnCommand),
79    Exit(ExitCommand),
80}
81
82#[derive(Debug, Clone)]
83pub(super) struct DeclClause {
84    pub(super) variant: Name,
85    pub(super) variant_span: Span,
86    pub(super) operands: SmallVec<[DeclOperand; 2]>,
87    pub(super) redirects: SmallVec<[Redirect; 1]>,
88    pub(super) assignments: SmallVec<[Assignment; 1]>,
89    pub(super) span: Span,
90}
91
92#[derive(Debug, Clone)]
93pub(super) enum Command {
94    Simple(SimpleCommand),
95    Builtin(BuiltinCommand),
96    Decl(Box<DeclClause>),
97    Compound(Box<CompoundCommand>, SmallVec<[Redirect; 1]>),
98    Function(FunctionDef),
99    AnonymousFunction(AnonymousFunctionCommand, SmallVec<[Redirect; 1]>),
100}
101
102/// Stateful parser for shell scripts.
103///
104/// Construct a parser with one of the `Parser::with_*` constructors and then
105/// call `parse` to obtain a [`super::ParseResult`]. The parser is single-use:
106/// `parse` consumes the value so internal recovery state cannot leak between
107/// parses.
108#[derive(Clone)]
109pub struct Parser<'a> {
110    pub(super) input: &'a str,
111    pub(super) lexer: Lexer<'a>,
112    pub(super) synthetic_tokens: VecDeque<SyntheticToken>,
113    pub(super) alias_replays: Vec<AliasReplay>,
114    pub(super) current_token: Option<LexedToken<'a>>,
115    pub(super) current_word_cache: Option<Word>,
116    pub(super) current_token_kind: Option<TokenKind>,
117    pub(super) current_keyword: Option<Keyword>,
118    /// Span of the current token
119    pub(super) current_span: Span,
120    /// Lookahead token for function parsing
121    pub(super) peeked_token: Option<LexedToken<'a>>,
122    /// Maximum allowed AST nesting depth
123    pub(super) max_depth: usize,
124    /// Current nesting depth
125    pub(super) current_depth: usize,
126    /// Remaining fuel for parsing operations
127    pub(super) fuel: usize,
128    /// Maximum fuel (for error reporting)
129    pub(super) max_fuel: usize,
130    /// Depth of reparsing source-text operands as patterns.
131    pub(super) source_text_pattern_depth: usize,
132    /// Comments collected during parsing.
133    pub(super) comments: Vec<Comment>,
134    /// Known aliases declared earlier in the current parse stream.
135    pub(super) aliases: HashMap<String, AliasDefinition>,
136    /// Whether alias expansion is currently enabled.
137    pub(super) expand_aliases: bool,
138    /// Whether the next fetched word is eligible for alias expansion because
139    /// the previous alias expansion ended with trailing whitespace.
140    pub(super) expand_next_word: bool,
141    /// Nesting depth of active brace-delimited statement sequences.
142    pub(super) brace_group_depth: usize,
143    /// Active brace-body parsing contexts, used to distinguish compact zsh
144    /// closers from literal `}` arguments.
145    pub(super) brace_body_stack: Vec<BraceBodyContext>,
146    pub(super) syntax_facts: SyntaxFacts,
147    pub(super) shell_profile: ShellProfile,
148    pub(super) zsh_timeline: Option<Arc<ZshOptionTimeline>>,
149    pub(super) dialect: ShellDialect,
150    #[cfg(feature = "benchmarking")]
151    pub(super) benchmark_counters: Option<ParserBenchmarkCounters>,
152}
153
154#[derive(Clone)]
155pub(super) struct ParserCheckpoint<'a> {
156    pub(super) lexer: Lexer<'a>,
157    pub(super) synthetic_tokens: VecDeque<SyntheticToken>,
158    pub(super) alias_replays: Vec<AliasReplay>,
159    pub(super) current_token: Option<LexedToken<'a>>,
160    pub(super) current_token_kind: Option<TokenKind>,
161    pub(super) current_keyword: Option<Keyword>,
162    pub(super) current_span: Span,
163    pub(super) peeked_token: Option<LexedToken<'a>>,
164    pub(super) current_depth: usize,
165    pub(super) source_text_pattern_depth: usize,
166    pub(super) fuel: usize,
167    pub(super) comments: Vec<Comment>,
168    pub(super) expand_next_word: bool,
169    pub(super) brace_group_depth: usize,
170    pub(super) brace_body_stack: Vec<BraceBodyContext>,
171    pub(super) syntax_facts: SyntaxFacts,
172    #[cfg(feature = "benchmarking")]
173    pub(super) benchmark_counters: Option<ParserBenchmarkCounters>,
174}
175
176#[derive(Debug, Clone)]
177pub(super) struct AliasDefinition {
178    pub(super) tokens: Arc<[LexedToken<'static>]>,
179    pub(super) expands_next_word: bool,
180}
181
182#[derive(Debug, Clone)]
183pub(super) struct AliasReplay {
184    pub(super) tokens: Arc<[LexedToken<'static>]>,
185    pub(super) next_index: usize,
186    pub(super) base: Position,
187}
188
189impl AliasReplay {
190    pub(super) fn new(alias: &AliasDefinition, base: Position) -> Self {
191        Self {
192            tokens: Arc::clone(&alias.tokens),
193            next_index: 0,
194            base,
195        }
196    }
197
198    pub(super) fn next_token<'b>(&mut self) -> Option<LexedToken<'b>> {
199        let token = self.tokens.get(self.next_index)?.clone();
200        self.next_index += 1;
201        Some(token.into_owned().rebased(self.base).with_synthetic_flag())
202    }
203}
204
205#[derive(Debug, Clone, Copy)]
206pub(super) struct SyntheticToken {
207    pub(super) kind: TokenKind,
208    pub(super) span: Span,
209}
210
211impl SyntheticToken {
212    pub(super) const fn punctuation(kind: TokenKind, span: Span) -> Self {
213        Self { kind, span }
214    }
215
216    pub(super) fn materialize<'b>(self) -> LexedToken<'b> {
217        LexedToken::punctuation(self.kind).with_span(self.span)
218    }
219}
220
221#[derive(Debug, Clone, Copy)]
222pub(super) enum FlowControlBuiltinKind {
223    Break,
224    Continue,
225    Return,
226    Exit,
227}
228
229#[derive(Debug, Clone, Copy, PartialEq, Eq)]
230pub(super) enum BraceBodyContext {
231    Ordinary,
232    Function,
233    IfClause,
234}