Skip to main content

shuck_parser/parser/
parser_state.rs

1use std::{
2    collections::{HashMap, VecDeque},
3    sync::Arc,
4};
5
6use smallvec::SmallVec;
7
8use shuck_ast::{
9    AnonymousFunctionCommand, Assignment, Comment, CompoundCommand, DeclOperand, FunctionDef, Name,
10    Position, Redirect, Span, TokenKind, Word,
11};
12
13use super::{
14    Keyword, LexedToken, Lexer, ShellDialect, ShellProfile, SyntaxFacts, ZshOptionTimeline,
15};
16
17#[cfg(feature = "benchmarking")]
18#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
19#[doc(hidden)]
20pub struct ParserBenchmarkCounters {
21    /// Number of lexer current-position lookups performed while parsing.
22    pub lexer_current_position_calls: u64,
23    /// Number of parser calls that updated the current spanned token.
24    pub parser_set_current_spanned_calls: u64,
25    /// Number of raw token-advance operations performed by the parser.
26    pub parser_advance_raw_calls: u64,
27}
28
29#[derive(Debug, Clone)]
30pub(super) struct SimpleCommand {
31    pub(super) name: Word,
32    pub(super) args: SmallVec<[Word; 2]>,
33    pub(super) redirects: SmallVec<[Redirect; 1]>,
34    pub(super) assignments: SmallVec<[Assignment; 1]>,
35    pub(super) span: Span,
36}
37
38#[derive(Debug, Clone)]
39pub(super) struct BreakCommand {
40    pub(super) depth: Option<Word>,
41    pub(super) extra_args: SmallVec<[Word; 2]>,
42    pub(super) redirects: SmallVec<[Redirect; 1]>,
43    pub(super) assignments: SmallVec<[Assignment; 1]>,
44    pub(super) span: Span,
45}
46
47#[derive(Debug, Clone)]
48pub(super) struct ContinueCommand {
49    pub(super) depth: Option<Word>,
50    pub(super) extra_args: SmallVec<[Word; 2]>,
51    pub(super) redirects: SmallVec<[Redirect; 1]>,
52    pub(super) assignments: SmallVec<[Assignment; 1]>,
53    pub(super) span: Span,
54}
55
56#[derive(Debug, Clone)]
57pub(super) struct ReturnCommand {
58    pub(super) code: Option<Word>,
59    pub(super) extra_args: SmallVec<[Word; 2]>,
60    pub(super) redirects: SmallVec<[Redirect; 1]>,
61    pub(super) assignments: SmallVec<[Assignment; 1]>,
62    pub(super) span: Span,
63}
64
65#[derive(Debug, Clone)]
66pub(super) struct ExitCommand {
67    pub(super) code: Option<Word>,
68    pub(super) extra_args: SmallVec<[Word; 2]>,
69    pub(super) redirects: SmallVec<[Redirect; 1]>,
70    pub(super) assignments: SmallVec<[Assignment; 1]>,
71    pub(super) span: Span,
72}
73
74#[derive(Debug, Clone)]
75pub(super) enum BuiltinCommand {
76    Break(BreakCommand),
77    Continue(ContinueCommand),
78    Return(ReturnCommand),
79    Exit(ExitCommand),
80}
81
82#[derive(Debug, Clone)]
83pub(super) struct DeclClause {
84    pub(super) variant: Name,
85    pub(super) variant_span: Span,
86    pub(super) operands: SmallVec<[DeclOperand; 2]>,
87    pub(super) redirects: SmallVec<[Redirect; 1]>,
88    pub(super) assignments: SmallVec<[Assignment; 1]>,
89    pub(super) span: Span,
90}
91
92#[derive(Debug, Clone)]
93pub(super) enum Command {
94    Simple(SimpleCommand),
95    Builtin(BuiltinCommand),
96    Decl(Box<DeclClause>),
97    Compound(Box<CompoundCommand>, SmallVec<[Redirect; 1]>),
98    Function(FunctionDef),
99    AnonymousFunction(AnonymousFunctionCommand, SmallVec<[Redirect; 1]>),
100}
101
102/// Stateful parser for shell scripts.
103///
104/// Construct a parser with one of the `Parser::with_*` constructors and then
105/// call `parse` to obtain a [`super::ParseResult`]. The parser is single-use:
106/// `parse` consumes the value so internal recovery state cannot leak between
107/// parses.
108#[derive(Clone)]
109pub struct Parser<'a> {
110    pub(super) input: &'a str,
111    pub(super) lexer: Lexer<'a>,
112    pub(super) synthetic_tokens: VecDeque<SyntheticToken>,
113    pub(super) alias_replays: Vec<AliasReplay>,
114    pub(super) current_token: Option<LexedToken<'a>>,
115    pub(super) current_word_cache: Option<Word>,
116    pub(super) current_token_kind: Option<TokenKind>,
117    pub(super) current_keyword: Option<Keyword>,
118    /// Span of the current token
119    pub(super) current_span: Span,
120    /// Lookahead token for function parsing
121    pub(super) peeked_token: Option<LexedToken<'a>>,
122    /// Maximum allowed AST nesting depth
123    pub(super) max_depth: usize,
124    /// Current nesting depth
125    pub(super) current_depth: usize,
126    /// Remaining fuel for parsing operations
127    pub(super) fuel: usize,
128    /// Maximum fuel (for error reporting)
129    pub(super) max_fuel: usize,
130    /// Depth of reparsing source-text operands as patterns.
131    pub(super) source_text_pattern_depth: usize,
132    /// Comments collected during parsing.
133    pub(super) comments: Vec<Comment>,
134    /// Known aliases declared earlier in the current parse stream.
135    pub(super) aliases: HashMap<String, AliasDefinition>,
136    /// Whether alias expansion is currently enabled.
137    pub(super) expand_aliases: bool,
138    /// Whether the next fetched word is eligible for alias expansion because
139    /// the previous alias expansion ended with trailing whitespace.
140    pub(super) expand_next_word: bool,
141    /// Nesting depth of active brace-delimited statement sequences.
142    pub(super) brace_group_depth: usize,
143    /// Active brace-body parsing contexts, used to distinguish compact zsh
144    /// closers from literal `}` arguments.
145    pub(super) brace_body_stack: Vec<BraceBodyContext>,
146    pub(super) syntax_facts: SyntaxFacts,
147    pub(super) shell_profile: ShellProfile,
148    pub(super) zsh_timeline: Option<Arc<ZshOptionTimeline>>,
149    pub(super) dialect: ShellDialect,
150    #[cfg(feature = "benchmarking")]
151    pub(super) benchmark_counters: Option<ParserBenchmarkCounters>,
152}
153
154#[derive(Clone)]
155pub(super) struct ParserCheckpoint<'a> {
156    pub(super) lexer: Lexer<'a>,
157    pub(super) synthetic_tokens: VecDeque<SyntheticToken>,
158    pub(super) alias_replays: Vec<AliasReplay>,
159    pub(super) current_token: Option<LexedToken<'a>>,
160    pub(super) current_token_kind: Option<TokenKind>,
161    pub(super) current_keyword: Option<Keyword>,
162    pub(super) current_span: Span,
163    pub(super) peeked_token: Option<LexedToken<'a>>,
164    pub(super) current_depth: usize,
165    pub(super) source_text_pattern_depth: usize,
166    pub(super) fuel: usize,
167    // `comments`, `brace_body_stack`, and the `syntax_facts` Vecs are append-only
168    // inside any speculative parse, so we save lengths and truncate on restore
169    // instead of cloning their backing storage.
170    pub(super) comments_len: usize,
171    pub(super) expand_next_word: bool,
172    pub(super) brace_group_depth: usize,
173    pub(super) brace_body_stack_len: usize,
174    pub(super) syntax_facts_zsh_brace_if_spans_len: usize,
175    pub(super) syntax_facts_zsh_always_spans_len: usize,
176    pub(super) syntax_facts_zsh_case_group_parts_len: usize,
177    #[cfg(feature = "benchmarking")]
178    pub(super) benchmark_counters: Option<ParserBenchmarkCounters>,
179}
180
181#[derive(Debug, Clone)]
182pub(super) struct AliasDefinition {
183    pub(super) tokens: Arc<[LexedToken<'static>]>,
184    pub(super) expands_next_word: bool,
185}
186
187#[derive(Debug, Clone)]
188pub(super) struct AliasReplay {
189    pub(super) tokens: Arc<[LexedToken<'static>]>,
190    pub(super) next_index: usize,
191    pub(super) base: Position,
192}
193
194impl AliasReplay {
195    pub(super) fn new(alias: &AliasDefinition, base: Position) -> Self {
196        Self {
197            tokens: Arc::clone(&alias.tokens),
198            next_index: 0,
199            base,
200        }
201    }
202
203    pub(super) fn next_token<'b>(&mut self) -> Option<LexedToken<'b>> {
204        let token = self.tokens.get(self.next_index)?.clone();
205        self.next_index += 1;
206        Some(token.into_owned().rebased(self.base).with_synthetic_flag())
207    }
208}
209
210#[derive(Debug, Clone, Copy)]
211pub(super) struct SyntheticToken {
212    pub(super) kind: TokenKind,
213    pub(super) span: Span,
214}
215
216impl SyntheticToken {
217    pub(super) const fn punctuation(kind: TokenKind, span: Span) -> Self {
218        Self { kind, span }
219    }
220
221    pub(super) fn materialize<'b>(self) -> LexedToken<'b> {
222        LexedToken::punctuation(self.kind).with_span(self.span)
223    }
224}
225
226#[derive(Debug, Clone, Copy)]
227pub(super) enum FlowControlBuiltinKind {
228    Break,
229    Continue,
230    Return,
231    Exit,
232}
233
234#[derive(Debug, Clone, Copy, PartialEq, Eq)]
235pub(super) enum BraceBodyContext {
236    Ordinary,
237    Function,
238    IfClause,
239}