Skip to main content

shuck_parser/parser/
parser_state.rs

1use std::{
2    collections::{HashMap, VecDeque},
3    sync::Arc,
4};
5
6use smallvec::SmallVec;
7
8use shuck_ast::{
9    AnonymousFunctionCommand, Assignment, Comment, CompoundCommand, DeclOperand, FunctionDef, Name,
10    Position, Redirect, Span, TokenKind, Word,
11};
12
13use super::{
14    Keyword, LexedToken, Lexer, ShellDialect, ShellProfile, SyntaxFacts, ZshOptionTimeline,
15};
16
17#[cfg(feature = "benchmarking")]
18#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
19#[doc(hidden)]
20pub struct ParserBenchmarkCounters {
21    /// Number of lexer current-position lookups performed while parsing.
22    pub lexer_current_position_calls: u64,
23    /// Number of parser calls that updated the current spanned token.
24    pub parser_set_current_spanned_calls: u64,
25    /// Number of raw token-advance operations performed by the parser.
26    pub parser_advance_raw_calls: u64,
27}
28
29#[derive(Debug, Clone)]
30pub(super) struct SimpleCommand {
31    pub(super) name: Word,
32    pub(super) args: SmallVec<[Word; 2]>,
33    pub(super) redirects: SmallVec<[Redirect; 1]>,
34    pub(super) assignments: SmallVec<[Assignment; 1]>,
35    pub(super) span: Span,
36}
37
38#[derive(Debug, Clone)]
39pub(super) struct BreakCommand {
40    pub(super) depth: Option<Word>,
41    pub(super) extra_args: SmallVec<[Word; 2]>,
42    pub(super) redirects: SmallVec<[Redirect; 1]>,
43    pub(super) assignments: SmallVec<[Assignment; 1]>,
44    pub(super) span: Span,
45}
46
47#[derive(Debug, Clone)]
48pub(super) struct ContinueCommand {
49    pub(super) depth: Option<Word>,
50    pub(super) extra_args: SmallVec<[Word; 2]>,
51    pub(super) redirects: SmallVec<[Redirect; 1]>,
52    pub(super) assignments: SmallVec<[Assignment; 1]>,
53    pub(super) span: Span,
54}
55
56#[derive(Debug, Clone)]
57pub(super) struct ReturnCommand {
58    pub(super) code: Option<Word>,
59    pub(super) extra_args: SmallVec<[Word; 2]>,
60    pub(super) redirects: SmallVec<[Redirect; 1]>,
61    pub(super) assignments: SmallVec<[Assignment; 1]>,
62    pub(super) span: Span,
63}
64
65#[derive(Debug, Clone)]
66pub(super) struct ExitCommand {
67    pub(super) code: Option<Word>,
68    pub(super) extra_args: SmallVec<[Word; 2]>,
69    pub(super) redirects: SmallVec<[Redirect; 1]>,
70    pub(super) assignments: SmallVec<[Assignment; 1]>,
71    pub(super) span: Span,
72}
73
74#[derive(Debug, Clone)]
75pub(super) enum BuiltinCommand {
76    Break(BreakCommand),
77    Continue(ContinueCommand),
78    Return(ReturnCommand),
79    Exit(ExitCommand),
80}
81
82#[derive(Debug, Clone)]
83pub(super) struct DeclClause {
84    pub(super) variant: Name,
85    pub(super) variant_span: Span,
86    pub(super) operands: SmallVec<[DeclOperand; 2]>,
87    pub(super) redirects: SmallVec<[Redirect; 1]>,
88    pub(super) assignments: SmallVec<[Assignment; 1]>,
89    pub(super) span: Span,
90}
91
92#[derive(Debug, Clone)]
93pub(super) enum Command {
94    Simple(SimpleCommand),
95    Builtin(BuiltinCommand),
96    Decl(Box<DeclClause>),
97    Compound(Box<CompoundCommand>, SmallVec<[Redirect; 1]>),
98    Function(FunctionDef),
99    AnonymousFunction(AnonymousFunctionCommand, SmallVec<[Redirect; 1]>),
100}
101
102/// Parser for bash scripts.
103#[derive(Clone)]
104pub struct Parser<'a> {
105    pub(super) input: &'a str,
106    pub(super) lexer: Lexer<'a>,
107    pub(super) synthetic_tokens: VecDeque<SyntheticToken>,
108    pub(super) alias_replays: Vec<AliasReplay>,
109    pub(super) current_token: Option<LexedToken<'a>>,
110    pub(super) current_word_cache: Option<Word>,
111    pub(super) current_token_kind: Option<TokenKind>,
112    pub(super) current_keyword: Option<Keyword>,
113    /// Span of the current token
114    pub(super) current_span: Span,
115    /// Lookahead token for function parsing
116    pub(super) peeked_token: Option<LexedToken<'a>>,
117    /// Maximum allowed AST nesting depth
118    pub(super) max_depth: usize,
119    /// Current nesting depth
120    pub(super) current_depth: usize,
121    /// Remaining fuel for parsing operations
122    pub(super) fuel: usize,
123    /// Maximum fuel (for error reporting)
124    pub(super) max_fuel: usize,
125    /// Depth of reparsing source-text operands as patterns.
126    pub(super) source_text_pattern_depth: usize,
127    /// Comments collected during parsing.
128    pub(super) comments: Vec<Comment>,
129    /// Known aliases declared earlier in the current parse stream.
130    pub(super) aliases: HashMap<String, AliasDefinition>,
131    /// Whether alias expansion is currently enabled.
132    pub(super) expand_aliases: bool,
133    /// Whether the next fetched word is eligible for alias expansion because
134    /// the previous alias expansion ended with trailing whitespace.
135    pub(super) expand_next_word: bool,
136    /// Nesting depth of active brace-delimited statement sequences.
137    pub(super) brace_group_depth: usize,
138    /// Active brace-body parsing contexts, used to distinguish compact zsh
139    /// closers from literal `}` arguments.
140    pub(super) brace_body_stack: Vec<BraceBodyContext>,
141    pub(super) syntax_facts: SyntaxFacts,
142    pub(super) shell_profile: ShellProfile,
143    pub(super) zsh_timeline: Option<Arc<ZshOptionTimeline>>,
144    pub(super) dialect: ShellDialect,
145    #[cfg(feature = "benchmarking")]
146    pub(super) benchmark_counters: Option<ParserBenchmarkCounters>,
147}
148
149#[derive(Clone)]
150pub(super) struct ParserCheckpoint<'a> {
151    pub(super) lexer: Lexer<'a>,
152    pub(super) synthetic_tokens: VecDeque<SyntheticToken>,
153    pub(super) alias_replays: Vec<AliasReplay>,
154    pub(super) current_token: Option<LexedToken<'a>>,
155    pub(super) current_token_kind: Option<TokenKind>,
156    pub(super) current_keyword: Option<Keyword>,
157    pub(super) current_span: Span,
158    pub(super) peeked_token: Option<LexedToken<'a>>,
159    pub(super) current_depth: usize,
160    pub(super) source_text_pattern_depth: usize,
161    pub(super) fuel: usize,
162    pub(super) comments: Vec<Comment>,
163    pub(super) expand_next_word: bool,
164    pub(super) brace_group_depth: usize,
165    pub(super) brace_body_stack: Vec<BraceBodyContext>,
166    pub(super) syntax_facts: SyntaxFacts,
167    #[cfg(feature = "benchmarking")]
168    pub(super) benchmark_counters: Option<ParserBenchmarkCounters>,
169}
170
171#[derive(Debug, Clone)]
172pub(super) struct AliasDefinition {
173    pub(super) tokens: Arc<[LexedToken<'static>]>,
174    pub(super) expands_next_word: bool,
175}
176
177#[derive(Debug, Clone)]
178pub(super) struct AliasReplay {
179    pub(super) tokens: Arc<[LexedToken<'static>]>,
180    pub(super) next_index: usize,
181    pub(super) base: Position,
182}
183
184impl AliasReplay {
185    pub(super) fn new(alias: &AliasDefinition, base: Position) -> Self {
186        Self {
187            tokens: Arc::clone(&alias.tokens),
188            next_index: 0,
189            base,
190        }
191    }
192
193    pub(super) fn next_token<'b>(&mut self) -> Option<LexedToken<'b>> {
194        let token = self.tokens.get(self.next_index)?.clone();
195        self.next_index += 1;
196        Some(token.into_owned().rebased(self.base).with_synthetic_flag())
197    }
198}
199
200#[derive(Debug, Clone, Copy)]
201pub(super) struct SyntheticToken {
202    pub(super) kind: TokenKind,
203    pub(super) span: Span,
204}
205
206impl SyntheticToken {
207    pub(super) const fn punctuation(kind: TokenKind, span: Span) -> Self {
208        Self { kind, span }
209    }
210
211    pub(super) fn materialize<'b>(self) -> LexedToken<'b> {
212        LexedToken::punctuation(self.kind).with_span(self.span)
213    }
214}
215
216#[derive(Debug, Clone, Copy)]
217pub(super) enum FlowControlBuiltinKind {
218    Break,
219    Continue,
220    Return,
221    Exit,
222}
223
224#[derive(Debug, Clone, Copy, PartialEq, Eq)]
225pub(super) enum BraceBodyContext {
226    Ordinary,
227    Function,
228    IfClause,
229}