Skip to main content

shuck_parser/parser/
mod.rs

1//! Parser entrypoints, lexical types, and shell-profile configuration.
2//!
3//! The parser is recursive descent and produces `shuck-ast` syntax trees while also collecting
4//! recovery diagnostics and lightweight syntax facts needed by downstream tooling.
5#![cfg_attr(not(test), warn(clippy::unwrap_used))]
6
7mod arithmetic;
8mod brace_syntax;
9mod commands;
10mod comments;
11mod cursor;
12mod diagnostics;
13mod entry;
14mod heredocs;
15mod keywords;
16mod lexer;
17mod lowering;
18mod parser_state;
19mod profile;
20mod recovery;
21mod redirects;
22mod result;
23mod source_tree;
24mod syntax_facts;
25mod token_stream;
26mod word;
27mod word_tokens;
28mod words;
29mod zsh_features;
30mod zsh_options;
31mod zsh_prescan;
32
33use std::{
34    borrow::Cow,
35    collections::{HashMap, HashSet, VecDeque},
36    sync::Arc,
37};
38
39pub use lexer::{LexedToken, Lexer};
40pub(crate) use lexer::{LexedWordSegment, LexedWordSegmentKind};
41pub use profile::{ShellDialect, ShellProfile};
42pub use result::{ParseDiagnostic, ParseResult, ParseStatus, SyntaxFacts, ZshCaseGroupPart};
43pub use zsh_options::{OptionValue, ZshEmulationMode, ZshOptionState};
44
45use keywords::*;
46use memchr::{memchr, memchr2, memchr3};
47pub use parser_state::Parser;
48#[cfg(feature = "benchmarking")]
49pub use parser_state::ParserBenchmarkCounters;
50use parser_state::*;
51use smallvec::SmallVec;
52use zsh_prescan::ZshOptionTimeline;
53
54use shuck_ast::{
55    AlwaysCommand, AnonymousFunctionCommand, AnonymousFunctionSurface, ArithmeticCommand,
56    ArithmeticExpansionSyntax, ArithmeticExpr, ArithmeticExprNode, ArithmeticForCommand,
57    ArithmeticLvalue, ArrayElem, ArrayExpr, ArrayKind, Assignment, AssignmentValue,
58    BackgroundOperator, BinaryCommand, BinaryOp, BourneParameterExpansion, BraceExpansionKind,
59    BraceQuoteContext, BraceSyntax, BraceSyntaxKind, BreakCommand as AstBreakCommand,
60    BuiltinCommand as AstBuiltinCommand, CaseCommand, CaseItem, CaseTerminator,
61    Command as AstCommand, CommandSubstitutionSyntax, Comment, CompoundCommand,
62    ConditionalBinaryExpr, ConditionalBinaryOp, ConditionalCommand, ConditionalExpr,
63    ConditionalParenExpr, ConditionalUnaryExpr, ConditionalUnaryOp,
64    ContinueCommand as AstContinueCommand, CoprocCommand, DeclClause as AstDeclClause, DeclOperand,
65    ExitCommand as AstExitCommand, File, ForCommand, ForSyntax, ForTarget, ForeachCommand,
66    ForeachSyntax, FunctionDef, FunctionHeader, FunctionHeaderEntry, Heredoc, HeredocBody,
67    HeredocBodyMode, HeredocBodyPart, HeredocBodyPartNode, HeredocDelimiter, IfCommand, IfSyntax,
68    LiteralText, Name, ParameterExpansion, ParameterExpansionSyntax, ParameterOp, Pattern,
69    PatternGroupKind, PatternPart, PatternPartNode, Position, PrefixMatchKind, Redirect,
70    RedirectKind, RedirectTarget, RepeatCommand, RepeatSyntax, ReturnCommand as AstReturnCommand,
71    SelectCommand, SimpleCommand as AstSimpleCommand, SourceText, Span, Stmt, StmtSeq,
72    StmtTerminator, Subscript, SubscriptInterpretation, SubscriptKind, SubscriptSelector, TextSize,
73    TimeCommand, TokenKind, UntilCommand, VarRef, WhileCommand, Word, WordPart, WordPartNode,
74    ZshDefaultingOp, ZshExpansionOperation, ZshExpansionTarget, ZshGlobQualifier,
75    ZshGlobQualifierGroup, ZshGlobQualifierKind, ZshGlobSegment, ZshInlineGlobControl, ZshModifier,
76    ZshParameterExpansion, ZshPatternOp, ZshQualifiedGlob, ZshReplacementOp, ZshTrimOp,
77};
78
79use crate::error::{Error, Result};
80
81type WordPartBuffer = SmallVec<[WordPartNode; 2]>;
82
83#[derive(Debug, Clone, Copy, Default)]
84struct ZshGlobParseFeatures {
85    classic_qualifiers: bool,
86    extended_glob: bool,
87    ksh_groups: bool,
88    bare_groups: bool,
89}
90
91impl ZshGlobParseFeatures {
92    const fn zsh_word_parsing_enabled(self) -> bool {
93        self.classic_qualifiers || self.extended_glob || self.ksh_groups || self.bare_groups
94    }
95}
96
97/// Default maximum AST depth (matches ExecutionLimits default)
98const DEFAULT_MAX_AST_DEPTH: usize = 100;
99
100/// Hard cap on AST depth to prevent stack overflow even if caller misconfigures limits.
101/// Protects against deeply nested input attacks where
102/// a large max_depth setting allows recursion deep enough to overflow the native stack.
103/// This cap cannot be overridden by the caller.
104///
105/// Set conservatively to avoid stack overflow on tokio's blocking threads (default 2MB
106/// stack in debug builds). Each parser recursion level uses ~4-8KB of stack in debug
107/// mode. 100 levels × ~8KB = ~800KB, well within 2MB.
108/// In release builds this could safely be higher, but we use one value for consistency.
109const HARD_MAX_AST_DEPTH: usize = 100;
110
111/// Auxiliary word reparsing happens while the main parser is already on the stack.
112/// Keep its synthetic parser shallower than the main AST limit.
113const SOURCE_TEXT_WORD_REPARSE_MAX_DEPTH: usize = 8;
114
115/// Pattern operands can themselves contain parameter expansions with pattern operands.
116/// Keep that source-text reparsing shallow and preserve deeper text literally.
117const SOURCE_TEXT_PATTERN_REPARSE_MAX_DEPTH: usize = 4;
118
119/// Default maximum parser operations (matches ExecutionLimits default)
120const DEFAULT_MAX_PARSER_OPERATIONS: usize = 100_000;
121
122/// Returns whether `text` parses as a nontrivial arithmetic expression.
123///
124/// Plain numbers and plain variable names are considered trivial. The helper
125/// returns `false` for empty text and for text that cannot be parsed inside a
126/// shell arithmetic command.
127pub fn text_looks_like_nontrivial_arithmetic_expression(text: &str) -> bool {
128    let text = text.trim();
129    if text.is_empty() {
130        return false;
131    }
132
133    let source = format!("(( {text} ))");
134    let file = Parser::new(&source).parse();
135    if file.is_err() {
136        return false;
137    }
138
139    let Some(statement) = file.file.body.first() else {
140        return false;
141    };
142
143    let AstCommand::Compound(CompoundCommand::Arithmetic(command)) = &statement.command else {
144        return false;
145    };
146
147    command.expr_ast.as_ref().is_some_and(|expr| {
148        !matches!(
149            expr.kind,
150            ArithmeticExpr::Number(_) | ArithmeticExpr::Variable(_)
151        )
152    })
153}
154
155/// Returns whether `text` parses as an arithmetic expression without variable
156/// references, subscripts, shell words, or assignments.
157///
158/// This is useful when a caller needs a purely self-contained arithmetic value.
159/// Invalid or empty text returns `false`.
160pub fn text_is_self_contained_arithmetic_expression(text: &str) -> bool {
161    let text = text.trim();
162    if text.is_empty() {
163        return false;
164    }
165
166    let source = format!("(( {text} ))");
167    let file = Parser::new(&source).parse();
168    if file.is_err() {
169        return false;
170    }
171
172    let Some(statement) = file.file.body.first() else {
173        return false;
174    };
175
176    let AstCommand::Compound(CompoundCommand::Arithmetic(command)) = &statement.command else {
177        return false;
178    };
179
180    command
181        .expr_ast
182        .as_ref()
183        .is_some_and(arithmetic_expr_is_self_contained)
184}
185
186fn arithmetic_expr_is_self_contained(expr: &ArithmeticExprNode) -> bool {
187    match &expr.kind {
188        ArithmeticExpr::Number(_) => true,
189        ArithmeticExpr::Variable(_)
190        | ArithmeticExpr::Indexed { .. }
191        | ArithmeticExpr::ShellWord(_)
192        | ArithmeticExpr::Assignment { .. } => false,
193        ArithmeticExpr::Parenthesized { expression } => {
194            arithmetic_expr_is_self_contained(expression)
195        }
196        ArithmeticExpr::Unary { expr, .. } | ArithmeticExpr::Postfix { expr, .. } => {
197            arithmetic_expr_is_self_contained(expr)
198        }
199        ArithmeticExpr::Binary { left, right, .. } => {
200            arithmetic_expr_is_self_contained(left) && arithmetic_expr_is_self_contained(right)
201        }
202        ArithmeticExpr::Conditional {
203            condition,
204            then_expr,
205            else_expr,
206        } => {
207            arithmetic_expr_is_self_contained(condition)
208                && arithmetic_expr_is_self_contained(then_expr)
209                && arithmetic_expr_is_self_contained(else_expr)
210        }
211    }
212}
213
214#[cfg(test)]
215mod arithmetic_text_helper_tests {
216    use super::{
217        text_is_self_contained_arithmetic_expression,
218        text_looks_like_nontrivial_arithmetic_expression,
219    };
220
221    #[test]
222    fn requires_nontrivial_expressions() {
223        assert!(text_looks_like_nontrivial_arithmetic_expression("1 + 2"));
224        assert!(text_looks_like_nontrivial_arithmetic_expression("arr[1]"));
225        assert!(text_looks_like_nontrivial_arithmetic_expression("++count"));
226        assert!(!text_looks_like_nontrivial_arithmetic_expression("123"));
227        assert!(!text_looks_like_nontrivial_arithmetic_expression("name"));
228        assert!(!text_looks_like_nontrivial_arithmetic_expression(
229            "latest value"
230        ));
231    }
232
233    #[test]
234    fn distinguishes_self_contained_expressions() {
235        assert!(text_is_self_contained_arithmetic_expression("1 + 2"));
236        assert!(text_is_self_contained_arithmetic_expression("(1 + 2)"));
237        assert!(!text_is_self_contained_arithmetic_expression("name"));
238        assert!(!text_is_self_contained_arithmetic_expression("arr[1]"));
239        assert!(!text_is_self_contained_arithmetic_expression("foo + 1"));
240        assert!(!text_is_self_contained_arithmetic_expression(
241            "latest value"
242        ));
243    }
244}
245
246#[cfg(test)]
247mod tests;