brush_parser/
word.rs

1//! Parser for shell words, used in expansion and other contexts.
2//!
3//! Implements support for:
4//!
5//! - Text quoting (single, double, ANSI C).
6//! - Escape sequences.
7//! - Tilde prefixes.
8//! - Parameter expansion expressions.
9//! - Command substitution expressions.
10//! - Arithmetic expansion expressions.
11
12use crate::ParserOptions;
13use crate::ast;
14use crate::error;
15
16/// Encapsulates a `WordPiece` together with its position in the string it came from.
17#[derive(Clone, Debug)]
18#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
19pub struct WordPieceWithSource {
20    /// The word piece.
21    pub piece: WordPiece,
22    /// The start index of the piece in the source string.
23    pub start_index: usize,
24    /// The end index of the piece in the source string.
25    pub end_index: usize,
26}
27
28/// Represents a piece of a word.
29#[derive(Clone, Debug)]
30#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
31pub enum WordPiece {
32    /// A simple unquoted, unescaped string.
33    Text(String),
34    /// A string that is single-quoted.
35    SingleQuotedText(String),
36    /// A string that is ANSI-C quoted.
37    AnsiCQuotedText(String),
38    /// A sequence of pieces that are embedded in double quotes.
39    DoubleQuotedSequence(Vec<WordPieceWithSource>),
40    /// Gettext enabled variant of [`WordPiece::DoubleQuotedSequence`].
41    GettextDoubleQuotedSequence(Vec<WordPieceWithSource>),
42    /// A tilde prefix.
43    TildePrefix(String),
44    /// A parameter expansion.
45    ParameterExpansion(ParameterExpr),
46    /// A command substitution.
47    CommandSubstitution(String),
48    /// A backquoted command substitution.
49    BackquotedCommandSubstitution(String),
50    /// An escape sequence.
51    EscapeSequence(String),
52    /// An arithmetic expression.
53    ArithmeticExpression(ast::UnexpandedArithmeticExpr),
54}
55
56/// Type of a parameter test.
57#[derive(Clone, Debug)]
58#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
59pub enum ParameterTestType {
60    /// Check for unset or null.
61    UnsetOrNull,
62    /// Check for unset.
63    Unset,
64}
65
66/// A parameter, used in a parameter expansion.
67#[derive(Clone, Debug)]
68#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
69pub enum Parameter {
70    /// A 0-indexed positional parameter.
71    Positional(u32),
72    /// A special parameter.
73    Special(SpecialParameter),
74    /// A named variable.
75    Named(String),
76    /// An index into a named variable.
77    NamedWithIndex {
78        /// Variable name.
79        name: String,
80        /// Index.
81        index: String,
82    },
83    /// A named array variable with all indices.
84    NamedWithAllIndices {
85        /// Variable name.
86        name: String,
87        /// Whether to concatenate the values.
88        concatenate: bool,
89    },
90}
91
92/// A special parameter, used in a parameter expansion.
93#[derive(Clone, Debug)]
94#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
95pub enum SpecialParameter {
96    /// All positional parameters.
97    AllPositionalParameters {
98        /// Whether to concatenate the values.
99        concatenate: bool,
100    },
101    /// The count of positional parameters.
102    PositionalParameterCount,
103    /// The last exit status in the shell.
104    LastExitStatus,
105    /// The current shell option flags.
106    CurrentOptionFlags,
107    /// The current shell process ID.
108    ProcessId,
109    /// The last background process ID managed by the shell.
110    LastBackgroundProcessId,
111    /// The name of the shell.
112    ShellName,
113}
114
115/// A parameter expression, used in a parameter expansion.
116#[derive(Clone, Debug)]
117#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
118pub enum ParameterExpr {
119    /// A parameter, with optional indirection.
120    Parameter {
121        /// The parameter.
122        parameter: Parameter,
123        /// Whether to treat the expanded parameter as an indirect
124        /// reference, which should be subsequently dereferenced
125        /// for the expansion.
126        indirect: bool,
127    },
128    /// Conditionally use default values.
129    UseDefaultValues {
130        /// The parameter.
131        parameter: Parameter,
132        /// Whether to treat the expanded parameter as an indirect
133        /// reference, which should be subsequently dereferenced
134        /// for the expansion.
135        indirect: bool,
136        /// The type of test to perform.
137        test_type: ParameterTestType,
138        /// Default value to conditionally use.
139        default_value: Option<String>,
140    },
141    /// Conditionally assign default values.
142    AssignDefaultValues {
143        /// The parameter.
144        parameter: Parameter,
145        /// Whether to treat the expanded parameter as an indirect
146        /// reference, which should be subsequently dereferenced
147        /// for the expansion.
148        indirect: bool,
149        /// The type of test to perform.
150        test_type: ParameterTestType,
151        /// Default value to conditionally assign.
152        default_value: Option<String>,
153    },
154    /// Indicate error if null or unset.
155    IndicateErrorIfNullOrUnset {
156        /// The parameter.
157        parameter: Parameter,
158        /// Whether to treat the expanded parameter as an indirect
159        /// reference, which should be subsequently dereferenced
160        /// for the expansion.
161        indirect: bool,
162        /// The type of test to perform.
163        test_type: ParameterTestType,
164        /// Error message to conditionally yield.
165        error_message: Option<String>,
166    },
167    /// Conditionally use an alternative value.
168    UseAlternativeValue {
169        /// The parameter.
170        parameter: Parameter,
171        /// Whether to treat the expanded parameter as an indirect
172        /// reference, which should be subsequently dereferenced
173        /// for the expansion.
174        indirect: bool,
175        /// The type of test to perform.
176        test_type: ParameterTestType,
177        /// Alternative value to conditionally use.
178        alternative_value: Option<String>,
179    },
180    /// Compute the length of the given parameter.
181    ParameterLength {
182        /// The parameter.
183        parameter: Parameter,
184        /// Whether to treat the expanded parameter as an indirect
185        /// reference, which should be subsequently dereferenced
186        /// for the expansion.
187        indirect: bool,
188    },
189    /// Remove the smallest suffix from the given string matching the given pattern.
190    RemoveSmallestSuffixPattern {
191        /// The parameter.
192        parameter: Parameter,
193        /// Whether to treat the expanded parameter as an indirect
194        /// reference, which should be subsequently dereferenced
195        /// for the expansion.
196        indirect: bool,
197        /// Optionally provides a pattern to match.
198        pattern: Option<String>,
199    },
200    /// Remove the largest suffix from the given string matching the given pattern.
201    RemoveLargestSuffixPattern {
202        /// The parameter.
203        parameter: Parameter,
204        /// Whether to treat the expanded parameter as an indirect
205        /// reference, which should be subsequently dereferenced
206        /// for the expansion.
207        indirect: bool,
208        /// Optionally provides a pattern to match.
209        pattern: Option<String>,
210    },
211    /// Remove the smallest prefix from the given string matching the given pattern.
212    RemoveSmallestPrefixPattern {
213        /// The parameter.
214        parameter: Parameter,
215        /// Whether to treat the expanded parameter as an indirect
216        /// reference, which should be subsequently dereferenced
217        /// for the expansion.
218        indirect: bool,
219        /// Optionally provides a pattern to match.
220        pattern: Option<String>,
221    },
222    /// Remove the largest prefix from the given string matching the given pattern.
223    RemoveLargestPrefixPattern {
224        /// The parameter.
225        parameter: Parameter,
226        /// Whether to treat the expanded parameter as an indirect
227        /// reference, which should be subsequently dereferenced
228        /// for the expansion.
229        indirect: bool,
230        /// Optionally provides a pattern to match.
231        pattern: Option<String>,
232    },
233    /// Extract a substring from the given parameter.
234    Substring {
235        /// The parameter.
236        parameter: Parameter,
237        /// Whether to treat the expanded parameter as an indirect
238        /// reference, which should be subsequently dereferenced
239        /// for the expansion.
240        indirect: bool,
241        /// Arithmetic expression that will be expanded to compute the offset
242        /// at which the substring should be extracted.
243        offset: ast::UnexpandedArithmeticExpr,
244        /// Optionally provides an arithmetic expression that will be expanded
245        /// to compute the length of substring to be extracted; if left
246        /// unspecified, the remainder of the string will be extracted.
247        length: Option<ast::UnexpandedArithmeticExpr>,
248    },
249    /// Transform the given parameter.
250    Transform {
251        /// The parameter.
252        parameter: Parameter,
253        /// Whether to treat the expanded parameter as an indirect
254        /// reference, which should be subsequently dereferenced
255        /// for the expansion.
256        indirect: bool,
257        /// Type of transformation to apply.
258        op: ParameterTransformOp,
259    },
260    /// Uppercase the first character of the given parameter.
261    UppercaseFirstChar {
262        /// The parameter.
263        parameter: Parameter,
264        /// Whether to treat the expanded parameter as an indirect
265        /// reference, which should be subsequently dereferenced
266        /// for the expansion.
267        indirect: bool,
268        /// Optionally provides a pattern to match.
269        pattern: Option<String>,
270    },
271    /// Uppercase the portion of the given parameter matching the given pattern.
272    UppercasePattern {
273        /// The parameter.
274        parameter: Parameter,
275        /// Whether to treat the expanded parameter as an indirect
276        /// reference, which should be subsequently dereferenced
277        /// for the expansion.
278        indirect: bool,
279        /// Optionally provides a pattern to match.
280        pattern: Option<String>,
281    },
282    /// Lowercase the first character of the given parameter.
283    LowercaseFirstChar {
284        /// The parameter.
285        parameter: Parameter,
286        /// Whether to treat the expanded parameter as an indirect
287        /// reference, which should be subsequently dereferenced
288        /// for the expansion.
289        indirect: bool,
290        /// Optionally provides a pattern to match.
291        pattern: Option<String>,
292    },
293    /// Lowercase the portion of the given parameter matching the given pattern.
294    LowercasePattern {
295        /// The parameter.
296        parameter: Parameter,
297        /// Whether to treat the expanded parameter as an indirect
298        /// reference, which should be subsequently dereferenced
299        /// for the expansion.
300        indirect: bool,
301        /// Optionally provides a pattern to match.
302        pattern: Option<String>,
303    },
304    /// Replace occurrences of the given pattern in the given parameter.
305    ReplaceSubstring {
306        /// The parameter.
307        parameter: Parameter,
308        /// Whether to treat the expanded parameter as an indirect
309        /// reference, which should be subsequently dereferenced
310        /// for the expansion.
311        indirect: bool,
312        /// Pattern to match.
313        pattern: String,
314        /// Replacement string.
315        replacement: Option<String>,
316        /// Kind of match to perform.
317        match_kind: SubstringMatchKind,
318    },
319    /// Select variable names from the environment with a given prefix.
320    VariableNames {
321        /// The prefix to match.
322        prefix: String,
323        /// Whether to concatenate the results.
324        concatenate: bool,
325    },
326    /// Select member keys from the named array.
327    MemberKeys {
328        /// Name of the array variable.
329        variable_name: String,
330        /// Whether to concatenate the results.
331        concatenate: bool,
332    },
333}
334
335/// Kind of substring match.
336#[derive(Clone, Debug)]
337#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
338pub enum SubstringMatchKind {
339    /// Match the prefix of the string.
340    Prefix,
341    /// Match the suffix of the string.
342    Suffix,
343    /// Match the first occurrence in the string.
344    FirstOccurrence,
345    /// Match all instances in the string.
346    Anywhere,
347}
348
349/// Kind of operation to apply to a parameter.
350#[derive(Clone, Debug)]
351#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
352pub enum ParameterTransformOp {
353    /// Capitalizate initials.
354    CapitalizeInitial,
355    /// Expand escape sequences.
356    ExpandEscapeSequences,
357    /// Possibly quote with arrays expanded.
358    PossiblyQuoteWithArraysExpanded {
359        /// Whether or not to yield separate words.
360        separate_words: bool,
361    },
362    /// Apply prompt expansion.
363    PromptExpand,
364    /// Quote the parameter.
365    Quoted,
366    /// Translate to a format usable in an assignment/declaration.
367    ToAssignmentLogic,
368    /// Translate to the parameter's attribute flags.
369    ToAttributeFlags,
370    /// Translate to lowercase.
371    ToLowerCase,
372    /// Translate to uppercase.
373    ToUpperCase,
374}
375
376/// Represents a sub-word that is either a brace expression or some other word text.
377#[derive(Clone, Debug)]
378#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
379pub enum BraceExpressionOrText {
380    /// A brace expression.
381    Expr(BraceExpression),
382    /// Other word text.
383    Text(String),
384}
385
386/// Represents a brace expression to be expanded.
387pub type BraceExpression = Vec<BraceExpressionMember>;
388
389/// Member of a brace expression.
390#[derive(Clone, Debug)]
391#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
392pub enum BraceExpressionMember {
393    /// An inclusive numerical sequence.
394    NumberSequence {
395        /// Start of the sequence.
396        start: i64,
397        /// Inclusive end of the sequence.
398        end: i64,
399        /// Increment value.
400        increment: i64,
401    },
402    /// An inclusive character sequence.
403    CharSequence {
404        /// Start of the sequence.
405        start: char,
406        /// Inclusive end of the sequence.
407        end: char,
408        /// Increment value.
409        increment: i64,
410    },
411    /// Child text or expressions.
412    Child(Vec<BraceExpressionOrText>),
413}
414
415/// Parse a word into its constituent pieces.
416///
417/// # Arguments
418///
419/// * `word` - The word to parse.
420/// * `options` - The parser options to use.
421pub fn parse(
422    word: &str,
423    options: &ParserOptions,
424) -> Result<Vec<WordPieceWithSource>, error::WordParseError> {
425    cacheable_parse(word.to_owned(), options.to_owned())
426}
427
428#[cached::proc_macro::cached(size = 64, result = true)]
429fn cacheable_parse(
430    word: String,
431    options: ParserOptions,
432) -> Result<Vec<WordPieceWithSource>, error::WordParseError> {
433    tracing::debug!(target: "expansion", "Parsing word '{}'", word);
434
435    let pieces = expansion_parser::unexpanded_word(word.as_str(), &options)
436        .map_err(|err| error::WordParseError::Word(word.clone(), err.into()))?;
437
438    tracing::debug!(target: "expansion", "Parsed word '{}' => {{{:?}}}", word, pieces);
439
440    Ok(pieces)
441}
442
443/// Parse the given word into a parameter expression.
444///
445/// # Arguments
446///
447/// * `word` - The word to parse.
448/// * `options` - The parser options to use.
449pub fn parse_parameter(
450    word: &str,
451    options: &ParserOptions,
452) -> Result<Parameter, error::WordParseError> {
453    expansion_parser::parameter(word, options)
454        .map_err(|err| error::WordParseError::Parameter(word.to_owned(), err.into()))
455}
456
457/// Parse brace expansion from a given word .
458///
459/// # Arguments
460///
461/// * `word` - The word to parse.
462/// * `options` - The parser options to use.
463pub fn parse_brace_expansions(
464    word: &str,
465    options: &ParserOptions,
466) -> Result<Option<Vec<BraceExpressionOrText>>, error::WordParseError> {
467    expansion_parser::brace_expansions(word, options)
468        .map_err(|err| error::WordParseError::BraceExpansion(word.to_owned(), err.into()))
469}
470
471peg::parser! {
472    grammar expansion_parser(parser_options: &ParserOptions) for str {
473        // Helper rule that enables pegviz to be used to visualize debug peg traces.
474        rule traced<T>(e: rule<T>) -> T =
475            &(input:$([_]*) {
476                #[cfg(feature = "debug-tracing")]
477                println!("[PEG_INPUT_START]\n{input}\n[PEG_TRACE_START]");
478            })
479            e:e()? {?
480                #[cfg(feature = "debug-tracing")]
481                println!("[PEG_TRACE_STOP]");
482                e.ok_or("")
483            }
484
485        pub(crate) rule unexpanded_word() -> Vec<WordPieceWithSource> = traced(<word(<![_]>)>)
486
487        rule word<T>(stop_condition: rule<T>) -> Vec<WordPieceWithSource> =
488            tilde:tilde_prefix_with_source()? pieces:word_piece_with_source(<stop_condition()>, false /*in_command*/)* {
489                let mut all_pieces = Vec::new();
490                if let Some(tilde) = tilde {
491                    all_pieces.push(tilde);
492                }
493                all_pieces.extend(pieces);
494                all_pieces
495            }
496
497        // Takes a word as input.
498        pub(crate) rule brace_expansions() -> Option<Vec<BraceExpressionOrText>> =
499            pieces:(brace_expansion_piece(<![_]>)+) { Some(pieces) } /
500            [_]* { None }
501
502        // Returns either a complete brace expression (without any prefix or suffix), or a
503        // non-brace-expression string.
504        rule brace_expansion_piece<T>(stop_condition: rule<T>) -> BraceExpressionOrText =
505            expr:brace_expr() {
506                BraceExpressionOrText::Expr(expr)
507            } /
508            text:$(non_brace_expr_text(<stop_condition()>)+) { BraceExpressionOrText::Text(text.to_owned()) }
509
510        // Parses text that is not considered to contain a brace expression.
511        rule non_brace_expr_text<T>(stop_condition: rule<T>) -> () =
512            !"{" word_piece(<['{'] {} / stop_condition() {}>, false) {} /
513            !brace_expr() !stop_condition() "{" {}
514
515        // Parses a complete brace expression, with no prefix or suffix.
516        pub(crate) rule brace_expr() -> BraceExpression =
517            "{" inner:brace_expr_inner() "}" { inner }
518
519        // Parses the text inside a complete brace expression; basically the complete brace
520        // expression without the opening and closing brace characters.
521        pub(crate) rule brace_expr_inner() -> BraceExpression =
522            brace_text_list_expr() /
523            seq:brace_sequence_expr() { vec![seq] }
524
525        // Parses a list of brace expression members, including the separating commas; does
526        // not include the opening and closing braces.
527        pub(crate) rule brace_text_list_expr() -> BraceExpression =
528            brace_text_list_member() **<2,> ","
529
530        // Parses an element that can occur in a brace expression member list, not including the
531        // terminating comma or closing brace.
532        pub(crate) rule brace_text_list_member() -> BraceExpressionMember =
533            // Matches an empty-string member, without consuming the comma or closing brace that terminates it.
534            &[',' | '}'] { BraceExpressionMember::Child(vec![BraceExpressionOrText::Text(String::new())]) } /
535            // Matches a nested string that may include some combination of concatenated textual strings
536            // and brace expressions.
537            child_pieces:(brace_expansion_piece(<[',' | '}']>)+) {
538                BraceExpressionMember::Child(child_pieces)
539            }
540
541        pub(crate) rule brace_sequence_expr() -> BraceExpressionMember =
542            start:number() ".." end:number() increment:(".." n:number() { n })? {
543                BraceExpressionMember::NumberSequence { start, end, increment: increment.unwrap_or(1) }
544            } /
545            start:character() ".." end:character() increment:(".." n:number() { n })? {
546                BraceExpressionMember::CharSequence { start, end, increment: increment.unwrap_or(1) }
547            }
548
549        rule number() -> i64 = sign:number_sign()? n:$(['0'..='9']+) {
550            let sign = sign.unwrap_or(1);
551            let num: i64 = n.parse().unwrap();
552            num * sign
553        }
554
555        rule number_sign() -> i64 =
556            ['-'] { -1 } /
557            ['+'] { 1 }
558
559        rule character() -> char = ['a'..='z' | 'A'..='Z']
560
561        pub(crate) rule is_arithmetic_word() =
562            arithmetic_word(<![_]>)
563
564            // N.B. We don't bother returning the word pieces, as all users of this rule
565        // only try to extract the consumed input string and not the parse result.
566        rule arithmetic_word<T>(stop_condition: rule<T>) =
567            arithmetic_word_piece(<stop_condition()>)* {}
568
569        pub(crate) rule is_arithmetic_word_piece() =
570            arithmetic_word_piece(<![_]>)
571
572        // This rule matches an individual "piece" of an arithmetic expression. It needs to handle
573        // matching nested parenthesized expressions as well. We stop consuming the input when
574        // we reach the provided stop condition, which typically denotes the end of the containing
575        // arithmetic expression.
576        rule arithmetic_word_piece<T>(stop_condition: rule<T>) =
577            // This branch matches a parenthesized piece; we consume the opening parenthesis and
578            // delegate the rest to a helper rule. We don't worry about the stop condition passed
579            // into us, because if we see an opening parenthesis then we *must* find its closing
580            // partner.
581            "(" arithmetic_word_plus_right_paren() {} /
582            // This branch matches any standard piece of a word, stopping as soon as we reach
583            // either the overall stop condition *OR* an opening parenthesis. We add this latter
584            // condition to ensure that *we* handle matching parentheses.
585            !"(" word_piece(<param_rule_or_open_paren(<stop_condition()>)>, false /*in_command*/) {}
586
587        // This is a helper rule that matches either the provided stop condition or an opening parenthesis.
588        rule param_rule_or_open_paren<T>(stop_condition: rule<T>) -> () =
589            stop_condition() {} /
590            "(" {}
591
592        // This rule matches an arithmetic word followed by a right parenthesis. It must consume the right parenthesis.
593        rule arithmetic_word_plus_right_paren() =
594            arithmetic_word(<[')']>) ")" /
595
596        rule word_piece_with_source<T>(stop_condition: rule<T>, in_command: bool) -> WordPieceWithSource =
597            start_index:position!() piece:word_piece(<stop_condition()>, in_command) end_index:position!() {
598                WordPieceWithSource { piece, start_index, end_index }
599            }
600
601        rule word_piece<T>(stop_condition: rule<T>, in_command: bool) -> WordPiece =
602            // Rules that match quoted text.
603            s:double_quoted_sequence() { WordPiece::DoubleQuotedSequence(s) } /
604            s:single_quoted_literal_text() { WordPiece::SingleQuotedText(s.to_owned()) } /
605            s:ansi_c_quoted_text() { WordPiece::AnsiCQuotedText(s.to_owned()) } /
606            s:gettext_double_quoted_sequence() { WordPiece::GettextDoubleQuotedSequence(s) } /
607            // Rules that match pieces starting with a dollar sign ('$').
608            arithmetic_expansion() /
609            command_substitution() /
610            parameter_expansion() /
611            // Rules that match unquoted text that doesn't start with an unescaped dollar sign.
612            normal_escape_sequence() /
613            unquoted_literal_text(<stop_condition()>, in_command)
614
615        rule double_quoted_word_piece() -> WordPiece =
616            arithmetic_expansion() /
617            command_substitution() /
618            parameter_expansion() /
619            double_quoted_escape_sequence() /
620            double_quoted_text()
621
622        rule double_quoted_sequence() -> Vec<WordPieceWithSource> =
623            "\"" i:double_quoted_sequence_inner()* "\"" { i }
624
625        rule gettext_double_quoted_sequence() -> Vec<WordPieceWithSource> =
626            "$\"" i:double_quoted_sequence_inner()* "\"" { i }
627
628        rule double_quoted_sequence_inner() -> WordPieceWithSource =
629            start_index:position!() piece:double_quoted_word_piece() end_index:position!() {
630                WordPieceWithSource {
631                    piece,
632                    start_index,
633                    end_index
634                }
635            }
636
637        rule single_quoted_literal_text() -> &'input str =
638            "\'" inner:$([^'\'']*) "\'" { inner }
639
640        rule ansi_c_quoted_text() -> &'input str =
641            "$\'" inner:$(("\\'" / [^'\''])*) "\'" { inner }
642
643        rule unquoted_literal_text<T>(stop_condition: rule<T>, in_command: bool) -> WordPiece =
644            s:$(unquoted_literal_text_piece(<stop_condition()>, in_command)+) { WordPiece::Text(s.to_owned()) }
645
646        // TODO: Find a way to remove the special-case logic for extglob + subshell commands
647        rule unquoted_literal_text_piece<T>(stop_condition: rule<T>, in_command: bool) =
648            is_true(in_command) extglob_pattern() /
649            is_true(in_command) subshell_command() /
650            !stop_condition() !normal_escape_sequence() [^'$' | '\'' | '\"' | '`'] {}
651
652        rule is_true(value: bool) = &[_] {? if value { Ok(()) } else { Err("not true") } }
653
654        rule extglob_pattern() =
655            ("@" / "!" / "?" / "+" / "*") "(" extglob_body_piece()* ")" {}
656
657        rule extglob_body_piece() =
658            word_piece(<[')']>, true /*in_command*/) {}
659
660        rule subshell_command() =
661            "(" command() ")" {}
662
663        rule double_quoted_text() -> WordPiece =
664            s:double_quote_body_text() { WordPiece::Text(s.to_owned()) }
665
666        rule double_quote_body_text() -> &'input str =
667            $((!double_quoted_escape_sequence() [^'$' | '\"' | '`'])+)
668
669        rule normal_escape_sequence() -> WordPiece =
670            s:$("\\" [c]) { WordPiece::EscapeSequence(s.to_owned()) }
671
672        rule double_quoted_escape_sequence() -> WordPiece =
673            s:$("\\" ['$' | '`' | '\"' | '\'' | '\\']) { WordPiece::EscapeSequence(s.to_owned()) }
674
675        rule tilde_prefix_with_source() -> WordPieceWithSource =
676            start_index:position!() piece:tilde_prefix() end_index:position!() {
677                WordPieceWithSource {
678                    piece,
679                    start_index,
680                    end_index
681                }
682            }
683
684        // TODO: Handle colon syntax
685        rule tilde_prefix() -> WordPiece =
686            tilde_parsing_enabled() "~" cs:$((!['/' | ':' | ';'] [c])*) { WordPiece::TildePrefix(cs.to_owned()) }
687
688        // TODO: Deal with fact that there may be a quoted word or escaped closing brace chars.
689        // TODO: Improve on how we handle a '$' not followed by a valid variable name or parameter.
690        rule parameter_expansion() -> WordPiece =
691            "${" e:parameter_expression() "}" {
692                WordPiece::ParameterExpansion(e)
693            } /
694            "$" parameter:unbraced_parameter() {
695                WordPiece::ParameterExpansion(ParameterExpr::Parameter { parameter, indirect: false })
696            } /
697            "$" !['\''] {
698                WordPiece::Text("$".to_owned())
699            }
700
701        rule parameter_expression() -> ParameterExpr =
702            indirect:parameter_indirection() parameter:parameter() test_type:parameter_test_type() "-" default_value:parameter_expression_word()? {
703                ParameterExpr::UseDefaultValues { parameter, indirect, test_type, default_value }
704            } /
705            indirect:parameter_indirection() parameter:parameter() test_type:parameter_test_type() "=" default_value:parameter_expression_word()? {
706                ParameterExpr::AssignDefaultValues { parameter, indirect, test_type, default_value }
707            } /
708            indirect:parameter_indirection() parameter:parameter() test_type:parameter_test_type() "?" error_message:parameter_expression_word()? {
709                ParameterExpr::IndicateErrorIfNullOrUnset { parameter, indirect, test_type, error_message }
710            } /
711            indirect:parameter_indirection() parameter:parameter() test_type:parameter_test_type() "+" alternative_value:parameter_expression_word()? {
712                ParameterExpr::UseAlternativeValue { parameter, indirect, test_type, alternative_value }
713            } /
714            "#" parameter:parameter() {
715                ParameterExpr::ParameterLength { parameter, indirect: false }
716            } /
717            indirect:parameter_indirection() parameter:parameter() "%%" pattern:parameter_expression_word()? {
718                ParameterExpr::RemoveLargestSuffixPattern { parameter, indirect, pattern }
719            } /
720            indirect:parameter_indirection() parameter:parameter() "%" pattern:parameter_expression_word()? {
721                ParameterExpr::RemoveSmallestSuffixPattern { parameter, indirect, pattern }
722            } /
723            indirect:parameter_indirection() parameter:parameter() "##" pattern:parameter_expression_word()? {
724                ParameterExpr::RemoveLargestPrefixPattern { parameter, indirect, pattern }
725            } /
726            indirect:parameter_indirection() parameter:parameter() "#" pattern:parameter_expression_word()? {
727                ParameterExpr::RemoveSmallestPrefixPattern { parameter, indirect, pattern }
728            } /
729            // N.B. The following case is for non-sh extensions.
730            non_posix_extensions_enabled() e:non_posix_parameter_expression() { e } /
731            indirect:parameter_indirection() parameter:parameter() {
732                ParameterExpr::Parameter { parameter, indirect }
733            }
734
735        rule parameter_test_type() -> ParameterTestType =
736            colon:":"? {
737                if colon.is_some() {
738                    ParameterTestType::UnsetOrNull
739                } else {
740                    ParameterTestType::Unset
741                }
742            }
743
744        rule non_posix_parameter_expression() -> ParameterExpr =
745            "!" variable_name:variable_name() "[*]" {
746                ParameterExpr::MemberKeys { variable_name: variable_name.to_owned(), concatenate: true }
747            } /
748            "!" variable_name:variable_name() "[@]" {
749                ParameterExpr::MemberKeys { variable_name: variable_name.to_owned(), concatenate: false }
750            } /
751            indirect:parameter_indirection() parameter:parameter() ":" offset:substring_offset() length:(":" l:substring_length() { l })? {
752                ParameterExpr::Substring { parameter, indirect, offset, length }
753            } /
754            indirect:parameter_indirection() parameter:parameter() "@" op:non_posix_parameter_transformation_op() {
755                ParameterExpr::Transform { parameter, indirect, op }
756            } /
757            "!" prefix:variable_name() "*" {
758                ParameterExpr::VariableNames { prefix: prefix.to_owned(), concatenate: true }
759            } /
760            "!" prefix:variable_name() "@" {
761                ParameterExpr::VariableNames { prefix: prefix.to_owned(), concatenate: false }
762            } /
763            indirect:parameter_indirection() parameter:parameter() "/#" pattern:parameter_search_pattern() replacement:parameter_replacement_str()? {
764                ParameterExpr::ReplaceSubstring { parameter, indirect, pattern, replacement, match_kind: SubstringMatchKind::Prefix }
765            } /
766            indirect:parameter_indirection() parameter:parameter() "/%" pattern:parameter_search_pattern() replacement:parameter_replacement_str()? {
767                ParameterExpr::ReplaceSubstring { parameter, indirect, pattern, replacement, match_kind: SubstringMatchKind::Suffix }
768            } /
769            indirect:parameter_indirection() parameter:parameter() "//" pattern:parameter_search_pattern() replacement:parameter_replacement_str()? {
770                ParameterExpr::ReplaceSubstring { parameter, indirect, pattern, replacement, match_kind: SubstringMatchKind::Anywhere }
771            } /
772            indirect:parameter_indirection() parameter:parameter() "/" pattern:parameter_search_pattern() replacement:parameter_replacement_str()? {
773                ParameterExpr::ReplaceSubstring { parameter, indirect, pattern, replacement, match_kind: SubstringMatchKind::FirstOccurrence }
774            } /
775            indirect:parameter_indirection() parameter:parameter() "^^" pattern:parameter_expression_word()? {
776                ParameterExpr::UppercasePattern { parameter, indirect, pattern }
777            } /
778            indirect:parameter_indirection() parameter:parameter() "^" pattern:parameter_expression_word()? {
779                ParameterExpr::UppercaseFirstChar { parameter, indirect, pattern }
780            } /
781            indirect:parameter_indirection() parameter:parameter() ",," pattern:parameter_expression_word()? {
782                ParameterExpr::LowercasePattern { parameter, indirect, pattern }
783            } /
784            indirect:parameter_indirection() parameter:parameter() "," pattern:parameter_expression_word()? {
785                ParameterExpr::LowercaseFirstChar { parameter, indirect, pattern }
786            }
787
788        rule parameter_indirection() -> bool =
789            non_posix_extensions_enabled() "!" { true } /
790            { false }
791
792        rule non_posix_parameter_transformation_op() -> ParameterTransformOp =
793            "U" { ParameterTransformOp::ToUpperCase } /
794            "u" { ParameterTransformOp::CapitalizeInitial } /
795            "L" { ParameterTransformOp::ToLowerCase } /
796            "Q" { ParameterTransformOp::Quoted } /
797            "E" { ParameterTransformOp::ExpandEscapeSequences } /
798            "P" { ParameterTransformOp::PromptExpand } /
799            "A" { ParameterTransformOp::ToAssignmentLogic } /
800            "K" { ParameterTransformOp::PossiblyQuoteWithArraysExpanded { separate_words: false } } /
801            "a" { ParameterTransformOp::ToAttributeFlags } /
802            "k" { ParameterTransformOp::PossiblyQuoteWithArraysExpanded { separate_words: true } }
803
804
805        rule unbraced_parameter() -> Parameter =
806            p:unbraced_positional_parameter() { Parameter::Positional(p) } /
807            p:special_parameter() { Parameter::Special(p) } /
808            p:variable_name() { Parameter::Named(p.to_owned()) }
809
810        // N.B. The indexing syntax is not a standard sh-ism.
811        pub(crate) rule parameter() -> Parameter =
812            p:positional_parameter() { Parameter::Positional(p) } /
813            p:special_parameter() { Parameter::Special(p) } /
814            non_posix_extensions_enabled() p:variable_name() "[@]" { Parameter::NamedWithAllIndices { name: p.to_owned(), concatenate: false } } /
815            non_posix_extensions_enabled() p:variable_name() "[*]" { Parameter::NamedWithAllIndices { name: p.to_owned(), concatenate: true } } /
816            non_posix_extensions_enabled() p:variable_name() "[" index:$(arithmetic_word(<"]">)) "]" {?
817                Ok(Parameter::NamedWithIndex { name: p.to_owned(), index: index.to_owned() })
818            } /
819            p:variable_name() { Parameter::Named(p.to_owned()) }
820
821        rule positional_parameter() -> u32 =
822            n:$(['1'..='9'](['0'..='9']*)) {? n.parse().or(Err("u32")) }
823        rule unbraced_positional_parameter() -> u32 =
824            n:$(['1'..='9']) {? n.parse().or(Err("u32")) }
825
826        rule special_parameter() -> SpecialParameter =
827            "@" { SpecialParameter::AllPositionalParameters { concatenate: false } } /
828            "*" { SpecialParameter::AllPositionalParameters { concatenate: true } } /
829            "#" { SpecialParameter::PositionalParameterCount } /
830            "?" { SpecialParameter::LastExitStatus } /
831            "-" { SpecialParameter::CurrentOptionFlags } /
832            "$" { SpecialParameter::ProcessId } /
833            "!" { SpecialParameter::LastBackgroundProcessId } /
834            "0" { SpecialParameter::ShellName }
835
836        rule variable_name() -> &'input str =
837            $(!['0'..='9'] ['_' | '0'..='9' | 'a'..='z' | 'A'..='Z']+)
838
839        pub(crate) rule command_substitution() -> WordPiece =
840            "$(" c:command() ")" { WordPiece::CommandSubstitution(c.to_owned()) } /
841            "`" c:backquoted_command() "`" { WordPiece::BackquotedCommandSubstitution(c) }
842
843        pub(crate) rule command() -> &'input str =
844            $(command_piece()*)
845
846        pub(crate) rule command_piece() -> () =
847            word_piece(<[')']>, true /*in_command*/) {} /
848            ([' ' | '\t'])+ {}
849
850        rule backquoted_command() -> String =
851            chars:(backquoted_char()*) { chars.into_iter().collect() }
852
853        rule backquoted_char() -> &'input str =
854            "\\`" { "`" } /
855            "\\\\" { "\\\\" } /
856            s:$([^'`']) { s }
857
858        rule arithmetic_expansion() -> WordPiece =
859            "$((" e:$(arithmetic_word(<"))">)) "))" { WordPiece::ArithmeticExpression(ast::UnexpandedArithmeticExpr { value: e.to_owned() } ) }
860
861        rule substring_offset() -> ast::UnexpandedArithmeticExpr =
862            s:$(arithmetic_word(<[':' | '}']>)) { ast::UnexpandedArithmeticExpr { value: s.to_owned() } }
863
864        rule substring_length() -> ast::UnexpandedArithmeticExpr =
865            s:$(arithmetic_word(<[':' | '}']>)) { ast::UnexpandedArithmeticExpr { value: s.to_owned() } }
866
867        rule parameter_replacement_str() -> String =
868            "/" s:$(word(<['}']>)) { s.to_owned() }
869
870        rule parameter_search_pattern() -> String =
871            s:$(word(<['}' | '/']>)) { s.to_owned() }
872
873        rule parameter_expression_word() -> String =
874            s:$(word(<['}']>)) { s.to_owned() }
875
876        rule extglob_enabled() -> () =
877            &[_] {? if parser_options.enable_extended_globbing { Ok(()) } else { Err("no extglob") } }
878
879        rule non_posix_extensions_enabled() -> () =
880            &[_] {? if !parser_options.sh_mode { Ok(()) } else { Err("posix") } }
881
882        rule tilde_parsing_enabled() -> () =
883            &[_] {? if parser_options.tilde_expansion { Ok(()) } else { Err("no tilde expansion") } }
884    }
885}
886
887#[cfg(test)]
888mod tests {
889    use super::*;
890    use anyhow::Result;
891    use insta::assert_ron_snapshot;
892
893    #[derive(serde::Serialize)]
894    struct ParseTestResults<'a> {
895        input: &'a str,
896        result: Vec<WordPieceWithSource>,
897    }
898
899    fn test_parse(word: &str) -> Result<ParseTestResults<'_>> {
900        let parsed = super::parse(word, &ParserOptions::default())?;
901        Ok(ParseTestResults {
902            input: word,
903            result: parsed,
904        })
905    }
906
907    #[test]
908    fn parse_ansi_c_quoted_text() -> Result<()> {
909        assert_ron_snapshot!(test_parse(r"$'hi\nthere\t'")?);
910        Ok(())
911    }
912
913    #[test]
914    fn parse_double_quoted_text() -> Result<()> {
915        assert_ron_snapshot!(test_parse(r#""a ${b} c""#)?);
916        Ok(())
917    }
918
919    #[test]
920    fn parse_gettext_double_quoted_text() -> Result<()> {
921        assert_ron_snapshot!(test_parse(r#"$"a ${b} c""#)?);
922        Ok(())
923    }
924
925    #[test]
926    fn parse_command_substitution() -> Result<()> {
927        super::expansion_parser::command_piece("echo", &ParserOptions::default())?;
928        super::expansion_parser::command_piece("hi", &ParserOptions::default())?;
929        super::expansion_parser::command("echo hi", &ParserOptions::default())?;
930        super::expansion_parser::command_substitution("$(echo hi)", &ParserOptions::default())?;
931
932        assert_ron_snapshot!(test_parse("$(echo hi)")?);
933
934        Ok(())
935    }
936
937    #[test]
938    fn parse_command_substitution_with_embedded_quotes() -> Result<()> {
939        super::expansion_parser::command_piece("echo", &ParserOptions::default())?;
940        super::expansion_parser::command_piece(r#""hi""#, &ParserOptions::default())?;
941        super::expansion_parser::command(r#"echo "hi""#, &ParserOptions::default())?;
942        super::expansion_parser::command_substitution(
943            r#"$(echo "hi")"#,
944            &ParserOptions::default(),
945        )?;
946
947        assert_ron_snapshot!(test_parse(r#"$(echo "hi")"#)?);
948        Ok(())
949    }
950
951    #[test]
952    fn parse_command_substitution_with_embedded_extglob() -> Result<()> {
953        assert_ron_snapshot!(test_parse("$(echo !(x))")?);
954        Ok(())
955    }
956
957    #[test]
958    fn parse_backquoted_command() -> Result<()> {
959        assert_ron_snapshot!(test_parse("`echo hi`")?);
960        Ok(())
961    }
962
963    #[test]
964    fn parse_backquoted_command_in_double_quotes() -> Result<()> {
965        assert_ron_snapshot!(test_parse(r#""`echo hi`""#)?);
966        Ok(())
967    }
968
969    #[test]
970    fn parse_extglob_with_embedded_parameter() -> Result<()> {
971        assert_ron_snapshot!(test_parse("+([$var])")?);
972        Ok(())
973    }
974
975    #[test]
976    fn parse_arithmetic_expansion() -> Result<()> {
977        assert_ron_snapshot!(test_parse("$((0))")?);
978        Ok(())
979    }
980
981    #[test]
982    fn parse_arithmetic_expansion_with_parens() -> Result<()> {
983        assert_ron_snapshot!(test_parse("$((((1+2)*3)))")?);
984        Ok(())
985    }
986
987    #[test]
988    fn test_arithmetic_word_parsing() {
989        let options = ParserOptions::default();
990
991        assert!(super::expansion_parser::is_arithmetic_word("a", &options).is_ok());
992        assert!(super::expansion_parser::is_arithmetic_word("b", &options).is_ok());
993        assert!(super::expansion_parser::is_arithmetic_word(" a + b ", &options).is_ok());
994        assert!(super::expansion_parser::is_arithmetic_word("(a)", &options).is_ok());
995        assert!(super::expansion_parser::is_arithmetic_word("((a))", &options).is_ok());
996        assert!(super::expansion_parser::is_arithmetic_word("(((a)))", &options).is_ok());
997        assert!(super::expansion_parser::is_arithmetic_word("(1+2)", &options).is_ok());
998        assert!(super::expansion_parser::is_arithmetic_word("(1+2)*3", &options).is_ok());
999        assert!(super::expansion_parser::is_arithmetic_word("((1+2)*3)", &options).is_ok());
1000    }
1001
1002    #[test]
1003    fn test_arithmetic_word_piece_parsing() {
1004        let options = ParserOptions::default();
1005
1006        assert!(super::expansion_parser::is_arithmetic_word_piece("a", &options).is_ok());
1007        assert!(super::expansion_parser::is_arithmetic_word_piece("b", &options).is_ok());
1008        assert!(super::expansion_parser::is_arithmetic_word_piece(" a + b ", &options).is_ok());
1009        assert!(super::expansion_parser::is_arithmetic_word_piece("(a)", &options).is_ok());
1010        assert!(super::expansion_parser::is_arithmetic_word_piece("((a))", &options).is_ok());
1011        assert!(super::expansion_parser::is_arithmetic_word_piece("(((a)))", &options).is_ok());
1012        assert!(super::expansion_parser::is_arithmetic_word_piece("(1+2)", &options).is_ok());
1013        assert!(super::expansion_parser::is_arithmetic_word_piece("((1+2))", &options).is_ok());
1014        assert!(super::expansion_parser::is_arithmetic_word_piece("((1+2)*3)", &options).is_ok());
1015        assert!(super::expansion_parser::is_arithmetic_word_piece("(a", &options).is_err());
1016        assert!(super::expansion_parser::is_arithmetic_word_piece("(a))", &options).is_err());
1017        assert!(super::expansion_parser::is_arithmetic_word_piece("((a)", &options).is_err());
1018    }
1019
1020    #[test]
1021    fn test_brace_expansion_parsing() -> Result<()> {
1022        let options = ParserOptions::default();
1023
1024        let inputs = ["x{a,b}y", "{a,b{1,2}}"];
1025
1026        for input in inputs {
1027            assert_ron_snapshot!(super::parse_brace_expansions(input, &options)?.ok_or_else(
1028                || anyhow::anyhow!("Expected brace expansion to be parsed successfully")
1029            )?);
1030        }
1031
1032        Ok(())
1033    }
1034}