brush_parser/
word.rs

1//! Parser for shell words, used in expansion and other contexts.
2//!
3//! Implements support for:
4//!
5//! - Text quoting (single, double, ANSI C).
6//! - Escape sequences.
7//! - Tilde prefixes.
8//! - Parameter expansion expressions.
9//! - Command substitution expressions.
10//! - Arithmetic expansion expressions.
11
12use crate::ParserOptions;
13use crate::ast;
14use crate::error;
15
16/// Encapsulates a `WordPiece` together with its position in the string it came from.
17#[derive(Clone, Debug)]
18#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
19pub struct WordPieceWithSource {
20    /// The word piece.
21    pub piece: WordPiece,
22    /// The start index of the piece in the source string.
23    pub start_index: usize,
24    /// The end index of the piece in the source string.
25    pub end_index: usize,
26}
27
28/// Represents a piece of a word.
29#[derive(Clone, Debug)]
30#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
31pub enum WordPiece {
32    /// A simple unquoted, unescaped string.
33    Text(String),
34    /// A string that is single-quoted.
35    SingleQuotedText(String),
36    /// A string that is ANSI-C quoted.
37    AnsiCQuotedText(String),
38    /// A sequence of pieces that are embedded in double quotes.
39    DoubleQuotedSequence(Vec<WordPieceWithSource>),
40    /// Gettext enabled variant of [`WordPiece::DoubleQuotedSequence`].
41    GettextDoubleQuotedSequence(Vec<WordPieceWithSource>),
42    /// A tilde prefix.
43    TildePrefix(String),
44    /// A parameter expansion.
45    ParameterExpansion(ParameterExpr),
46    /// A command substitution.
47    CommandSubstitution(String),
48    /// A backquoted command substitution.
49    BackquotedCommandSubstitution(String),
50    /// An escape sequence.
51    EscapeSequence(String),
52    /// An arithmetic expression.
53    ArithmeticExpression(ast::UnexpandedArithmeticExpr),
54}
55
56/// Type of a parameter test.
57#[derive(Clone, Debug)]
58#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
59pub enum ParameterTestType {
60    /// Check for unset or null.
61    UnsetOrNull,
62    /// Check for unset.
63    Unset,
64}
65
66/// A parameter, used in a parameter expansion.
67#[derive(Clone, Debug)]
68#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
69pub enum Parameter {
70    /// A 0-indexed positional parameter.
71    Positional(u32),
72    /// A special parameter.
73    Special(SpecialParameter),
74    /// A named variable.
75    Named(String),
76    /// An index into a named variable.
77    NamedWithIndex {
78        /// Variable name.
79        name: String,
80        /// Index.
81        index: String,
82    },
83    /// A named array variable with all indices.
84    NamedWithAllIndices {
85        /// Variable name.
86        name: String,
87        /// Whether to concatenate the values.
88        concatenate: bool,
89    },
90}
91
92/// A special parameter, used in a parameter expansion.
93#[derive(Clone, Debug)]
94#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
95pub enum SpecialParameter {
96    /// All positional parameters.
97    AllPositionalParameters {
98        /// Whether to concatenate the values.
99        concatenate: bool,
100    },
101    /// The count of positional parameters.
102    PositionalParameterCount,
103    /// The last exit status in the shell.
104    LastExitStatus,
105    /// The current shell option flags.
106    CurrentOptionFlags,
107    /// The current shell process ID.
108    ProcessId,
109    /// The last background process ID managed by the shell.
110    LastBackgroundProcessId,
111    /// The name of the shell.
112    ShellName,
113}
114
115/// A parameter expression, used in a parameter expansion.
116#[derive(Clone, Debug)]
117#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
118pub enum ParameterExpr {
119    /// A parameter, with optional indirection.
120    Parameter {
121        /// The parameter.
122        parameter: Parameter,
123        /// Whether to treat the expanded parameter as an indirect
124        /// reference, which should be subsequently dereferenced
125        /// for the expansion.
126        indirect: bool,
127    },
128    /// Conditionally use default values.
129    UseDefaultValues {
130        /// The parameter.
131        parameter: Parameter,
132        /// Whether to treat the expanded parameter as an indirect
133        /// reference, which should be subsequently dereferenced
134        /// for the expansion.
135        indirect: bool,
136        /// The type of test to perform.
137        test_type: ParameterTestType,
138        /// Default value to conditionally use.
139        default_value: Option<String>,
140    },
141    /// Conditionally assign default values.
142    AssignDefaultValues {
143        /// The parameter.
144        parameter: Parameter,
145        /// Whether to treat the expanded parameter as an indirect
146        /// reference, which should be subsequently dereferenced
147        /// for the expansion.
148        indirect: bool,
149        /// The type of test to perform.
150        test_type: ParameterTestType,
151        /// Default value to conditionally assign.
152        default_value: Option<String>,
153    },
154    /// Indicate error if null or unset.
155    IndicateErrorIfNullOrUnset {
156        /// The parameter.
157        parameter: Parameter,
158        /// Whether to treat the expanded parameter as an indirect
159        /// reference, which should be subsequently dereferenced
160        /// for the expansion.
161        indirect: bool,
162        /// The type of test to perform.
163        test_type: ParameterTestType,
164        /// Error message to conditionally yield.
165        error_message: Option<String>,
166    },
167    /// Conditionally use an alternative value.
168    UseAlternativeValue {
169        /// The parameter.
170        parameter: Parameter,
171        /// Whether to treat the expanded parameter as an indirect
172        /// reference, which should be subsequently dereferenced
173        /// for the expansion.
174        indirect: bool,
175        /// The type of test to perform.
176        test_type: ParameterTestType,
177        /// Alternative value to conditionally use.
178        alternative_value: Option<String>,
179    },
180    /// Compute the length of the given parameter.
181    ParameterLength {
182        /// The parameter.
183        parameter: Parameter,
184        /// Whether to treat the expanded parameter as an indirect
185        /// reference, which should be subsequently dereferenced
186        /// for the expansion.
187        indirect: bool,
188    },
189    /// Remove the smallest suffix from the given string matching the given pattern.
190    RemoveSmallestSuffixPattern {
191        /// The parameter.
192        parameter: Parameter,
193        /// Whether to treat the expanded parameter as an indirect
194        /// reference, which should be subsequently dereferenced
195        /// for the expansion.
196        indirect: bool,
197        /// Optionally provides a pattern to match.
198        pattern: Option<String>,
199    },
200    /// Remove the largest suffix from the given string matching the given pattern.
201    RemoveLargestSuffixPattern {
202        /// The parameter.
203        parameter: Parameter,
204        /// Whether to treat the expanded parameter as an indirect
205        /// reference, which should be subsequently dereferenced
206        /// for the expansion.
207        indirect: bool,
208        /// Optionally provides a pattern to match.
209        pattern: Option<String>,
210    },
211    /// Remove the smallest prefix from the given string matching the given pattern.
212    RemoveSmallestPrefixPattern {
213        /// The parameter.
214        parameter: Parameter,
215        /// Whether to treat the expanded parameter as an indirect
216        /// reference, which should be subsequently dereferenced
217        /// for the expansion.
218        indirect: bool,
219        /// Optionally provides a pattern to match.
220        pattern: Option<String>,
221    },
222    /// Remove the largest prefix from the given string matching the given pattern.
223    RemoveLargestPrefixPattern {
224        /// The parameter.
225        parameter: Parameter,
226        /// Whether to treat the expanded parameter as an indirect
227        /// reference, which should be subsequently dereferenced
228        /// for the expansion.
229        indirect: bool,
230        /// Optionally provides a pattern to match.
231        pattern: Option<String>,
232    },
233    /// Extract a substring from the given parameter.
234    Substring {
235        /// The parameter.
236        parameter: Parameter,
237        /// Whether to treat the expanded parameter as an indirect
238        /// reference, which should be subsequently dereferenced
239        /// for the expansion.
240        indirect: bool,
241        /// Arithmetic expression that will be expanded to compute the offset
242        /// at which the substring should be extracted.
243        offset: ast::UnexpandedArithmeticExpr,
244        /// Optionally provides an arithmetic expression that will be expanded
245        /// to compute the length of substring to be extracted; if left
246        /// unspecified, the remainder of the string will be extracted.
247        length: Option<ast::UnexpandedArithmeticExpr>,
248    },
249    /// Transform the given parameter.
250    Transform {
251        /// The parameter.
252        parameter: Parameter,
253        /// Whether to treat the expanded parameter as an indirect
254        /// reference, which should be subsequently dereferenced
255        /// for the expansion.
256        indirect: bool,
257        /// Type of transformation to apply.
258        op: ParameterTransformOp,
259    },
260    /// Uppercase the first character of the given parameter.
261    UppercaseFirstChar {
262        /// The parameter.
263        parameter: Parameter,
264        /// Whether to treat the expanded parameter as an indirect
265        /// reference, which should be subsequently dereferenced
266        /// for the expansion.
267        indirect: bool,
268        /// Optionally provides a pattern to match.
269        pattern: Option<String>,
270    },
271    /// Uppercase the portion of the given parameter matching the given pattern.
272    UppercasePattern {
273        /// The parameter.
274        parameter: Parameter,
275        /// Whether to treat the expanded parameter as an indirect
276        /// reference, which should be subsequently dereferenced
277        /// for the expansion.
278        indirect: bool,
279        /// Optionally provides a pattern to match.
280        pattern: Option<String>,
281    },
282    /// Lowercase the first character of the given parameter.
283    LowercaseFirstChar {
284        /// The parameter.
285        parameter: Parameter,
286        /// Whether to treat the expanded parameter as an indirect
287        /// reference, which should be subsequently dereferenced
288        /// for the expansion.
289        indirect: bool,
290        /// Optionally provides a pattern to match.
291        pattern: Option<String>,
292    },
293    /// Lowercase the portion of the given parameter matching the given pattern.
294    LowercasePattern {
295        /// The parameter.
296        parameter: Parameter,
297        /// Whether to treat the expanded parameter as an indirect
298        /// reference, which should be subsequently dereferenced
299        /// for the expansion.
300        indirect: bool,
301        /// Optionally provides a pattern to match.
302        pattern: Option<String>,
303    },
304    /// Replace occurrences of the given pattern in the given parameter.
305    ReplaceSubstring {
306        /// The parameter.
307        parameter: Parameter,
308        /// Whether to treat the expanded parameter as an indirect
309        /// reference, which should be subsequently dereferenced
310        /// for the expansion.
311        indirect: bool,
312        /// Pattern to match.
313        pattern: String,
314        /// Replacement string.
315        replacement: Option<String>,
316        /// Kind of match to perform.
317        match_kind: SubstringMatchKind,
318    },
319    /// Select variable names from the environment with a given prefix.
320    VariableNames {
321        /// The prefix to match.
322        prefix: String,
323        /// Whether to concatenate the results.
324        concatenate: bool,
325    },
326    /// Select member keys from the named array.
327    MemberKeys {
328        /// Name of the array variable.
329        variable_name: String,
330        /// Whether to concatenate the results.
331        concatenate: bool,
332    },
333}
334
335/// Kind of substring match.
336#[derive(Clone, Debug)]
337#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
338pub enum SubstringMatchKind {
339    /// Match the prefix of the string.
340    Prefix,
341    /// Match the suffix of the string.
342    Suffix,
343    /// Match the first occurrence in the string.
344    FirstOccurrence,
345    /// Match all instances in the string.
346    Anywhere,
347}
348
349/// Kind of operation to apply to a parameter.
350#[derive(Clone, Debug)]
351#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
352pub enum ParameterTransformOp {
353    /// Capitalizate initials.
354    CapitalizeInitial,
355    /// Expand escape sequences.
356    ExpandEscapeSequences,
357    /// Possibly quote with arrays expanded.
358    PossiblyQuoteWithArraysExpanded {
359        /// Whether or not to yield separate words.
360        separate_words: bool,
361    },
362    /// Apply prompt expansion.
363    PromptExpand,
364    /// Quote the parameter.
365    Quoted,
366    /// Translate to a format usable in an assignment/declaration.
367    ToAssignmentLogic,
368    /// Translate to the parameter's attribute flags.
369    ToAttributeFlags,
370    /// Translate to lowercase.
371    ToLowerCase,
372    /// Translate to uppercase.
373    ToUpperCase,
374}
375
376/// Represents a sub-word that is either a brace expression or some other word text.
377#[derive(Clone, Debug)]
378#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
379pub enum BraceExpressionOrText {
380    /// A brace expression.
381    Expr(BraceExpression),
382    /// Other word text.
383    Text(String),
384}
385
386/// Represents a brace expression to be expanded.
387pub type BraceExpression = Vec<BraceExpressionMember>;
388
389/// Member of a brace expression.
390#[derive(Clone, Debug)]
391#[cfg_attr(test, derive(PartialEq, Eq, serde::Serialize))]
392pub enum BraceExpressionMember {
393    /// An inclusive numerical sequence.
394    NumberSequence {
395        /// Start of the sequence.
396        start: i64,
397        /// Inclusive end of the sequence.
398        end: i64,
399        /// Increment value.
400        increment: i64,
401    },
402    /// An inclusive character sequence.
403    CharSequence {
404        /// Start of the sequence.
405        start: char,
406        /// Inclusive end of the sequence.
407        end: char,
408        /// Increment value.
409        increment: i64,
410    },
411    /// Child text or expressions.
412    Child(Vec<BraceExpressionOrText>),
413}
414
415/// Parse a word into its constituent pieces.
416///
417/// # Arguments
418///
419/// * `word` - The word to parse.
420/// * `options` - The parser options to use.
421pub fn parse(
422    word: &str,
423    options: &ParserOptions,
424) -> Result<Vec<WordPieceWithSource>, error::WordParseError> {
425    cacheable_parse(word.to_owned(), options.to_owned())
426}
427
428#[cached::proc_macro::cached(size = 64, result = true)]
429fn cacheable_parse(
430    word: String,
431    options: ParserOptions,
432) -> Result<Vec<WordPieceWithSource>, error::WordParseError> {
433    tracing::debug!(target: "expansion", "Parsing word '{}'", word);
434
435    let pieces = expansion_parser::unexpanded_word(word.as_str(), &options)
436        .map_err(|err| error::WordParseError::Word(word.clone(), err.into()))?;
437
438    tracing::debug!(target: "expansion", "Parsed word '{}' => {{{:?}}}", word, pieces);
439
440    Ok(pieces)
441}
442
443/// Parse the given word into a parameter expression.
444///
445/// # Arguments
446///
447/// * `word` - The word to parse.
448/// * `options` - The parser options to use.
449pub fn parse_parameter(
450    word: &str,
451    options: &ParserOptions,
452) -> Result<Parameter, error::WordParseError> {
453    expansion_parser::parameter(word, options)
454        .map_err(|err| error::WordParseError::Parameter(word.to_owned(), err.into()))
455}
456
457/// Parse brace expansion from a given word .
458///
459/// # Arguments
460///
461/// * `word` - The word to parse.
462/// * `options` - The parser options to use.
463pub fn parse_brace_expansions(
464    word: &str,
465    options: &ParserOptions,
466) -> Result<Option<Vec<BraceExpressionOrText>>, error::WordParseError> {
467    expansion_parser::brace_expansions(word, options)
468        .map_err(|err| error::WordParseError::BraceExpansion(word.to_owned(), err.into()))
469}
470
471peg::parser! {
472    grammar expansion_parser(parser_options: &ParserOptions) for str {
473        // Helper rule that enables pegviz to be used to visualize debug peg traces.
474        rule traced<T>(e: rule<T>) -> T =
475            &(input:$([_]*) {
476                #[cfg(feature = "debug-tracing")]
477                println!("[PEG_INPUT_START]\n{input}\n[PEG_TRACE_START]");
478            })
479            e:e()? {?
480                #[cfg(feature = "debug-tracing")]
481                println!("[PEG_TRACE_STOP]");
482                e.ok_or("")
483            }
484
485        pub(crate) rule unexpanded_word() -> Vec<WordPieceWithSource> = traced(<word(<![_]>)>)
486
487        rule word<T>(stop_condition: rule<T>) -> Vec<WordPieceWithSource> =
488            tilde:tilde_prefix_with_source()? pieces:word_piece_with_source(<stop_condition()>, false /*in_command*/)* {
489                let mut all_pieces = Vec::new();
490                if let Some(tilde) = tilde {
491                    all_pieces.push(tilde);
492                }
493                all_pieces.extend(pieces);
494                all_pieces
495            }
496
497        // Takes a word as input.
498        pub(crate) rule brace_expansions() -> Option<Vec<BraceExpressionOrText>> =
499            pieces:(brace_expansion_piece(<![_]>)+) { Some(pieces) } /
500            [_]* { None }
501
502        // Returns either a complete brace expression (without any prefix or suffix), or a
503        // non-brace-expression string.
504        rule brace_expansion_piece<T>(stop_condition: rule<T>) -> BraceExpressionOrText =
505            expr:brace_expr() {
506                BraceExpressionOrText::Expr(expr)
507            } /
508            text:$(non_brace_expr_text(<stop_condition()>)+) { BraceExpressionOrText::Text(text.to_owned()) }
509
510        // Parses text that is not considered to contain a brace expression.
511        rule non_brace_expr_text<T>(stop_condition: rule<T>) -> () =
512            !"{" word_piece(<['{'] {} / stop_condition() {}>, false) {} /
513            !brace_expr() !stop_condition() "{" {}
514
515        // Parses a complete brace expression, with no prefix or suffix.
516        pub(crate) rule brace_expr() -> BraceExpression =
517            "{" inner:brace_expr_inner() "}" { inner }
518
519        // Parses the text inside a complete brace expression; basically the complete brace
520        // expression without the opening and closing brace characters.
521        pub(crate) rule brace_expr_inner() -> BraceExpression =
522            brace_text_list_expr() /
523            seq:brace_sequence_expr() { vec![seq] }
524
525        // Parses a list of brace expression members, including the separating commas; does
526        // not include the opening and closing braces.
527        pub(crate) rule brace_text_list_expr() -> BraceExpression =
528            brace_text_list_member() **<2,> ","
529
530        // Parses an element that can occur in a brace expression member list, not including the
531        // terminating comma or closing brace.
532        pub(crate) rule brace_text_list_member() -> BraceExpressionMember =
533            // Matches an empty-string member, without consuming the comma or closing brace that terminates it.
534            &[',' | '}'] { BraceExpressionMember::Child(vec![BraceExpressionOrText::Text(String::new())]) } /
535            // Matches a nested string that may include some combination of concatenated textual strings
536            // and brace expressions.
537            child_pieces:(brace_expansion_piece(<[',' | '}']>)+) {
538                BraceExpressionMember::Child(child_pieces)
539            }
540
541        pub(crate) rule brace_sequence_expr() -> BraceExpressionMember =
542            start:number() ".." end:number() increment:(".." n:number() { n })? {
543                BraceExpressionMember::NumberSequence { start, end, increment: increment.unwrap_or(1) }
544            } /
545            start:character() ".." end:character() increment:(".." n:number() { n })? {
546                BraceExpressionMember::CharSequence { start, end, increment: increment.unwrap_or(1) }
547            }
548
549        rule number() -> i64 = sign:number_sign()? n:$(['0'..='9']+) {
550            let sign = sign.unwrap_or(1);
551            let num: i64 = n.parse().unwrap();
552            num * sign
553        }
554
555        rule number_sign() -> i64 =
556            ['-'] { -1 } /
557            ['+'] { 1 }
558
559        rule character() -> char = ['a'..='z' | 'A'..='Z']
560
561        pub(crate) rule is_arithmetic_word() =
562            arithmetic_word(<![_]>)
563
564            // N.B. We don't bother returning the word pieces, as all users of this rule
565        // only try to extract the consumed input string and not the parse result.
566        rule arithmetic_word<T>(stop_condition: rule<T>) =
567            arithmetic_word_piece(<stop_condition()>)* {}
568
569        pub(crate) rule is_arithmetic_word_piece() =
570            arithmetic_word_piece(<![_]>)
571
572        // This rule matches an individual "piece" of an arithmetic expression. It needs to handle
573        // matching nested parenthesized expressions as well. We stop consuming the input when
574        // we reach the provided stop condition, which typically denotes the end of the containing
575        // arithmetic expression.
576        rule arithmetic_word_piece<T>(stop_condition: rule<T>) =
577            // This branch matches a parenthesized piece; we consume the opening parenthesis and
578            // delegate the rest to a helper rule. We don't worry about the stop condition passed
579            // into us, because if we see an opening parenthesis then we *must* find its closing
580            // partner.
581            "(" arithmetic_word_plus_right_paren() {} /
582            // This branch matches any standard piece of a word, stopping as soon as we reach
583            // either the overall stop condition *OR* an opening parenthesis. We add this latter
584            // condition to ensure that *we* handle matching parentheses.
585            !"(" word_piece(<param_rule_or_open_paren(<stop_condition()>)>, false /*in_command*/) {}
586
587        // This is a helper rule that matches either the provided stop condition or an opening parenthesis.
588        rule param_rule_or_open_paren<T>(stop_condition: rule<T>) -> () =
589            stop_condition() {} /
590            "(" {}
591
592        // This rule matches an arithmetic word followed by a right parenthesis. It must consume the right parenthesis.
593        rule arithmetic_word_plus_right_paren() =
594            arithmetic_word(<[')']>) ")" /
595
596        rule word_piece_with_source<T>(stop_condition: rule<T>, in_command: bool) -> WordPieceWithSource =
597            start_index:position!() piece:word_piece(<stop_condition()>, in_command) end_index:position!() {
598                WordPieceWithSource { piece, start_index, end_index }
599            }
600
601        rule word_piece<T>(stop_condition: rule<T>, in_command: bool) -> WordPiece =
602            // Rules that match quoted text.
603            s:double_quoted_sequence() { WordPiece::DoubleQuotedSequence(s) } /
604            s:single_quoted_literal_text() { WordPiece::SingleQuotedText(s.to_owned()) } /
605            s:ansi_c_quoted_text() { WordPiece::AnsiCQuotedText(s.to_owned()) } /
606            s:gettext_double_quoted_sequence() { WordPiece::GettextDoubleQuotedSequence(s) } /
607            // Rules that match pieces starting with a dollar sign ('$').
608            dollar_sign_word_piece() /
609            // Rules that match unquoted text that doesn't start with an unescaped dollar sign.
610            normal_escape_sequence() /
611            unquoted_literal_text(<stop_condition()>, in_command)
612
613        rule dollar_sign_word_piece() -> WordPiece =
614            arithmetic_expansion() /
615            command_substitution() /
616            parameter_expansion()
617
618        rule double_quoted_word_piece() -> WordPiece =
619            arithmetic_expansion() /
620            command_substitution() /
621            parameter_expansion() /
622            double_quoted_escape_sequence() /
623            double_quoted_text()
624
625        rule double_quoted_sequence() -> Vec<WordPieceWithSource> =
626            "\"" i:double_quoted_sequence_inner()* "\"" { i }
627
628        rule gettext_double_quoted_sequence() -> Vec<WordPieceWithSource> =
629            "$\"" i:double_quoted_sequence_inner()* "\"" { i }
630
631        rule double_quoted_sequence_inner() -> WordPieceWithSource =
632            start_index:position!() piece:double_quoted_word_piece() end_index:position!() {
633                WordPieceWithSource {
634                    piece,
635                    start_index,
636                    end_index
637                }
638            }
639
640        rule single_quoted_literal_text() -> &'input str =
641            "\'" inner:$([^'\'']*) "\'" { inner }
642
643        rule ansi_c_quoted_text() -> &'input str =
644            "$\'" inner:$(("\\'" / [^'\''])*) "\'" { inner }
645
646        rule unquoted_literal_text<T>(stop_condition: rule<T>, in_command: bool) -> WordPiece =
647            s:$(unquoted_literal_text_piece(<stop_condition()>, in_command)+) { WordPiece::Text(s.to_owned()) }
648
649        // TODO: Find a way to remove the special-case logic for extglob + subshell commands
650        rule unquoted_literal_text_piece<T>(stop_condition: rule<T>, in_command: bool) =
651            is_true(in_command) extglob_pattern() /
652            is_true(in_command) subshell_command() /
653            !stop_condition() !normal_escape_sequence() [^'\'' | '\"' | '$' | '`'] {}
654
655        rule is_true(value: bool) = &[_] {? if value { Ok(()) } else { Err("not true") } }
656
657        rule extglob_pattern() =
658            ("@" / "!" / "?" / "+" / "*") "(" extglob_body_piece()* ")" {}
659
660        rule extglob_body_piece() =
661            word_piece(<[')']>, true /*in_command*/) {}
662
663        rule subshell_command() =
664            "(" command() ")" {}
665
666        rule double_quoted_text() -> WordPiece =
667            s:double_quote_body_text() { WordPiece::Text(s.to_owned()) }
668
669        rule double_quote_body_text() -> &'input str =
670            $((!double_quoted_escape_sequence() !dollar_sign_word_piece() [^'\"'])+)
671
672        rule normal_escape_sequence() -> WordPiece =
673            s:$("\\" [c]) { WordPiece::EscapeSequence(s.to_owned()) }
674
675        rule double_quoted_escape_sequence() -> WordPiece =
676            s:$("\\" ['$' | '`' | '\"' | '\\']) { WordPiece::EscapeSequence(s.to_owned()) }
677
678        rule tilde_prefix_with_source() -> WordPieceWithSource =
679            start_index:position!() piece:tilde_prefix() end_index:position!() {
680                WordPieceWithSource {
681                    piece,
682                    start_index,
683                    end_index
684                }
685            }
686
687        // TODO: Handle colon syntax
688        rule tilde_prefix() -> WordPiece =
689            tilde_parsing_enabled() "~" cs:$((!['/' | ':' | ';'] [c])*) { WordPiece::TildePrefix(cs.to_owned()) }
690
691        // TODO: Deal with fact that there may be a quoted word or escaped closing brace chars.
692        // TODO: Improve on how we handle a '$' not followed by a valid variable name or parameter.
693        rule parameter_expansion() -> WordPiece =
694            "${" e:parameter_expression() "}" {
695                WordPiece::ParameterExpansion(e)
696            } /
697            "$" parameter:unbraced_parameter() {
698                WordPiece::ParameterExpansion(ParameterExpr::Parameter { parameter, indirect: false })
699            } /
700            "$" !['\''] {
701                WordPiece::Text("$".to_owned())
702            }
703
704        rule parameter_expression() -> ParameterExpr =
705            indirect:parameter_indirection() parameter:parameter() test_type:parameter_test_type() "-" default_value:parameter_expression_word()? {
706                ParameterExpr::UseDefaultValues { parameter, indirect, test_type, default_value }
707            } /
708            indirect:parameter_indirection() parameter:parameter() test_type:parameter_test_type() "=" default_value:parameter_expression_word()? {
709                ParameterExpr::AssignDefaultValues { parameter, indirect, test_type, default_value }
710            } /
711            indirect:parameter_indirection() parameter:parameter() test_type:parameter_test_type() "?" error_message:parameter_expression_word()? {
712                ParameterExpr::IndicateErrorIfNullOrUnset { parameter, indirect, test_type, error_message }
713            } /
714            indirect:parameter_indirection() parameter:parameter() test_type:parameter_test_type() "+" alternative_value:parameter_expression_word()? {
715                ParameterExpr::UseAlternativeValue { parameter, indirect, test_type, alternative_value }
716            } /
717            "#" parameter:parameter() {
718                ParameterExpr::ParameterLength { parameter, indirect: false }
719            } /
720            indirect:parameter_indirection() parameter:parameter() "%%" pattern:parameter_expression_word()? {
721                ParameterExpr::RemoveLargestSuffixPattern { parameter, indirect, pattern }
722            } /
723            indirect:parameter_indirection() parameter:parameter() "%" pattern:parameter_expression_word()? {
724                ParameterExpr::RemoveSmallestSuffixPattern { parameter, indirect, pattern }
725            } /
726            indirect:parameter_indirection() parameter:parameter() "##" pattern:parameter_expression_word()? {
727                ParameterExpr::RemoveLargestPrefixPattern { parameter, indirect, pattern }
728            } /
729            indirect:parameter_indirection() parameter:parameter() "#" pattern:parameter_expression_word()? {
730                ParameterExpr::RemoveSmallestPrefixPattern { parameter, indirect, pattern }
731            } /
732            // N.B. The following case is for non-sh extensions.
733            non_posix_extensions_enabled() e:non_posix_parameter_expression() { e } /
734            indirect:parameter_indirection() parameter:parameter() {
735                ParameterExpr::Parameter { parameter, indirect }
736            }
737
738        rule parameter_test_type() -> ParameterTestType =
739            colon:":"? {
740                if colon.is_some() {
741                    ParameterTestType::UnsetOrNull
742                } else {
743                    ParameterTestType::Unset
744                }
745            }
746
747        rule non_posix_parameter_expression() -> ParameterExpr =
748            "!" variable_name:variable_name() "[*]" {
749                ParameterExpr::MemberKeys { variable_name: variable_name.to_owned(), concatenate: true }
750            } /
751            "!" variable_name:variable_name() "[@]" {
752                ParameterExpr::MemberKeys { variable_name: variable_name.to_owned(), concatenate: false }
753            } /
754            indirect:parameter_indirection() parameter:parameter() ":" offset:substring_offset() length:(":" l:substring_length() { l })? {
755                ParameterExpr::Substring { parameter, indirect, offset, length }
756            } /
757            indirect:parameter_indirection() parameter:parameter() "@" op:non_posix_parameter_transformation_op() {
758                ParameterExpr::Transform { parameter, indirect, op }
759            } /
760            "!" prefix:variable_name() "*" {
761                ParameterExpr::VariableNames { prefix: prefix.to_owned(), concatenate: true }
762            } /
763            "!" prefix:variable_name() "@" {
764                ParameterExpr::VariableNames { prefix: prefix.to_owned(), concatenate: false }
765            } /
766            indirect:parameter_indirection() parameter:parameter() "/#" pattern:parameter_search_pattern() replacement:parameter_replacement_str()? {
767                ParameterExpr::ReplaceSubstring { parameter, indirect, pattern, replacement, match_kind: SubstringMatchKind::Prefix }
768            } /
769            indirect:parameter_indirection() parameter:parameter() "/%" pattern:parameter_search_pattern() replacement:parameter_replacement_str()? {
770                ParameterExpr::ReplaceSubstring { parameter, indirect, pattern, replacement, match_kind: SubstringMatchKind::Suffix }
771            } /
772            indirect:parameter_indirection() parameter:parameter() "//" pattern:parameter_search_pattern() replacement:parameter_replacement_str()? {
773                ParameterExpr::ReplaceSubstring { parameter, indirect, pattern, replacement, match_kind: SubstringMatchKind::Anywhere }
774            } /
775            indirect:parameter_indirection() parameter:parameter() "/" pattern:parameter_search_pattern() replacement:parameter_replacement_str()? {
776                ParameterExpr::ReplaceSubstring { parameter, indirect, pattern, replacement, match_kind: SubstringMatchKind::FirstOccurrence }
777            } /
778            indirect:parameter_indirection() parameter:parameter() "^^" pattern:parameter_expression_word()? {
779                ParameterExpr::UppercasePattern { parameter, indirect, pattern }
780            } /
781            indirect:parameter_indirection() parameter:parameter() "^" pattern:parameter_expression_word()? {
782                ParameterExpr::UppercaseFirstChar { parameter, indirect, pattern }
783            } /
784            indirect:parameter_indirection() parameter:parameter() ",," pattern:parameter_expression_word()? {
785                ParameterExpr::LowercasePattern { parameter, indirect, pattern }
786            } /
787            indirect:parameter_indirection() parameter:parameter() "," pattern:parameter_expression_word()? {
788                ParameterExpr::LowercaseFirstChar { parameter, indirect, pattern }
789            }
790
791        rule parameter_indirection() -> bool =
792            non_posix_extensions_enabled() "!" { true } /
793            { false }
794
795        rule non_posix_parameter_transformation_op() -> ParameterTransformOp =
796            "U" { ParameterTransformOp::ToUpperCase } /
797            "u" { ParameterTransformOp::CapitalizeInitial } /
798            "L" { ParameterTransformOp::ToLowerCase } /
799            "Q" { ParameterTransformOp::Quoted } /
800            "E" { ParameterTransformOp::ExpandEscapeSequences } /
801            "P" { ParameterTransformOp::PromptExpand } /
802            "A" { ParameterTransformOp::ToAssignmentLogic } /
803            "K" { ParameterTransformOp::PossiblyQuoteWithArraysExpanded { separate_words: false } } /
804            "a" { ParameterTransformOp::ToAttributeFlags } /
805            "k" { ParameterTransformOp::PossiblyQuoteWithArraysExpanded { separate_words: true } }
806
807
808        rule unbraced_parameter() -> Parameter =
809            p:unbraced_positional_parameter() { Parameter::Positional(p) } /
810            p:special_parameter() { Parameter::Special(p) } /
811            p:variable_name() { Parameter::Named(p.to_owned()) }
812
813        // N.B. The indexing syntax is not a standard sh-ism.
814        pub(crate) rule parameter() -> Parameter =
815            p:positional_parameter() { Parameter::Positional(p) } /
816            p:special_parameter() { Parameter::Special(p) } /
817            non_posix_extensions_enabled() p:variable_name() "[@]" { Parameter::NamedWithAllIndices { name: p.to_owned(), concatenate: false } } /
818            non_posix_extensions_enabled() p:variable_name() "[*]" { Parameter::NamedWithAllIndices { name: p.to_owned(), concatenate: true } } /
819            non_posix_extensions_enabled() p:variable_name() "[" index:$(arithmetic_word(<"]">)) "]" {?
820                Ok(Parameter::NamedWithIndex { name: p.to_owned(), index: index.to_owned() })
821            } /
822            p:variable_name() { Parameter::Named(p.to_owned()) }
823
824        rule positional_parameter() -> u32 =
825            n:$(['1'..='9'](['0'..='9']*)) {? n.parse().or(Err("u32")) }
826        rule unbraced_positional_parameter() -> u32 =
827            n:$(['1'..='9']) {? n.parse().or(Err("u32")) }
828
829        rule special_parameter() -> SpecialParameter =
830            "@" { SpecialParameter::AllPositionalParameters { concatenate: false } } /
831            "*" { SpecialParameter::AllPositionalParameters { concatenate: true } } /
832            "#" { SpecialParameter::PositionalParameterCount } /
833            "?" { SpecialParameter::LastExitStatus } /
834            "-" { SpecialParameter::CurrentOptionFlags } /
835            "$" { SpecialParameter::ProcessId } /
836            "!" { SpecialParameter::LastBackgroundProcessId } /
837            "0" { SpecialParameter::ShellName }
838
839        rule variable_name() -> &'input str =
840            $(!['0'..='9'] ['_' | '0'..='9' | 'a'..='z' | 'A'..='Z']+)
841
842        pub(crate) rule command_substitution() -> WordPiece =
843            "$(" c:command() ")" { WordPiece::CommandSubstitution(c.to_owned()) } /
844            "`" c:backquoted_command() "`" { WordPiece::BackquotedCommandSubstitution(c) }
845
846        pub(crate) rule command() -> &'input str =
847            $(command_piece()*)
848
849        pub(crate) rule command_piece() -> () =
850            word_piece(<[')']>, true /*in_command*/) {} /
851            ([' ' | '\t'])+ {}
852
853        rule backquoted_command() -> String =
854            chars:(backquoted_char()*) { chars.into_iter().collect() }
855
856        rule backquoted_char() -> &'input str =
857            "\\`" { "`" } /
858            "\\\\" { "\\\\" } /
859            s:$([^'`']) { s }
860
861        rule arithmetic_expansion() -> WordPiece =
862            "$((" e:$(arithmetic_word(<"))">)) "))" { WordPiece::ArithmeticExpression(ast::UnexpandedArithmeticExpr { value: e.to_owned() } ) }
863
864        rule substring_offset() -> ast::UnexpandedArithmeticExpr =
865            s:$(arithmetic_word(<[':' | '}']>)) { ast::UnexpandedArithmeticExpr { value: s.to_owned() } }
866
867        rule substring_length() -> ast::UnexpandedArithmeticExpr =
868            s:$(arithmetic_word(<[':' | '}']>)) { ast::UnexpandedArithmeticExpr { value: s.to_owned() } }
869
870        rule parameter_replacement_str() -> String =
871            "/" s:$(word(<['}']>)) { s.to_owned() }
872
873        rule parameter_search_pattern() -> String =
874            s:$(word(<['}' | '/']>)) { s.to_owned() }
875
876        rule parameter_expression_word() -> String =
877            s:$(word(<['}']>)) { s.to_owned() }
878
879        rule extglob_enabled() -> () =
880            &[_] {? if parser_options.enable_extended_globbing { Ok(()) } else { Err("no extglob") } }
881
882        rule non_posix_extensions_enabled() -> () =
883            &[_] {? if !parser_options.sh_mode { Ok(()) } else { Err("posix") } }
884
885        rule tilde_parsing_enabled() -> () =
886            &[_] {? if parser_options.tilde_expansion { Ok(()) } else { Err("no tilde expansion") } }
887    }
888}
889
890#[cfg(test)]
891mod tests {
892    use super::*;
893    use anyhow::Result;
894    use insta::assert_ron_snapshot;
895
896    #[derive(serde::Serialize)]
897    struct ParseTestResults<'a> {
898        input: &'a str,
899        result: Vec<WordPieceWithSource>,
900    }
901
902    fn test_parse(word: &str) -> Result<ParseTestResults<'_>> {
903        let parsed = super::parse(word, &ParserOptions::default())?;
904        Ok(ParseTestResults {
905            input: word,
906            result: parsed,
907        })
908    }
909
910    #[test]
911    fn parse_ansi_c_quoted_text() -> Result<()> {
912        assert_ron_snapshot!(test_parse(r"$'hi\nthere\t'")?);
913        Ok(())
914    }
915
916    #[test]
917    fn parse_double_quoted_text() -> Result<()> {
918        assert_ron_snapshot!(test_parse(r#""a ${b} c""#)?);
919        Ok(())
920    }
921
922    #[test]
923    fn parse_gettext_double_quoted_text() -> Result<()> {
924        assert_ron_snapshot!(test_parse(r#"$"a ${b} c""#)?);
925        Ok(())
926    }
927
928    #[test]
929    fn parse_command_substitution() -> Result<()> {
930        super::expansion_parser::command_piece("echo", &ParserOptions::default())?;
931        super::expansion_parser::command_piece("hi", &ParserOptions::default())?;
932        super::expansion_parser::command("echo hi", &ParserOptions::default())?;
933        super::expansion_parser::command_substitution("$(echo hi)", &ParserOptions::default())?;
934
935        assert_ron_snapshot!(test_parse("$(echo hi)")?);
936
937        Ok(())
938    }
939
940    #[test]
941    fn parse_command_substitution_with_embedded_quotes() -> Result<()> {
942        super::expansion_parser::command_piece("echo", &ParserOptions::default())?;
943        super::expansion_parser::command_piece(r#""hi""#, &ParserOptions::default())?;
944        super::expansion_parser::command(r#"echo "hi""#, &ParserOptions::default())?;
945        super::expansion_parser::command_substitution(
946            r#"$(echo "hi")"#,
947            &ParserOptions::default(),
948        )?;
949
950        assert_ron_snapshot!(test_parse(r#"$(echo "hi")"#)?);
951        Ok(())
952    }
953
954    #[test]
955    fn parse_command_substitution_with_embedded_extglob() -> Result<()> {
956        assert_ron_snapshot!(test_parse("$(echo !(x))")?);
957        Ok(())
958    }
959
960    #[test]
961    fn parse_backquoted_command() -> Result<()> {
962        assert_ron_snapshot!(test_parse("`echo hi`")?);
963        Ok(())
964    }
965
966    #[test]
967    fn parse_backquoted_command_in_double_quotes() -> Result<()> {
968        assert_ron_snapshot!(test_parse(r#""`echo hi`""#)?);
969        Ok(())
970    }
971
972    #[test]
973    fn parse_extglob_with_embedded_parameter() -> Result<()> {
974        assert_ron_snapshot!(test_parse("+([$var])")?);
975        Ok(())
976    }
977
978    #[test]
979    fn parse_arithmetic_expansion() -> Result<()> {
980        assert_ron_snapshot!(test_parse("$((0))")?);
981        Ok(())
982    }
983
984    #[test]
985    fn parse_arithmetic_expansion_with_parens() -> Result<()> {
986        assert_ron_snapshot!(test_parse("$((((1+2)*3)))")?);
987        Ok(())
988    }
989
990    #[test]
991    fn test_arithmetic_word_parsing() {
992        let options = ParserOptions::default();
993
994        assert!(super::expansion_parser::is_arithmetic_word("a", &options).is_ok());
995        assert!(super::expansion_parser::is_arithmetic_word("b", &options).is_ok());
996        assert!(super::expansion_parser::is_arithmetic_word(" a + b ", &options).is_ok());
997        assert!(super::expansion_parser::is_arithmetic_word("(a)", &options).is_ok());
998        assert!(super::expansion_parser::is_arithmetic_word("((a))", &options).is_ok());
999        assert!(super::expansion_parser::is_arithmetic_word("(((a)))", &options).is_ok());
1000        assert!(super::expansion_parser::is_arithmetic_word("(1+2)", &options).is_ok());
1001        assert!(super::expansion_parser::is_arithmetic_word("(1+2)*3", &options).is_ok());
1002        assert!(super::expansion_parser::is_arithmetic_word("((1+2)*3)", &options).is_ok());
1003    }
1004
1005    #[test]
1006    fn test_arithmetic_word_piece_parsing() {
1007        let options = ParserOptions::default();
1008
1009        assert!(super::expansion_parser::is_arithmetic_word_piece("a", &options).is_ok());
1010        assert!(super::expansion_parser::is_arithmetic_word_piece("b", &options).is_ok());
1011        assert!(super::expansion_parser::is_arithmetic_word_piece(" a + b ", &options).is_ok());
1012        assert!(super::expansion_parser::is_arithmetic_word_piece("(a)", &options).is_ok());
1013        assert!(super::expansion_parser::is_arithmetic_word_piece("((a))", &options).is_ok());
1014        assert!(super::expansion_parser::is_arithmetic_word_piece("(((a)))", &options).is_ok());
1015        assert!(super::expansion_parser::is_arithmetic_word_piece("(1+2)", &options).is_ok());
1016        assert!(super::expansion_parser::is_arithmetic_word_piece("((1+2))", &options).is_ok());
1017        assert!(super::expansion_parser::is_arithmetic_word_piece("((1+2)*3)", &options).is_ok());
1018        assert!(super::expansion_parser::is_arithmetic_word_piece("(a", &options).is_err());
1019        assert!(super::expansion_parser::is_arithmetic_word_piece("(a))", &options).is_err());
1020        assert!(super::expansion_parser::is_arithmetic_word_piece("((a)", &options).is_err());
1021    }
1022
1023    #[test]
1024    fn test_brace_expansion_parsing() -> Result<()> {
1025        let options = ParserOptions::default();
1026
1027        let inputs = ["x{a,b}y", "{a,b{1,2}}"];
1028
1029        for input in inputs {
1030            assert_ron_snapshot!(super::parse_brace_expansions(input, &options)?.ok_or_else(
1031                || anyhow::anyhow!("Expected brace expansion to be parsed successfully")
1032            )?);
1033        }
1034
1035        Ok(())
1036    }
1037}