Skip to main content

mii_http/parse/
exec.rs

1//! Parser for the Exec mini-language (the value of an `Exec:` directive).
2//!
3//! Grammar (informal):
4//!
5//!   exec      := pipeline
6//!   pipeline  := stage ("|" stage)*
7//!   stage     := source | command
8//!   source    := value_ref                       (a single bare ref by itself)
9//!   command   := token (ws+ token)*
10//!   token     := group | text
11//!   group     := "[" piece (ws+ piece)* "]"
12//!   piece     := text-without-spaces             (with `{...}` interpolations)
13//!   text      := (literal | "{" value_ref "}" | quoted_str)+
14//!   value_ref := "%" ident | ":" ident | "^" ident | "@" ident
15//!              | "$" | "$." ident ("." ident)*
16//!
17//! This module is purely syntactic: it produces an `ExecStage` AST. Argv
18//! construction and process spawning live in [`crate::exec`].
19
20use crate::diag::Diag;
21use crate::spec::{ExecStage, ExecToken, GroupPiece, TextPart, ValueRef};
22use chumsky::error::Rich;
23use chumsky::prelude::*;
24
25type Extra<'a> = extra::Err<Rich<'a, char>>;
26
27/// Parse the Exec value. `start` is the absolute byte offset of the first
28/// character in the source, used to translate spans for diagnostics.
29pub fn parse_exec(raw: &str, start: usize) -> Result<Vec<ExecStage>, Diag> {
30    let result = pipeline_parser().parse(raw).into_result();
31    match result {
32        Ok(stages) => Ok(stages
33            .into_iter()
34            .map(|s| shift_stage(s, start))
35            .collect()),
36        Err(errs) => {
37            let e = errs
38                .into_iter()
39                .next()
40                .expect("chumsky returns >=1 err on failure");
41            let span = e.span();
42            Err(Diag::error(
43                format!("invalid Exec: {}", e),
44                (start + span.start)..(start + span.end),
45                "syntax error",
46            ))
47        }
48    }
49}
50
51// ---------- chumsky grammar ----------
52
53fn ident_parser<'a>() -> impl Parser<'a, &'a str, String, Extra<'a>> + Clone {
54    any()
55        .filter(|c: &char| c.is_ascii_alphanumeric() || *c == '_' || *c == '-')
56        .repeated()
57        .at_least(1)
58        .collect::<String>()
59}
60
61fn value_ref_parser<'a>() -> impl Parser<'a, &'a str, ValueRef, Extra<'a>> + Clone {
62    let dotted_ident = any()
63        .filter(|c: &char| c.is_ascii_alphanumeric() || *c == '_' || *c == '-')
64        .repeated()
65        .at_least(1)
66        .collect::<String>();
67    let body_path = just('.')
68        .ignore_then(
69            dotted_ident
70                .clone()
71                .separated_by(just('.'))
72                .at_least(1)
73                .collect::<Vec<_>>(),
74        )
75        .or_not();
76    let body = just('$')
77        .ignore_then(body_path)
78        .map(|p| ValueRef::Body {
79            path: p.unwrap_or_default(),
80        });
81
82    let sigil_ref = choice((
83        just('%').ignore_then(ident_parser()).map(ValueRef::Query),
84        just(':').ignore_then(ident_parser()).map(ValueRef::Path),
85        just('^').ignore_then(ident_parser()).map(ValueRef::Header),
86        just('@').ignore_then(ident_parser()).map(ValueRef::Var),
87    ));
88
89    choice((body, sigil_ref))
90}
91
92fn interp_parser<'a>() -> impl Parser<'a, &'a str, ValueRef, Extra<'a>> + Clone {
93    just('{')
94        .ignore_then(value_ref_parser().padded_by(one_of(" \t").repeated()))
95        .then_ignore(just('}'))
96}
97
98fn quoted_str<'a>(quote: char) -> impl Parser<'a, &'a str, String, Extra<'a>> + Clone {
99    let escape = just('\\').ignore_then(any().map(|c: char| c));
100    let normal = any().filter(move |c: &char| *c != quote && *c != '\\');
101    just(quote)
102        .ignore_then(choice((escape, normal)).repeated().collect::<String>())
103        .then_ignore(just(quote))
104}
105
106/// A "text token" is a sequence of literal chunks, interpolations and quoted
107/// strings, terminated by whitespace or a special char (`[`, `|`, `]`).
108fn text_token_parser<'a>() -> impl Parser<'a, &'a str, Vec<TextPart>, Extra<'a>> + Clone {
109    let interp = interp_parser().map(TextPart::Interp);
110    let quoted = choice((quoted_str('"'), quoted_str('\''))).map(TextPart::Literal);
111    let bare = any()
112        .filter(|c: &char| {
113            !c.is_whitespace() && *c != '|' && *c != '[' && *c != ']' && *c != '{' && *c != '"' && *c != '\''
114        })
115        .repeated()
116        .at_least(1)
117        .collect::<String>()
118        .map(TextPart::Literal);
119    choice((interp, quoted, bare))
120        .repeated()
121        .at_least(1)
122        .collect::<Vec<_>>()
123        .map(merge_literals)
124}
125
126/// Inside a `[...]` group: pieces are whitespace-separated. A piece may be a
127/// bare value ref (e.g. `%name`, `:user_id`, `$.user.name`), a quoted string,
128/// or a literal mixed with `{...}` interps.
129fn group_piece_parser<'a>() -> impl Parser<'a, &'a str, GroupPiece, Extra<'a>> + Clone {
130    let interp = interp_parser().map(TextPart::Interp);
131    let bare_ref = value_ref_parser().map(TextPart::Interp);
132    let quoted = choice((quoted_str('"'), quoted_str('\''))).map(TextPart::Literal);
133    let bare = any()
134        .filter(|c: &char| {
135            !c.is_whitespace()
136                && *c != '|'
137                && *c != '['
138                && *c != ']'
139                && *c != '{'
140                && *c != '}'
141                && *c != '"'
142                && *c != '\''
143                && *c != '%'
144                && *c != ':'
145                && *c != '^'
146                && *c != '@'
147                && *c != '$'
148        })
149        .repeated()
150        .at_least(1)
151        .collect::<String>()
152        .map(TextPart::Literal);
153    choice((interp, bare_ref, quoted, bare))
154        .repeated()
155        .at_least(1)
156        .collect::<Vec<_>>()
157        .map(|parts| GroupPiece {
158            parts: merge_literals(parts),
159        })
160}
161
162fn merge_literals(parts: Vec<TextPart>) -> Vec<TextPart> {
163    let mut out: Vec<TextPart> = Vec::with_capacity(parts.len());
164    for p in parts {
165        match (p, out.last_mut()) {
166            (TextPart::Literal(s), Some(TextPart::Literal(prev))) => {
167                prev.push_str(&s);
168            }
169            (p, _) => out.push(p),
170        }
171    }
172    out
173}
174
175fn hws<'a>() -> impl Parser<'a, &'a str, (), Extra<'a>> + Clone {
176    one_of(" \t").repeated().ignored()
177}
178
179fn group_parser<'a>() -> impl Parser<'a, &'a str, ExecToken, Extra<'a>> + Clone {
180    just('[')
181        .ignore_then(hws())
182        .ignore_then(
183            group_piece_parser()
184                .separated_by(hws().then(empty()))
185                .at_least(1)
186                .collect::<Vec<_>>(),
187        )
188        .then_ignore(hws())
189        .then_ignore(just(']'))
190        .map_with(|pieces, e| {
191            let span: SimpleSpan = e.span();
192            ExecToken::Group {
193                pieces,
194                span: span.start..span.end,
195            }
196        })
197}
198
199fn token_parser<'a>() -> impl Parser<'a, &'a str, ExecToken, Extra<'a>> + Clone {
200    choice((
201        group_parser(),
202        text_token_parser().map_with(|parts, e| {
203            let span: SimpleSpan = e.span();
204            ExecToken::Text {
205                parts,
206                span: span.start..span.end,
207            }
208        }),
209    ))
210}
211
212fn stage_parser<'a>() -> impl Parser<'a, &'a str, ExecStage, Extra<'a>> + Clone {
213    // Try a bare value-ref-only stage first (Source). Then fall back to a
214    // command stage. The Source path requires the ref to be alone (only ws
215    // before the next `|` or end).
216    let source_only = hws()
217        .ignore_then(value_ref_parser())
218        .then_ignore(hws())
219        .then_ignore(choice((just('|').rewind().ignored(), end())))
220        .map_with(|reference, e| {
221            let span: SimpleSpan = e.span();
222            ExecStage::Source {
223                reference,
224                span: span.start..span.end,
225            }
226        });
227
228    let command = hws().ignore_then(
229        token_parser()
230            .separated_by(hws().then(empty()).then(hws()))
231            .at_least(1)
232            .collect::<Vec<_>>()
233            .then_ignore(hws())
234            .map_with(|tokens: Vec<ExecToken>, e| {
235                let span: SimpleSpan = e.span();
236                ExecStage::Command {
237                    tokens,
238                    span: span.start..span.end,
239                }
240            }),
241    );
242
243    choice((source_only, command))
244}
245
246fn pipeline_parser<'a>() -> impl Parser<'a, &'a str, Vec<ExecStage>, Extra<'a>> + Clone {
247    stage_parser()
248        .separated_by(just('|'))
249        .at_least(1)
250        .collect::<Vec<_>>()
251        .then_ignore(hws())
252        .then_ignore(end())
253}
254
255// ---------- span shifting ----------
256
257fn shift_stage(s: ExecStage, base: usize) -> ExecStage {
258    match s {
259        ExecStage::Source { reference, span } => ExecStage::Source {
260            reference,
261            span: (span.start + base)..(span.end + base),
262        },
263        ExecStage::Command { tokens, span } => ExecStage::Command {
264            tokens: tokens.into_iter().map(|t| shift_token(t, base)).collect(),
265            span: (span.start + base)..(span.end + base),
266        },
267    }
268}
269
270fn shift_token(t: ExecToken, base: usize) -> ExecToken {
271    match t {
272        ExecToken::Text { parts, span } => ExecToken::Text {
273            parts,
274            span: (span.start + base)..(span.end + base),
275        },
276        ExecToken::Group { pieces, span } => ExecToken::Group {
277            pieces,
278            span: (span.start + base)..(span.end + base),
279        },
280    }
281}