Skip to main content

mii_http/parse/
exec.rs

1//! Parser for the Exec mini-language (the value of an `Exec:` directive).
2//!
3//! Grammar (informal):
4//!
5//!   exec      := pipeline
6//!   pipeline  := stage ("|" stage)*
7//!   stage     := source | command
8//!   source    := value_ref                       (a single bare ref by itself)
9//!   command   := token (ws+ token)*
10//!   token     := group | text
11//!   group     := "[" piece (ws+ piece)* "]"
12//!   piece     := text-without-spaces | value_ref | quoted_str
13//!   text      := (literal | quoted_str)+
14//!   quoted    := quote (literal | "{" value_ref "}")* quote
15//!   value_ref := "%" ident | ":" ident | "^" ident | "@" ident
16//!              | "$" | "$." ident ("." ident)*
17//!
18//! This module is purely syntactic: it produces an `ExecStage` AST. Argv
19//! construction and process spawning live in [`crate::exec`].
20
21use crate::diag::Diag;
22use crate::spec::{ExecStage, ExecToken, GroupPiece, TextPart, ValueRef};
23use chumsky::error::Rich;
24use chumsky::prelude::*;
25
26type Extra<'a> = extra::Err<Rich<'a, char>>;
27
28/// Parse the Exec value. `start` is the absolute byte offset of the first
29/// character in the source, used to translate spans for diagnostics.
30pub fn parse_exec(raw: &str, start: usize) -> Result<Vec<ExecStage>, Diag> {
31    let result = pipeline_parser().parse(raw).into_result();
32    match result {
33        Ok(stages) => Ok(stages.into_iter().map(|s| shift_stage(s, start)).collect()),
34        Err(errs) => {
35            let e = errs
36                .into_iter()
37                .next()
38                .expect("chumsky returns >=1 err on failure");
39            let span = e.span();
40            Err(Diag::error(
41                format!("invalid Exec: {}", e),
42                (start + span.start)..(start + span.end),
43                "syntax error",
44            ))
45        }
46    }
47}
48
49// ---------- chumsky grammar ----------
50
51fn ident_parser<'a>() -> impl Parser<'a, &'a str, String, Extra<'a>> + Clone {
52    any()
53        .filter(|c: &char| c.is_ascii_alphanumeric() || *c == '_' || *c == '-')
54        .repeated()
55        .at_least(1)
56        .collect::<String>()
57}
58
59fn value_ref_parser<'a>() -> impl Parser<'a, &'a str, ValueRef, Extra<'a>> + Clone {
60    let dotted_ident = any()
61        .filter(|c: &char| c.is_ascii_alphanumeric() || *c == '_' || *c == '-')
62        .repeated()
63        .at_least(1)
64        .collect::<String>();
65    let body_path = just('.')
66        .ignore_then(
67            dotted_ident
68                .separated_by(just('.'))
69                .at_least(1)
70                .collect::<Vec<_>>(),
71        )
72        .or_not();
73    let body = just('$').ignore_then(body_path).map(|p| ValueRef::Body {
74        path: p.unwrap_or_default(),
75    });
76
77    let sigil_ref = choice((
78        just('%').ignore_then(ident_parser()).map(ValueRef::Query),
79        just(':').ignore_then(ident_parser()).map(ValueRef::Path),
80        just('^').ignore_then(ident_parser()).map(ValueRef::Header),
81        just('@').ignore_then(ident_parser()).map(ValueRef::Var),
82    ));
83
84    choice((body, sigil_ref))
85}
86
87fn interp_parser<'a>() -> impl Parser<'a, &'a str, ValueRef, Extra<'a>> + Clone {
88    just('{')
89        .ignore_then(value_ref_parser().padded_by(one_of(" \t").repeated()))
90        .then_ignore(just('}'))
91}
92
93fn quoted_parts<'a>(quote: char) -> impl Parser<'a, &'a str, Vec<TextPart>, Extra<'a>> + Clone {
94    let interp = interp_parser().map(TextPart::Interp);
95    let escape = just('\\').ignore_then(any().map(|c: char| TextPart::Literal(c.to_string())));
96    let literal = any()
97        .filter(move |c: &char| *c != quote && *c != '\\' && *c != '{')
98        .repeated()
99        .at_least(1)
100        .collect::<String>()
101        .map(TextPart::Literal);
102    just(quote)
103        .ignore_then(
104            choice((interp, escape, literal))
105                .repeated()
106                .collect::<Vec<_>>()
107                .map(merge_literals),
108        )
109        .then_ignore(just(quote))
110}
111
112/// A "text token" is a sequence of literal chunks and quoted strings,
113/// terminated by whitespace or a special char (`[`, `|`, `]`). `{...}`
114/// interpolation is accepted only inside quoted strings.
115fn text_token_parser<'a>() -> impl Parser<'a, &'a str, (Vec<TextPart>, bool), Extra<'a>> + Clone {
116    let quoted = choice((quoted_parts('"'), quoted_parts('\''))).map(|parts| (parts, true));
117    let bare = any()
118        .filter(|c: &char| {
119            !c.is_whitespace()
120                && *c != '|'
121                && *c != '['
122                && *c != ']'
123                && *c != '{'
124                && *c != '"'
125                && *c != '\''
126        })
127        .repeated()
128        .at_least(1)
129        .collect::<String>()
130        .map(|s| (vec![TextPart::Literal(s)], false));
131    choice((quoted, bare))
132        .repeated()
133        .at_least(1)
134        .collect::<Vec<_>>()
135        .map(|chunks| {
136            let mut parts = Vec::new();
137            let mut force_quote = false;
138            for (mut chunk_parts, quoted) in chunks {
139                force_quote |= quoted;
140                parts.append(&mut chunk_parts);
141            }
142            (merge_literals(parts), force_quote)
143        })
144}
145
146/// Inside a `[...]` group: pieces are whitespace-separated. A piece may be a
147/// bare value ref (e.g. `%name`, `:user_id`, `$.user.name`), a quoted string
148/// with `{...}` interpolation, or literal text mixed with those forms.
149fn group_piece_parser<'a>() -> impl Parser<'a, &'a str, GroupPiece, Extra<'a>> + Clone {
150    let bare_ref = value_ref_parser().map(|r| (vec![TextPart::Interp(r)], false));
151    let quoted = choice((quoted_parts('"'), quoted_parts('\''))).map(|parts| (parts, true));
152    let bare = any()
153        .filter(|c: &char| {
154            !c.is_whitespace()
155                && *c != '|'
156                && *c != '['
157                && *c != ']'
158                && *c != '{'
159                && *c != '}'
160                && *c != '"'
161                && *c != '\''
162                && *c != '%'
163                && *c != ':'
164                && *c != '^'
165                && *c != '@'
166                && *c != '$'
167        })
168        .repeated()
169        .at_least(1)
170        .collect::<String>()
171        .map(|s| (vec![TextPart::Literal(s)], false));
172    choice((bare_ref, quoted, bare))
173        .repeated()
174        .at_least(1)
175        .collect::<Vec<_>>()
176        .map(|chunks| {
177            let mut parts = Vec::new();
178            let mut force_quote = false;
179            for (mut chunk_parts, quoted) in chunks {
180                force_quote |= quoted;
181                parts.append(&mut chunk_parts);
182            }
183            GroupPiece {
184                parts: merge_literals(parts),
185                force_quote,
186            }
187        })
188}
189
190fn merge_literals(parts: Vec<TextPart>) -> Vec<TextPart> {
191    let mut out: Vec<TextPart> = Vec::with_capacity(parts.len());
192    for p in parts {
193        match (p, out.last_mut()) {
194            (TextPart::Literal(s), Some(TextPart::Literal(prev))) => {
195                prev.push_str(&s);
196            }
197            (p, _) => out.push(p),
198        }
199    }
200    out
201}
202
203fn hws<'a>() -> impl Parser<'a, &'a str, (), Extra<'a>> + Clone {
204    one_of(" \t").repeated().ignored()
205}
206
207fn group_parser<'a>() -> impl Parser<'a, &'a str, ExecToken, Extra<'a>> + Clone {
208    just('[')
209        .ignore_then(hws())
210        .ignore_then(
211            group_piece_parser()
212                .separated_by(hws().then(empty()))
213                .at_least(1)
214                .collect::<Vec<_>>(),
215        )
216        .then_ignore(hws())
217        .then_ignore(just(']'))
218        .map_with(|pieces, e| {
219            let span: SimpleSpan = e.span();
220            ExecToken::Group {
221                pieces,
222                span: span.start..span.end,
223            }
224        })
225}
226
227fn token_parser<'a>() -> impl Parser<'a, &'a str, ExecToken, Extra<'a>> + Clone {
228    choice((
229        group_parser(),
230        text_token_parser().map_with(|(parts, force_quote), e| {
231            let span: SimpleSpan = e.span();
232            ExecToken::Text {
233                parts,
234                force_quote,
235                span: span.start..span.end,
236            }
237        }),
238    ))
239}
240
241fn stage_parser<'a>() -> impl Parser<'a, &'a str, ExecStage, Extra<'a>> + Clone {
242    // Try a bare value-ref-only stage first (Source). Then fall back to a
243    // command stage. The Source path requires the ref to be alone (only ws
244    // before the next `|` or end).
245    let source_only = hws()
246        .ignore_then(value_ref_parser())
247        .then_ignore(hws())
248        .then_ignore(choice((just('|').rewind().ignored(), end())))
249        .map_with(|reference, e| {
250            let span: SimpleSpan = e.span();
251            ExecStage::Source {
252                reference,
253                span: span.start..span.end,
254            }
255        });
256
257    let command = hws().ignore_then(
258        token_parser()
259            .separated_by(hws().then(empty()).then(hws()))
260            .at_least(1)
261            .collect::<Vec<_>>()
262            .then_ignore(hws())
263            .map_with(|tokens: Vec<ExecToken>, e| {
264                let span: SimpleSpan = e.span();
265                ExecStage::Command {
266                    tokens,
267                    span: span.start..span.end,
268                }
269            }),
270    );
271
272    choice((source_only, command))
273}
274
275fn pipeline_parser<'a>() -> impl Parser<'a, &'a str, Vec<ExecStage>, Extra<'a>> + Clone {
276    stage_parser()
277        .separated_by(just('|'))
278        .at_least(1)
279        .collect::<Vec<_>>()
280        .then_ignore(hws())
281        .then_ignore(end())
282}
283
284// ---------- span shifting ----------
285
286fn shift_stage(s: ExecStage, base: usize) -> ExecStage {
287    match s {
288        ExecStage::Source { reference, span } => ExecStage::Source {
289            reference,
290            span: (span.start + base)..(span.end + base),
291        },
292        ExecStage::Command { tokens, span } => ExecStage::Command {
293            tokens: tokens.into_iter().map(|t| shift_token(t, base)).collect(),
294            span: (span.start + base)..(span.end + base),
295        },
296    }
297}
298
299fn shift_token(t: ExecToken, base: usize) -> ExecToken {
300    match t {
301        ExecToken::Text {
302            parts,
303            force_quote,
304            span,
305        } => ExecToken::Text {
306            parts,
307            force_quote,
308            span: (span.start + base)..(span.end + base),
309        },
310        ExecToken::Group { pieces, span } => ExecToken::Group {
311            pieces,
312            span: (span.start + base)..(span.end + base),
313        },
314    }
315}