Skip to main content

safe_chains/cst/
parse.rs

1use super::*;
2use winnow::ModalResult;
3use winnow::combinator::{alt, delimited, not, opt, preceded, repeat, separated, terminated};
4use winnow::error::{ContextError, ErrMode};
5use winnow::prelude::*;
6use winnow::token::{any, take_while};
7
8pub fn parse(input: &str) -> Option<Script> {
9    script.parse(input).ok()
10}
11
12fn backtrack<T>() -> ModalResult<T> {
13    Err(ErrMode::Backtrack(ContextError::new()))
14}
15
16fn ws(input: &mut &str) -> ModalResult<()> {
17    take_while(0.., [' ', '\t']).void().parse_next(input)
18}
19
20fn sep(input: &mut &str) -> ModalResult<()> {
21    take_while(0.., [' ', '\t', ';', '\n'])
22        .void()
23        .parse_next(input)
24}
25
26fn eat_keyword(input: &mut &str, kw: &str) -> ModalResult<()> {
27    if !input.starts_with(kw) {
28        return backtrack();
29    }
30    if input
31        .as_bytes()
32        .get(kw.len())
33        .is_some_and(|&b| b.is_ascii_alphanumeric() || b == b'_')
34    {
35        return backtrack();
36    }
37    *input = &input[kw.len()..];
38    Ok(())
39}
40
41const SCRIPT_STOPS: &[&str] = &["do", "done", "elif", "else", "fi", "then"];
42
43fn at_script_stop(input: &str) -> bool {
44    input.starts_with(')')
45        || SCRIPT_STOPS.iter().any(|kw| {
46            input.starts_with(kw)
47                && !input
48                    .as_bytes()
49                    .get(kw.len())
50                    .is_some_and(|&b| b.is_ascii_alphanumeric() || b == b'_')
51        })
52}
53
54fn is_word_boundary(c: char) -> bool {
55    matches!(c, ' ' | '\t' | '\n' | ';' | '|' | '&' | ')' | '>' | '<')
56}
57
58fn is_word_literal(c: char) -> bool {
59    !is_word_boundary(c) && !matches!(c, '\'' | '"' | '`' | '\\' | '(' | '$')
60}
61
62fn is_dq_literal(c: char) -> bool {
63    !matches!(c, '"' | '\\' | '`' | '$')
64}
65
66// === Script ===
67
68fn script(input: &mut &str) -> ModalResult<Script> {
69    let mut stmts = Vec::new();
70    while let Some(pl) = opt(pipeline).parse_next(input)? {
71        ws.parse_next(input)?;
72        let op = opt(list_op).parse_next(input)?;
73        stmts.push(Stmt { pipeline: pl, op });
74        if op.is_none() {
75            break;
76        }
77    }
78    Ok(Script(stmts))
79}
80
81fn list_op(input: &mut &str) -> ModalResult<ListOp> {
82    ws.parse_next(input)?;
83    alt((
84        "&&".value(ListOp::And),
85        "||".value(ListOp::Or),
86        '\n'.value(ListOp::Semi),
87        ';'.value(ListOp::Semi),
88        ('&', not('>')).value(ListOp::Amp),
89    ))
90    .parse_next(input)
91}
92
93fn pipe_sep(input: &mut &str) -> ModalResult<()> {
94    (ws, '|', not('|'), ws).void().parse_next(input)
95}
96
97// === Pipeline ===
98
99fn pipeline(input: &mut &str) -> ModalResult<Pipeline> {
100    ws.parse_next(input)?;
101    if at_script_stop(input) {
102        return backtrack();
103    }
104    let bang = opt(terminated('!', ws)).parse_next(input)?.is_some();
105    let commands: Vec<Cmd> = separated(1.., command, pipe_sep).parse_next(input)?;
106    Ok(Pipeline { bang, commands })
107}
108
109// === Command ===
110
111fn command(input: &mut &str) -> ModalResult<Cmd> {
112    ws.parse_next(input)?;
113    if at_script_stop(input) {
114        return backtrack();
115    }
116    alt((
117        subshell,
118        for_cmd,
119        while_cmd,
120        until_cmd,
121        if_cmd,
122        simple_cmd.map(Cmd::Simple),
123    ))
124    .parse_next(input)
125}
126
127fn subshell(input: &mut &str) -> ModalResult<Cmd> {
128    delimited(('(', ws), script, (ws, ')'))
129        .map(Cmd::Subshell)
130        .parse_next(input)
131}
132
133// === Simple Command ===
134
135fn simple_cmd(input: &mut &str) -> ModalResult<SimpleCmd> {
136    let env: Vec<(String, Word)> =
137        repeat(0.., terminated(assignment, ws)).parse_next(input)?;
138    let mut words = Vec::new();
139    let mut redirs = Vec::new();
140
141    loop {
142        ws.parse_next(input)?;
143        if at_cmd_end(input) {
144            break;
145        }
146        if let Some(r) = opt(redirect).parse_next(input)? {
147            redirs.push(r);
148        } else if let Some(w) = opt(word).parse_next(input)? {
149            words.push(w);
150        } else {
151            break;
152        }
153    }
154
155    if env.is_empty() && words.is_empty() && redirs.is_empty() {
156        return backtrack();
157    }
158    Ok(SimpleCmd { env, words, redirs })
159}
160
161fn at_cmd_end(input: &str) -> bool {
162    input.is_empty()
163        || matches!(
164            input.as_bytes().first(),
165            Some(b'\n' | b';' | b'|' | b'&' | b')')
166        )
167}
168
169fn assignment(input: &mut &str) -> ModalResult<(String, Word)> {
170    let n: &str = take_while(1.., |c: char| c.is_ascii_alphanumeric() || c == '_')
171        .parse_next(input)?;
172    '='.parse_next(input)?;
173    let value = opt(word)
174        .parse_next(input)?
175        .unwrap_or(Word(vec![WordPart::Lit(String::new())]));
176    Ok((n.to_string(), value))
177}
178
179// === Redirect ===
180
181fn redirect(input: &mut &str) -> ModalResult<Redir> {
182    let fd = opt(fd_prefix).parse_next(input)?;
183    alt((
184        preceded("<<<", (ws, word)).map(|(_, target)| Redir::HereStr(target)),
185        preceded(">>", (ws, word)).map(move |(_, target)| Redir::Write {
186            fd: fd.unwrap_or(1),
187            target,
188            append: true,
189        }),
190        preceded(">&", fd_target).map(move |dst| Redir::DupFd {
191            src: fd.unwrap_or(1),
192            dst,
193        }),
194        preceded('>', (ws, word)).map(move |(_, target)| Redir::Write {
195            fd: fd.unwrap_or(1),
196            target,
197            append: false,
198        }),
199        preceded('<', (ws, word)).map(move |(_, target)| Redir::Read {
200            fd: fd.unwrap_or(0),
201            target,
202        }),
203    ))
204    .parse_next(input)
205}
206
207fn fd_prefix(input: &mut &str) -> ModalResult<u32> {
208    let b = input.as_bytes();
209    if b.len() >= 2 && b[0].is_ascii_digit() && matches!(b[1], b'>' | b'<') {
210        let d = (b[0] - b'0') as u32;
211        *input = &input[1..];
212        Ok(d)
213    } else {
214        backtrack()
215    }
216}
217
218fn fd_target(input: &mut &str) -> ModalResult<String> {
219    alt((
220        '-'.value("-".to_string()),
221        take_while(1.., |c: char| c.is_ascii_digit()).map(|s: &str| s.to_string()),
222    ))
223    .parse_next(input)
224}
225
226// === Word ===
227
228fn word(input: &mut &str) -> ModalResult<Word> {
229    repeat(1.., word_part)
230        .map(Word)
231        .parse_next(input)
232}
233
234fn word_part(input: &mut &str) -> ModalResult<WordPart> {
235    if input.is_empty() || is_word_boundary(input.as_bytes()[0] as char) {
236        return backtrack();
237    }
238    alt((single_quoted, double_quoted, cmd_sub, backtick_part, escaped, dollar_lit(is_word_literal), lit(is_word_literal)))
239        .parse_next(input)
240}
241
242fn single_quoted(input: &mut &str) -> ModalResult<WordPart> {
243    delimited('\'', take_while(0.., |c| c != '\''), '\'')
244        .map(|s: &str| WordPart::SQuote(s.to_string()))
245        .parse_next(input)
246}
247
248fn double_quoted(input: &mut &str) -> ModalResult<WordPart> {
249    delimited('"', repeat(0.., dq_part).map(Word), '"')
250        .map(WordPart::DQuote)
251        .parse_next(input)
252}
253
254fn cmd_sub(input: &mut &str) -> ModalResult<WordPart> {
255    delimited(("$(", ws), script, (ws, ')'))
256        .map(WordPart::CmdSub)
257        .parse_next(input)
258}
259
260fn backtick_part(input: &mut &str) -> ModalResult<WordPart> {
261    delimited('`', backtick_inner, '`')
262        .map(WordPart::Backtick)
263        .parse_next(input)
264}
265
266fn escaped(input: &mut &str) -> ModalResult<WordPart> {
267    preceded('\\', any).map(WordPart::Escape).parse_next(input)
268}
269
270fn lit(pred: fn(char) -> bool) -> impl FnMut(&mut &str) -> ModalResult<WordPart> {
271    move |input: &mut &str| {
272        take_while(1.., pred)
273            .map(|s: &str| WordPart::Lit(s.to_string()))
274            .parse_next(input)
275    }
276}
277
278fn dollar_lit(pred: fn(char) -> bool) -> impl FnMut(&mut &str) -> ModalResult<WordPart> {
279    move |input: &mut &str| {
280        ('$', not('(')).void().parse_next(input)?;
281        let rest: &str = take_while(0.., pred).parse_next(input)?;
282        Ok(WordPart::Lit(format!("${rest}")))
283    }
284}
285
286// === Double-quoted parts ===
287
288fn dq_part(input: &mut &str) -> ModalResult<WordPart> {
289    if input.is_empty() || input.starts_with('"') {
290        return backtrack();
291    }
292    alt((dq_escape, cmd_sub, backtick_part, dollar_lit(is_dq_literal), lit(is_dq_literal)))
293        .parse_next(input)
294}
295
296fn dq_escape(input: &mut &str) -> ModalResult<WordPart> {
297    preceded('\\', any)
298        .map(|c: char| match c {
299            '"' | '\\' | '$' | '`' => WordPart::Escape(c),
300            _ => WordPart::Lit(format!("\\{c}")),
301        })
302        .parse_next(input)
303}
304
305// === Backtick inner content ===
306
307fn backtick_inner(input: &mut &str) -> ModalResult<String> {
308    repeat(0.., alt((bt_escape, bt_literal)))
309        .fold(String::new, |mut acc, chunk: &str| {
310            acc.push_str(chunk);
311            acc
312        })
313        .parse_next(input)
314}
315
316fn bt_escape<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
317    ('\\', any).take().parse_next(input)
318}
319
320fn bt_literal<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
321    take_while(1.., |c: char| c != '`' && c != '\\').parse_next(input)
322}
323
324// === Compound Commands ===
325
326fn for_cmd(input: &mut &str) -> ModalResult<Cmd> {
327    eat_keyword(input, "for")?;
328    ws.parse_next(input)?;
329    let var = name.parse_next(input)?;
330    ws.parse_next(input)?;
331
332    let items = if eat_keyword(input, "in").is_ok() {
333        ws.parse_next(input)?;
334        repeat(0.., terminated(word, ws)).parse_next(input)?
335    } else {
336        vec![]
337    };
338
339    let body = do_done_body.parse_next(input)?;
340    Ok(Cmd::For { var, items, body })
341}
342
343fn while_cmd(input: &mut &str) -> ModalResult<Cmd> {
344    eat_keyword(input, "while")?;
345    ws.parse_next(input)?;
346    let cond = script.parse_next(input)?;
347    let body = do_done_body.parse_next(input)?;
348    Ok(Cmd::While { cond, body })
349}
350
351fn until_cmd(input: &mut &str) -> ModalResult<Cmd> {
352    eat_keyword(input, "until")?;
353    ws.parse_next(input)?;
354    let cond = script.parse_next(input)?;
355    let body = do_done_body.parse_next(input)?;
356    Ok(Cmd::Until { cond, body })
357}
358
359fn do_done_body(input: &mut &str) -> ModalResult<Script> {
360    sep.parse_next(input)?;
361    eat_keyword(input, "do")?;
362    sep.parse_next(input)?;
363    let body = script.parse_next(input)?;
364    sep.parse_next(input)?;
365    eat_keyword(input, "done")?;
366    Ok(body)
367}
368
369fn if_cmd(input: &mut &str) -> ModalResult<Cmd> {
370    eat_keyword(input, "if")?;
371    ws.parse_next(input)?;
372    let mut branches = vec![cond_then_body.parse_next(input)?];
373    let mut else_body = None;
374
375    loop {
376        sep.parse_next(input)?;
377        if eat_keyword(input, "elif").is_ok() {
378            ws.parse_next(input)?;
379            branches.push(cond_then_body.parse_next(input)?);
380        } else if eat_keyword(input, "else").is_ok() {
381            sep.parse_next(input)?;
382            else_body = Some(script.parse_next(input)?);
383            break;
384        } else {
385            break;
386        }
387    }
388
389    sep.parse_next(input)?;
390    eat_keyword(input, "fi")?;
391    Ok(Cmd::If { branches, else_body })
392}
393
394fn cond_then_body(input: &mut &str) -> ModalResult<Branch> {
395    let cond = script.parse_next(input)?;
396    sep.parse_next(input)?;
397    eat_keyword(input, "then")?;
398    sep.parse_next(input)?;
399    let body = script.parse_next(input)?;
400    Ok(Branch { cond, body })
401}
402
403fn name(input: &mut &str) -> ModalResult<String> {
404    take_while(1.., |c: char| c.is_ascii_alphanumeric() || c == '_')
405        .map(|s: &str| s.to_string())
406        .parse_next(input)
407}
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412
413    fn p(input: &str) -> Script {
414        parse(input).unwrap_or_else(|| panic!("failed to parse: {input}"))
415    }
416
417    fn words(script: &Script) -> Vec<String> {
418        match &script.0[0].pipeline.commands[0] {
419            Cmd::Simple(s) => s.words.iter().map(|w| w.eval()).collect(),
420            _ => panic!("expected simple command"),
421        }
422    }
423
424    fn simple(script: &Script) -> &SimpleCmd {
425        match &script.0[0].pipeline.commands[0] {
426            Cmd::Simple(s) => s,
427            _ => panic!("expected simple command"),
428        }
429    }
430
431    #[test]
432    fn simple_command() { assert_eq!(words(&p("echo hello")), ["echo", "hello"]); }
433    #[test]
434    fn flags() { assert_eq!(words(&p("ls -la")), ["ls", "-la"]); }
435    #[test]
436    fn single_quoted() { assert_eq!(words(&p("echo 'hello world'")), ["echo", "hello world"]); }
437    #[test]
438    fn double_quoted() { assert_eq!(words(&p("echo \"hello world\"")), ["echo", "hello world"]); }
439    #[test]
440    fn mixed_quotes() { assert_eq!(words(&p("jq '.key' file.json")), ["jq", ".key", "file.json"]); }
441
442    #[test]
443    fn pipeline_test() { assert_eq!(p("grep foo | head -5").0[0].pipeline.commands.len(), 2); }
444    #[test]
445    fn sequence_and() { assert_eq!(p("ls && echo done").0[0].op, Some(ListOp::And)); }
446    #[test]
447    fn sequence_semi() { assert_eq!(p("ls; echo done").0.len(), 2); }
448    #[test]
449    fn newline_separator() { assert_eq!(p("echo foo\necho bar").0.len(), 2); }
450    #[test]
451    fn background() { assert_eq!(p("ls & echo done").0[0].op, Some(ListOp::Amp)); }
452
453    #[test]
454    fn redirect_dev_null() {
455        let s = p("echo hello > /dev/null");
456        let cmd = simple(&s);
457        assert_eq!(cmd.words.len(), 2);
458        assert!(matches!(&cmd.redirs[0], Redir::Write { fd: 1, append: false, .. }));
459    }
460    #[test]
461    fn redirect_stderr() {
462        assert!(matches!(&simple(&p("echo hello 2>&1")).redirs[0], Redir::DupFd { src: 2, dst } if dst == "1"));
463    }
464    #[test]
465    fn here_string() {
466        assert!(matches!(&simple(&p("grep -c , <<< 'hello,world,test'")).redirs[0], Redir::HereStr(_)));
467    }
468
469    #[test]
470    fn env_prefix() {
471        let s = p("FOO='bar baz' ls -la");
472        let cmd = simple(&s);
473        assert_eq!(cmd.env[0].0, "FOO");
474        assert_eq!(cmd.env[0].1.eval(), "bar baz");
475    }
476    #[test]
477    fn cmd_substitution() { assert!(matches!(&simple(&p("echo $(ls)")).words[1].0[0], WordPart::CmdSub(_))); }
478    #[test]
479    fn backtick_substitution() { assert_eq!(simple(&p("ls `pwd`")).words[1].eval(), "__SAFE_CHAINS_SUB__"); }
480    #[test]
481    fn nested_substitution() {
482        if let WordPart::CmdSub(inner) = &simple(&p("echo $(echo $(ls))")).words[1].0[0] {
483            assert!(matches!(&simple(inner).words[1].0[0], WordPart::CmdSub(_)));
484        } else { panic!("expected CmdSub"); }
485    }
486
487    #[test]
488    fn subshell_test() { assert!(matches!(&p("(echo hello)").0[0].pipeline.commands[0], Cmd::Subshell(_))); }
489    #[test]
490    fn negation() { assert!(p("! echo hello").0[0].pipeline.bang); }
491
492    #[test]
493    fn for_loop() { assert!(matches!(&p("for x in 1 2 3; do echo $x; done").0[0].pipeline.commands[0], Cmd::For { var, .. } if var == "x")); }
494    #[test]
495    fn while_loop() { assert!(matches!(&p("while test -f /tmp/foo; do sleep 1; done").0[0].pipeline.commands[0], Cmd::While { .. })); }
496    #[test]
497    fn if_then_fi() {
498        if let Cmd::If { branches, else_body } = &p("if test -f foo; then echo exists; fi").0[0].pipeline.commands[0] {
499            assert_eq!(branches.len(), 1);
500            assert!(else_body.is_none());
501        } else { panic!("expected If"); }
502    }
503    #[test]
504    fn if_elif_else() {
505        if let Cmd::If { branches, else_body } = &p("if test -f a; then echo a; elif test -f b; then echo b; else echo c; fi").0[0].pipeline.commands[0] {
506            assert_eq!(branches.len(), 2);
507            assert!(else_body.is_some());
508        } else { panic!("expected If"); }
509    }
510
511    #[test]
512    fn escaped_outside_quotes() { assert_eq!(words(&p("echo hello\\ world")), ["echo", "hello world"]); }
513    #[test]
514    fn double_quoted_escape() { assert_eq!(words(&p("echo \"hello\\\"world\"")), ["echo", "hello\"world"]); }
515    #[test]
516    fn assign_subst() { assert_eq!(simple(&p("out=$(ls)")).env[0].0, "out"); }
517
518    #[test]
519    fn unmatched_single_quote_fails() { assert!(parse("echo 'hello").is_none()); }
520    #[test]
521    fn unmatched_double_quote_fails() { assert!(parse("echo \"hello").is_none()); }
522    #[test]
523    fn unclosed_subshell_fails() { assert!(parse("(echo hello").is_none()); }
524    #[test]
525    fn unclosed_cmd_sub_fails() { assert!(parse("echo $(ls").is_none()); }
526    #[test]
527    fn for_missing_do_fails() { assert!(parse("for x in 1 2 3; echo $x; done").is_none()); }
528    #[test]
529    fn if_missing_fi_fails() { assert!(parse("if true; then echo hello").is_none()); }
530
531    #[test]
532    fn subshell_for() {
533        if let Cmd::Subshell(inner) = &p("(for x in 1 2; do echo $x; done)").0[0].pipeline.commands[0] {
534            assert!(matches!(&inner.0[0].pipeline.commands[0], Cmd::For { .. }));
535        } else { panic!("expected Subshell"); }
536    }
537    #[test]
538    fn quoted_redirect_in_echo() {
539        let s = p("echo 'greater > than' test");
540        let cmd = simple(&s);
541        assert_eq!(cmd.words.len(), 3);
542        assert_eq!(cmd.redirs.len(), 0);
543    }
544
545    #[test]
546    fn parses_all_safe_commands() {
547        let cmds = [
548            "grep foo file.txt", "cat /etc/hosts", "jq '.key' file.json", "base64 -d",
549            "ls -la", "wc -l file.txt", "ps aux", "echo hello", "cat file.txt",
550            "echo $(ls)", "ls `pwd`", "echo $(echo $(ls))", "echo \"$(ls)\"",
551            "out=$(ls)", "out=$(git status)", "a=$(ls) b=$(pwd)",
552            "(echo hello)", "(ls)", "(ls && echo done)", "(echo hello; echo world)",
553            "(ls | grep foo)", "(echo hello) | grep hello", "(ls) && echo done",
554            "((echo hello))", "(for x in 1 2; do echo $x; done)",
555            "echo 'greater > than' test", "echo '$(safe)' arg",
556            "FOO='bar baz' ls -la", "FOO=\"bar baz\" ls -la",
557            "RACK_ENV=test bundle exec rspec spec/foo_spec.rb",
558            "grep foo file.txt | head -5", "cat file | sort | uniq",
559            "ls && echo done", "ls; echo done", "ls & echo done",
560            "grep -c , <<< 'hello,world,test'",
561            "echo foo\necho bar", "ls\ncat file.txt",
562            "git log --oneline -20 | head -5",
563            "echo hello > /dev/null", "echo hello 2> /dev/null",
564            "echo hello >> /dev/null", "git log > /dev/null 2>&1",
565            "ls 2>&1", "cargo clippy 2>&1", "git log < /dev/null",
566            "for x in 1 2 3; do echo $x; done",
567            "for f in *.txt; do cat $f | grep pattern; done",
568            "for x in 1 2 3; do; done",
569            "for x in 1 2; do echo $x; done; for y in a b; do echo $y; done",
570            "for x in 1 2; do for y in a b; do echo $x $y; done; done",
571            "for x in 1 2; do echo $x; done && echo finished",
572            "for x in $(seq 1 5); do echo $x; done",
573            "while test -f /tmp/foo; do sleep 1; done",
574            "while ! test -f /tmp/done; do sleep 1; done",
575            "until test -f /tmp/ready; do sleep 1; done",
576            "if test -f foo; then echo exists; fi",
577            "if test -f foo; then echo yes; else echo no; fi",
578            "if test -f a; then echo a; elif test -f b; then echo b; else echo c; fi",
579            "for x in 1 2; do if test $x = 1; then echo one; fi; done",
580            "if true; then for x in 1 2; do echo $x; done; fi",
581            "! echo hello", "! test -f foo",
582            "echo for; echo done; echo if; echo fi",
583        ];
584        let mut failures = Vec::new();
585        for cmd in &cmds {
586            if parse(cmd).is_none() { failures.push(*cmd); }
587        }
588        assert!(failures.is_empty(), "failed on {} commands:\n{}", failures.len(), failures.join("\n"));
589    }
590}