Skip to main content

aft/bash_rewrite/
parser.rs

1#[derive(Debug, Clone, PartialEq, Eq)]
2pub struct ParsedCommand {
3    pub args: Vec<String>,
4    pub heredoc: Option<String>,
5    pub appends_to: Option<String>,
6}
7
8pub fn parse(command: &str) -> Option<ParsedCommand> {
9    let command = command.trim();
10    if command.is_empty() {
11        return None;
12    }
13
14    let (header, heredoc) = split_heredoc(command)?;
15    let parsed = tokenize(header, heredoc)?;
16
17    if parsed.args.is_empty() {
18        return None;
19    }
20
21    Some(parsed)
22}
23
24fn split_heredoc(command: &str) -> Option<(&str, Option<String>)> {
25    let Some(op_start) = find_heredoc_operator(command)? else {
26        return Some((command, None));
27    };
28
29    let after_operator = op_start + 2;
30    let after_spaces = skip_horizontal_space(command, after_operator);
31    let (delimiter, delimiter_end) = read_unquoted_word(command, after_spaces)?;
32    if delimiter.is_empty() {
33        return None;
34    }
35
36    let line_start = match command[delimiter_end..].find('\n') {
37        Some(offset) => delimiter_end + offset + 1,
38        None => return None,
39    };
40
41    let body = &command[line_start..];
42    let terminator = format!("\n{delimiter}");
43    let (content, rest_start) = if body == delimiter {
44        ("", line_start + delimiter.len())
45    } else if let Some(stripped) = body.strip_prefix(&format!("{delimiter}\n")) {
46        ("", command.len() - stripped.len())
47    } else if let Some(offset) = body.find(&terminator) {
48        let content = &body[..offset + 1];
49        let rest_start = line_start + offset + terminator.len();
50        (content, rest_start)
51    } else {
52        return None;
53    };
54
55    let rest = &command[rest_start..];
56    let rest = rest.strip_prefix('\n').unwrap_or(rest);
57    if !rest.trim().is_empty() {
58        return None;
59    }
60
61    Some((&command[..op_start], Some(content.to_string())))
62}
63
64fn find_heredoc_operator(command: &str) -> Option<Option<usize>> {
65    let mut quote = Quote::None;
66    let mut chars = command.char_indices().peekable();
67
68    while let Some((idx, ch)) = chars.next() {
69        match quote {
70            Quote::Single => {
71                if ch == '\'' {
72                    quote = Quote::None;
73                }
74            }
75            Quote::Double => match ch {
76                '"' => quote = Quote::None,
77                '`' => return None,
78                '$' if matches!(chars.peek(), Some((_, '(' | '{'))) => return None,
79                '\\' => {
80                    chars.next();
81                }
82                _ => {}
83            },
84            Quote::None => match ch {
85                '\'' => quote = Quote::Single,
86                '"' => quote = Quote::Double,
87                '`' => return None,
88                '$' if is_unsupported_variable_start(chars.peek().map(|(_, c)| *c)) => return None,
89                '\\' => {
90                    chars.next();
91                }
92                '<' if matches!(chars.peek(), Some((_, '<'))) => return Some(Some(idx)),
93                _ => {}
94            },
95        }
96    }
97
98    if quote == Quote::None {
99        Some(None)
100    } else {
101        None
102    }
103}
104
105fn tokenize(header: &str, heredoc: Option<String>) -> Option<ParsedCommand> {
106    let mut args = Vec::new();
107    let mut token = String::new();
108    let mut quote = Quote::None;
109    let mut appends_to = None;
110    let mut chars = header.char_indices().peekable();
111
112    while let Some((_, ch)) = chars.next() {
113        match quote {
114            Quote::Single => {
115                if ch == '\'' {
116                    quote = Quote::None;
117                } else {
118                    token.push(ch);
119                }
120            }
121            Quote::Double => match ch {
122                '"' => quote = Quote::None,
123                '`' => return None,
124                '$' if is_unsupported_variable_start(chars.peek().map(|(_, c)| *c)) => return None,
125                '\\' => match chars.next() {
126                    Some((_, escaped)) => token.push(escaped),
127                    None => token.push('\\'),
128                },
129                _ => token.push(ch),
130            },
131            Quote::None => match ch {
132                c if c.is_whitespace() => push_token(&mut args, &mut token),
133                '\'' => quote = Quote::Single,
134                '"' => quote = Quote::Double,
135                '\\' => match chars.next() {
136                    Some((_, escaped)) => token.push(escaped),
137                    None => token.push('\\'),
138                },
139                '`' => return None,
140                '$' if is_unsupported_variable_start(chars.peek().map(|(_, c)| *c)) => return None,
141                '|' | ';' => return None,
142                '&' if matches!(chars.peek(), Some((_, '&'))) => return None,
143                '>' if matches!(chars.peek(), Some((_, '>'))) => {
144                    chars.next();
145                    push_token(&mut args, &mut token);
146                    if appends_to.is_some() {
147                        return None;
148                    }
149                    appends_to = Some(read_next_redirect_target(header, &mut chars)?);
150                    if has_non_space_remainder(&mut chars) {
151                        return None;
152                    }
153                    break;
154                }
155                '>' | '<' => return None,
156                _ => token.push(ch),
157            },
158        }
159    }
160
161    if quote != Quote::None {
162        return None;
163    }
164    push_token(&mut args, &mut token);
165
166    if heredoc.is_some() && appends_to.is_none() {
167        return None;
168    }
169
170    Some(ParsedCommand {
171        args,
172        heredoc,
173        appends_to,
174    })
175}
176
177fn push_token(args: &mut Vec<String>, token: &mut String) {
178    if !token.is_empty() {
179        args.push(std::mem::take(token));
180    }
181}
182
183fn read_next_redirect_target(
184    header: &str,
185    chars: &mut std::iter::Peekable<std::str::CharIndices<'_>>,
186) -> Option<String> {
187    while matches!(chars.peek(), Some((_, c)) if c.is_whitespace()) {
188        chars.next();
189    }
190
191    let start = chars.peek().map(|(idx, _)| *idx).unwrap_or(header.len());
192    let remainder = &header[start..];
193    let mut parsed = tokenize_word(remainder)?;
194    if parsed.0.is_empty() {
195        return None;
196    }
197    while let Some((idx, _)) = chars.peek() {
198        if *idx < start + parsed.1 {
199            chars.next();
200        } else {
201            break;
202        }
203    }
204    Some(std::mem::take(&mut parsed.0))
205}
206
207fn tokenize_word(input: &str) -> Option<(String, usize)> {
208    let mut token = String::new();
209    let mut quote = Quote::None;
210    let mut consumed = 0;
211    let mut chars = input.char_indices().peekable();
212
213    while let Some((idx, ch)) = chars.next() {
214        consumed = idx + ch.len_utf8();
215        match quote {
216            Quote::Single => {
217                if ch == '\'' {
218                    quote = Quote::None;
219                } else {
220                    token.push(ch);
221                }
222            }
223            Quote::Double => match ch {
224                '"' => quote = Quote::None,
225                '`' => return None,
226                '$' if is_unsupported_variable_start(chars.peek().map(|(_, c)| *c)) => return None,
227                '\\' => match chars.next() {
228                    Some((next_idx, escaped)) => {
229                        consumed = next_idx + escaped.len_utf8();
230                        token.push(escaped);
231                    }
232                    None => token.push('\\'),
233                },
234                _ => token.push(ch),
235            },
236            Quote::None => match ch {
237                c if c.is_whitespace() => {
238                    consumed = idx;
239                    break;
240                }
241                '\'' => quote = Quote::Single,
242                '"' => quote = Quote::Double,
243                '\\' => match chars.next() {
244                    Some((next_idx, escaped)) => {
245                        consumed = next_idx + escaped.len_utf8();
246                        token.push(escaped);
247                    }
248                    None => token.push('\\'),
249                },
250                '|' | ';' | '<' | '>' | '`' => return None,
251                '&' if matches!(chars.peek(), Some((_, '&'))) => return None,
252                '$' if is_unsupported_variable_start(chars.peek().map(|(_, c)| *c)) => return None,
253                _ => token.push(ch),
254            },
255        }
256    }
257
258    if quote == Quote::None {
259        Some((token, consumed))
260    } else {
261        None
262    }
263}
264
265fn has_non_space_remainder(chars: &mut std::iter::Peekable<std::str::CharIndices<'_>>) -> bool {
266    chars.any(|(_, ch)| !ch.is_whitespace())
267}
268
269fn skip_horizontal_space(input: &str, start: usize) -> usize {
270    input[start..]
271        .char_indices()
272        .find_map(|(offset, ch)| (!matches!(ch, ' ' | '\t')).then_some(start + offset))
273        .unwrap_or(input.len())
274}
275
276fn read_unquoted_word(input: &str, start: usize) -> Option<(String, usize)> {
277    let mut end = start;
278    let mut word = String::new();
279    for (offset, ch) in input[start..].char_indices() {
280        if ch.is_whitespace() {
281            break;
282        }
283        if matches!(ch, '\'' | '"' | '`' | '$' | '|' | ';' | '&' | '<' | '>') {
284            return None;
285        }
286        word.push(ch);
287        end = start + offset + ch.len_utf8();
288    }
289    Some((word, end))
290}
291
292fn is_unsupported_variable_start(next: Option<char>) -> bool {
293    matches!(next, Some('(' | '{')) || next.is_some_and(|ch| ch == '_' || ch.is_ascii_alphabetic())
294}
295
296#[derive(Debug, Clone, Copy, PartialEq, Eq)]
297enum Quote {
298    None,
299    Single,
300    Double,
301}