1#[derive(Debug, Clone, PartialEq, Eq)]
2pub struct ParsedCommand {
3 pub args: Vec<String>,
4 pub heredoc: Option<String>,
5 pub appends_to: Option<String>,
6}
7
8pub fn parse(command: &str) -> Option<ParsedCommand> {
9 let command = command.trim();
10 if command.is_empty() {
11 return None;
12 }
13
14 let (header, heredoc) = split_heredoc(command)?;
15 let parsed = tokenize(header, heredoc)?;
16
17 if parsed.args.is_empty() {
18 return None;
19 }
20
21 Some(parsed)
22}
23
24fn split_heredoc(command: &str) -> Option<(&str, Option<String>)> {
25 let Some(op_start) = find_heredoc_operator(command)? else {
26 return Some((command, None));
27 };
28
29 let after_operator = op_start + 2;
30 let after_spaces = skip_horizontal_space(command, after_operator);
31 let (delimiter, delimiter_end) = read_unquoted_word(command, after_spaces)?;
32 if delimiter.is_empty() {
33 return None;
34 }
35
36 let line_start = match command[delimiter_end..].find('\n') {
37 Some(offset) => delimiter_end + offset + 1,
38 None => return None,
39 };
40
41 let body = &command[line_start..];
42 let terminator = format!("\n{delimiter}");
43 let (content, rest_start) = if body == delimiter {
44 ("", line_start + delimiter.len())
45 } else if let Some(stripped) = body.strip_prefix(&format!("{delimiter}\n")) {
46 ("", command.len() - stripped.len())
47 } else if let Some(offset) = body.find(&terminator) {
48 let content = &body[..offset + 1];
49 let rest_start = line_start + offset + terminator.len();
50 (content, rest_start)
51 } else {
52 return None;
53 };
54
55 let rest = &command[rest_start..];
56 let rest = rest.strip_prefix('\n').unwrap_or(rest);
57 if !rest.trim().is_empty() {
58 return None;
59 }
60
61 Some((&command[..op_start], Some(content.to_string())))
62}
63
64fn find_heredoc_operator(command: &str) -> Option<Option<usize>> {
65 let mut quote = Quote::None;
66 let mut chars = command.char_indices().peekable();
67
68 while let Some((idx, ch)) = chars.next() {
69 match quote {
70 Quote::Single => {
71 if ch == '\'' {
72 quote = Quote::None;
73 }
74 }
75 Quote::Double => match ch {
76 '"' => quote = Quote::None,
77 '`' => return None,
78 '$' if matches!(chars.peek(), Some((_, '(' | '{'))) => return None,
79 '\\' => {
80 chars.next();
81 }
82 _ => {}
83 },
84 Quote::None => match ch {
85 '\'' => quote = Quote::Single,
86 '"' => quote = Quote::Double,
87 '`' => return None,
88 '$' if is_unsupported_variable_start(chars.peek().map(|(_, c)| *c)) => return None,
89 '\\' => {
90 chars.next();
91 }
92 '<' if matches!(chars.peek(), Some((_, '<'))) => return Some(Some(idx)),
93 _ => {}
94 },
95 }
96 }
97
98 if quote == Quote::None {
99 Some(None)
100 } else {
101 None
102 }
103}
104
105fn tokenize(header: &str, heredoc: Option<String>) -> Option<ParsedCommand> {
106 let mut args = Vec::new();
107 let mut token = String::new();
108 let mut quote = Quote::None;
109 let mut appends_to = None;
110 let mut chars = header.char_indices().peekable();
111
112 while let Some((_, ch)) = chars.next() {
113 match quote {
114 Quote::Single => {
115 if ch == '\'' {
116 quote = Quote::None;
117 } else {
118 token.push(ch);
119 }
120 }
121 Quote::Double => match ch {
122 '"' => quote = Quote::None,
123 '`' => return None,
124 '$' if is_unsupported_variable_start(chars.peek().map(|(_, c)| *c)) => return None,
125 '\\' => match chars.next() {
126 Some((_, escaped)) => token.push(escaped),
127 None => token.push('\\'),
128 },
129 _ => token.push(ch),
130 },
131 Quote::None => match ch {
132 c if c.is_whitespace() => push_token(&mut args, &mut token),
133 '\'' => quote = Quote::Single,
134 '"' => quote = Quote::Double,
135 '\\' => match chars.next() {
136 Some((_, escaped)) => token.push(escaped),
137 None => token.push('\\'),
138 },
139 '`' => return None,
140 '$' if is_unsupported_variable_start(chars.peek().map(|(_, c)| *c)) => return None,
141 '|' | ';' => return None,
142 '&' if matches!(chars.peek(), Some((_, '&'))) => return None,
143 '>' if matches!(chars.peek(), Some((_, '>'))) => {
144 chars.next();
145 push_token(&mut args, &mut token);
146 if appends_to.is_some() {
147 return None;
148 }
149 appends_to = Some(read_next_redirect_target(header, &mut chars)?);
150 if has_non_space_remainder(&mut chars) {
151 return None;
152 }
153 break;
154 }
155 '>' | '<' => return None,
156 _ => token.push(ch),
157 },
158 }
159 }
160
161 if quote != Quote::None {
162 return None;
163 }
164 push_token(&mut args, &mut token);
165
166 if heredoc.is_some() && appends_to.is_none() {
167 return None;
168 }
169
170 Some(ParsedCommand {
171 args,
172 heredoc,
173 appends_to,
174 })
175}
176
177fn push_token(args: &mut Vec<String>, token: &mut String) {
178 if !token.is_empty() {
179 args.push(std::mem::take(token));
180 }
181}
182
183fn read_next_redirect_target(
184 header: &str,
185 chars: &mut std::iter::Peekable<std::str::CharIndices<'_>>,
186) -> Option<String> {
187 while matches!(chars.peek(), Some((_, c)) if c.is_whitespace()) {
188 chars.next();
189 }
190
191 let start = chars.peek().map(|(idx, _)| *idx).unwrap_or(header.len());
192 let remainder = &header[start..];
193 let mut parsed = tokenize_word(remainder)?;
194 if parsed.0.is_empty() {
195 return None;
196 }
197 while let Some((idx, _)) = chars.peek() {
198 if *idx < start + parsed.1 {
199 chars.next();
200 } else {
201 break;
202 }
203 }
204 Some(std::mem::take(&mut parsed.0))
205}
206
207fn tokenize_word(input: &str) -> Option<(String, usize)> {
208 let mut token = String::new();
209 let mut quote = Quote::None;
210 let mut consumed = 0;
211 let mut chars = input.char_indices().peekable();
212
213 while let Some((idx, ch)) = chars.next() {
214 consumed = idx + ch.len_utf8();
215 match quote {
216 Quote::Single => {
217 if ch == '\'' {
218 quote = Quote::None;
219 } else {
220 token.push(ch);
221 }
222 }
223 Quote::Double => match ch {
224 '"' => quote = Quote::None,
225 '`' => return None,
226 '$' if is_unsupported_variable_start(chars.peek().map(|(_, c)| *c)) => return None,
227 '\\' => match chars.next() {
228 Some((next_idx, escaped)) => {
229 consumed = next_idx + escaped.len_utf8();
230 token.push(escaped);
231 }
232 None => token.push('\\'),
233 },
234 _ => token.push(ch),
235 },
236 Quote::None => match ch {
237 c if c.is_whitespace() => {
238 consumed = idx;
239 break;
240 }
241 '\'' => quote = Quote::Single,
242 '"' => quote = Quote::Double,
243 '\\' => match chars.next() {
244 Some((next_idx, escaped)) => {
245 consumed = next_idx + escaped.len_utf8();
246 token.push(escaped);
247 }
248 None => token.push('\\'),
249 },
250 '|' | ';' | '<' | '>' | '`' => return None,
251 '&' if matches!(chars.peek(), Some((_, '&'))) => return None,
252 '$' if is_unsupported_variable_start(chars.peek().map(|(_, c)| *c)) => return None,
253 _ => token.push(ch),
254 },
255 }
256 }
257
258 if quote == Quote::None {
259 Some((token, consumed))
260 } else {
261 None
262 }
263}
264
265fn has_non_space_remainder(chars: &mut std::iter::Peekable<std::str::CharIndices<'_>>) -> bool {
266 chars.any(|(_, ch)| !ch.is_whitespace())
267}
268
269fn skip_horizontal_space(input: &str, start: usize) -> usize {
270 input[start..]
271 .char_indices()
272 .find_map(|(offset, ch)| (!matches!(ch, ' ' | '\t')).then_some(start + offset))
273 .unwrap_or(input.len())
274}
275
276fn read_unquoted_word(input: &str, start: usize) -> Option<(String, usize)> {
277 let mut end = start;
278 let mut word = String::new();
279 for (offset, ch) in input[start..].char_indices() {
280 if ch.is_whitespace() {
281 break;
282 }
283 if matches!(ch, '\'' | '"' | '`' | '$' | '|' | ';' | '&' | '<' | '>') {
284 return None;
285 }
286 word.push(ch);
287 end = start + offset + ch.len_utf8();
288 }
289 Some((word, end))
290}
291
292fn is_unsupported_variable_start(next: Option<char>) -> bool {
293 matches!(next, Some('(' | '{')) || next.is_some_and(|ch| ch == '_' || ch.is_ascii_alphabetic())
294}
295
296#[derive(Debug, Clone, Copy, PartialEq, Eq)]
297enum Quote {
298 None,
299 Single,
300 Double,
301}