1use serde::{Deserialize, Serialize};
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
5#[serde(rename_all = "lowercase")]
6pub enum ShellType {
7 Posix,
8 Fish,
9 PowerShell,
10}
11
12impl std::str::FromStr for ShellType {
13 type Err = String;
14 fn from_str(s: &str) -> Result<Self, Self::Err> {
15 match s.to_lowercase().as_str() {
16 "posix" | "bash" | "zsh" | "sh" => Ok(ShellType::Posix),
17 "fish" => Ok(ShellType::Fish),
18 "powershell" | "pwsh" => Ok(ShellType::PowerShell),
19 _ => Err(format!("unknown shell type: {s}")),
20 }
21 }
22}
23
24#[derive(Debug, Clone)]
26pub struct Segment {
27 pub raw: String,
29 pub command: Option<String>,
31 pub args: Vec<String>,
33 pub preceding_separator: Option<String>,
35}
36
37pub fn tokenize(input: &str, shell: ShellType) -> Vec<Segment> {
39 match shell {
40 ShellType::Posix => tokenize_posix(input),
41 ShellType::Fish => tokenize_fish(input),
42 ShellType::PowerShell => tokenize_powershell(input),
43 }
44}
45
46fn tokenize_posix(input: &str) -> Vec<Segment> {
47 let mut segments = Vec::new();
48 let mut current = String::new();
49 let mut preceding_sep = None;
50 let chars: Vec<char> = input.chars().collect();
51 let len = chars.len();
52 let mut i = 0;
53
54 while i < len {
55 let ch = chars[i];
56
57 match ch {
58 '\\' if i + 1 < len => {
60 current.push(chars[i]);
61 current.push(chars[i + 1]);
62 i += 2;
63 continue;
64 }
65 '\'' => {
67 current.push(ch);
68 i += 1;
69 while i < len && chars[i] != '\'' {
70 current.push(chars[i]);
71 i += 1;
72 }
73 if i < len {
74 current.push(chars[i]); i += 1;
76 }
77 continue;
78 }
79 '"' => {
81 current.push(ch);
82 i += 1;
83 while i < len && chars[i] != '"' {
84 if chars[i] == '\\' && i + 1 < len {
85 current.push(chars[i]);
86 current.push(chars[i + 1]);
87 i += 2;
88 } else {
89 current.push(chars[i]);
90 i += 1;
91 }
92 }
93 if i < len {
94 current.push(chars[i]); i += 1;
96 }
97 continue;
98 }
99 '|' => {
101 if i + 1 < len && chars[i + 1] == '|' {
102 push_segment(&mut segments, ¤t, preceding_sep.take());
104 current.clear();
105 preceding_sep = Some("||".to_string());
106 i += 2;
107 continue;
108 } else if i + 1 < len && chars[i + 1] == '&' {
109 push_segment(&mut segments, ¤t, preceding_sep.take());
111 current.clear();
112 preceding_sep = Some("|&".to_string());
113 i += 2;
114 continue;
115 } else {
116 push_segment(&mut segments, ¤t, preceding_sep.take());
118 current.clear();
119 preceding_sep = Some("|".to_string());
120 i += 1;
121 continue;
122 }
123 }
124 '&' if i + 1 < len && chars[i + 1] == '&' => {
126 push_segment(&mut segments, ¤t, preceding_sep.take());
127 current.clear();
128 preceding_sep = Some("&&".to_string());
129 i += 2;
130 continue;
131 }
132 ';' => {
134 push_segment(&mut segments, ¤t, preceding_sep.take());
135 current.clear();
136 preceding_sep = Some(";".to_string());
137 i += 1;
138 continue;
139 }
140 '\n' => {
142 push_segment(&mut segments, ¤t, preceding_sep.take());
143 current.clear();
144 preceding_sep = Some("\n".to_string());
145 i += 1;
146 continue;
147 }
148 _ => {
149 current.push(ch);
150 i += 1;
151 }
152 }
153 }
154
155 push_segment(&mut segments, ¤t, preceding_sep.take());
156 segments
157}
158
159fn tokenize_fish(input: &str) -> Vec<Segment> {
160 tokenize_posix(input)
165}
166
167fn tokenize_powershell(input: &str) -> Vec<Segment> {
168 let mut segments = Vec::new();
169 let mut current = String::new();
170 let mut preceding_sep = None;
171 let chars: Vec<char> = input.chars().collect();
172 let len = chars.len();
173 let mut i = 0;
174
175 while i < len {
176 let ch = chars[i];
177
178 match ch {
179 '`' if i + 1 < len => {
181 current.push(chars[i]);
182 current.push(chars[i + 1]);
183 i += 2;
184 continue;
185 }
186 '\'' => {
188 current.push(ch);
189 i += 1;
190 while i < len && chars[i] != '\'' {
191 current.push(chars[i]);
192 i += 1;
193 }
194 if i < len {
195 current.push(chars[i]);
196 i += 1;
197 }
198 continue;
199 }
200 '"' => {
202 current.push(ch);
203 i += 1;
204 while i < len && chars[i] != '"' {
205 if chars[i] == '`' && i + 1 < len {
206 current.push(chars[i]);
207 current.push(chars[i + 1]);
208 i += 2;
209 } else {
210 current.push(chars[i]);
211 i += 1;
212 }
213 }
214 if i < len {
215 current.push(chars[i]);
216 i += 1;
217 }
218 continue;
219 }
220 '|' => {
222 push_segment(&mut segments, ¤t, preceding_sep.take());
223 current.clear();
224 preceding_sep = Some("|".to_string());
225 i += 1;
226 continue;
227 }
228 ';' => {
230 push_segment(&mut segments, ¤t, preceding_sep.take());
231 current.clear();
232 preceding_sep = Some(";".to_string());
233 i += 1;
234 continue;
235 }
236 '-' if current.ends_with(char::is_whitespace) || current.is_empty() => {
238 let remaining = &input[i..];
239 if remaining.starts_with("-and")
240 && remaining[4..]
241 .chars()
242 .next()
243 .is_none_or(|c| c.is_whitespace())
244 {
245 push_segment(&mut segments, ¤t, preceding_sep.take());
246 current.clear();
247 preceding_sep = Some("-and".to_string());
248 i += 4;
249 continue;
250 } else if remaining.starts_with("-or")
251 && remaining[3..]
252 .chars()
253 .next()
254 .is_none_or(|c| c.is_whitespace())
255 {
256 push_segment(&mut segments, ¤t, preceding_sep.take());
257 current.clear();
258 preceding_sep = Some("-or".to_string());
259 i += 3;
260 continue;
261 }
262 current.push(ch);
263 i += 1;
264 }
265 '\n' => {
266 push_segment(&mut segments, ¤t, preceding_sep.take());
267 current.clear();
268 preceding_sep = Some("\n".to_string());
269 i += 1;
270 continue;
271 }
272 _ => {
273 current.push(ch);
274 i += 1;
275 }
276 }
277 }
278
279 push_segment(&mut segments, ¤t, preceding_sep.take());
280 segments
281}
282
283fn push_segment(segments: &mut Vec<Segment>, raw: &str, preceding_sep: Option<String>) {
284 let trimmed = raw.trim();
285 if trimmed.is_empty() {
286 return;
287 }
288
289 let words = split_words(trimmed);
290 let command = words.first().cloned();
291 let args = if words.len() > 1 {
292 words[1..].to_vec()
293 } else {
294 Vec::new()
295 };
296
297 segments.push(Segment {
298 raw: trimmed.to_string(),
299 command,
300 args,
301 preceding_separator: preceding_sep,
302 });
303}
304
305fn split_words(input: &str) -> Vec<String> {
307 let mut words = Vec::new();
308 let mut current = String::new();
309 let chars: Vec<char> = input.chars().collect();
310 let len = chars.len();
311 let mut i = 0;
312
313 while i < len {
314 let ch = chars[i];
315 match ch {
316 ' ' | '\t' if !current.is_empty() => {
317 words.push(current.clone());
318 current.clear();
319 i += 1;
320 while i < len && (chars[i] == ' ' || chars[i] == '\t') {
322 i += 1;
323 }
324 }
325 ' ' | '\t' => {
326 i += 1;
327 }
328 '\'' => {
329 current.push(ch);
330 i += 1;
331 while i < len && chars[i] != '\'' {
332 current.push(chars[i]);
333 i += 1;
334 }
335 if i < len {
336 current.push(chars[i]);
337 i += 1;
338 }
339 }
340 '"' => {
341 current.push(ch);
342 i += 1;
343 while i < len && chars[i] != '"' {
344 if chars[i] == '\\' && i + 1 < len {
345 current.push(chars[i]);
346 current.push(chars[i + 1]);
347 i += 2;
348 } else {
349 current.push(chars[i]);
350 i += 1;
351 }
352 }
353 if i < len {
354 current.push(chars[i]);
355 i += 1;
356 }
357 }
358 '\\' if i + 1 < len => {
359 current.push(chars[i]);
360 current.push(chars[i + 1]);
361 i += 2;
362 }
363 _ => {
364 current.push(ch);
365 i += 1;
366 }
367 }
368 }
369
370 if !current.is_empty() {
371 words.push(current);
372 }
373
374 words
375}
376
377#[cfg(test)]
378mod tests {
379 use super::*;
380
381 #[test]
382 fn test_simple_pipe() {
383 let segs = tokenize("echo hello | grep world", ShellType::Posix);
384 assert_eq!(segs.len(), 2);
385 assert_eq!(segs[0].command.as_deref(), Some("echo"));
386 assert_eq!(segs[1].command.as_deref(), Some("grep"));
387 assert_eq!(segs[1].preceding_separator.as_deref(), Some("|"));
388 }
389
390 #[test]
391 fn test_quoted_pipe() {
392 let segs = tokenize(r#"echo "hello | world" | bash"#, ShellType::Posix);
393 assert_eq!(segs.len(), 2);
394 assert_eq!(segs[0].raw, r#"echo "hello | world""#);
395 assert_eq!(segs[1].command.as_deref(), Some("bash"));
396 }
397
398 #[test]
399 fn test_and_or() {
400 let segs = tokenize("cmd1 && cmd2 || cmd3", ShellType::Posix);
401 assert_eq!(segs.len(), 3);
402 assert_eq!(segs[1].preceding_separator.as_deref(), Some("&&"));
403 assert_eq!(segs[2].preceding_separator.as_deref(), Some("||"));
404 }
405
406 #[test]
407 fn test_semicolon() {
408 let segs = tokenize("cmd1; cmd2", ShellType::Posix);
409 assert_eq!(segs.len(), 2);
410 assert_eq!(segs[1].preceding_separator.as_deref(), Some(";"));
411 }
412
413 #[test]
414 fn test_pipe_ampersand() {
415 let segs = tokenize("cmd1 |& cmd2", ShellType::Posix);
416 assert_eq!(segs.len(), 2);
417 assert_eq!(segs[1].preceding_separator.as_deref(), Some("|&"));
418 }
419
420 #[test]
421 fn test_powershell_pipe() {
422 let segs = tokenize("iwr url | iex", ShellType::PowerShell);
423 assert_eq!(segs.len(), 2);
424 assert_eq!(segs[0].command.as_deref(), Some("iwr"));
425 assert_eq!(segs[1].command.as_deref(), Some("iex"));
426 }
427
428 #[test]
429 fn test_powershell_backtick() {
430 let segs = tokenize("echo `| not a pipe", ShellType::PowerShell);
431 assert_eq!(segs.len(), 1);
433 }
434
435 #[test]
436 fn test_single_quotes() {
437 let segs = tokenize("echo 'hello | world' | bash", ShellType::Posix);
438 assert_eq!(segs.len(), 2);
439 }
440
441 #[test]
442 fn test_backslash_escape() {
443 let segs = tokenize("echo hello\\|world | bash", ShellType::Posix);
444 assert_eq!(segs.len(), 2);
446 }
447
448 #[test]
449 fn test_empty_input() {
450 let segs = tokenize("", ShellType::Posix);
451 assert!(segs.is_empty());
452 }
453
454 #[test]
455 fn test_whitespace_only() {
456 let segs = tokenize(" ", ShellType::Posix);
457 assert!(segs.is_empty());
458 }
459
460 #[test]
461 fn test_args_extraction() {
462 let segs = tokenize("curl -sSL https://example.com", ShellType::Posix);
463 assert_eq!(segs.len(), 1);
464 assert_eq!(segs[0].command.as_deref(), Some("curl"));
465 assert_eq!(segs[0].args.len(), 2);
466 }
467}