1use serde::{Deserialize, Serialize};
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
5#[serde(rename_all = "lowercase")]
6pub enum ShellType {
7 Posix,
8 Fish,
9 PowerShell,
10 Cmd,
11}
12
13impl std::str::FromStr for ShellType {
14 type Err = String;
15 fn from_str(s: &str) -> Result<Self, Self::Err> {
16 match s.to_lowercase().as_str() {
17 "posix" | "bash" | "zsh" | "sh" => Ok(ShellType::Posix),
18 "fish" => Ok(ShellType::Fish),
19 "powershell" | "pwsh" => Ok(ShellType::PowerShell),
20 "cmd" | "cmd.exe" => Ok(ShellType::Cmd),
21 _ => Err(format!("unknown shell type: {s}")),
22 }
23 }
24}
25
26#[derive(Debug, Clone)]
28pub struct Segment {
29 pub raw: String,
31 pub command: Option<String>,
33 pub args: Vec<String>,
35 pub preceding_separator: Option<String>,
37 pub byte_range: std::ops::Range<usize>,
45}
46
47pub fn tokenize(input: &str, shell: ShellType) -> Vec<Segment> {
49 match shell {
50 ShellType::Posix => tokenize_posix(input),
51 ShellType::Fish => tokenize_fish(input),
52 ShellType::PowerShell => tokenize_powershell(input),
53 ShellType::Cmd => tokenize_cmd(input),
54 }
55}
56
57fn tokenize_posix(input: &str) -> Vec<Segment> {
58 let mut segments = Vec::new();
59 let mut current = String::new();
60 let mut preceding_sep = None;
61 let mut search_cursor: usize = 0;
62 let chars: Vec<char> = input.chars().collect();
63 let len = chars.len();
64 let mut i = 0;
65
66 while i < len {
67 let ch = chars[i];
68
69 match ch {
70 '\\' if i + 1 < len => {
72 current.push(chars[i]);
73 current.push(chars[i + 1]);
74 i += 2;
75 continue;
76 }
77 '\'' => {
79 current.push(ch);
80 i += 1;
81 while i < len && chars[i] != '\'' {
82 current.push(chars[i]);
83 i += 1;
84 }
85 if i < len {
86 current.push(chars[i]); i += 1;
88 }
89 continue;
90 }
91 '"' => {
93 current.push(ch);
94 i += 1;
95 while i < len && chars[i] != '"' {
96 if chars[i] == '\\' && i + 1 < len {
97 current.push(chars[i]);
98 current.push(chars[i + 1]);
99 i += 2;
100 } else {
101 current.push(chars[i]);
102 i += 1;
103 }
104 }
105 if i < len {
106 current.push(chars[i]); i += 1;
108 }
109 continue;
110 }
111 '|' => {
113 if i + 1 < len && chars[i + 1] == '|' {
114 push_segment(
116 &mut segments,
117 ¤t,
118 preceding_sep.take(),
119 input,
120 &mut search_cursor,
121 );
122 current.clear();
123 preceding_sep = Some("||".to_string());
124 i += 2;
125 continue;
126 } else if i + 1 < len && chars[i + 1] == '&' {
127 push_segment(
129 &mut segments,
130 ¤t,
131 preceding_sep.take(),
132 input,
133 &mut search_cursor,
134 );
135 current.clear();
136 preceding_sep = Some("|&".to_string());
137 i += 2;
138 continue;
139 } else {
140 push_segment(
142 &mut segments,
143 ¤t,
144 preceding_sep.take(),
145 input,
146 &mut search_cursor,
147 );
148 current.clear();
149 preceding_sep = Some("|".to_string());
150 i += 1;
151 continue;
152 }
153 }
154 '&' if i + 1 < len && chars[i + 1] == '&' => {
156 push_segment(
157 &mut segments,
158 ¤t,
159 preceding_sep.take(),
160 input,
161 &mut search_cursor,
162 );
163 current.clear();
164 preceding_sep = Some("&&".to_string());
165 i += 2;
166 continue;
167 }
168 ';' => {
170 push_segment(
171 &mut segments,
172 ¤t,
173 preceding_sep.take(),
174 input,
175 &mut search_cursor,
176 );
177 current.clear();
178 preceding_sep = Some(";".to_string());
179 i += 1;
180 continue;
181 }
182 '\n' => {
184 push_segment(
185 &mut segments,
186 ¤t,
187 preceding_sep.take(),
188 input,
189 &mut search_cursor,
190 );
191 current.clear();
192 preceding_sep = Some("\n".to_string());
193 i += 1;
194 continue;
195 }
196 _ => {
197 current.push(ch);
198 i += 1;
199 }
200 }
201 }
202
203 push_segment(
204 &mut segments,
205 ¤t,
206 preceding_sep.take(),
207 input,
208 &mut search_cursor,
209 );
210 segments
211}
212
213fn tokenize_fish(input: &str) -> Vec<Segment> {
214 tokenize_posix(input)
219}
220
221fn tokenize_powershell(input: &str) -> Vec<Segment> {
222 let mut segments = Vec::new();
223 let mut current = String::new();
224 let mut preceding_sep = None;
225 let mut search_cursor: usize = 0;
226 let indexed: Vec<(usize, char)> = input.char_indices().collect();
228 let len = indexed.len();
229 let mut i = 0;
230
231 while i < len {
232 let (byte_off, ch) = indexed[i];
233
234 match ch {
235 '`' if i + 1 < len => {
237 current.push(indexed[i].1);
238 current.push(indexed[i + 1].1);
239 i += 2;
240 continue;
241 }
242 '\'' => {
244 current.push(ch);
245 i += 1;
246 while i < len && indexed[i].1 != '\'' {
247 current.push(indexed[i].1);
248 i += 1;
249 }
250 if i < len {
251 current.push(indexed[i].1);
252 i += 1;
253 }
254 continue;
255 }
256 '"' => {
258 current.push(ch);
259 i += 1;
260 while i < len && indexed[i].1 != '"' {
261 if indexed[i].1 == '`' && i + 1 < len {
262 current.push(indexed[i].1);
263 current.push(indexed[i + 1].1);
264 i += 2;
265 } else {
266 current.push(indexed[i].1);
267 i += 1;
268 }
269 }
270 if i < len {
271 current.push(indexed[i].1);
272 i += 1;
273 }
274 continue;
275 }
276 '|' => {
278 push_segment(
279 &mut segments,
280 ¤t,
281 preceding_sep.take(),
282 input,
283 &mut search_cursor,
284 );
285 current.clear();
286 preceding_sep = Some("|".to_string());
287 i += 1;
288 continue;
289 }
290 ';' => {
292 push_segment(
293 &mut segments,
294 ¤t,
295 preceding_sep.take(),
296 input,
297 &mut search_cursor,
298 );
299 current.clear();
300 preceding_sep = Some(";".to_string());
301 i += 1;
302 continue;
303 }
304 '-' if current.ends_with(char::is_whitespace) || current.is_empty() => {
306 let remaining = &input[byte_off..];
307 if remaining.starts_with("-and")
308 && remaining[4..]
309 .chars()
310 .next()
311 .is_none_or(|c| c.is_whitespace())
312 {
313 push_segment(
314 &mut segments,
315 ¤t,
316 preceding_sep.take(),
317 input,
318 &mut search_cursor,
319 );
320 current.clear();
321 preceding_sep = Some("-and".to_string());
322 i += 4;
323 continue;
324 } else if remaining.starts_with("-or")
325 && remaining[3..]
326 .chars()
327 .next()
328 .is_none_or(|c| c.is_whitespace())
329 {
330 push_segment(
331 &mut segments,
332 ¤t,
333 preceding_sep.take(),
334 input,
335 &mut search_cursor,
336 );
337 current.clear();
338 preceding_sep = Some("-or".to_string());
339 i += 3;
340 continue;
341 }
342 current.push(ch);
343 i += 1;
344 }
345 '\n' => {
346 push_segment(
347 &mut segments,
348 ¤t,
349 preceding_sep.take(),
350 input,
351 &mut search_cursor,
352 );
353 current.clear();
354 preceding_sep = Some("\n".to_string());
355 i += 1;
356 continue;
357 }
358 _ => {
359 current.push(ch);
360 i += 1;
361 }
362 }
363 }
364
365 push_segment(
366 &mut segments,
367 ¤t,
368 preceding_sep.take(),
369 input,
370 &mut search_cursor,
371 );
372 segments
373}
374
375fn tokenize_cmd(input: &str) -> Vec<Segment> {
376 let mut segments = Vec::new();
377 let mut current = String::new();
378 let mut preceding_sep = None;
379 let mut search_cursor: usize = 0;
380 let chars: Vec<char> = input.chars().collect();
381 let len = chars.len();
382 let mut i = 0;
383
384 while i < len {
385 let ch = chars[i];
386 match ch {
387 '^' if i + 1 < len => {
389 current.push(chars[i]);
390 current.push(chars[i + 1]);
391 i += 2;
392 continue;
393 }
394 '"' => {
396 current.push(ch);
397 i += 1;
398 while i < len && chars[i] != '"' {
399 current.push(chars[i]);
400 i += 1;
401 }
402 if i < len {
403 current.push(chars[i]);
404 i += 1;
405 }
406 continue;
407 }
408 '|' => {
410 if i + 1 < len && chars[i + 1] == '|' {
411 push_segment(
412 &mut segments,
413 ¤t,
414 preceding_sep.take(),
415 input,
416 &mut search_cursor,
417 );
418 current.clear();
419 preceding_sep = Some("||".to_string());
420 i += 2;
421 } else {
422 push_segment(
423 &mut segments,
424 ¤t,
425 preceding_sep.take(),
426 input,
427 &mut search_cursor,
428 );
429 current.clear();
430 preceding_sep = Some("|".to_string());
431 i += 1;
432 }
433 continue;
434 }
435 '&' => {
437 if i + 1 < len && chars[i + 1] == '&' {
438 push_segment(
439 &mut segments,
440 ¤t,
441 preceding_sep.take(),
442 input,
443 &mut search_cursor,
444 );
445 current.clear();
446 preceding_sep = Some("&&".to_string());
447 i += 2;
448 } else {
449 push_segment(
450 &mut segments,
451 ¤t,
452 preceding_sep.take(),
453 input,
454 &mut search_cursor,
455 );
456 current.clear();
457 preceding_sep = Some("&".to_string());
458 i += 1;
459 }
460 continue;
461 }
462 '\n' => {
463 push_segment(
464 &mut segments,
465 ¤t,
466 preceding_sep.take(),
467 input,
468 &mut search_cursor,
469 );
470 current.clear();
471 preceding_sep = Some("\n".to_string());
472 i += 1;
473 continue;
474 }
475 _ => {
476 current.push(ch);
477 i += 1;
478 }
479 }
480 }
481 push_segment(
482 &mut segments,
483 ¤t,
484 preceding_sep.take(),
485 input,
486 &mut search_cursor,
487 );
488 segments
489}
490
491fn push_segment(
499 segments: &mut Vec<Segment>,
500 raw: &str,
501 preceding_sep: Option<String>,
502 input: &str,
503 search_cursor: &mut usize,
504) {
505 let trimmed = raw.trim();
506 if trimmed.is_empty() {
507 return;
508 }
509
510 let byte_range = match input.get(*search_cursor..).and_then(|s| s.find(trimmed)) {
515 Some(rel_pos) => {
516 let start = *search_cursor + rel_pos;
517 let end = start + trimmed.len();
518 *search_cursor = end;
519 start..end
520 }
521 None => {
522 let cursor = (*search_cursor).min(input.len());
526 cursor..cursor
527 }
528 };
529
530 let words = split_words(trimmed);
531 let first_non_assign = words.iter().position(|w| !is_env_assignment(w));
533 let (command, args) = match first_non_assign {
534 Some(idx) => {
535 let cmd = Some(words[idx].clone());
536 let args = if idx + 1 < words.len() {
537 words[idx + 1..].to_vec()
538 } else {
539 Vec::new()
540 };
541 (cmd, args)
542 }
543 None => {
544 (None, Vec::new())
546 }
547 };
548
549 segments.push(Segment {
550 raw: trimmed.to_string(),
551 command,
552 args,
553 preceding_separator: preceding_sep,
554 byte_range,
555 });
556}
557
558pub fn is_env_assignment(word: &str) -> bool {
561 let s = word.trim();
562 if s.starts_with('-') || s.starts_with('=') {
563 return false;
564 }
565 if let Some(eq_pos) = s.find('=') {
566 if eq_pos == 0 {
567 return false;
568 }
569 let name = &s[..eq_pos];
570 let first = name.chars().next().unwrap_or('0');
571 if first.is_ascii_digit() {
572 return false;
573 }
574 name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
575 } else {
576 false
577 }
578}
579
580pub fn leading_env_assignments(segment_raw: &str) -> Vec<(String, String)> {
583 let mut assignments = Vec::new();
584 for word in split_words(segment_raw.trim()) {
585 if !is_env_assignment(&word) {
586 break;
587 }
588 if let Some((name, value)) = word.split_once('=') {
589 assignments.push((name.to_string(), value.to_string()));
590 }
591 }
592 assignments
593}
594
595pub fn leading_env_assignment_values(segment_raw: &str) -> Vec<String> {
598 leading_env_assignments(segment_raw)
599 .into_iter()
600 .map(|(_, value)| value)
601 .collect()
602}
603
604fn split_words(input: &str) -> Vec<String> {
606 let mut words = Vec::new();
607 let mut current = String::new();
608 let chars: Vec<char> = input.chars().collect();
609 let len = chars.len();
610 let mut i = 0;
611
612 while i < len {
613 let ch = chars[i];
614 match ch {
615 ' ' | '\t' if !current.is_empty() => {
616 words.push(current.clone());
617 current.clear();
618 i += 1;
619 while i < len && (chars[i] == ' ' || chars[i] == '\t') {
621 i += 1;
622 }
623 }
624 ' ' | '\t' => {
625 i += 1;
626 }
627 '\'' => {
628 current.push(ch);
629 i += 1;
630 while i < len && chars[i] != '\'' {
631 current.push(chars[i]);
632 i += 1;
633 }
634 if i < len {
635 current.push(chars[i]);
636 i += 1;
637 }
638 }
639 '"' => {
640 current.push(ch);
641 i += 1;
642 while i < len && chars[i] != '"' {
643 if chars[i] == '\\' && i + 1 < len {
644 current.push(chars[i]);
645 current.push(chars[i + 1]);
646 i += 2;
647 } else {
648 current.push(chars[i]);
649 i += 1;
650 }
651 }
652 if i < len {
653 current.push(chars[i]);
654 i += 1;
655 }
656 }
657 '\\' if i + 1 < len => {
658 current.push(chars[i]);
659 current.push(chars[i + 1]);
660 i += 2;
661 }
662 _ => {
663 current.push(ch);
664 i += 1;
665 }
666 }
667 }
668
669 if !current.is_empty() {
670 words.push(current);
671 }
672
673 words
674}
675
676#[cfg(test)]
677mod tests {
678 use super::*;
679
680 #[test]
681 fn test_simple_pipe() {
682 let segs = tokenize("echo hello | grep world", ShellType::Posix);
683 assert_eq!(segs.len(), 2);
684 assert_eq!(segs[0].command.as_deref(), Some("echo"));
685 assert_eq!(segs[1].command.as_deref(), Some("grep"));
686 assert_eq!(segs[1].preceding_separator.as_deref(), Some("|"));
687 }
688
689 #[test]
690 fn test_quoted_pipe() {
691 let segs = tokenize(r#"echo "hello | world" | bash"#, ShellType::Posix);
692 assert_eq!(segs.len(), 2);
693 assert_eq!(segs[0].raw, r#"echo "hello | world""#);
694 assert_eq!(segs[1].command.as_deref(), Some("bash"));
695 }
696
697 #[test]
698 fn test_and_or() {
699 let segs = tokenize("cmd1 && cmd2 || cmd3", ShellType::Posix);
700 assert_eq!(segs.len(), 3);
701 assert_eq!(segs[1].preceding_separator.as_deref(), Some("&&"));
702 assert_eq!(segs[2].preceding_separator.as_deref(), Some("||"));
703 }
704
705 #[test]
706 fn test_semicolon() {
707 let segs = tokenize("cmd1; cmd2", ShellType::Posix);
708 assert_eq!(segs.len(), 2);
709 assert_eq!(segs[1].preceding_separator.as_deref(), Some(";"));
710 }
711
712 #[test]
713 fn test_pipe_ampersand() {
714 let segs = tokenize("cmd1 |& cmd2", ShellType::Posix);
715 assert_eq!(segs.len(), 2);
716 assert_eq!(segs[1].preceding_separator.as_deref(), Some("|&"));
717 }
718
719 #[test]
720 fn test_powershell_pipe() {
721 let segs = tokenize("iwr url | iex", ShellType::PowerShell);
722 assert_eq!(segs.len(), 2);
723 assert_eq!(segs[0].command.as_deref(), Some("iwr"));
724 assert_eq!(segs[1].command.as_deref(), Some("iex"));
725 }
726
727 #[test]
728 fn test_powershell_backtick() {
729 let segs = tokenize("echo `| not a pipe", ShellType::PowerShell);
730 assert_eq!(segs.len(), 1);
732 }
733
734 #[test]
735 fn test_single_quotes() {
736 let segs = tokenize("echo 'hello | world' | bash", ShellType::Posix);
737 assert_eq!(segs.len(), 2);
738 }
739
740 #[test]
741 fn test_backslash_escape() {
742 let segs = tokenize("echo hello\\|world | bash", ShellType::Posix);
743 assert_eq!(segs.len(), 2);
745 }
746
747 #[test]
748 fn test_empty_input() {
749 let segs = tokenize("", ShellType::Posix);
750 assert!(segs.is_empty());
751 }
752
753 #[test]
754 fn test_whitespace_only() {
755 let segs = tokenize(" ", ShellType::Posix);
756 assert!(segs.is_empty());
757 }
758
759 #[test]
760 fn test_args_extraction() {
761 let segs = tokenize("curl -sSL https://example.com", ShellType::Posix);
762 assert_eq!(segs.len(), 1);
763 assert_eq!(segs[0].command.as_deref(), Some("curl"));
764 assert_eq!(segs[0].args.len(), 2);
765 }
766
767 #[test]
768 fn test_env_prefix_skipped() {
769 let segs = tokenize("TIRITH=0 curl evil.com", ShellType::Posix);
770 assert_eq!(segs.len(), 1);
771 assert_eq!(segs[0].command.as_deref(), Some("curl"));
772 assert_eq!(segs[0].args, vec!["evil.com"]);
773 }
774
775 #[test]
776 fn test_multiple_env_prefixes() {
777 let segs = tokenize("FOO=bar BAZ=1 python script.py", ShellType::Posix);
778 assert_eq!(segs.len(), 1);
779 assert_eq!(segs[0].command.as_deref(), Some("python"));
780 assert_eq!(segs[0].args, vec!["script.py"]);
781 }
782
783 #[test]
784 fn test_env_only_no_command() {
785 let segs = tokenize("TIRITH=0", ShellType::Posix);
786 assert_eq!(segs.len(), 1);
787 assert_eq!(segs[0].command, None);
788 assert!(segs[0].args.is_empty());
789 }
790
791 #[test]
792 fn test_is_env_assignment() {
793 assert!(is_env_assignment("FOO=bar"));
794 assert!(is_env_assignment("TIRITH=0"));
795 assert!(is_env_assignment("PATH=/usr/bin"));
796 assert!(is_env_assignment("A="));
797 assert!(!is_env_assignment("-o"));
798 assert!(!is_env_assignment("curl"));
799 assert!(!is_env_assignment("=value"));
800 assert!(!is_env_assignment("--flag=value"));
801 assert!(!is_env_assignment("1FOO=bar"));
802 }
803
804 #[test]
805 fn test_leading_env_assignment_values() {
806 assert_eq!(
807 leading_env_assignment_values("URL=https://example.com curl ok"),
808 vec!["https://example.com"]
809 );
810 assert_eq!(
811 leading_env_assignments("URL='https://example.com/a' FOO=bar curl ok"),
812 vec![
813 ("URL".to_string(), "'https://example.com/a'".to_string()),
814 ("FOO".to_string(), "bar".to_string())
815 ]
816 );
817 assert_eq!(
818 leading_env_assignment_values("URL='https://example.com/a' FOO=bar curl ok"),
819 vec!["'https://example.com/a'", "bar"]
820 );
821 assert!(leading_env_assignment_values("env URL=https://example.com curl ok").is_empty());
822 }
823
824 #[test]
825 fn test_cmd_pipe() {
826 let segs = tokenize("dir | findstr foo", ShellType::Cmd);
827 assert_eq!(segs.len(), 2);
828 assert_eq!(segs[0].command.as_deref(), Some("dir"));
829 assert_eq!(segs[1].command.as_deref(), Some("findstr"));
830 }
831
832 #[test]
833 fn test_cmd_ampersand_separator() {
834 let segs = tokenize("dir & echo done", ShellType::Cmd);
835 assert_eq!(segs.len(), 2);
836 assert_eq!(segs[1].preceding_separator.as_deref(), Some("&"));
837 }
838
839 #[test]
840 fn test_cmd_double_ampersand() {
841 let segs = tokenize("cmd1 && cmd2", ShellType::Cmd);
842 assert_eq!(segs.len(), 2);
843 assert_eq!(segs[1].preceding_separator.as_deref(), Some("&&"));
844 }
845
846 #[test]
847 fn test_cmd_caret_escape() {
848 let segs = tokenize("echo hello^|world | findstr x", ShellType::Cmd);
849 assert_eq!(segs.len(), 2);
851 }
852
853 #[test]
854 fn test_cmd_double_quotes() {
855 let segs = tokenize(r#"echo "hello | world" | findstr x"#, ShellType::Cmd);
856 assert_eq!(segs.len(), 2);
857 }
858
859 #[test]
860 fn test_powershell_multibyte_and_operator_no_panic() {
861 let input = " ?]BB\u{07E7}\u{07E7} -\n-\r-and-~\0\u{c}-and-~\u{1d}";
864 let _ = tokenize(input, ShellType::PowerShell);
865 }
866
867 fn assert_byte_ranges_match_raw(input: &str, segs: &[Segment]) {
872 for (i, seg) in segs.iter().enumerate() {
873 assert_eq!(
874 &input[seg.byte_range.clone()],
875 seg.raw,
876 "segment {i} byte_range {:?} does not match raw {:?} in input {:?}",
877 seg.byte_range,
878 seg.raw,
879 input
880 );
881 }
882 }
883
884 #[test]
885 fn test_byte_range_posix_simple_pipe() {
886 let input = "foo bar | baz";
887 let segs = tokenize(input, ShellType::Posix);
888 assert_eq!(segs.len(), 2);
889 assert_byte_ranges_match_raw(input, &segs);
890 assert_eq!(&input[segs[0].byte_range.clone()], "foo bar");
891 assert_eq!(&input[segs[1].byte_range.clone()], "baz");
892 }
893
894 #[test]
895 fn test_byte_range_posix_leading_trailing_whitespace() {
896 let input = " foo bar | baz ";
898 let segs = tokenize(input, ShellType::Posix);
899 assert_eq!(segs.len(), 2);
900 assert_byte_ranges_match_raw(input, &segs);
901 assert_eq!(segs[0].byte_range, 2..9); assert_eq!(segs[1].byte_range, 13..16); }
904
905 #[test]
906 fn test_byte_range_posix_duplicate_segments() {
907 let input = "foo | foo | foo";
910 let segs = tokenize(input, ShellType::Posix);
911 assert_eq!(segs.len(), 3);
912 assert_byte_ranges_match_raw(input, &segs);
913 assert_eq!(segs[0].byte_range, 0..3);
914 assert_eq!(segs[1].byte_range, 6..9);
915 assert_eq!(segs[2].byte_range, 12..15);
916 }
917
918 #[test]
919 fn test_byte_range_posix_with_quoted_pipe() {
920 let input = r#"echo "a | b" | grep x"#;
922 let segs = tokenize(input, ShellType::Posix);
923 assert_eq!(segs.len(), 2);
924 assert_byte_ranges_match_raw(input, &segs);
925 assert_eq!(segs[0].raw, r#"echo "a | b""#);
926 }
927
928 #[test]
929 fn test_byte_range_posix_multibyte_content() {
930 let input = "echo 日本語 | grep x";
933 let segs = tokenize(input, ShellType::Posix);
934 assert_eq!(segs.len(), 2);
935 assert_byte_ranges_match_raw(input, &segs);
936 assert_eq!(segs[0].raw, "echo 日本語");
937 }
938
939 #[test]
940 fn test_byte_range_powershell_simple_pipe() {
941 let input = "Get-Process | Where-Object { $_.Name -eq 'x' }";
942 let segs = tokenize(input, ShellType::PowerShell);
943 assert!(segs.len() >= 2);
944 assert_byte_ranges_match_raw(input, &segs);
945 }
946
947 #[test]
948 fn test_byte_range_cmd_pipe() {
949 let input = "dir | findstr foo";
950 let segs = tokenize(input, ShellType::Cmd);
951 assert_eq!(segs.len(), 2);
952 assert_byte_ranges_match_raw(input, &segs);
953 }
954
955 #[test]
956 fn test_byte_range_fish_delegates_to_posix() {
957 let input = "echo hi | cat";
959 let segs = tokenize(input, ShellType::Fish);
960 assert_eq!(segs.len(), 2);
961 assert_byte_ranges_match_raw(input, &segs);
962 }
963
964 #[test]
965 fn test_byte_range_empty_input() {
966 let segs = tokenize("", ShellType::Posix);
967 assert!(segs.is_empty());
968 }
969
970 #[test]
971 fn test_byte_range_whitespace_only() {
972 let segs = tokenize(" \t ", ShellType::Posix);
973 assert!(segs.is_empty());
974 }
975
976 #[test]
977 fn test_byte_range_sequence_operators() {
978 let input = "ls && echo done";
979 let segs = tokenize(input, ShellType::Posix);
980 assert_eq!(segs.len(), 2);
981 assert_byte_ranges_match_raw(input, &segs);
982 assert_eq!(segs[0].byte_range, 0..2); assert_eq!(segs[1].byte_range, 6..15); }
985}