agcodex_core/
parse_command.rs

1use crate::bash::try_parse_bash;
2use crate::bash::try_parse_word_only_commands_sequence;
3use serde::Deserialize;
4use serde::Serialize;
5use shlex::split as shlex_split;
6use shlex::try_join as shlex_try_join;
7
8#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
9pub enum ParsedCommand {
10    Read {
11        cmd: String,
12        name: String,
13    },
14    ListFiles {
15        cmd: String,
16        path: Option<String>,
17    },
18    Search {
19        cmd: String,
20        query: Option<String>,
21        path: Option<String>,
22    },
23    Format {
24        cmd: String,
25        tool: Option<String>,
26        targets: Option<Vec<String>>,
27    },
28    Test {
29        cmd: String,
30    },
31    Lint {
32        cmd: String,
33        tool: Option<String>,
34        targets: Option<Vec<String>>,
35    },
36    Noop {
37        cmd: String,
38    },
39    Unknown {
40        cmd: String,
41    },
42}
43
44// Convert core's parsed command enum into the protocol's simplified type so
45// events can carry the canonical representation across process boundaries.
46impl From<ParsedCommand> for agcodex_protocol::parse_command::ParsedCommand {
47    fn from(v: ParsedCommand) -> Self {
48        use agcodex_protocol::parse_command::ParsedCommand as P;
49        match v {
50            ParsedCommand::Read { cmd, name } => P::Read { cmd, name },
51            ParsedCommand::ListFiles { cmd, path } => P::ListFiles { cmd, path },
52            ParsedCommand::Search { cmd, query, path } => P::Search { cmd, query, path },
53            ParsedCommand::Format { cmd, tool, targets } => P::Format { cmd, tool, targets },
54            ParsedCommand::Test { cmd } => P::Test { cmd },
55            ParsedCommand::Lint { cmd, tool, targets } => P::Lint { cmd, tool, targets },
56            ParsedCommand::Noop { cmd } => P::Noop { cmd },
57            ParsedCommand::Unknown { cmd } => P::Unknown { cmd },
58        }
59    }
60}
61
62fn shlex_join(tokens: &[String]) -> String {
63    shlex_try_join(tokens.iter().map(|s| s.as_str()))
64        .unwrap_or_else(|_| "<command included NUL byte>".to_string())
65}
66
67/// DO NOT REVIEW THIS CODE BY HAND
68/// This parsing code is quite complex and not easy to hand-modify.
69/// The easiest way to iterate is to add unit tests and have Codex fix the implementation.
70/// To encourage this, the tests have been put directly below this function rather than at the bottom of the
71///
72/// Parses metadata out of an arbitrary command.
73/// These commands are model driven and could include just about anything.
74/// The parsing is slightly lossy due to the ~infinite expressiveness of an arbitrary command.
75/// The goal of the parsed metadata is to be able to provide the user with a human readable gis
76/// of what it is doing.
77pub fn parse_command(command: &[String]) -> Vec<ParsedCommand> {
78    // Parse and then collapse consecutive duplicate commands to avoid redundant summaries.
79    let parsed = parse_command_impl(command);
80    let mut deduped: Vec<ParsedCommand> = Vec::with_capacity(parsed.len());
81    for cmd in parsed.into_iter() {
82        if deduped.last().is_some_and(|prev| prev == &cmd) {
83            continue;
84        }
85        deduped.push(cmd);
86    }
87    deduped
88}
89
90#[cfg(test)]
91#[allow(clippy::items_after_test_module)]
92/// Tests are at the top to encourage using TDD + Codex to fix the implementation.
93mod tests {
94    use super::*;
95
96    fn shlex_split_safe(s: &str) -> Vec<String> {
97        shlex_split(s).unwrap_or_else(|| s.split_whitespace().map(|s| s.to_string()).collect())
98    }
99
100    fn vec_str(args: &[&str]) -> Vec<String> {
101        args.iter().map(|s| (*s).to_string()).collect()
102    }
103
104    fn assert_parsed(args: &[String], expected: Vec<ParsedCommand>) {
105        let out = parse_command(args);
106        assert_eq!(out, expected);
107    }
108
109    #[test]
110    fn git_status_is_unknown() {
111        assert_parsed(
112            &vec_str(&["git", "status"]),
113            vec![ParsedCommand::Unknown {
114                cmd: "git status".to_string(),
115            }],
116        );
117    }
118
119    #[test]
120    fn handles_git_pipe_wc() {
121        let inner = "git status | wc -l";
122        assert_parsed(
123            &vec_str(&["bash", "-lc", inner]),
124            vec![ParsedCommand::Unknown {
125                cmd: "git status | wc -l".to_string(),
126            }],
127        );
128    }
129
130    #[test]
131    fn bash_lc_redirect_not_quoted() {
132        let inner = "echo foo > bar";
133        assert_parsed(
134            &vec_str(&["bash", "-lc", inner]),
135            vec![ParsedCommand::Unknown {
136                cmd: "echo foo > bar".to_string(),
137            }],
138        );
139    }
140
141    #[test]
142    fn handles_complex_bash_command_head() {
143        let inner =
144            "rg --version && node -v && pnpm -v && rg --files | wc -l && rg --files | head -n 40";
145        assert_parsed(
146            &vec_str(&["bash", "-lc", inner]),
147            vec![
148                // Expect commands in left-to-right execution order
149                ParsedCommand::Search {
150                    cmd: "rg --version".to_string(),
151                    query: None,
152                    path: None,
153                },
154                ParsedCommand::Unknown {
155                    cmd: "node -v".to_string(),
156                },
157                ParsedCommand::Unknown {
158                    cmd: "pnpm -v".to_string(),
159                },
160                ParsedCommand::Search {
161                    cmd: "rg --files".to_string(),
162                    query: None,
163                    path: None,
164                },
165                ParsedCommand::Unknown {
166                    cmd: "head -n 40".to_string(),
167                },
168            ],
169        );
170    }
171
172    #[test]
173    fn supports_searching_for_navigate_to_route()
174    -> std::result::Result<(), Box<dyn std::error::Error>> {
175        let inner = "rg -n \"navigate-to-route\" -S";
176        assert_parsed(
177            &vec_str(&["bash", "-lc", inner]),
178            vec![ParsedCommand::Search {
179                cmd: "rg -n navigate-to-route -S".to_string(),
180                query: Some("navigate-to-route".to_string()),
181                path: None,
182            }],
183        );
184        Ok(())
185    }
186
187    #[test]
188    fn handles_complex_bash_command() {
189        let inner = "rg -n \"BUG|FIXME|TODO|XXX|HACK\" -S | head -n 200";
190        assert_parsed(
191            &vec_str(&["bash", "-lc", inner]),
192            vec![
193                ParsedCommand::Search {
194                    cmd: "rg -n 'BUG|FIXME|TODO|XXX|HACK' -S".to_string(),
195                    query: Some("BUG|FIXME|TODO|XXX|HACK".to_string()),
196                    path: None,
197                },
198                ParsedCommand::Unknown {
199                    cmd: "head -n 200".to_string(),
200                },
201            ],
202        );
203    }
204
205    #[test]
206    fn supports_rg_files_with_path_and_pipe() {
207        let inner = "rg --files webview/src | sed -n";
208        assert_parsed(
209            &vec_str(&["bash", "-lc", inner]),
210            vec![ParsedCommand::Search {
211                cmd: "rg --files webview/src".to_string(),
212                query: None,
213                path: Some("webview".to_string()),
214            }],
215        );
216    }
217
218    #[test]
219    fn supports_rg_files_then_head() {
220        let inner = "rg --files | head -n 50";
221        assert_parsed(
222            &vec_str(&["bash", "-lc", inner]),
223            vec![
224                ParsedCommand::Search {
225                    cmd: "rg --files".to_string(),
226                    query: None,
227                    path: None,
228                },
229                ParsedCommand::Unknown {
230                    cmd: "head -n 50".to_string(),
231                },
232            ],
233        );
234    }
235
236    #[test]
237    fn supports_cat() {
238        let inner = "cat webview/README.md";
239        assert_parsed(
240            &vec_str(&["bash", "-lc", inner]),
241            vec![ParsedCommand::Read {
242                cmd: inner.to_string(),
243                name: "README.md".to_string(),
244            }],
245        );
246    }
247
248    #[test]
249    fn supports_ls_with_pipe() {
250        let inner = "ls -la | sed -n '1,120p'";
251        assert_parsed(
252            &vec_str(&["bash", "-lc", inner]),
253            vec![ParsedCommand::ListFiles {
254                cmd: "ls -la".to_string(),
255                path: None,
256            }],
257        );
258    }
259
260    #[test]
261    fn supports_head_n() {
262        let inner = "head -n 50 Cargo.toml";
263        assert_parsed(
264            &vec_str(&["bash", "-lc", inner]),
265            vec![ParsedCommand::Read {
266                cmd: inner.to_string(),
267                name: "Cargo.toml".to_string(),
268            }],
269        );
270    }
271
272    #[test]
273    fn supports_cat_sed_n() {
274        let inner = "cat tui/Cargo.toml | sed -n '1,200p'";
275        assert_parsed(
276            &vec_str(&["bash", "-lc", inner]),
277            vec![ParsedCommand::Read {
278                cmd: inner.to_string(),
279                name: "Cargo.toml".to_string(),
280            }],
281        );
282    }
283
284    #[test]
285    fn supports_tail_n_plus() {
286        let inner = "tail -n +522 README.md";
287        assert_parsed(
288            &vec_str(&["bash", "-lc", inner]),
289            vec![ParsedCommand::Read {
290                cmd: inner.to_string(),
291                name: "README.md".to_string(),
292            }],
293        );
294    }
295
296    #[test]
297    fn supports_tail_n_last_lines() {
298        let inner = "tail -n 30 README.md";
299        let out = parse_command(&vec_str(&["bash", "-lc", inner]));
300        assert_eq!(
301            out,
302            vec![ParsedCommand::Read {
303                cmd: inner.to_string(),
304                name: "README.md".to_string(),
305            }]
306        );
307    }
308
309    #[test]
310    fn supports_npm_run_build_is_unknown() {
311        assert_parsed(
312            &vec_str(&["npm", "run", "build"]),
313            vec![ParsedCommand::Unknown {
314                cmd: "npm run build".to_string(),
315            }],
316        );
317    }
318
319    #[test]
320    fn supports_npm_run_with_forwarded_args() {
321        assert_parsed(
322            &vec_str(&[
323                "npm",
324                "run",
325                "lint",
326                "--",
327                "--max-warnings",
328                "0",
329                "--format",
330                "json",
331            ]),
332            vec![ParsedCommand::Lint {
333                cmd: "npm run lint -- --max-warnings 0 --format json".to_string(),
334                tool: Some("npm-script:lint".to_string()),
335                targets: None,
336            }],
337        );
338    }
339
340    #[test]
341    fn supports_grep_recursive_current_dir() {
342        assert_parsed(
343            &vec_str(&["grep", "-R", "CODEX_SANDBOX_ENV_VAR", "-n", "."]),
344            vec![ParsedCommand::Search {
345                cmd: "grep -R CODEX_SANDBOX_ENV_VAR -n .".to_string(),
346                query: Some("CODEX_SANDBOX_ENV_VAR".to_string()),
347                path: Some(".".to_string()),
348            }],
349        );
350    }
351
352    #[test]
353    fn supports_grep_recursive_specific_file() {
354        assert_parsed(
355            &vec_str(&[
356                "grep",
357                "-R",
358                "CODEX_SANDBOX_ENV_VAR",
359                "-n",
360                "core/src/spawn.rs",
361            ]),
362            vec![ParsedCommand::Search {
363                cmd: "grep -R CODEX_SANDBOX_ENV_VAR -n core/src/spawn.rs".to_string(),
364                query: Some("CODEX_SANDBOX_ENV_VAR".to_string()),
365                path: Some("spawn.rs".to_string()),
366            }],
367        );
368    }
369
370    #[test]
371    fn supports_grep_query_with_slashes_not_shortened() {
372        // Query strings may contain slashes and should not be shortened to the basename.
373        // Previously, grep queries were passed through short_display_path, which is incorrect.
374        assert_parsed(
375            &shlex_split_safe("grep -R src/main.rs -n ."),
376            vec![ParsedCommand::Search {
377                cmd: "grep -R src/main.rs -n .".to_string(),
378                query: Some("src/main.rs".to_string()),
379                path: Some(".".to_string()),
380            }],
381        );
382    }
383
384    #[test]
385    fn supports_grep_weird_backtick_in_query() {
386        assert_parsed(
387            &shlex_split_safe("grep -R COD`EX_SANDBOX -n"),
388            vec![ParsedCommand::Search {
389                cmd: "grep -R 'COD`EX_SANDBOX' -n".to_string(),
390                query: Some("COD`EX_SANDBOX".to_string()),
391                path: None,
392            }],
393        );
394    }
395
396    #[test]
397    fn supports_cd_and_rg_files() {
398        assert_parsed(
399            &shlex_split_safe("cd codex-rs && rg --files"),
400            vec![
401                ParsedCommand::Unknown {
402                    cmd: "cd codex-rs".to_string(),
403                },
404                ParsedCommand::Search {
405                    cmd: "rg --files".to_string(),
406                    query: None,
407                    path: None,
408                },
409            ],
410        );
411    }
412
413    #[test]
414    fn echo_then_cargo_test_sequence() {
415        assert_parsed(
416            &shlex_split_safe("echo Running tests... && cargo test --all-features --quiet"),
417            vec![ParsedCommand::Test {
418                cmd: "cargo test --all-features --quiet".to_string(),
419            }],
420        );
421    }
422
423    #[test]
424    fn supports_cargo_fmt_and_test_with_config() {
425        assert_parsed(
426            &shlex_split_safe(
427                "cargo fmt -- --config imports_granularity=Item && cargo test -p core --all-features",
428            ),
429            vec![
430                ParsedCommand::Format {
431                    cmd: "cargo fmt -- --config 'imports_granularity=Item'".to_string(),
432                    tool: Some("cargo fmt".to_string()),
433                    targets: None,
434                },
435                ParsedCommand::Test {
436                    cmd: "cargo test -p core --all-features".to_string(),
437                },
438            ],
439        );
440    }
441
442    #[test]
443    fn recognizes_rustfmt_and_clippy() {
444        assert_parsed(
445            &shlex_split_safe("rustfmt src/main.rs"),
446            vec![ParsedCommand::Format {
447                cmd: "rustfmt src/main.rs".to_string(),
448                tool: Some("rustfmt".to_string()),
449                targets: Some(vec!["src/main.rs".to_string()]),
450            }],
451        );
452
453        assert_parsed(
454            &shlex_split_safe("cargo clippy -p core --all-features -- -D warnings"),
455            vec![ParsedCommand::Lint {
456                cmd: "cargo clippy -p core --all-features -- -D warnings".to_string(),
457                tool: Some("cargo clippy".to_string()),
458                targets: None,
459            }],
460        );
461    }
462
463    #[test]
464    fn recognizes_pytest_go_and_tools() {
465        assert_parsed(
466            &shlex_split_safe(
467                "pytest -k 'Login and not slow' tests/test_login.py::TestLogin::test_ok",
468            ),
469            vec![ParsedCommand::Test {
470                cmd: "pytest -k 'Login and not slow' tests/test_login.py::TestLogin::test_ok"
471                    .to_string(),
472            }],
473        );
474
475        assert_parsed(
476            &shlex_split_safe("go fmt ./..."),
477            vec![ParsedCommand::Format {
478                cmd: "go fmt ./...".to_string(),
479                tool: Some("go fmt".to_string()),
480                targets: Some(vec!["./...".to_string()]),
481            }],
482        );
483
484        assert_parsed(
485            &shlex_split_safe("go test ./pkg -run TestThing"),
486            vec![ParsedCommand::Test {
487                cmd: "go test ./pkg -run TestThing".to_string(),
488            }],
489        );
490
491        assert_parsed(
492            &shlex_split_safe("eslint . --max-warnings 0"),
493            vec![ParsedCommand::Lint {
494                cmd: "eslint . --max-warnings 0".to_string(),
495                tool: Some("eslint".to_string()),
496                targets: Some(vec![".".to_string()]),
497            }],
498        );
499
500        assert_parsed(
501            &shlex_split_safe("prettier -w ."),
502            vec![ParsedCommand::Format {
503                cmd: "prettier -w .".to_string(),
504                tool: Some("prettier".to_string()),
505                targets: Some(vec![".".to_string()]),
506            }],
507        );
508    }
509
510    #[test]
511    fn recognizes_jest_and_vitest_filters() {
512        assert_parsed(
513            &shlex_split_safe("jest -t 'should work' src/foo.test.ts"),
514            vec![ParsedCommand::Test {
515                cmd: "jest -t 'should work' src/foo.test.ts".to_string(),
516            }],
517        );
518
519        assert_parsed(
520            &shlex_split_safe("vitest -t 'runs' src/foo.test.tsx"),
521            vec![ParsedCommand::Test {
522                cmd: "vitest -t runs src/foo.test.tsx".to_string(),
523            }],
524        );
525    }
526
527    #[test]
528    fn recognizes_npx_and_scripts() {
529        assert_parsed(
530            &shlex_split_safe("npx eslint src"),
531            vec![ParsedCommand::Lint {
532                cmd: "npx eslint src".to_string(),
533                tool: Some("eslint".to_string()),
534                targets: Some(vec!["src".to_string()]),
535            }],
536        );
537
538        assert_parsed(
539            &shlex_split_safe("npx prettier -c ."),
540            vec![ParsedCommand::Format {
541                cmd: "npx prettier -c .".to_string(),
542                tool: Some("prettier".to_string()),
543                targets: Some(vec![".".to_string()]),
544            }],
545        );
546
547        assert_parsed(
548            &shlex_split_safe("pnpm run lint -- --max-warnings 0"),
549            vec![ParsedCommand::Lint {
550                cmd: "pnpm run lint -- --max-warnings 0".to_string(),
551                tool: Some("pnpm-script:lint".to_string()),
552                targets: None,
553            }],
554        );
555
556        assert_parsed(
557            &shlex_split_safe("npm test"),
558            vec![ParsedCommand::Test {
559                cmd: "npm test".to_string(),
560            }],
561        );
562
563        assert_parsed(
564            &shlex_split_safe("yarn test"),
565            vec![ParsedCommand::Test {
566                cmd: "yarn test".to_string(),
567            }],
568        );
569    }
570
571    // ---- is_small_formatting_command unit tests ----
572    #[test]
573    fn small_formatting_always_true_commands() {
574        for cmd in [
575            "wc", "tr", "cut", "sort", "uniq", "xargs", "tee", "column", "awk",
576        ] {
577            assert!(is_small_formatting_command(&shlex_split_safe(cmd)));
578            assert!(is_small_formatting_command(&shlex_split_safe(&format!(
579                "{cmd} -x"
580            ))));
581        }
582    }
583
584    #[test]
585    fn head_behavior() {
586        // No args -> small formatting
587        assert!(is_small_formatting_command(&vec_str(&["head"])));
588        // Numeric count only -> not considered small formatting by implementation
589        assert!(!is_small_formatting_command(&shlex_split_safe(
590            "head -n 40"
591        )));
592        // With explicit file -> not small formatting
593        assert!(!is_small_formatting_command(&shlex_split_safe(
594            "head -n 40 file.txt"
595        )));
596        // File only (no count) -> treated as small formatting by implementation
597        assert!(is_small_formatting_command(&vec_str(&["head", "file.txt"])));
598    }
599
600    #[test]
601    fn tail_behavior() {
602        // No args -> small formatting
603        assert!(is_small_formatting_command(&vec_str(&["tail"])));
604        // Numeric with plus offset -> not small formatting
605        assert!(!is_small_formatting_command(&shlex_split_safe(
606            "tail -n +10"
607        )));
608        assert!(!is_small_formatting_command(&shlex_split_safe(
609            "tail -n +10 file.txt"
610        )));
611        // Numeric count
612        assert!(!is_small_formatting_command(&shlex_split_safe(
613            "tail -n 30"
614        )));
615        assert!(!is_small_formatting_command(&shlex_split_safe(
616            "tail -n 30 file.txt"
617        )));
618        // File only -> small formatting by implementation
619        assert!(is_small_formatting_command(&vec_str(&["tail", "file.txt"])));
620    }
621
622    #[test]
623    fn sed_behavior() {
624        // Plain sed -> small formatting
625        assert!(is_small_formatting_command(&vec_str(&["sed"])));
626        // sed -n <range> (no file) -> still small formatting
627        assert!(is_small_formatting_command(&vec_str(&["sed", "-n", "10p"])));
628        // Valid range with file -> not small formatting
629        assert!(!is_small_formatting_command(&shlex_split_safe(
630            "sed -n 10p file.txt"
631        )));
632        assert!(!is_small_formatting_command(&shlex_split_safe(
633            "sed -n 1,200p file.txt"
634        )));
635        // Invalid ranges with file -> small formatting
636        assert!(is_small_formatting_command(&shlex_split_safe(
637            "sed -n p file.txt"
638        )));
639        assert!(is_small_formatting_command(&shlex_split_safe(
640            "sed -n +10p file.txt"
641        )));
642    }
643
644    #[test]
645    fn empty_tokens_is_not_small() {
646        let empty: Vec<String> = Vec::new();
647        assert!(!is_small_formatting_command(&empty));
648    }
649
650    #[test]
651    fn supports_nl_then_sed_reading() {
652        let inner = "nl -ba core/src/parse_command.rs | sed -n '1200,1720p'";
653        assert_parsed(
654            &vec_str(&["bash", "-lc", inner]),
655            vec![ParsedCommand::Read {
656                cmd: inner.to_string(),
657                name: "parse_command.rs".to_string(),
658            }],
659        );
660    }
661
662    #[test]
663    fn supports_sed_n() {
664        let inner = "sed -n '2000,2200p' tui/src/history_cell.rs";
665        assert_parsed(
666            &vec_str(&["bash", "-lc", inner]),
667            vec![ParsedCommand::Read {
668                cmd: inner.to_string(),
669                name: "history_cell.rs".to_string(),
670            }],
671        );
672    }
673
674    #[test]
675    fn filters_out_printf() {
676        let inner =
677            r#"printf "\n===== ansi-escape/Cargo.toml =====\n"; cat -- ansi-escape/Cargo.toml"#;
678        assert_parsed(
679            &vec_str(&["bash", "-lc", inner]),
680            vec![ParsedCommand::Read {
681                cmd: "cat -- ansi-escape/Cargo.toml".to_string(),
682                name: "Cargo.toml".to_string(),
683            }],
684        );
685    }
686
687    #[test]
688    fn drops_yes_in_pipelines() {
689        // Inside bash -lc, `yes | rg --files` should focus on the primary command.
690        let inner = "yes | rg --files";
691        assert_parsed(
692            &vec_str(&["bash", "-lc", inner]),
693            vec![ParsedCommand::Search {
694                cmd: "rg --files".to_string(),
695                query: None,
696                path: None,
697            }],
698        );
699    }
700
701    #[test]
702    fn supports_sed_n_then_nl_as_search() {
703        // Ensure `sed -n '<range>' <file> | nl -ba` is summarized as a search for that file.
704        let args = shlex_split_safe(
705            "sed -n '260,640p' exec/src/event_processor_with_human_output.rs | nl -ba",
706        );
707        assert_parsed(
708            &args,
709            vec![ParsedCommand::Read {
710                cmd: "sed -n '260,640p' exec/src/event_processor_with_human_output.rs".to_string(),
711                name: "event_processor_with_human_output.rs".to_string(),
712            }],
713        );
714    }
715
716    #[test]
717    fn preserves_rg_with_spaces() {
718        assert_parsed(
719            &shlex_split_safe("yes | rg -n 'foo bar' -S"),
720            vec![ParsedCommand::Search {
721                cmd: "rg -n 'foo bar' -S".to_string(),
722                query: Some("foo bar".to_string()),
723                path: None,
724            }],
725        );
726    }
727
728    #[test]
729    fn ls_with_glob() {
730        assert_parsed(
731            &shlex_split_safe("ls -I '*.test.js'"),
732            vec![ParsedCommand::ListFiles {
733                cmd: "ls -I '*.test.js'".to_string(),
734                path: None,
735            }],
736        );
737    }
738
739    #[test]
740    fn trim_on_semicolon() {
741        assert_parsed(
742            &shlex_split_safe("rg foo ; echo done"),
743            vec![
744                ParsedCommand::Search {
745                    cmd: "rg foo".to_string(),
746                    query: Some("foo".to_string()),
747                    path: None,
748                },
749                ParsedCommand::Unknown {
750                    cmd: "echo done".to_string(),
751                },
752            ],
753        );
754    }
755
756    #[test]
757    fn split_on_or_connector() {
758        // Ensure we split commands on the logical OR operator as well.
759        assert_parsed(
760            &shlex_split_safe("rg foo || echo done"),
761            vec![
762                ParsedCommand::Search {
763                    cmd: "rg foo".to_string(),
764                    query: Some("foo".to_string()),
765                    path: None,
766                },
767                ParsedCommand::Unknown {
768                    cmd: "echo done".to_string(),
769                },
770            ],
771        );
772    }
773
774    #[test]
775    fn strips_true_in_sequence() {
776        // `true` should be dropped from parsed sequences
777        assert_parsed(
778            &shlex_split_safe("true && rg --files"),
779            vec![ParsedCommand::Search {
780                cmd: "rg --files".to_string(),
781                query: None,
782                path: None,
783            }],
784        );
785
786        assert_parsed(
787            &shlex_split_safe("rg --files && true"),
788            vec![ParsedCommand::Search {
789                cmd: "rg --files".to_string(),
790                query: None,
791                path: None,
792            }],
793        );
794    }
795
796    #[test]
797    fn strips_true_inside_bash_lc() {
798        let inner = "true && rg --files";
799        assert_parsed(
800            &vec_str(&["bash", "-lc", inner]),
801            vec![ParsedCommand::Search {
802                cmd: "rg --files".to_string(),
803                query: None,
804                path: None,
805            }],
806        );
807
808        let inner2 = "rg --files || true";
809        assert_parsed(
810            &vec_str(&["bash", "-lc", inner2]),
811            vec![ParsedCommand::Search {
812                cmd: "rg --files".to_string(),
813                query: None,
814                path: None,
815            }],
816        );
817    }
818
819    #[test]
820    fn shorten_path_on_windows() {
821        assert_parsed(
822            &shlex_split_safe(r#"cat "pkg\src\main.rs""#),
823            vec![ParsedCommand::Read {
824                cmd: r#"cat "pkg\\src\\main.rs""#.to_string(),
825                name: "main.rs".to_string(),
826            }],
827        );
828    }
829
830    #[test]
831    fn head_with_no_space() {
832        assert_parsed(
833            &shlex_split_safe("bash -lc 'head -n50 Cargo.toml'"),
834            vec![ParsedCommand::Read {
835                cmd: "head -n50 Cargo.toml".to_string(),
836                name: "Cargo.toml".to_string(),
837            }],
838        );
839    }
840
841    #[test]
842    fn bash_dash_c_pipeline_parsing() {
843        // Ensure -c is handled similarly to -lc by normalization
844        let inner = "rg --files | head -n 1";
845        assert_parsed(
846            &shlex_split_safe(inner),
847            vec![
848                ParsedCommand::Search {
849                    cmd: "rg --files".to_string(),
850                    query: None,
851                    path: None,
852                },
853                ParsedCommand::Unknown {
854                    cmd: "head -n 1".to_string(),
855                },
856            ],
857        );
858    }
859
860    #[test]
861    fn tail_with_no_space() {
862        assert_parsed(
863            &shlex_split_safe("bash -lc 'tail -n+10 README.md'"),
864            vec![ParsedCommand::Read {
865                cmd: "tail -n+10 README.md".to_string(),
866                name: "README.md".to_string(),
867            }],
868        );
869    }
870
871    #[test]
872    fn pnpm_test_is_parsed_as_test() {
873        assert_parsed(
874            &shlex_split_safe("pnpm test"),
875            vec![ParsedCommand::Test {
876                cmd: "pnpm test".to_string(),
877            }],
878        );
879    }
880
881    #[test]
882    fn pnpm_exec_vitest_is_unknown() {
883        // From commands_combined: cd codex-cli && pnpm exec vitest run tests/... --threads=false --passWithNoTests
884        let inner = "cd codex-cli && pnpm exec vitest run tests/file-tag-utils.test.ts --threads=false --passWithNoTests";
885        assert_parsed(
886            &shlex_split_safe(inner),
887            vec![
888                ParsedCommand::Unknown {
889                    cmd: "cd codex-cli".to_string(),
890                },
891                ParsedCommand::Unknown {
892                    cmd: "pnpm exec vitest run tests/file-tag-utils.test.ts '--threads=false' --passWithNoTests".to_string(),
893                },
894            ],
895        );
896    }
897
898    #[test]
899    fn cargo_test_with_crate() {
900        assert_parsed(
901            &shlex_split_safe("cargo test -p codex-core parse_command::"),
902            vec![ParsedCommand::Test {
903                cmd: "cargo test -p codex-core parse_command::".to_string(),
904            }],
905        );
906    }
907
908    #[test]
909    fn cargo_test_with_crate_2() {
910        assert_parsed(
911            &shlex_split_safe(
912                "cd core && cargo test -q parse_command::tests::bash_dash_c_pipeline_parsing parse_command::tests::fd_file_finder_variants",
913            ),
914            vec![ParsedCommand::Test {
915                cmd: "cargo test -q parse_command::tests::bash_dash_c_pipeline_parsing parse_command::tests::fd_file_finder_variants".to_string(),
916            }],
917        );
918    }
919
920    #[test]
921    fn cargo_test_with_crate_3() {
922        assert_parsed(
923            &shlex_split_safe("cd core && cargo test -q parse_command::tests"),
924            vec![ParsedCommand::Test {
925                cmd: "cargo test -q parse_command::tests".to_string(),
926            }],
927        );
928    }
929
930    #[test]
931    fn cargo_test_with_crate_4() {
932        assert_parsed(
933            &shlex_split_safe("cd core && cargo test --all-features parse_command -- --nocapture"),
934            vec![ParsedCommand::Test {
935                cmd: "cargo test --all-features parse_command -- --nocapture".to_string(),
936            }],
937        );
938    }
939
940    // Additional coverage for other common tools/frameworks
941    #[test]
942    fn recognizes_black_and_ruff() {
943        // black formats Python code
944        assert_parsed(
945            &shlex_split_safe("black src"),
946            vec![ParsedCommand::Format {
947                cmd: "black src".to_string(),
948                tool: Some("black".to_string()),
949                targets: Some(vec!["src".to_string()]),
950            }],
951        );
952
953        // ruff check is a linter; ensure we collect targets
954        assert_parsed(
955            &shlex_split_safe("ruff check ."),
956            vec![ParsedCommand::Lint {
957                cmd: "ruff check .".to_string(),
958                tool: Some("ruff".to_string()),
959                targets: Some(vec![".".to_string()]),
960            }],
961        );
962
963        // ruff format is a formatter
964        assert_parsed(
965            &shlex_split_safe("ruff format pkg/"),
966            vec![ParsedCommand::Format {
967                cmd: "ruff format pkg/".to_string(),
968                tool: Some("ruff".to_string()),
969                targets: Some(vec!["pkg/".to_string()]),
970            }],
971        );
972    }
973
974    #[test]
975    fn recognizes_pnpm_monorepo_test_and_npm_format_script() {
976        // pnpm -r test in a monorepo should still parse as a test action
977        assert_parsed(
978            &shlex_split_safe("pnpm -r test"),
979            vec![ParsedCommand::Test {
980                cmd: "pnpm -r test".to_string(),
981            }],
982        );
983
984        // npm run format should be recognized as a format action
985        assert_parsed(
986            &shlex_split_safe("npm run format -- -w ."),
987            vec![ParsedCommand::Format {
988                cmd: "npm run format -- -w .".to_string(),
989                tool: Some("npm-script:format".to_string()),
990                targets: None,
991            }],
992        );
993    }
994
995    #[test]
996    fn yarn_test_is_parsed_as_test() {
997        assert_parsed(
998            &shlex_split_safe("yarn test"),
999            vec![ParsedCommand::Test {
1000                cmd: "yarn test".to_string(),
1001            }],
1002        );
1003    }
1004
1005    #[test]
1006    fn pytest_file_only_and_go_run_regex() {
1007        // pytest invoked with a file path should be captured as a filter
1008        assert_parsed(
1009            &shlex_split_safe("pytest tests/test_example.py"),
1010            vec![ParsedCommand::Test {
1011                cmd: "pytest tests/test_example.py".to_string(),
1012            }],
1013        );
1014
1015        // go test with -run regex should capture the filter
1016        assert_parsed(
1017            &shlex_split_safe("go test ./... -run '^TestFoo$'"),
1018            vec![ParsedCommand::Test {
1019                cmd: "go test ./... -run '^TestFoo$'".to_string(),
1020            }],
1021        );
1022    }
1023
1024    #[test]
1025    fn grep_with_query_and_path() {
1026        assert_parsed(
1027            &shlex_split_safe("grep -R TODO src"),
1028            vec![ParsedCommand::Search {
1029                cmd: "grep -R TODO src".to_string(),
1030                query: Some("TODO".to_string()),
1031                path: Some("src".to_string()),
1032            }],
1033        );
1034    }
1035
1036    #[test]
1037    fn rg_with_equals_style_flags() {
1038        assert_parsed(
1039            &shlex_split_safe("rg --colors=never -n foo src"),
1040            vec![ParsedCommand::Search {
1041                cmd: "rg '--colors=never' -n foo src".to_string(),
1042                query: Some("foo".to_string()),
1043                path: Some("src".to_string()),
1044            }],
1045        );
1046    }
1047
1048    #[test]
1049    fn cat_with_double_dash_and_sed_ranges() {
1050        // cat -- <file> should be treated as a read of that file
1051        assert_parsed(
1052            &shlex_split_safe("cat -- ./-strange-file-name"),
1053            vec![ParsedCommand::Read {
1054                cmd: "cat -- ./-strange-file-name".to_string(),
1055                name: "-strange-file-name".to_string(),
1056            }],
1057        );
1058
1059        // sed -n <range> <file> should be treated as a read of <file>
1060        assert_parsed(
1061            &shlex_split_safe("sed -n '12,20p' Cargo.toml"),
1062            vec![ParsedCommand::Read {
1063                cmd: "sed -n '12,20p' Cargo.toml".to_string(),
1064                name: "Cargo.toml".to_string(),
1065            }],
1066        );
1067    }
1068
1069    #[test]
1070    fn drop_trailing_nl_in_pipeline() {
1071        // When an `nl` stage has only flags, it should be dropped from the summary
1072        assert_parsed(
1073            &shlex_split_safe("rg --files | nl -ba"),
1074            vec![ParsedCommand::Search {
1075                cmd: "rg --files".to_string(),
1076                query: None,
1077                path: None,
1078            }],
1079        );
1080    }
1081
1082    #[test]
1083    fn ls_with_time_style_and_path() {
1084        assert_parsed(
1085            &shlex_split_safe("ls --time-style=long-iso ./dist"),
1086            vec![ParsedCommand::ListFiles {
1087                cmd: "ls '--time-style=long-iso' ./dist".to_string(),
1088                // short_display_path drops "dist" and shows "." as the last useful segment
1089                path: Some(".".to_string()),
1090            }],
1091        );
1092    }
1093
1094    #[test]
1095    fn eslint_with_config_path_and_target() {
1096        assert_parsed(
1097            &shlex_split_safe("eslint -c .eslintrc.json src"),
1098            vec![ParsedCommand::Lint {
1099                cmd: "eslint -c .eslintrc.json src".to_string(),
1100                tool: Some("eslint".to_string()),
1101                targets: Some(vec!["src".to_string()]),
1102            }],
1103        );
1104    }
1105
1106    #[test]
1107    fn npx_eslint_with_config_path_and_target() {
1108        assert_parsed(
1109            &shlex_split_safe("npx eslint -c .eslintrc src"),
1110            vec![ParsedCommand::Lint {
1111                cmd: "npx eslint -c .eslintrc src".to_string(),
1112                tool: Some("eslint".to_string()),
1113                targets: Some(vec!["src".to_string()]),
1114            }],
1115        );
1116    }
1117
1118    #[test]
1119    fn fd_file_finder_variants() {
1120        assert_parsed(
1121            &shlex_split_safe("fd -t f src/"),
1122            vec![ParsedCommand::Search {
1123                cmd: "fd -t f src/".to_string(),
1124                query: None,
1125                path: Some("src".to_string()),
1126            }],
1127        );
1128
1129        // fd with query and path should capture both
1130        assert_parsed(
1131            &shlex_split_safe("fd main src"),
1132            vec![ParsedCommand::Search {
1133                cmd: "fd main src".to_string(),
1134                query: Some("main".to_string()),
1135                path: Some("src".to_string()),
1136            }],
1137        );
1138    }
1139
1140    #[test]
1141    fn find_basic_name_filter() {
1142        assert_parsed(
1143            &shlex_split_safe("find . -name '*.rs'"),
1144            vec![ParsedCommand::Search {
1145                cmd: "find . -name '*.rs'".to_string(),
1146                query: Some("*.rs".to_string()),
1147                path: Some(".".to_string()),
1148            }],
1149        );
1150    }
1151
1152    #[test]
1153    fn find_type_only_path() {
1154        assert_parsed(
1155            &shlex_split_safe("find src -type f"),
1156            vec![ParsedCommand::Search {
1157                cmd: "find src -type f".to_string(),
1158                query: None,
1159                path: Some("src".to_string()),
1160            }],
1161        );
1162    }
1163}
1164
1165pub fn parse_command_impl(command: &[String]) -> Vec<ParsedCommand> {
1166    if let Some(commands) = parse_bash_lc_commands(command) {
1167        return commands;
1168    }
1169
1170    let normalized = normalize_tokens(command);
1171
1172    let parts = if contains_connectors(&normalized) {
1173        split_on_connectors(&normalized)
1174    } else {
1175        vec![normalized.clone()]
1176    };
1177
1178    // Preserve left-to-right execution order for all commands, including bash -c/-lc
1179    // so summaries reflect the order they will run.
1180
1181    // Map each pipeline segment to its parsed summary.
1182    let mut commands: Vec<ParsedCommand> = parts
1183        .iter()
1184        .map(|tokens| summarize_main_tokens(tokens))
1185        .collect();
1186
1187    while let Some(next) = simplify_once(&commands) {
1188        commands = next;
1189    }
1190
1191    commands
1192}
1193
1194fn simplify_once(commands: &[ParsedCommand]) -> Option<Vec<ParsedCommand>> {
1195    if commands.len() <= 1 {
1196        return None;
1197    }
1198
1199    // echo ... && ...rest => ...rest
1200    if let ParsedCommand::Unknown { cmd } = &commands[0]
1201        && shlex_split(cmd).is_some_and(|t| t.first().map(|s| s.as_str()) == Some("echo"))
1202    {
1203        return Some(commands[1..].to_vec());
1204    }
1205
1206    // cd foo && [any Test command] => [any Test command]
1207    if let Some(idx) = commands.iter().position(|pc| match pc {
1208        ParsedCommand::Unknown { cmd } => {
1209            shlex_split(cmd).is_some_and(|t| t.first().map(|s| s.as_str()) == Some("cd"))
1210        }
1211        _ => false,
1212    }) && commands
1213        .iter()
1214        .skip(idx + 1)
1215        .any(|pc| matches!(pc, ParsedCommand::Test { .. }))
1216    {
1217        let mut out = Vec::with_capacity(commands.len() - 1);
1218        out.extend_from_slice(&commands[..idx]);
1219        out.extend_from_slice(&commands[idx + 1..]);
1220        return Some(out);
1221    }
1222
1223    // cmd || true => cmd
1224    if let Some(idx) = commands.iter().position(|pc| match pc {
1225        ParsedCommand::Noop { cmd } => cmd == "true",
1226        _ => false,
1227    }) {
1228        let mut out = Vec::with_capacity(commands.len() - 1);
1229        out.extend_from_slice(&commands[..idx]);
1230        out.extend_from_slice(&commands[idx + 1..]);
1231        return Some(out);
1232    }
1233
1234    // nl -[any_flags] && ...rest => ...rest
1235    if let Some(idx) = commands.iter().position(|pc| match pc {
1236        ParsedCommand::Unknown { cmd } => {
1237            if let Some(tokens) = shlex_split(cmd) {
1238                tokens.first().is_some_and(|s| s.as_str() == "nl")
1239                    && tokens.iter().skip(1).all(|t| t.starts_with('-'))
1240            } else {
1241                false
1242            }
1243        }
1244        _ => false,
1245    }) {
1246        let mut out = Vec::with_capacity(commands.len() - 1);
1247        out.extend_from_slice(&commands[..idx]);
1248        out.extend_from_slice(&commands[idx + 1..]);
1249        return Some(out);
1250    }
1251
1252    None
1253}
1254
1255/// Validates that this is a `sed -n 123,123p` command.
1256fn is_valid_sed_n_arg(arg: Option<&str>) -> bool {
1257    let s = match arg {
1258        Some(s) => s,
1259        None => return false,
1260    };
1261    let core = match s.strip_suffix('p') {
1262        Some(rest) => rest,
1263        None => return false,
1264    };
1265    let parts: Vec<&str> = core.split(',').collect();
1266    match parts.as_slice() {
1267        [num] => !num.is_empty() && num.chars().all(|c| c.is_ascii_digit()),
1268        [a, b] => {
1269            !a.is_empty()
1270                && !b.is_empty()
1271                && a.chars().all(|c| c.is_ascii_digit())
1272                && b.chars().all(|c| c.is_ascii_digit())
1273        }
1274        _ => false,
1275    }
1276}
1277
1278/// Normalize a command by:
1279/// - Removing `yes`/`no`/`bash -c`/`bash -lc` prefixes.
1280/// - Splitting on `|` and `&&`/`||`/`;
1281fn normalize_tokens(cmd: &[String]) -> Vec<String> {
1282    match cmd {
1283        [first, pipe, rest @ ..] if (first == "yes" || first == "y") && pipe == "|" => {
1284            // Do not re-shlex already-tokenized input; just drop the prefix.
1285            rest.to_vec()
1286        }
1287        [first, pipe, rest @ ..] if (first == "no" || first == "n") && pipe == "|" => {
1288            // Do not re-shlex already-tokenized input; just drop the prefix.
1289            rest.to_vec()
1290        }
1291        [bash, flag, script] if bash == "bash" && (flag == "-c" || flag == "-lc") => {
1292            shlex_split(script)
1293                .unwrap_or_else(|| vec!["bash".to_string(), flag.clone(), script.clone()])
1294        }
1295        _ => cmd.to_vec(),
1296    }
1297}
1298
1299fn contains_connectors(tokens: &[String]) -> bool {
1300    tokens
1301        .iter()
1302        .any(|t| t == "&&" || t == "||" || t == "|" || t == ";")
1303}
1304
1305fn split_on_connectors(tokens: &[String]) -> Vec<Vec<String>> {
1306    let mut out: Vec<Vec<String>> = Vec::new();
1307    let mut cur: Vec<String> = Vec::new();
1308    for t in tokens {
1309        if t == "&&" || t == "||" || t == "|" || t == ";" {
1310            if !cur.is_empty() {
1311                out.push(std::mem::take(&mut cur));
1312            }
1313        } else {
1314            cur.push(t.clone());
1315        }
1316    }
1317    if !cur.is_empty() {
1318        out.push(cur);
1319    }
1320    out
1321}
1322
1323fn trim_at_connector(tokens: &[String]) -> Vec<String> {
1324    let idx = tokens
1325        .iter()
1326        .position(|t| t == "|" || t == "&&" || t == "||" || t == ";")
1327        .unwrap_or(tokens.len());
1328    tokens[..idx].to_vec()
1329}
1330
1331/// Shorten a path to the last component, excluding `build`/`dist`/`node_modules`/`src`.
1332/// It also pulls out a useful path from a directory such as:
1333/// - webview/src -> webview
1334/// - foo/src/ -> foo
1335/// - packages/app/node_modules/ -> app
1336fn short_display_path(path: &str) -> String {
1337    // Normalize separators and drop any trailing slash for display.
1338    let normalized = path.replace('\\', "/");
1339    let trimmed = normalized.trim_end_matches('/');
1340    let mut parts = trimmed.split('/').rev().filter(|p| {
1341        !p.is_empty() && *p != "build" && *p != "dist" && *p != "node_modules" && *p != "src"
1342    });
1343    parts
1344        .next()
1345        .map(|s| s.to_string())
1346        .unwrap_or_else(|| trimmed.to_string())
1347}
1348
1349// Skip values consumed by specific flags and ignore --flag=value style arguments.
1350fn skip_flag_values<'a>(args: &'a [String], flags_with_vals: &[&str]) -> Vec<&'a String> {
1351    let mut out: Vec<&'a String> = Vec::new();
1352    let mut skip_next = false;
1353    for (i, a) in args.iter().enumerate() {
1354        if skip_next {
1355            skip_next = false;
1356            continue;
1357        }
1358        if a == "--" {
1359            // From here on, everything is positional operands; push the rest and break.
1360            for rest in &args[i + 1..] {
1361                out.push(rest);
1362            }
1363            break;
1364        }
1365        if a.starts_with("--") && a.contains('=') {
1366            // --flag=value form: treat as a flag taking a value; skip entirely.
1367            continue;
1368        }
1369        if flags_with_vals.contains(&a.as_str()) {
1370            // This flag consumes the next argument as its value.
1371            if i + 1 < args.len() {
1372                skip_next = true;
1373            }
1374            continue;
1375        }
1376        out.push(a);
1377    }
1378    out
1379}
1380
1381/// Common flags for ESLint that take a following value and should not be
1382/// considered positional targets.
1383const ESLINT_FLAGS_WITH_VALUES: &[&str] = &[
1384    "-c",
1385    "--config",
1386    "--parser",
1387    "--parser-options",
1388    "--rulesdir",
1389    "--plugin",
1390    "--max-warnings",
1391    "--format",
1392];
1393
1394fn collect_non_flag_targets(args: &[String]) -> Option<Vec<String>> {
1395    let mut targets = Vec::new();
1396    let mut skip_next = false;
1397    for (i, a) in args.iter().enumerate() {
1398        if a == "--" {
1399            break;
1400        }
1401        if skip_next {
1402            skip_next = false;
1403            continue;
1404        }
1405        if a == "-p"
1406            || a == "--package"
1407            || a == "--features"
1408            || a == "-C"
1409            || a == "--config"
1410            || a == "--config-path"
1411            || a == "--out-dir"
1412            || a == "-o"
1413            || a == "--run"
1414            || a == "--max-warnings"
1415            || a == "--format"
1416        {
1417            if i + 1 < args.len() {
1418                skip_next = true;
1419            }
1420            continue;
1421        }
1422        if a.starts_with('-') {
1423            continue;
1424        }
1425        targets.push(a.clone());
1426    }
1427    if targets.is_empty() {
1428        None
1429    } else {
1430        Some(targets)
1431    }
1432}
1433
1434fn collect_non_flag_targets_with_flags(
1435    args: &[String],
1436    flags_with_vals: &[&str],
1437) -> Option<Vec<String>> {
1438    let targets: Vec<String> = skip_flag_values(args, flags_with_vals)
1439        .into_iter()
1440        .filter(|a| !a.starts_with('-'))
1441        .cloned()
1442        .collect();
1443    if targets.is_empty() {
1444        None
1445    } else {
1446        Some(targets)
1447    }
1448}
1449
1450fn is_pathish(s: &str) -> bool {
1451    s == "."
1452        || s == ".."
1453        || s.starts_with("./")
1454        || s.starts_with("../")
1455        || s.contains('/')
1456        || s.contains('\\')
1457}
1458
1459fn parse_fd_query_and_path(tail: &[String]) -> (Option<String>, Option<String>) {
1460    let args_no_connector = trim_at_connector(tail);
1461    // fd has several flags that take values (e.g., -t/--type, -e/--extension).
1462    // Skip those values when extracting positional operands.
1463    let candidates = skip_flag_values(
1464        &args_no_connector,
1465        &[
1466            "-t",
1467            "--type",
1468            "-e",
1469            "--extension",
1470            "-E",
1471            "--exclude",
1472            "--search-path",
1473        ],
1474    );
1475    let non_flags: Vec<&String> = candidates
1476        .into_iter()
1477        .filter(|p| !p.starts_with('-'))
1478        .collect();
1479    match non_flags.as_slice() {
1480        [one] => {
1481            if is_pathish(one) {
1482                (None, Some(short_display_path(one)))
1483            } else {
1484                (Some((*one).clone()), None)
1485            }
1486        }
1487        [q, p, ..] => (Some((*q).clone()), Some(short_display_path(p))),
1488        _ => (None, None),
1489    }
1490}
1491
1492fn parse_find_query_and_path(tail: &[String]) -> (Option<String>, Option<String>) {
1493    let args_no_connector = trim_at_connector(tail);
1494    // First positional argument (excluding common unary operators) is the root path
1495    let mut path: Option<String> = None;
1496    for a in &args_no_connector {
1497        if !a.starts_with('-') && *a != "!" && *a != "(" && *a != ")" {
1498            path = Some(short_display_path(a));
1499            break;
1500        }
1501    }
1502    // Extract a common name/path/regex pattern if present
1503    let mut query: Option<String> = None;
1504    let mut i = 0;
1505    while i < args_no_connector.len() {
1506        let a = &args_no_connector[i];
1507        if a == "-name" || a == "-iname" || a == "-path" || a == "-regex" {
1508            if i + 1 < args_no_connector.len() {
1509                query = Some(args_no_connector[i + 1].clone());
1510            }
1511            break;
1512        }
1513        i += 1;
1514    }
1515    (query, path)
1516}
1517
1518fn classify_npm_like(tool: &str, tail: &[String], full_cmd: &[String]) -> Option<ParsedCommand> {
1519    let mut r = tail;
1520    if tool == "pnpm" && r.first().map(|s| s.as_str()) == Some("-r") {
1521        r = &r[1..];
1522    }
1523    let mut script_name: Option<String> = None;
1524    if r.first().map(|s| s.as_str()) == Some("run") {
1525        script_name = r.get(1).cloned();
1526    } else {
1527        let is_test_cmd = (tool == "npm" && r.first().map(|s| s.as_str()) == Some("t"))
1528            || ((tool == "npm" || tool == "pnpm" || tool == "yarn")
1529                && r.first().map(|s| s.as_str()) == Some("test"));
1530        if is_test_cmd {
1531            script_name = Some("test".to_string());
1532        }
1533    }
1534    if let Some(name) = script_name {
1535        let lname = name.to_lowercase();
1536        if lname == "test" || lname == "unit" || lname == "jest" || lname == "vitest" {
1537            return Some(ParsedCommand::Test {
1538                cmd: shlex_join(full_cmd),
1539            });
1540        }
1541        if lname == "lint" || lname == "eslint" {
1542            return Some(ParsedCommand::Lint {
1543                cmd: shlex_join(full_cmd),
1544                tool: Some(format!("{tool}-script:{name}")),
1545                targets: None,
1546            });
1547        }
1548        if lname == "format" || lname == "fmt" || lname == "prettier" {
1549            return Some(ParsedCommand::Format {
1550                cmd: shlex_join(full_cmd),
1551                tool: Some(format!("{tool}-script:{name}")),
1552                targets: None,
1553            });
1554        }
1555    }
1556    None
1557}
1558
1559fn parse_bash_lc_commands(original: &[String]) -> Option<Vec<ParsedCommand>> {
1560    let [bash, flag, script] = original else {
1561        return None;
1562    };
1563    if bash != "bash" || flag != "-lc" {
1564        return None;
1565    }
1566    if let Some(tree) = try_parse_bash(script)
1567        && let Some(all_commands) = try_parse_word_only_commands_sequence(&tree, script)
1568        && !all_commands.is_empty()
1569    {
1570        let script_tokens = shlex_split(script)
1571            .unwrap_or_else(|| vec!["bash".to_string(), flag.clone(), script.clone()]);
1572        // Strip small formatting helpers (e.g., head/tail/awk/wc/etc) so we
1573        // bias toward the primary command when pipelines are present.
1574        // First, drop obvious small formatting helpers (e.g., wc/awk/etc).
1575        let had_multiple_commands = all_commands.len() > 1;
1576        // The bash AST walker yields commands in right-to-left order for
1577        // connector/pipeline sequences. Reverse to reflect actual execution order.
1578        let mut filtered_commands = drop_small_formatting_commands(all_commands);
1579        filtered_commands.reverse();
1580        if filtered_commands.is_empty() {
1581            return Some(vec![ParsedCommand::Unknown {
1582                cmd: script.clone(),
1583            }]);
1584        }
1585        let mut commands: Vec<ParsedCommand> = filtered_commands
1586            .into_iter()
1587            .map(|tokens| summarize_main_tokens(&tokens))
1588            .collect();
1589        if commands.len() > 1 {
1590            commands.retain(|pc| !matches!(pc, ParsedCommand::Noop { .. }));
1591        }
1592        if commands.len() == 1 {
1593            // If we reduced to a single command, attribute the full original script
1594            // for clearer UX in file-reading and listing scenarios, or when there were
1595            // no connectors in the original script. For search commands that came from
1596            // a pipeline (e.g. `rg --files | sed -n`), keep only the primary command.
1597            let had_connectors = had_multiple_commands
1598                || script_tokens
1599                    .iter()
1600                    .any(|t| t == "|" || t == "&&" || t == "||" || t == ";");
1601            commands = commands
1602                .into_iter()
1603                .map(|pc| match pc {
1604                    ParsedCommand::Read { name, cmd, .. } => {
1605                        if had_connectors {
1606                            let has_pipe = script_tokens.iter().any(|t| t == "|");
1607                            let has_sed_n = script_tokens.windows(2).any(|w| {
1608                                w.first().map(|s| s.as_str()) == Some("sed")
1609                                    && w.get(1).map(|s| s.as_str()) == Some("-n")
1610                            });
1611                            if has_pipe && has_sed_n {
1612                                ParsedCommand::Read {
1613                                    cmd: script.clone(),
1614                                    name,
1615                                }
1616                            } else {
1617                                ParsedCommand::Read {
1618                                    cmd: cmd.clone(),
1619                                    name,
1620                                }
1621                            }
1622                        } else {
1623                            ParsedCommand::Read {
1624                                cmd: shlex_join(&script_tokens),
1625                                name,
1626                            }
1627                        }
1628                    }
1629                    ParsedCommand::ListFiles { path, cmd, .. } => {
1630                        if had_connectors {
1631                            ParsedCommand::ListFiles {
1632                                cmd: cmd.clone(),
1633                                path,
1634                            }
1635                        } else {
1636                            ParsedCommand::ListFiles {
1637                                cmd: shlex_join(&script_tokens),
1638                                path,
1639                            }
1640                        }
1641                    }
1642                    ParsedCommand::Search {
1643                        query, path, cmd, ..
1644                    } => {
1645                        if had_connectors {
1646                            ParsedCommand::Search {
1647                                cmd: cmd.clone(),
1648                                query,
1649                                path,
1650                            }
1651                        } else {
1652                            ParsedCommand::Search {
1653                                cmd: shlex_join(&script_tokens),
1654                                query,
1655                                path,
1656                            }
1657                        }
1658                    }
1659                    ParsedCommand::Format {
1660                        tool, targets, cmd, ..
1661                    } => ParsedCommand::Format {
1662                        cmd: cmd.clone(),
1663                        tool,
1664                        targets,
1665                    },
1666                    ParsedCommand::Test { cmd, .. } => ParsedCommand::Test { cmd: cmd.clone() },
1667                    ParsedCommand::Lint {
1668                        tool, targets, cmd, ..
1669                    } => ParsedCommand::Lint {
1670                        cmd: cmd.clone(),
1671                        tool,
1672                        targets,
1673                    },
1674                    ParsedCommand::Unknown { .. } => ParsedCommand::Unknown {
1675                        cmd: script.clone(),
1676                    },
1677                    ParsedCommand::Noop { .. } => ParsedCommand::Noop {
1678                        cmd: script.clone(),
1679                    },
1680                })
1681                .collect();
1682        }
1683        return Some(commands);
1684    }
1685    Some(vec![ParsedCommand::Unknown {
1686        cmd: script.clone(),
1687    }])
1688}
1689
1690/// Return true if this looks like a small formatting helper in a pipeline.
1691/// Examples: `head -n 40`, `tail -n +10`, `wc -l`, `awk ...`, `cut ...`, `tr ...`.
1692/// We try to keep variants that clearly include a file path (e.g. `tail -n 30 file`).
1693fn is_small_formatting_command(tokens: &[String]) -> bool {
1694    if tokens.is_empty() {
1695        return false;
1696    }
1697    let cmd = tokens[0].as_str();
1698    match cmd {
1699        // Always formatting; typically used in pipes.
1700        // `nl` is special-cased below to allow `nl <file>` to be treated as a read command.
1701        "wc" | "tr" | "cut" | "sort" | "uniq" | "xargs" | "tee" | "column" | "awk" | "yes"
1702        | "printf" => true,
1703        "head" => {
1704            // Treat as formatting when no explicit file operand is present.
1705            // Common forms: `head -n 40`, `head -c 100`.
1706            // Keep cases like `head -n 40 file`.
1707            tokens.len() < 3
1708        }
1709        "tail" => {
1710            // Treat as formatting when no explicit file operand is present.
1711            // Common forms: `tail -n +10`, `tail -n 30`.
1712            // Keep cases like `tail -n 30 file`.
1713            tokens.len() < 3
1714        }
1715        "sed" => {
1716            // Keep `sed -n <range> file` (treated as a file read elsewhere);
1717            // otherwise consider it a formatting helper in a pipeline.
1718            tokens.len() < 4
1719                || !(tokens[1] == "-n" && is_valid_sed_n_arg(tokens.get(2).map(|s| s.as_str())))
1720        }
1721        _ => false,
1722    }
1723}
1724
1725fn drop_small_formatting_commands(mut commands: Vec<Vec<String>>) -> Vec<Vec<String>> {
1726    commands.retain(|tokens| !is_small_formatting_command(tokens));
1727    commands
1728}
1729
1730fn summarize_main_tokens(main_cmd: &[String]) -> ParsedCommand {
1731    match main_cmd.split_first() {
1732        Some((head, tail)) if head == "true" && tail.is_empty() => ParsedCommand::Noop {
1733            cmd: shlex_join(main_cmd),
1734        },
1735        // (sed-specific logic handled below in dedicated arm returning Read)
1736        Some((head, tail))
1737            if head == "cargo" && tail.first().map(|s| s.as_str()) == Some("fmt") =>
1738        {
1739            ParsedCommand::Format {
1740                cmd: shlex_join(main_cmd),
1741                tool: Some("cargo fmt".to_string()),
1742                targets: collect_non_flag_targets(&tail[1..]),
1743            }
1744        }
1745        Some((head, tail))
1746            if head == "cargo" && tail.first().map(|s| s.as_str()) == Some("clippy") =>
1747        {
1748            ParsedCommand::Lint {
1749                cmd: shlex_join(main_cmd),
1750                tool: Some("cargo clippy".to_string()),
1751                targets: collect_non_flag_targets(&tail[1..]),
1752            }
1753        }
1754        Some((head, tail))
1755            if head == "cargo" && tail.first().map(|s| s.as_str()) == Some("test") =>
1756        {
1757            ParsedCommand::Test {
1758                cmd: shlex_join(main_cmd),
1759            }
1760        }
1761        Some((head, tail)) if head == "rustfmt" => ParsedCommand::Format {
1762            cmd: shlex_join(main_cmd),
1763            tool: Some("rustfmt".to_string()),
1764            targets: collect_non_flag_targets(tail),
1765        },
1766        Some((head, tail)) if head == "go" && tail.first().map(|s| s.as_str()) == Some("fmt") => {
1767            ParsedCommand::Format {
1768                cmd: shlex_join(main_cmd),
1769                tool: Some("go fmt".to_string()),
1770                targets: collect_non_flag_targets(&tail[1..]),
1771            }
1772        }
1773        Some((head, tail)) if head == "go" && tail.first().map(|s| s.as_str()) == Some("test") => {
1774            ParsedCommand::Test {
1775                cmd: shlex_join(main_cmd),
1776            }
1777        }
1778        Some((head, _)) if head == "pytest" => ParsedCommand::Test {
1779            cmd: shlex_join(main_cmd),
1780        },
1781        Some((head, tail)) if head == "eslint" => {
1782            // Treat configuration flags with values (e.g. `-c .eslintrc`) as non-targets.
1783            let targets = collect_non_flag_targets_with_flags(tail, ESLINT_FLAGS_WITH_VALUES);
1784            ParsedCommand::Lint {
1785                cmd: shlex_join(main_cmd),
1786                tool: Some("eslint".to_string()),
1787                targets,
1788            }
1789        }
1790        Some((head, tail)) if head == "prettier" => ParsedCommand::Format {
1791            cmd: shlex_join(main_cmd),
1792            tool: Some("prettier".to_string()),
1793            targets: collect_non_flag_targets(tail),
1794        },
1795        Some((head, tail)) if head == "black" => ParsedCommand::Format {
1796            cmd: shlex_join(main_cmd),
1797            tool: Some("black".to_string()),
1798            targets: collect_non_flag_targets(tail),
1799        },
1800        Some((head, tail))
1801            if head == "ruff" && tail.first().map(|s| s.as_str()) == Some("check") =>
1802        {
1803            ParsedCommand::Lint {
1804                cmd: shlex_join(main_cmd),
1805                tool: Some("ruff".to_string()),
1806                targets: collect_non_flag_targets(&tail[1..]),
1807            }
1808        }
1809        Some((head, tail))
1810            if head == "ruff" && tail.first().map(|s| s.as_str()) == Some("format") =>
1811        {
1812            ParsedCommand::Format {
1813                cmd: shlex_join(main_cmd),
1814                tool: Some("ruff".to_string()),
1815                targets: collect_non_flag_targets(&tail[1..]),
1816            }
1817        }
1818        Some((head, _)) if (head == "jest" || head == "vitest") => ParsedCommand::Test {
1819            cmd: shlex_join(main_cmd),
1820        },
1821        Some((head, tail))
1822            if head == "npx" && tail.first().map(|s| s.as_str()) == Some("eslint") =>
1823        {
1824            let targets = collect_non_flag_targets_with_flags(&tail[1..], ESLINT_FLAGS_WITH_VALUES);
1825            ParsedCommand::Lint {
1826                cmd: shlex_join(main_cmd),
1827                tool: Some("eslint".to_string()),
1828                targets,
1829            }
1830        }
1831        Some((head, tail))
1832            if head == "npx" && tail.first().map(|s| s.as_str()) == Some("prettier") =>
1833        {
1834            ParsedCommand::Format {
1835                cmd: shlex_join(main_cmd),
1836                tool: Some("prettier".to_string()),
1837                targets: collect_non_flag_targets(&tail[1..]),
1838            }
1839        }
1840        // NPM-like scripts including yarn
1841        Some((tool, tail)) if (tool == "pnpm" || tool == "npm" || tool == "yarn") => {
1842            if let Some(cmd) = classify_npm_like(tool, tail, main_cmd) {
1843                cmd
1844            } else {
1845                ParsedCommand::Unknown {
1846                    cmd: shlex_join(main_cmd),
1847                }
1848            }
1849        }
1850        Some((head, tail)) if head == "ls" => {
1851            // Avoid treating option values as paths (e.g., ls -I "*.test.js").
1852            let candidates = skip_flag_values(
1853                tail,
1854                &[
1855                    "-I",
1856                    "-w",
1857                    "--block-size",
1858                    "--format",
1859                    "--time-style",
1860                    "--color",
1861                    "--quoting-style",
1862                ],
1863            );
1864            let path = candidates
1865                .into_iter()
1866                .find(|p| !p.starts_with('-'))
1867                .map(|p| short_display_path(p));
1868            ParsedCommand::ListFiles {
1869                cmd: shlex_join(main_cmd),
1870                path,
1871            }
1872        }
1873        Some((head, tail)) if head == "rg" => {
1874            let args_no_connector = trim_at_connector(tail);
1875            let has_files_flag = args_no_connector.iter().any(|a| a == "--files");
1876            let non_flags: Vec<&String> = args_no_connector
1877                .iter()
1878                .filter(|p| !p.starts_with('-'))
1879                .collect();
1880            let (query, path) = if has_files_flag {
1881                (None, non_flags.first().map(|s| short_display_path(s)))
1882            } else {
1883                (
1884                    non_flags.first().cloned().map(|s| s.to_string()),
1885                    non_flags.get(1).map(|s| short_display_path(s)),
1886                )
1887            };
1888            ParsedCommand::Search {
1889                cmd: shlex_join(main_cmd),
1890                query,
1891                path,
1892            }
1893        }
1894        Some((head, tail)) if head == "fd" => {
1895            let (query, path) = parse_fd_query_and_path(tail);
1896            ParsedCommand::Search {
1897                cmd: shlex_join(main_cmd),
1898                query,
1899                path,
1900            }
1901        }
1902        Some((head, tail)) if head == "find" => {
1903            // Basic find support: capture path and common name filter
1904            let (query, path) = parse_find_query_and_path(tail);
1905            ParsedCommand::Search {
1906                cmd: shlex_join(main_cmd),
1907                query,
1908                path,
1909            }
1910        }
1911        Some((head, tail)) if head == "grep" => {
1912            let args_no_connector = trim_at_connector(tail);
1913            let non_flags: Vec<&String> = args_no_connector
1914                .iter()
1915                .filter(|p| !p.starts_with('-'))
1916                .collect();
1917            // Do not shorten the query: grep patterns may legitimately contain slashes
1918            // and should be preserved verbatim. Only paths should be shortened.
1919            let query = non_flags.first().cloned().map(|s| s.to_string());
1920            let path = non_flags.get(1).map(|s| short_display_path(s));
1921            ParsedCommand::Search {
1922                cmd: shlex_join(main_cmd),
1923                query,
1924                path,
1925            }
1926        }
1927        Some((head, tail)) if head == "cat" => {
1928            // Support both `cat <file>` and `cat -- <file>` forms.
1929            let effective_tail: &[String] = if tail.first().map(|s| s.as_str()) == Some("--") {
1930                &tail[1..]
1931            } else {
1932                tail
1933            };
1934            if effective_tail.len() == 1 {
1935                let name = short_display_path(&effective_tail[0]);
1936                ParsedCommand::Read {
1937                    cmd: shlex_join(main_cmd),
1938                    name,
1939                }
1940            } else {
1941                ParsedCommand::Unknown {
1942                    cmd: shlex_join(main_cmd),
1943                }
1944            }
1945        }
1946        Some((head, tail)) if head == "head" => {
1947            // Support `head -n 50 file` and `head -n50 file` forms.
1948            let has_valid_n = match tail.split_first() {
1949                Some((first, rest)) if first == "-n" => rest
1950                    .first()
1951                    .is_some_and(|n| n.chars().all(|c| c.is_ascii_digit())),
1952                Some((first, _)) if first.starts_with("-n") => {
1953                    first[2..].chars().all(|c| c.is_ascii_digit())
1954                }
1955                _ => false,
1956            };
1957            if has_valid_n {
1958                // Build candidates skipping the numeric value consumed by `-n` when separated.
1959                let mut candidates: Vec<&String> = Vec::new();
1960                let mut i = 0;
1961                while i < tail.len() {
1962                    if i == 0 && tail[i] == "-n" && i + 1 < tail.len() {
1963                        let n = &tail[i + 1];
1964                        if n.chars().all(|c| c.is_ascii_digit()) {
1965                            i += 2;
1966                            continue;
1967                        }
1968                    }
1969                    candidates.push(&tail[i]);
1970                    i += 1;
1971                }
1972                if let Some(p) = candidates.into_iter().find(|p| !p.starts_with('-')) {
1973                    let name = short_display_path(p);
1974                    return ParsedCommand::Read {
1975                        cmd: shlex_join(main_cmd),
1976                        name,
1977                    };
1978                }
1979            }
1980            ParsedCommand::Unknown {
1981                cmd: shlex_join(main_cmd),
1982            }
1983        }
1984        Some((head, tail)) if head == "tail" => {
1985            // Support `tail -n +10 file` and `tail -n+10 file` forms.
1986            let has_valid_n = match tail.split_first() {
1987                Some((first, rest)) if first == "-n" => rest.first().is_some_and(|n| {
1988                    let s = n.strip_prefix('+').unwrap_or(n);
1989                    !s.is_empty() && s.chars().all(|c| c.is_ascii_digit())
1990                }),
1991                Some((first, _)) if first.starts_with("-n") => {
1992                    let v = &first[2..];
1993                    let s = v.strip_prefix('+').unwrap_or(v);
1994                    !s.is_empty() && s.chars().all(|c| c.is_ascii_digit())
1995                }
1996                _ => false,
1997            };
1998            if has_valid_n {
1999                // Build candidates skipping the numeric value consumed by `-n` when separated.
2000                let mut candidates: Vec<&String> = Vec::new();
2001                let mut i = 0;
2002                while i < tail.len() {
2003                    if i == 0 && tail[i] == "-n" && i + 1 < tail.len() {
2004                        let n = &tail[i + 1];
2005                        let s = n.strip_prefix('+').unwrap_or(n);
2006                        if !s.is_empty() && s.chars().all(|c| c.is_ascii_digit()) {
2007                            i += 2;
2008                            continue;
2009                        }
2010                    }
2011                    candidates.push(&tail[i]);
2012                    i += 1;
2013                }
2014                if let Some(p) = candidates.into_iter().find(|p| !p.starts_with('-')) {
2015                    let name = short_display_path(p);
2016                    return ParsedCommand::Read {
2017                        cmd: shlex_join(main_cmd),
2018                        name,
2019                    };
2020                }
2021            }
2022            ParsedCommand::Unknown {
2023                cmd: shlex_join(main_cmd),
2024            }
2025        }
2026        Some((head, tail)) if head == "nl" => {
2027            // Avoid treating option values as paths (e.g., nl -s "  ").
2028            let candidates = skip_flag_values(tail, &["-s", "-w", "-v", "-i", "-b"]);
2029            if let Some(p) = candidates.into_iter().find(|p| !p.starts_with('-')) {
2030                let name = short_display_path(p);
2031                ParsedCommand::Read {
2032                    cmd: shlex_join(main_cmd),
2033                    name,
2034                }
2035            } else {
2036                ParsedCommand::Unknown {
2037                    cmd: shlex_join(main_cmd),
2038                }
2039            }
2040        }
2041        Some((head, tail))
2042            if head == "sed"
2043                && tail.len() >= 3
2044                && tail[0] == "-n"
2045                && is_valid_sed_n_arg(tail.get(1).map(|s| s.as_str())) =>
2046        {
2047            if let Some(path) = tail.get(2) {
2048                let name = short_display_path(path);
2049                ParsedCommand::Read {
2050                    cmd: shlex_join(main_cmd),
2051                    name,
2052                }
2053            } else {
2054                ParsedCommand::Unknown {
2055                    cmd: shlex_join(main_cmd),
2056                }
2057            }
2058        }
2059        // Other commands
2060        _ => ParsedCommand::Unknown {
2061            cmd: shlex_join(main_cmd),
2062        },
2063    }
2064}