lean_ctx/shell/compress/
classification.rs1use super::passthrough::{BUILTIN_PASSTHROUGH, DEV_SCRIPT_KEYWORDS, SCRIPT_RUNNER_PREFIXES};
2
3fn is_dev_script_runner(cmd: &str) -> bool {
4 for prefix in SCRIPT_RUNNER_PREFIXES {
5 if let Some(rest) = cmd.strip_prefix(prefix) {
6 let script_name = rest.split_whitespace().next().unwrap_or("");
7 for kw in DEV_SCRIPT_KEYWORDS {
8 if script_name.contains(kw) {
9 return true;
10 }
11 }
12 }
13 }
14 false
15}
16
17pub(in crate::shell) fn is_excluded_command(command: &str, excluded: &[String]) -> bool {
18 let cmd = command.trim().to_lowercase();
19 for pattern in BUILTIN_PASSTHROUGH {
20 if pattern.starts_with("--") {
21 if cmd.contains(pattern) {
22 return true;
23 }
24 } else if pattern.ends_with(' ') || pattern.ends_with('\t') {
25 if cmd == pattern.trim() || cmd.starts_with(pattern) {
26 return true;
27 }
28 } else if cmd == *pattern
29 || cmd.starts_with(&format!("{pattern} "))
30 || cmd.starts_with(&format!("{pattern}\t"))
31 || cmd.contains(&format!(" {pattern} "))
32 || cmd.contains(&format!(" {pattern}\t"))
33 || cmd.contains(&format!("|{pattern} "))
34 || cmd.contains(&format!("|{pattern}\t"))
35 || cmd.ends_with(&format!(" {pattern}"))
36 || cmd.ends_with(&format!("|{pattern}"))
37 {
38 return true;
39 }
40 }
41
42 if is_dev_script_runner(&cmd) {
43 return true;
44 }
45
46 if excluded.is_empty() {
47 return false;
48 }
49 excluded.iter().any(|excl| {
50 let excl_lower = excl.trim().to_lowercase();
51 cmd == excl_lower || cmd.starts_with(&format!("{excl_lower} "))
52 })
53}
54
55pub(super) fn is_search_output(command: &str) -> bool {
56 let c = command.trim_start();
57 c.starts_with("grep ")
58 || c.starts_with("rg ")
59 || c.starts_with("find ")
60 || c.starts_with("fd ")
61 || c.starts_with("ag ")
62 || c.starts_with("ack ")
63}
64
65pub fn has_structural_output(command: &str) -> bool {
71 if is_verbatim_output(command) {
72 return true;
73 }
74 if is_standalone_diff_command(command) {
75 return true;
76 }
77 is_structural_git_command(command)
78}
79
80pub fn is_verbatim_output(command: &str) -> bool {
84 is_verbatim_single(command) || is_verbatim_pipe_tail(command)
85}
86
87fn is_verbatim_single(command: &str) -> bool {
88 is_http_client(command)
89 || is_file_viewer(command)
90 || is_data_format_tool(command)
91 || is_binary_viewer(command)
92 || is_infra_inspection(command)
93 || is_crypto_command(command)
94 || is_database_query(command)
95 || is_dns_network_inspection(command)
96 || is_language_one_liner(command)
97 || is_container_listing(command)
98 || is_file_listing(command)
99 || is_system_query(command)
100 || is_cloud_cli_query(command)
101 || is_cli_api_data_command(command)
102 || is_package_manager_info(command)
103 || is_version_or_help(command)
104 || is_config_viewer(command)
105 || is_log_viewer(command)
106 || is_archive_listing(command)
107 || is_clipboard_tool(command)
108 || is_git_data_command(command)
109 || is_git_write_command(command)
110 || is_task_dry_run(command)
111 || is_env_dump(command)
112}
113
114fn is_cli_api_data_command(command: &str) -> bool {
117 let cl = command.trim().to_ascii_lowercase();
118
119 if cl.starts_with("gh ")
121 && (cl.starts_with("gh api ")
122 || cl.starts_with("gh api\t")
123 || cl.contains(" --json")
124 || cl.contains(" --jq ")
125 || cl.contains(" --template ")
126 || (cl.contains("run view") && (cl.contains("--log") || cl.contains("log-failed")))
127 || cl.starts_with("gh search ")
128 || cl.starts_with("gh release view")
129 || cl.starts_with("gh gist view")
130 || cl.starts_with("gh gist list"))
131 {
132 return true;
133 }
134
135 if cl.starts_with("glab ") && cl.starts_with("glab api ") {
137 return true;
138 }
139
140 if cl.starts_with("jira ") && (cl.contains(" view") || cl.contains(" list")) {
142 return true;
143 }
144
145 if cl.starts_with("linear ") {
147 return true;
148 }
149
150 let first = first_binary(command);
152 if matches!(
153 first,
154 "stripe" | "twilio" | "vercel" | "netlify" | "flyctl" | "fly" | "railway" | "supabase"
155 ) && (cl.contains(" list")
156 || cl.contains(" get")
157 || cl.contains(" show")
158 || cl.contains(" status")
159 || cl.contains(" info")
160 || cl.contains(" logs")
161 || cl.contains(" inspect")
162 || cl.contains(" export")
163 || cl.contains(" describe"))
164 {
165 return true;
166 }
167
168 if cl.starts_with("wrangler ")
170 && !cl.starts_with("wrangler dev")
171 && (cl.contains(" tail") || cl.contains(" secret list") || cl.contains(" kv "))
172 {
173 return true;
174 }
175
176 if cl.starts_with("heroku ")
178 && (cl.contains(" config")
179 || cl.contains(" logs")
180 || cl.contains(" ps")
181 || cl.contains(" info"))
182 {
183 return true;
184 }
185
186 false
187}
188
189fn is_verbatim_pipe_tail(command: &str) -> bool {
192 if !command.contains('|') {
193 return false;
194 }
195 let last_segment = command.rsplit('|').next().unwrap_or("").trim();
196 if last_segment.is_empty() {
197 return false;
198 }
199 is_verbatim_single(last_segment)
200}
201
202fn is_http_client(command: &str) -> bool {
203 let first = first_binary(command);
204 matches!(
205 first,
206 "curl" | "wget" | "http" | "https" | "xh" | "curlie" | "grpcurl" | "grpc_cli"
207 )
208}
209
210fn is_file_viewer(command: &str) -> bool {
211 let first = first_binary(command);
212 match first {
213 "cat" | "bat" | "batcat" | "pygmentize" | "highlight" => true,
214 "head" | "tail" => !command.contains("-f") && !command.contains("--follow"),
215 _ => false,
216 }
217}
218
219fn is_data_format_tool(command: &str) -> bool {
220 let first = first_binary(command);
221 matches!(
222 first,
223 "jq" | "yq"
224 | "xq"
225 | "fx"
226 | "gron"
227 | "mlr"
228 | "miller"
229 | "dasel"
230 | "csvlook"
231 | "csvcut"
232 | "csvgrep"
233 | "csvjson"
234 | "in2csv"
235 | "sql2csv"
236 )
237}
238
239fn is_binary_viewer(command: &str) -> bool {
240 let first = first_binary(command);
241 matches!(first, "xxd" | "hexdump" | "od" | "strings" | "file")
242}
243
244fn is_infra_inspection(command: &str) -> bool {
245 let cl = command.trim().to_ascii_lowercase();
246 if cl.starts_with("terraform output")
247 || cl.starts_with("terraform show")
248 || cl.starts_with("terraform state show")
249 || cl.starts_with("terraform state list")
250 || cl.starts_with("terraform state pull")
251 || cl.starts_with("tofu output")
252 || cl.starts_with("tofu show")
253 || cl.starts_with("tofu state show")
254 || cl.starts_with("tofu state list")
255 || cl.starts_with("tofu state pull")
256 || cl.starts_with("pulumi stack output")
257 || cl.starts_with("pulumi stack export")
258 {
259 return true;
260 }
261 if cl.starts_with("docker inspect") || cl.starts_with("podman inspect") {
262 return true;
263 }
264 if (cl.starts_with("kubectl get") || cl.starts_with("k get"))
265 && (cl.contains("-o yaml")
266 || cl.contains("-o json")
267 || cl.contains("-oyaml")
268 || cl.contains("-ojson")
269 || cl.contains("--output yaml")
270 || cl.contains("--output json")
271 || cl.contains("--output=yaml")
272 || cl.contains("--output=json"))
273 {
274 return true;
275 }
276 if cl.starts_with("kubectl describe") || cl.starts_with("k describe") {
277 return true;
278 }
279 if cl.starts_with("helm get") || cl.starts_with("helm template") {
280 return true;
281 }
282 false
283}
284
285fn is_crypto_command(command: &str) -> bool {
286 let first = first_binary(command);
287 if first == "openssl" {
288 return true;
289 }
290 matches!(first, "gpg" | "age" | "ssh-keygen" | "certutil")
291}
292
293fn is_database_query(command: &str) -> bool {
294 let cl = command.to_ascii_lowercase();
295 if cl.starts_with("psql ") && (cl.contains(" -c ") || cl.contains("--command")) {
296 return true;
297 }
298 if cl.starts_with("mysql ") && (cl.contains(" -e ") || cl.contains("--execute")) {
299 return true;
300 }
301 if cl.starts_with("mariadb ") && (cl.contains(" -e ") || cl.contains("--execute")) {
302 return true;
303 }
304 if cl.starts_with("sqlite3 ") && cl.contains('"') {
305 return true;
306 }
307 if cl.starts_with("mongosh ") && cl.contains("--eval") {
308 return true;
309 }
310 false
311}
312
313fn is_dns_network_inspection(command: &str) -> bool {
314 let first = first_binary(command);
315 matches!(
316 first,
317 "dig" | "nslookup" | "host" | "whois" | "drill" | "resolvectl"
318 )
319}
320
321fn is_language_one_liner(command: &str) -> bool {
322 let cl = command.to_ascii_lowercase();
323 (cl.starts_with("python ") || cl.starts_with("python3 "))
324 && (cl.contains(" -c ") || cl.contains(" -c\"") || cl.contains(" -c'"))
325 || (cl.starts_with("node ") && (cl.contains(" -e ") || cl.contains(" --eval")))
326 || (cl.starts_with("ruby ") && cl.contains(" -e "))
327 || (cl.starts_with("perl ") && cl.contains(" -e "))
328 || (cl.starts_with("php ") && cl.contains(" -r "))
329}
330
331fn is_container_listing(command: &str) -> bool {
332 let cl = command.trim().to_ascii_lowercase();
333 if cl.starts_with("docker ps") || cl.starts_with("docker images") {
334 return true;
335 }
336 if cl.starts_with("podman ps") || cl.starts_with("podman images") {
337 return true;
338 }
339 if cl.starts_with("kubectl get") || cl.starts_with("k get") {
340 return true;
341 }
342 if cl.starts_with("helm list") || cl.starts_with("helm ls") {
343 return true;
344 }
345 if cl.starts_with("docker compose ps") || cl.starts_with("docker-compose ps") {
346 return true;
347 }
348 false
349}
350
351fn is_file_listing(command: &str) -> bool {
352 let first = first_binary(command);
353 matches!(
354 first,
355 "find" | "fd" | "fdfind" | "ls" | "exa" | "eza" | "lsd"
356 )
357}
358
359fn is_system_query(command: &str) -> bool {
360 let first = first_binary(command);
361 matches!(
362 first,
363 "stat"
364 | "wc"
365 | "du"
366 | "df"
367 | "free"
368 | "uname"
369 | "id"
370 | "whoami"
371 | "hostname"
372 | "uptime"
373 | "lscpu"
374 | "lsblk"
375 | "ip"
376 | "ifconfig"
377 | "route"
378 | "ss"
379 | "netstat"
380 | "base64"
381 | "sha256sum"
382 | "sha1sum"
383 | "md5sum"
384 | "cksum"
385 | "readlink"
386 | "realpath"
387 | "which"
388 | "type"
389 | "command"
390 )
391}
392
393fn is_cloud_cli_query(command: &str) -> bool {
394 let cl = command.trim().to_ascii_lowercase();
395 let cloud_query_verbs = [
396 "describe",
397 "get",
398 "list",
399 "show",
400 "export",
401 "inspect",
402 "info",
403 "status",
404 "whoami",
405 "caller-identity",
406 "account",
407 ];
408
409 let is_aws = cl.starts_with("aws ") && !cl.starts_with("aws configure");
410 let is_gcloud =
411 cl.starts_with("gcloud ") && !cl.starts_with("gcloud auth") && !cl.contains(" deploy");
412 let is_az = cl.starts_with("az ") && !cl.starts_with("az login");
413
414 if !(is_aws || is_gcloud || is_az) {
415 return false;
416 }
417
418 cloud_query_verbs
419 .iter()
420 .any(|verb| cl.contains(&format!(" {verb}")))
421}
422
423fn is_package_manager_info(command: &str) -> bool {
424 let cl = command.trim().to_ascii_lowercase();
425
426 if cl.starts_with("npm ") {
427 return cl.starts_with("npm list")
428 || cl.starts_with("npm ls")
429 || cl.starts_with("npm info")
430 || cl.starts_with("npm view")
431 || cl.starts_with("npm show")
432 || cl.starts_with("npm outdated")
433 || cl.starts_with("npm audit");
434 }
435 if cl.starts_with("yarn ") {
436 return cl.starts_with("yarn list")
437 || cl.starts_with("yarn info")
438 || cl.starts_with("yarn why")
439 || cl.starts_with("yarn outdated")
440 || cl.starts_with("yarn audit");
441 }
442 if cl.starts_with("pnpm ") {
443 return cl.starts_with("pnpm list")
444 || cl.starts_with("pnpm ls")
445 || cl.starts_with("pnpm why")
446 || cl.starts_with("pnpm outdated")
447 || cl.starts_with("pnpm audit");
448 }
449 if cl.starts_with("pip ") || cl.starts_with("pip3 ") {
450 return cl.contains(" list") || cl.contains(" show") || cl.contains(" freeze");
451 }
452 if cl.starts_with("gem ") {
453 return cl.starts_with("gem list")
454 || cl.starts_with("gem info")
455 || cl.starts_with("gem specification");
456 }
457 if cl.starts_with("cargo ") {
458 return cl.starts_with("cargo metadata")
459 || cl.starts_with("cargo tree")
460 || cl.starts_with("cargo pkgid");
461 }
462 if cl.starts_with("go ") {
463 return cl.starts_with("go list") || cl.starts_with("go version");
464 }
465 if cl.starts_with("composer ") {
466 return cl.starts_with("composer show")
467 || cl.starts_with("composer info")
468 || cl.starts_with("composer outdated");
469 }
470 if cl.starts_with("brew ") {
471 return cl.starts_with("brew list")
472 || cl.starts_with("brew info")
473 || cl.starts_with("brew deps")
474 || cl.starts_with("brew outdated");
475 }
476 if cl.starts_with("apt ") || cl.starts_with("dpkg ") {
477 return cl.starts_with("apt list")
478 || cl.starts_with("apt show")
479 || cl.starts_with("dpkg -l")
480 || cl.starts_with("dpkg --list")
481 || cl.starts_with("dpkg -s");
482 }
483 false
484}
485
486fn is_version_or_help(command: &str) -> bool {
487 let parts: Vec<&str> = command.split_whitespace().collect();
488 if parts.len() < 2 || parts.len() > 3 {
489 return false;
490 }
491 parts.iter().any(|p| {
492 *p == "--version"
493 || *p == "-V"
494 || p.eq_ignore_ascii_case("version")
495 || *p == "--help"
496 || *p == "-h"
497 || p.eq_ignore_ascii_case("help")
498 })
499}
500
501fn is_config_viewer(command: &str) -> bool {
502 let cl = command.trim().to_ascii_lowercase();
503 if cl.starts_with("git config") && !cl.contains("--set") && !cl.contains("--unset") {
504 return true;
505 }
506 if cl.starts_with("npm config list") || cl.starts_with("npm config get") {
507 return true;
508 }
509 if cl.starts_with("yarn config") && !cl.contains(" set") {
510 return true;
511 }
512 if cl.starts_with("pip config list") || cl.starts_with("pip3 config list") {
513 return true;
514 }
515 if cl.starts_with("rustup show") || cl.starts_with("rustup target list") {
516 return true;
517 }
518 if cl.starts_with("docker context ls") || cl.starts_with("docker context list") {
519 return true;
520 }
521 if cl.starts_with("kubectl config")
522 && (cl.contains("view") || cl.contains("get-contexts") || cl.contains("current-context"))
523 {
524 return true;
525 }
526 false
527}
528
529fn is_log_viewer(command: &str) -> bool {
530 let cl = command.trim().to_ascii_lowercase();
531 if cl.starts_with("journalctl") && !cl.contains("-f") && !cl.contains("--follow") {
532 return true;
533 }
534 if cl.starts_with("dmesg") && !cl.contains("-w") && !cl.contains("--follow") {
535 return true;
536 }
537 if cl.starts_with("docker logs") && !cl.contains("-f") && !cl.contains("--follow") {
538 return true;
539 }
540 if cl.starts_with("kubectl logs") && !cl.contains("-f") && !cl.contains("--follow") {
541 return true;
542 }
543 if cl.starts_with("docker compose logs") && !cl.contains("-f") && !cl.contains("--follow") {
544 return true;
545 }
546 false
547}
548
549fn is_archive_listing(command: &str) -> bool {
550 let cl = command.trim().to_ascii_lowercase();
551 if cl.starts_with("tar ") && (cl.contains(" -tf") || cl.contains(" -t") || cl.contains(" tf")) {
552 return true;
553 }
554 if cl.starts_with("unzip -l") || cl.starts_with("unzip -Z") {
555 return true;
556 }
557 let first = first_binary(command);
558 matches!(first, "zipinfo" | "lsar" | "7z" if cl.contains(" l ") || cl.contains(" l\t"))
559 || first == "zipinfo"
560 || first == "lsar"
561}
562
563fn is_clipboard_tool(command: &str) -> bool {
564 let first = first_binary(command);
565 if matches!(first, "pbpaste" | "wl-paste") {
566 return true;
567 }
568 let cl = command.trim().to_ascii_lowercase();
569 if cl.starts_with("xclip") && cl.contains("-o") {
570 return true;
571 }
572 if cl.starts_with("xsel") && (cl.contains("-o") || cl.contains("--output")) {
573 return true;
574 }
575 false
576}
577
578fn is_git_write_command(command: &str) -> bool {
582 let cl = command.trim().to_ascii_lowercase();
583 if !cl.starts_with("git ") {
584 return false;
585 }
586 let git_write_subs = [
587 "commit",
588 "push",
589 "pull",
590 "merge",
591 "rebase",
592 "cherry-pick",
593 "tag",
594 "reset",
595 ];
596 let mut skip_next = false;
597 for arg in cl.split_whitespace().skip(1) {
598 if skip_next {
599 skip_next = false;
600 continue;
601 }
602 if arg == "-c" || arg == "-C" || arg == "--git-dir" || arg == "--work-tree" {
603 skip_next = true;
604 continue;
605 }
606 if arg.starts_with('-') {
607 continue;
608 }
609 return git_write_subs.contains(&arg);
610 }
611 false
612}
613
614pub(super) fn is_git_data_command(command: &str) -> bool {
615 let cl = command.trim().to_ascii_lowercase();
616 if !cl.contains("git") {
617 return false;
618 }
619 let exact_data_subs = [
620 "remote",
621 "rev-parse",
622 "rev-list",
623 "ls-files",
624 "ls-tree",
625 "ls-remote",
626 "shortlog",
627 "for-each-ref",
628 "cat-file",
629 "name-rev",
630 "describe",
631 "merge-base",
632 ];
633
634 let mut tokens = cl.split_whitespace();
635 while let Some(tok) = tokens.next() {
636 let base = tok.rsplit('/').next().unwrap_or(tok);
637 if base != "git" {
638 continue;
639 }
640 let mut skip_next = false;
641 for arg in tokens.by_ref() {
642 if skip_next {
643 skip_next = false;
644 continue;
645 }
646 if arg == "-c" || arg == "-C" || arg == "--git-dir" || arg == "--work-tree" {
647 skip_next = true;
648 continue;
649 }
650 if arg.starts_with('-') {
651 continue;
652 }
653 return exact_data_subs.contains(&arg);
654 }
655 return false;
656 }
657 false
658}
659
660fn is_task_dry_run(command: &str) -> bool {
661 let cl = command.trim().to_ascii_lowercase();
662 if cl.starts_with("make ") && (cl.contains(" -n") || cl.contains(" --dry-run")) {
663 return true;
664 }
665 if cl.starts_with("ansible") && (cl.contains("--check") || cl.contains("--diff")) {
666 return true;
667 }
668 false
669}
670
671fn is_env_dump(command: &str) -> bool {
672 let first = first_binary(command);
673 matches!(first, "env" | "printenv" | "set" | "export" | "locale")
674}
675
676fn first_binary(command: &str) -> &str {
678 let first = command.split_whitespace().next().unwrap_or("");
679 first.rsplit('/').next().unwrap_or(first)
680}
681
682fn is_standalone_diff_command(command: &str) -> bool {
684 let first = command.split_whitespace().next().unwrap_or("");
685 let base = first.rsplit('/').next().unwrap_or(first);
686 base.eq_ignore_ascii_case("diff")
687 || base.eq_ignore_ascii_case("colordiff")
688 || base.eq_ignore_ascii_case("icdiff")
689 || base.eq_ignore_ascii_case("delta")
690}
691
692fn is_structural_git_command(command: &str) -> bool {
694 let mut tokens = command.split_whitespace();
695 while let Some(tok) = tokens.next() {
696 let base = tok.rsplit('/').next().unwrap_or(tok);
697 if !base.eq_ignore_ascii_case("git") {
698 continue;
699 }
700 let mut skip_next = false;
701 let remaining: Vec<&str> = tokens.collect();
702 for arg in &remaining {
703 if skip_next {
704 skip_next = false;
705 continue;
706 }
707 if *arg == "-C" || *arg == "-c" || *arg == "--git-dir" || *arg == "--work-tree" {
708 skip_next = true;
709 continue;
710 }
711 if arg.starts_with('-') {
712 continue;
713 }
714 let sub = arg.to_ascii_lowercase();
715 return match sub.as_str() {
716 "diff" | "show" | "blame" => true,
717 "log" => has_patch_flag(&remaining) || has_stat_flag(&remaining),
718 "stash" => remaining.iter().any(|a| a.eq_ignore_ascii_case("show")),
719 _ => false,
720 };
721 }
722 return false;
723 }
724 false
725}
726
727fn has_patch_flag(args: &[&str]) -> bool {
729 args.iter()
730 .any(|a| *a == "-p" || *a == "--patch" || a.starts_with("-p"))
731}
732
733fn has_stat_flag(args: &[&str]) -> bool {
735 args.iter()
736 .any(|a| *a == "--stat" || a.starts_with("--stat="))
737}