Skip to main content

lean_ctx/shell/compress/
classification.rs

1use super::passthrough::{BUILTIN_PASSTHROUGH, DEV_SCRIPT_KEYWORDS, SCRIPT_RUNNER_PREFIXES};
2
3fn is_dev_script_runner(cmd: &str) -> bool {
4    for prefix in SCRIPT_RUNNER_PREFIXES {
5        if let Some(rest) = cmd.strip_prefix(prefix) {
6            let script_name = rest.split_whitespace().next().unwrap_or("");
7            for kw in DEV_SCRIPT_KEYWORDS {
8                if script_name.contains(kw) {
9                    return true;
10                }
11            }
12        }
13    }
14    false
15}
16
17pub(in crate::shell) fn is_excluded_command(command: &str, excluded: &[String]) -> bool {
18    let cmd = command.trim().to_lowercase();
19    for pattern in BUILTIN_PASSTHROUGH {
20        if pattern.starts_with("--") {
21            if cmd.contains(pattern) {
22                return true;
23            }
24        } else if pattern.ends_with(' ') || pattern.ends_with('\t') {
25            if cmd == pattern.trim() || cmd.starts_with(pattern) {
26                return true;
27            }
28        } else if cmd == *pattern
29            || cmd.starts_with(&format!("{pattern} "))
30            || cmd.starts_with(&format!("{pattern}\t"))
31            || cmd.contains(&format!(" {pattern} "))
32            || cmd.contains(&format!(" {pattern}\t"))
33            || cmd.contains(&format!("|{pattern} "))
34            || cmd.contains(&format!("|{pattern}\t"))
35            || cmd.ends_with(&format!(" {pattern}"))
36            || cmd.ends_with(&format!("|{pattern}"))
37        {
38            return true;
39        }
40    }
41
42    if is_dev_script_runner(&cmd) {
43        return true;
44    }
45
46    if excluded.is_empty() {
47        return false;
48    }
49    excluded.iter().any(|excl| {
50        let excl_lower = excl.trim().to_lowercase();
51        cmd == excl_lower || cmd.starts_with(&format!("{excl_lower} "))
52    })
53}
54
55pub(super) fn is_search_output(command: &str) -> bool {
56    let c = command.trim_start();
57    c.starts_with("grep ")
58        || c.starts_with("rg ")
59        || c.starts_with("find ")
60        || c.starts_with("fd ")
61        || c.starts_with("ag ")
62        || c.starts_with("ack ")
63}
64
65/// Returns true for commands whose output structure is critical for developer
66/// readability. Pattern compression (light cleanup like removing `index` lines
67/// or limiting context) still applies, but the terse pipeline and generic
68/// compressors are skipped so diff hunks, blame annotations, etc. remain
69/// fully readable.
70pub fn has_structural_output(command: &str) -> bool {
71    if is_verbatim_output(command) {
72        return true;
73    }
74    if is_standalone_diff_command(command) {
75        return true;
76    }
77    is_structural_git_command(command)
78}
79
80/// Returns true for commands where the output IS the purpose of the command.
81/// These must never have their content transformed — only size-limited if huge.
82/// Checks both the full command AND the last pipe segment for comprehensive coverage.
83pub fn is_verbatim_output(command: &str) -> bool {
84    is_verbatim_single(command) || is_verbatim_pipe_tail(command)
85}
86
87fn is_verbatim_single(command: &str) -> bool {
88    is_http_client(command)
89        || is_file_viewer(command)
90        || is_data_format_tool(command)
91        || is_binary_viewer(command)
92        || is_infra_inspection(command)
93        || is_crypto_command(command)
94        || is_database_query(command)
95        || is_dns_network_inspection(command)
96        || is_language_one_liner(command)
97        || is_container_listing(command)
98        || is_file_listing(command)
99        || is_system_query(command)
100        || is_cloud_cli_query(command)
101        || is_cli_api_data_command(command)
102        || is_package_manager_info(command)
103        || is_version_or_help(command)
104        || is_config_viewer(command)
105        || is_log_viewer(command)
106        || is_archive_listing(command)
107        || is_clipboard_tool(command)
108        || is_git_data_command(command)
109        || is_task_dry_run(command)
110        || is_env_dump(command)
111}
112
113/// CLI tools that fetch or output raw API/structured data.
114/// These MUST never be compressed -- compression destroys the payload.
115fn is_cli_api_data_command(command: &str) -> bool {
116    let cl = command.trim().to_ascii_lowercase();
117
118    // gh (GitHub CLI) -- api, run view --log, search, release view, gist view
119    if cl.starts_with("gh ")
120        && (cl.starts_with("gh api ")
121            || cl.starts_with("gh api\t")
122            || cl.contains(" --json")
123            || cl.contains(" --jq ")
124            || cl.contains(" --template ")
125            || (cl.contains("run view") && (cl.contains("--log") || cl.contains("log-failed")))
126            || cl.starts_with("gh search ")
127            || cl.starts_with("gh release view")
128            || cl.starts_with("gh gist view")
129            || cl.starts_with("gh gist list"))
130    {
131        return true;
132    }
133
134    // GitLab CLI (glab)
135    if cl.starts_with("glab ") && cl.starts_with("glab api ") {
136        return true;
137    }
138
139    // Jira CLI
140    if cl.starts_with("jira ") && (cl.contains(" view") || cl.contains(" list")) {
141        return true;
142    }
143
144    // Linear CLI
145    if cl.starts_with("linear ") {
146        return true;
147    }
148
149    // Stripe, Twilio, Vercel, Netlify, Fly, Railway, Supabase CLIs
150    let first = first_binary(command);
151    if matches!(
152        first,
153        "stripe" | "twilio" | "vercel" | "netlify" | "flyctl" | "fly" | "railway" | "supabase"
154    ) && (cl.contains(" list")
155        || cl.contains(" get")
156        || cl.contains(" show")
157        || cl.contains(" status")
158        || cl.contains(" info")
159        || cl.contains(" logs")
160        || cl.contains(" inspect")
161        || cl.contains(" export")
162        || cl.contains(" describe"))
163    {
164        return true;
165    }
166
167    // Cloudflare (wrangler)
168    if cl.starts_with("wrangler ")
169        && !cl.starts_with("wrangler dev")
170        && (cl.contains(" tail") || cl.contains(" secret list") || cl.contains(" kv "))
171    {
172        return true;
173    }
174
175    // Heroku
176    if cl.starts_with("heroku ")
177        && (cl.contains(" config")
178            || cl.contains(" logs")
179            || cl.contains(" ps")
180            || cl.contains(" info"))
181    {
182        return true;
183    }
184
185    false
186}
187
188/// For piped commands like `kubectl get pods -o json | jq '.items[]'`,
189/// check if the LAST command in the pipe is a verbatim tool.
190fn is_verbatim_pipe_tail(command: &str) -> bool {
191    if !command.contains('|') {
192        return false;
193    }
194    let last_segment = command.rsplit('|').next().unwrap_or("").trim();
195    if last_segment.is_empty() {
196        return false;
197    }
198    is_verbatim_single(last_segment)
199}
200
201fn is_http_client(command: &str) -> bool {
202    let first = first_binary(command);
203    matches!(
204        first,
205        "curl" | "wget" | "http" | "https" | "xh" | "curlie" | "grpcurl" | "grpc_cli"
206    )
207}
208
209fn is_file_viewer(command: &str) -> bool {
210    let first = first_binary(command);
211    match first {
212        "cat" | "bat" | "batcat" | "pygmentize" | "highlight" => true,
213        "head" | "tail" => !command.contains("-f") && !command.contains("--follow"),
214        _ => false,
215    }
216}
217
218fn is_data_format_tool(command: &str) -> bool {
219    let first = first_binary(command);
220    matches!(
221        first,
222        "jq" | "yq"
223            | "xq"
224            | "fx"
225            | "gron"
226            | "mlr"
227            | "miller"
228            | "dasel"
229            | "csvlook"
230            | "csvcut"
231            | "csvgrep"
232            | "csvjson"
233            | "in2csv"
234            | "sql2csv"
235    )
236}
237
238fn is_binary_viewer(command: &str) -> bool {
239    let first = first_binary(command);
240    matches!(first, "xxd" | "hexdump" | "od" | "strings" | "file")
241}
242
243fn is_infra_inspection(command: &str) -> bool {
244    let cl = command.trim().to_ascii_lowercase();
245    if cl.starts_with("terraform output")
246        || cl.starts_with("terraform show")
247        || cl.starts_with("terraform state show")
248        || cl.starts_with("terraform state list")
249        || cl.starts_with("terraform state pull")
250        || cl.starts_with("tofu output")
251        || cl.starts_with("tofu show")
252        || cl.starts_with("tofu state show")
253        || cl.starts_with("tofu state list")
254        || cl.starts_with("tofu state pull")
255        || cl.starts_with("pulumi stack output")
256        || cl.starts_with("pulumi stack export")
257    {
258        return true;
259    }
260    if cl.starts_with("docker inspect") || cl.starts_with("podman inspect") {
261        return true;
262    }
263    if (cl.starts_with("kubectl get") || cl.starts_with("k get"))
264        && (cl.contains("-o yaml")
265            || cl.contains("-o json")
266            || cl.contains("-oyaml")
267            || cl.contains("-ojson")
268            || cl.contains("--output yaml")
269            || cl.contains("--output json")
270            || cl.contains("--output=yaml")
271            || cl.contains("--output=json"))
272    {
273        return true;
274    }
275    if cl.starts_with("kubectl describe") || cl.starts_with("k describe") {
276        return true;
277    }
278    if cl.starts_with("helm get") || cl.starts_with("helm template") {
279        return true;
280    }
281    false
282}
283
284fn is_crypto_command(command: &str) -> bool {
285    let first = first_binary(command);
286    if first == "openssl" {
287        return true;
288    }
289    matches!(first, "gpg" | "age" | "ssh-keygen" | "certutil")
290}
291
292fn is_database_query(command: &str) -> bool {
293    let cl = command.to_ascii_lowercase();
294    if cl.starts_with("psql ") && (cl.contains(" -c ") || cl.contains("--command")) {
295        return true;
296    }
297    if cl.starts_with("mysql ") && (cl.contains(" -e ") || cl.contains("--execute")) {
298        return true;
299    }
300    if cl.starts_with("mariadb ") && (cl.contains(" -e ") || cl.contains("--execute")) {
301        return true;
302    }
303    if cl.starts_with("sqlite3 ") && cl.contains('"') {
304        return true;
305    }
306    if cl.starts_with("mongosh ") && cl.contains("--eval") {
307        return true;
308    }
309    false
310}
311
312fn is_dns_network_inspection(command: &str) -> bool {
313    let first = first_binary(command);
314    matches!(
315        first,
316        "dig" | "nslookup" | "host" | "whois" | "drill" | "resolvectl"
317    )
318}
319
320fn is_language_one_liner(command: &str) -> bool {
321    let cl = command.to_ascii_lowercase();
322    (cl.starts_with("python ") || cl.starts_with("python3 "))
323        && (cl.contains(" -c ") || cl.contains(" -c\"") || cl.contains(" -c'"))
324        || (cl.starts_with("node ") && (cl.contains(" -e ") || cl.contains(" --eval")))
325        || (cl.starts_with("ruby ") && cl.contains(" -e "))
326        || (cl.starts_with("perl ") && cl.contains(" -e "))
327        || (cl.starts_with("php ") && cl.contains(" -r "))
328}
329
330fn is_container_listing(command: &str) -> bool {
331    let cl = command.trim().to_ascii_lowercase();
332    if cl.starts_with("docker ps") || cl.starts_with("docker images") {
333        return true;
334    }
335    if cl.starts_with("podman ps") || cl.starts_with("podman images") {
336        return true;
337    }
338    if cl.starts_with("kubectl get") || cl.starts_with("k get") {
339        return true;
340    }
341    if cl.starts_with("helm list") || cl.starts_with("helm ls") {
342        return true;
343    }
344    if cl.starts_with("docker compose ps") || cl.starts_with("docker-compose ps") {
345        return true;
346    }
347    false
348}
349
350fn is_file_listing(command: &str) -> bool {
351    let first = first_binary(command);
352    matches!(
353        first,
354        "find" | "fd" | "fdfind" | "ls" | "exa" | "eza" | "lsd"
355    )
356}
357
358fn is_system_query(command: &str) -> bool {
359    let first = first_binary(command);
360    matches!(
361        first,
362        "stat"
363            | "wc"
364            | "du"
365            | "df"
366            | "free"
367            | "uname"
368            | "id"
369            | "whoami"
370            | "hostname"
371            | "uptime"
372            | "lscpu"
373            | "lsblk"
374            | "ip"
375            | "ifconfig"
376            | "route"
377            | "ss"
378            | "netstat"
379            | "base64"
380            | "sha256sum"
381            | "sha1sum"
382            | "md5sum"
383            | "cksum"
384            | "readlink"
385            | "realpath"
386            | "which"
387            | "type"
388            | "command"
389    )
390}
391
392fn is_cloud_cli_query(command: &str) -> bool {
393    let cl = command.trim().to_ascii_lowercase();
394    let cloud_query_verbs = [
395        "describe",
396        "get",
397        "list",
398        "show",
399        "export",
400        "inspect",
401        "info",
402        "status",
403        "whoami",
404        "caller-identity",
405        "account",
406    ];
407
408    let is_aws = cl.starts_with("aws ") && !cl.starts_with("aws configure");
409    let is_gcloud =
410        cl.starts_with("gcloud ") && !cl.starts_with("gcloud auth") && !cl.contains(" deploy");
411    let is_az = cl.starts_with("az ") && !cl.starts_with("az login");
412
413    if !(is_aws || is_gcloud || is_az) {
414        return false;
415    }
416
417    cloud_query_verbs
418        .iter()
419        .any(|verb| cl.contains(&format!(" {verb}")))
420}
421
422fn is_package_manager_info(command: &str) -> bool {
423    let cl = command.trim().to_ascii_lowercase();
424
425    if cl.starts_with("npm ") {
426        return cl.starts_with("npm list")
427            || cl.starts_with("npm ls")
428            || cl.starts_with("npm info")
429            || cl.starts_with("npm view")
430            || cl.starts_with("npm show")
431            || cl.starts_with("npm outdated")
432            || cl.starts_with("npm audit");
433    }
434    if cl.starts_with("yarn ") {
435        return cl.starts_with("yarn list")
436            || cl.starts_with("yarn info")
437            || cl.starts_with("yarn why")
438            || cl.starts_with("yarn outdated")
439            || cl.starts_with("yarn audit");
440    }
441    if cl.starts_with("pnpm ") {
442        return cl.starts_with("pnpm list")
443            || cl.starts_with("pnpm ls")
444            || cl.starts_with("pnpm why")
445            || cl.starts_with("pnpm outdated")
446            || cl.starts_with("pnpm audit");
447    }
448    if cl.starts_with("pip ") || cl.starts_with("pip3 ") {
449        return cl.contains(" list") || cl.contains(" show") || cl.contains(" freeze");
450    }
451    if cl.starts_with("gem ") {
452        return cl.starts_with("gem list")
453            || cl.starts_with("gem info")
454            || cl.starts_with("gem specification");
455    }
456    if cl.starts_with("cargo ") {
457        return cl.starts_with("cargo metadata")
458            || cl.starts_with("cargo tree")
459            || cl.starts_with("cargo pkgid");
460    }
461    if cl.starts_with("go ") {
462        return cl.starts_with("go list") || cl.starts_with("go version");
463    }
464    if cl.starts_with("composer ") {
465        return cl.starts_with("composer show")
466            || cl.starts_with("composer info")
467            || cl.starts_with("composer outdated");
468    }
469    if cl.starts_with("brew ") {
470        return cl.starts_with("brew list")
471            || cl.starts_with("brew info")
472            || cl.starts_with("brew deps")
473            || cl.starts_with("brew outdated");
474    }
475    if cl.starts_with("apt ") || cl.starts_with("dpkg ") {
476        return cl.starts_with("apt list")
477            || cl.starts_with("apt show")
478            || cl.starts_with("dpkg -l")
479            || cl.starts_with("dpkg --list")
480            || cl.starts_with("dpkg -s");
481    }
482    false
483}
484
485fn is_version_or_help(command: &str) -> bool {
486    let parts: Vec<&str> = command.split_whitespace().collect();
487    if parts.len() < 2 || parts.len() > 3 {
488        return false;
489    }
490    parts.iter().any(|p| {
491        *p == "--version"
492            || *p == "-V"
493            || p.eq_ignore_ascii_case("version")
494            || *p == "--help"
495            || *p == "-h"
496            || p.eq_ignore_ascii_case("help")
497    })
498}
499
500fn is_config_viewer(command: &str) -> bool {
501    let cl = command.trim().to_ascii_lowercase();
502    if cl.starts_with("git config") && !cl.contains("--set") && !cl.contains("--unset") {
503        return true;
504    }
505    if cl.starts_with("npm config list") || cl.starts_with("npm config get") {
506        return true;
507    }
508    if cl.starts_with("yarn config") && !cl.contains(" set") {
509        return true;
510    }
511    if cl.starts_with("pip config list") || cl.starts_with("pip3 config list") {
512        return true;
513    }
514    if cl.starts_with("rustup show") || cl.starts_with("rustup target list") {
515        return true;
516    }
517    if cl.starts_with("docker context ls") || cl.starts_with("docker context list") {
518        return true;
519    }
520    if cl.starts_with("kubectl config")
521        && (cl.contains("view") || cl.contains("get-contexts") || cl.contains("current-context"))
522    {
523        return true;
524    }
525    false
526}
527
528fn is_log_viewer(command: &str) -> bool {
529    let cl = command.trim().to_ascii_lowercase();
530    if cl.starts_with("journalctl") && !cl.contains("-f") && !cl.contains("--follow") {
531        return true;
532    }
533    if cl.starts_with("dmesg") && !cl.contains("-w") && !cl.contains("--follow") {
534        return true;
535    }
536    if cl.starts_with("docker logs") && !cl.contains("-f") && !cl.contains("--follow") {
537        return true;
538    }
539    if cl.starts_with("kubectl logs") && !cl.contains("-f") && !cl.contains("--follow") {
540        return true;
541    }
542    if cl.starts_with("docker compose logs") && !cl.contains("-f") && !cl.contains("--follow") {
543        return true;
544    }
545    false
546}
547
548fn is_archive_listing(command: &str) -> bool {
549    let cl = command.trim().to_ascii_lowercase();
550    if cl.starts_with("tar ") && (cl.contains(" -tf") || cl.contains(" -t") || cl.contains(" tf")) {
551        return true;
552    }
553    if cl.starts_with("unzip -l") || cl.starts_with("unzip -Z") {
554        return true;
555    }
556    let first = first_binary(command);
557    matches!(first, "zipinfo" | "lsar" | "7z" if cl.contains(" l ") || cl.contains(" l\t"))
558        || first == "zipinfo"
559        || first == "lsar"
560}
561
562fn is_clipboard_tool(command: &str) -> bool {
563    let first = first_binary(command);
564    if matches!(first, "pbpaste" | "wl-paste") {
565        return true;
566    }
567    let cl = command.trim().to_ascii_lowercase();
568    if cl.starts_with("xclip") && cl.contains("-o") {
569        return true;
570    }
571    if cl.starts_with("xsel") && (cl.contains("-o") || cl.contains("--output")) {
572        return true;
573    }
574    false
575}
576
577pub(super) fn is_git_data_command(command: &str) -> bool {
578    let cl = command.trim().to_ascii_lowercase();
579    if !cl.contains("git") {
580        return false;
581    }
582    let exact_data_subs = [
583        "remote",
584        "rev-parse",
585        "rev-list",
586        "ls-files",
587        "ls-tree",
588        "ls-remote",
589        "shortlog",
590        "for-each-ref",
591        "cat-file",
592        "name-rev",
593        "describe",
594        "merge-base",
595    ];
596
597    let mut tokens = cl.split_whitespace();
598    while let Some(tok) = tokens.next() {
599        let base = tok.rsplit('/').next().unwrap_or(tok);
600        if base != "git" {
601            continue;
602        }
603        let mut skip_next = false;
604        for arg in tokens.by_ref() {
605            if skip_next {
606                skip_next = false;
607                continue;
608            }
609            if arg == "-c" || arg == "-C" || arg == "--git-dir" || arg == "--work-tree" {
610                skip_next = true;
611                continue;
612            }
613            if arg.starts_with('-') {
614                continue;
615            }
616            return exact_data_subs.contains(&arg);
617        }
618        return false;
619    }
620    false
621}
622
623fn is_task_dry_run(command: &str) -> bool {
624    let cl = command.trim().to_ascii_lowercase();
625    if cl.starts_with("make ") && (cl.contains(" -n") || cl.contains(" --dry-run")) {
626        return true;
627    }
628    if cl.starts_with("ansible") && (cl.contains("--check") || cl.contains("--diff")) {
629        return true;
630    }
631    false
632}
633
634fn is_env_dump(command: &str) -> bool {
635    let first = first_binary(command);
636    matches!(first, "env" | "printenv" | "set" | "export" | "locale")
637}
638
639/// Extracts the binary name (basename, no path) from the first token of a command.
640fn first_binary(command: &str) -> &str {
641    let first = command.split_whitespace().next().unwrap_or("");
642    first.rsplit('/').next().unwrap_or(first)
643}
644
645/// Non-git diff tools: `diff`, `colordiff`, `icdiff`, `delta`.
646fn is_standalone_diff_command(command: &str) -> bool {
647    let first = command.split_whitespace().next().unwrap_or("");
648    let base = first.rsplit('/').next().unwrap_or(first);
649    base.eq_ignore_ascii_case("diff")
650        || base.eq_ignore_ascii_case("colordiff")
651        || base.eq_ignore_ascii_case("icdiff")
652        || base.eq_ignore_ascii_case("delta")
653}
654
655/// Git subcommands that produce structural output the developer must read verbatim.
656fn is_structural_git_command(command: &str) -> bool {
657    let mut tokens = command.split_whitespace();
658    while let Some(tok) = tokens.next() {
659        let base = tok.rsplit('/').next().unwrap_or(tok);
660        if !base.eq_ignore_ascii_case("git") {
661            continue;
662        }
663        let mut skip_next = false;
664        let remaining: Vec<&str> = tokens.collect();
665        for arg in &remaining {
666            if skip_next {
667                skip_next = false;
668                continue;
669            }
670            if *arg == "-C" || *arg == "-c" || *arg == "--git-dir" || *arg == "--work-tree" {
671                skip_next = true;
672                continue;
673            }
674            if arg.starts_with('-') {
675                continue;
676            }
677            let sub = arg.to_ascii_lowercase();
678            return match sub.as_str() {
679                "diff" | "show" | "blame" => true,
680                "log" => has_patch_flag(&remaining) || has_stat_flag(&remaining),
681                "stash" => remaining.iter().any(|a| a.eq_ignore_ascii_case("show")),
682                _ => false,
683            };
684        }
685        return false;
686    }
687    false
688}
689
690/// Returns true if the argument list contains `-p` or `--patch`.
691fn has_patch_flag(args: &[&str]) -> bool {
692    args.iter()
693        .any(|a| *a == "-p" || *a == "--patch" || a.starts_with("-p"))
694}
695
696/// Returns true if the argument list contains `--stat`.
697fn has_stat_flag(args: &[&str]) -> bool {
698    args.iter()
699        .any(|a| *a == "--stat" || a.starts_with("--stat="))
700}