Skip to main content

lean_ctx/shell/compress/
classification.rs

1use super::passthrough::{BUILTIN_PASSTHROUGH, DEV_SCRIPT_KEYWORDS, SCRIPT_RUNNER_PREFIXES};
2
3fn is_dev_script_runner(cmd: &str) -> bool {
4    for prefix in SCRIPT_RUNNER_PREFIXES {
5        if let Some(rest) = cmd.strip_prefix(prefix) {
6            let script_name = rest.split_whitespace().next().unwrap_or("");
7            for kw in DEV_SCRIPT_KEYWORDS {
8                if script_name.contains(kw) {
9                    return true;
10                }
11            }
12        }
13    }
14    false
15}
16
17pub(in crate::shell) fn is_excluded_command(command: &str, excluded: &[String]) -> bool {
18    let cmd = command.trim().to_lowercase();
19    for pattern in BUILTIN_PASSTHROUGH {
20        if pattern.starts_with("--") {
21            if cmd.contains(pattern) {
22                return true;
23            }
24        } else if pattern.ends_with(' ') || pattern.ends_with('\t') {
25            if cmd == pattern.trim() || cmd.starts_with(pattern) {
26                return true;
27            }
28        } else if cmd == *pattern
29            || cmd.starts_with(&format!("{pattern} "))
30            || cmd.starts_with(&format!("{pattern}\t"))
31            || cmd.contains(&format!(" {pattern} "))
32            || cmd.contains(&format!(" {pattern}\t"))
33            || cmd.contains(&format!("|{pattern} "))
34            || cmd.contains(&format!("|{pattern}\t"))
35            || cmd.ends_with(&format!(" {pattern}"))
36            || cmd.ends_with(&format!("|{pattern}"))
37        {
38            return true;
39        }
40    }
41
42    if is_dev_script_runner(&cmd) {
43        return true;
44    }
45
46    if excluded.is_empty() {
47        return false;
48    }
49    excluded.iter().any(|excl| {
50        let excl_lower = excl.trim().to_lowercase();
51        cmd == excl_lower || cmd.starts_with(&format!("{excl_lower} "))
52    })
53}
54
55pub(super) fn is_search_output(command: &str) -> bool {
56    let c = command.trim_start();
57    c.starts_with("grep ")
58        || c.starts_with("rg ")
59        || c.starts_with("find ")
60        || c.starts_with("fd ")
61        || c.starts_with("ag ")
62        || c.starts_with("ack ")
63}
64
65/// Returns true for commands whose output structure is critical for developer
66/// readability. Pattern compression (light cleanup like removing `index` lines
67/// or limiting context) still applies, but the terse pipeline and generic
68/// compressors are skipped so diff hunks, blame annotations, etc. remain
69/// fully readable.
70pub fn has_structural_output(command: &str) -> bool {
71    if is_verbatim_output(command) {
72        return true;
73    }
74    if is_standalone_diff_command(command) {
75        return true;
76    }
77    is_structural_git_command(command)
78}
79
80/// Returns true for commands where the output IS the purpose of the command.
81/// These must never have their content transformed — only size-limited if huge.
82/// Checks both the full command AND the last pipe segment for comprehensive coverage.
83pub fn is_verbatim_output(command: &str) -> bool {
84    is_verbatim_single(command) || is_verbatim_pipe_tail(command)
85}
86
87fn is_verbatim_single(command: &str) -> bool {
88    is_http_client(command)
89        || is_file_viewer(command)
90        || is_data_format_tool(command)
91        || is_binary_viewer(command)
92        || is_infra_inspection(command)
93        || is_crypto_command(command)
94        || is_database_query(command)
95        || is_dns_network_inspection(command)
96        || is_language_one_liner(command)
97        || is_container_listing(command)
98        || is_file_listing(command)
99        || is_system_query(command)
100        || is_cloud_cli_query(command)
101        || is_cli_api_data_command(command)
102        || is_package_manager_info(command)
103        || is_version_or_help(command)
104        || is_config_viewer(command)
105        || is_log_viewer(command)
106        || is_archive_listing(command)
107        || is_clipboard_tool(command)
108        || is_git_data_command(command)
109        || is_git_write_command(command)
110        || is_task_dry_run(command)
111        || is_env_dump(command)
112}
113
114/// CLI tools that fetch or output raw API/structured data.
115/// These MUST never be compressed -- compression destroys the payload.
116fn is_cli_api_data_command(command: &str) -> bool {
117    let cl = command.trim().to_ascii_lowercase();
118
119    // gh (GitHub CLI) -- api, run view --log, search, release view, gist view
120    if cl.starts_with("gh ")
121        && (cl.starts_with("gh api ")
122            || cl.starts_with("gh api\t")
123            || cl.contains(" --json")
124            || cl.contains(" --jq ")
125            || cl.contains(" --template ")
126            || (cl.contains("run view") && (cl.contains("--log") || cl.contains("log-failed")))
127            || cl.starts_with("gh search ")
128            || cl.starts_with("gh release view")
129            || cl.starts_with("gh gist view")
130            || cl.starts_with("gh gist list"))
131    {
132        return true;
133    }
134
135    // GitLab CLI (glab)
136    if cl.starts_with("glab ") && cl.starts_with("glab api ") {
137        return true;
138    }
139
140    // Jira CLI
141    if cl.starts_with("jira ") && (cl.contains(" view") || cl.contains(" list")) {
142        return true;
143    }
144
145    // Linear CLI
146    if cl.starts_with("linear ") {
147        return true;
148    }
149
150    // Stripe, Twilio, Vercel, Netlify, Fly, Railway, Supabase CLIs
151    let first = first_binary(command);
152    if matches!(
153        first,
154        "stripe" | "twilio" | "vercel" | "netlify" | "flyctl" | "fly" | "railway" | "supabase"
155    ) && (cl.contains(" list")
156        || cl.contains(" get")
157        || cl.contains(" show")
158        || cl.contains(" status")
159        || cl.contains(" info")
160        || cl.contains(" logs")
161        || cl.contains(" inspect")
162        || cl.contains(" export")
163        || cl.contains(" describe"))
164    {
165        return true;
166    }
167
168    // Cloudflare (wrangler)
169    if cl.starts_with("wrangler ")
170        && !cl.starts_with("wrangler dev")
171        && (cl.contains(" tail") || cl.contains(" secret list") || cl.contains(" kv "))
172    {
173        return true;
174    }
175
176    // Heroku
177    if cl.starts_with("heroku ")
178        && (cl.contains(" config")
179            || cl.contains(" logs")
180            || cl.contains(" ps")
181            || cl.contains(" info"))
182    {
183        return true;
184    }
185
186    false
187}
188
189/// For piped commands like `kubectl get pods -o json | jq '.items[]'`,
190/// check if the LAST command in the pipe is a verbatim tool.
191fn is_verbatim_pipe_tail(command: &str) -> bool {
192    if !command.contains('|') {
193        return false;
194    }
195    let last_segment = command.rsplit('|').next().unwrap_or("").trim();
196    if last_segment.is_empty() {
197        return false;
198    }
199    is_verbatim_single(last_segment)
200}
201
202fn is_http_client(command: &str) -> bool {
203    let first = first_binary(command);
204    matches!(
205        first,
206        "curl" | "wget" | "http" | "https" | "xh" | "curlie" | "grpcurl" | "grpc_cli"
207    )
208}
209
210fn is_file_viewer(command: &str) -> bool {
211    let first = first_binary(command);
212    match first {
213        "cat" | "bat" | "batcat" | "pygmentize" | "highlight" => true,
214        "head" | "tail" => !command.contains("-f") && !command.contains("--follow"),
215        _ => false,
216    }
217}
218
219fn is_data_format_tool(command: &str) -> bool {
220    let first = first_binary(command);
221    matches!(
222        first,
223        "jq" | "yq"
224            | "xq"
225            | "fx"
226            | "gron"
227            | "mlr"
228            | "miller"
229            | "dasel"
230            | "csvlook"
231            | "csvcut"
232            | "csvgrep"
233            | "csvjson"
234            | "in2csv"
235            | "sql2csv"
236    )
237}
238
239fn is_binary_viewer(command: &str) -> bool {
240    let first = first_binary(command);
241    matches!(first, "xxd" | "hexdump" | "od" | "strings" | "file")
242}
243
244fn is_infra_inspection(command: &str) -> bool {
245    let cl = command.trim().to_ascii_lowercase();
246    if cl.starts_with("terraform output")
247        || cl.starts_with("terraform show")
248        || cl.starts_with("terraform state show")
249        || cl.starts_with("terraform state list")
250        || cl.starts_with("terraform state pull")
251        || cl.starts_with("tofu output")
252        || cl.starts_with("tofu show")
253        || cl.starts_with("tofu state show")
254        || cl.starts_with("tofu state list")
255        || cl.starts_with("tofu state pull")
256        || cl.starts_with("pulumi stack output")
257        || cl.starts_with("pulumi stack export")
258    {
259        return true;
260    }
261    if cl.starts_with("docker inspect") || cl.starts_with("podman inspect") {
262        return true;
263    }
264    if (cl.starts_with("kubectl get") || cl.starts_with("k get"))
265        && (cl.contains("-o yaml")
266            || cl.contains("-o json")
267            || cl.contains("-oyaml")
268            || cl.contains("-ojson")
269            || cl.contains("--output yaml")
270            || cl.contains("--output json")
271            || cl.contains("--output=yaml")
272            || cl.contains("--output=json"))
273    {
274        return true;
275    }
276    if cl.starts_with("kubectl describe") || cl.starts_with("k describe") {
277        return true;
278    }
279    if cl.starts_with("helm get") || cl.starts_with("helm template") {
280        return true;
281    }
282    false
283}
284
285fn is_crypto_command(command: &str) -> bool {
286    let first = first_binary(command);
287    if first == "openssl" {
288        return true;
289    }
290    matches!(first, "gpg" | "age" | "ssh-keygen" | "certutil")
291}
292
293fn is_database_query(command: &str) -> bool {
294    let cl = command.to_ascii_lowercase();
295    if cl.starts_with("psql ") && (cl.contains(" -c ") || cl.contains("--command")) {
296        return true;
297    }
298    if cl.starts_with("mysql ") && (cl.contains(" -e ") || cl.contains("--execute")) {
299        return true;
300    }
301    if cl.starts_with("mariadb ") && (cl.contains(" -e ") || cl.contains("--execute")) {
302        return true;
303    }
304    if cl.starts_with("sqlite3 ") && cl.contains('"') {
305        return true;
306    }
307    if cl.starts_with("mongosh ") && cl.contains("--eval") {
308        return true;
309    }
310    false
311}
312
313fn is_dns_network_inspection(command: &str) -> bool {
314    let first = first_binary(command);
315    matches!(
316        first,
317        "dig" | "nslookup" | "host" | "whois" | "drill" | "resolvectl"
318    )
319}
320
321fn is_language_one_liner(command: &str) -> bool {
322    let cl = command.to_ascii_lowercase();
323    (cl.starts_with("python ") || cl.starts_with("python3 "))
324        && (cl.contains(" -c ") || cl.contains(" -c\"") || cl.contains(" -c'"))
325        || (cl.starts_with("node ") && (cl.contains(" -e ") || cl.contains(" --eval")))
326        || (cl.starts_with("ruby ") && cl.contains(" -e "))
327        || (cl.starts_with("perl ") && cl.contains(" -e "))
328        || (cl.starts_with("php ") && cl.contains(" -r "))
329}
330
331fn is_container_listing(command: &str) -> bool {
332    let cl = command.trim().to_ascii_lowercase();
333    if cl.starts_with("docker ps") || cl.starts_with("docker images") {
334        return true;
335    }
336    if cl.starts_with("podman ps") || cl.starts_with("podman images") {
337        return true;
338    }
339    // kubectl get is handled by the kubectl pattern compressor (not verbatim)
340    if cl.starts_with("helm list") || cl.starts_with("helm ls") {
341        return true;
342    }
343    if cl.starts_with("docker compose ps") || cl.starts_with("docker-compose ps") {
344        return true;
345    }
346    false
347}
348
349fn is_file_listing(command: &str) -> bool {
350    let first = first_binary(command);
351    matches!(
352        first,
353        "find" | "fd" | "fdfind" | "ls" | "exa" | "eza" | "lsd"
354    )
355}
356
357fn is_system_query(command: &str) -> bool {
358    let first = first_binary(command);
359    matches!(
360        first,
361        "stat"
362            | "wc"
363            | "du"
364            | "df"
365            | "free"
366            | "uname"
367            | "id"
368            | "whoami"
369            | "hostname"
370            | "uptime"
371            | "lscpu"
372            | "lsblk"
373            | "ip"
374            | "ifconfig"
375            | "route"
376            | "ss"
377            | "netstat"
378            | "base64"
379            | "sha256sum"
380            | "sha1sum"
381            | "md5sum"
382            | "cksum"
383            | "readlink"
384            | "realpath"
385            | "which"
386            | "type"
387            | "command"
388    )
389}
390
391fn is_cloud_cli_query(command: &str) -> bool {
392    let cl = command.trim().to_ascii_lowercase();
393    let cloud_query_verbs = [
394        "describe",
395        "get",
396        "list",
397        "show",
398        "export",
399        "inspect",
400        "info",
401        "status",
402        "whoami",
403        "caller-identity",
404        "account",
405    ];
406
407    let is_aws = cl.starts_with("aws ") && !cl.starts_with("aws configure");
408    let is_gcloud =
409        cl.starts_with("gcloud ") && !cl.starts_with("gcloud auth") && !cl.contains(" deploy");
410    let is_az = cl.starts_with("az ") && !cl.starts_with("az login");
411
412    if !(is_aws || is_gcloud || is_az) {
413        return false;
414    }
415
416    cloud_query_verbs
417        .iter()
418        .any(|verb| cl.contains(&format!(" {verb}")))
419}
420
421fn is_package_manager_info(command: &str) -> bool {
422    let cl = command.trim().to_ascii_lowercase();
423
424    if cl.starts_with("npm ") {
425        return cl.starts_with("npm list")
426            || cl.starts_with("npm ls")
427            || cl.starts_with("npm info")
428            || cl.starts_with("npm view")
429            || cl.starts_with("npm show")
430            || cl.starts_with("npm outdated")
431            || cl.starts_with("npm audit");
432    }
433    if cl.starts_with("yarn ") {
434        return cl.starts_with("yarn list")
435            || cl.starts_with("yarn info")
436            || cl.starts_with("yarn why")
437            || cl.starts_with("yarn outdated")
438            || cl.starts_with("yarn audit");
439    }
440    if cl.starts_with("pnpm ") {
441        return cl.starts_with("pnpm list")
442            || cl.starts_with("pnpm ls")
443            || cl.starts_with("pnpm why")
444            || cl.starts_with("pnpm outdated")
445            || cl.starts_with("pnpm audit");
446    }
447    if cl.starts_with("pip ") || cl.starts_with("pip3 ") {
448        return cl.contains(" list") || cl.contains(" show") || cl.contains(" freeze");
449    }
450    if cl.starts_with("gem ") {
451        return cl.starts_with("gem list")
452            || cl.starts_with("gem info")
453            || cl.starts_with("gem specification");
454    }
455    if cl.starts_with("cargo ") {
456        return cl.starts_with("cargo metadata")
457            || cl.starts_with("cargo tree")
458            || cl.starts_with("cargo pkgid");
459    }
460    if cl.starts_with("go ") {
461        return cl.starts_with("go list") || cl.starts_with("go version");
462    }
463    if cl.starts_with("composer ") {
464        return cl.starts_with("composer show")
465            || cl.starts_with("composer info")
466            || cl.starts_with("composer outdated");
467    }
468    if cl.starts_with("brew ") {
469        return cl.starts_with("brew list")
470            || cl.starts_with("brew info")
471            || cl.starts_with("brew deps")
472            || cl.starts_with("brew outdated");
473    }
474    if cl.starts_with("apt ") || cl.starts_with("dpkg ") {
475        return cl.starts_with("apt list")
476            || cl.starts_with("apt show")
477            || cl.starts_with("dpkg -l")
478            || cl.starts_with("dpkg --list")
479            || cl.starts_with("dpkg -s");
480    }
481    false
482}
483
484fn is_version_or_help(command: &str) -> bool {
485    let parts: Vec<&str> = command.split_whitespace().collect();
486    if parts.len() < 2 || parts.len() > 3 {
487        return false;
488    }
489    parts.iter().any(|p| {
490        *p == "--version"
491            || *p == "-V"
492            || p.eq_ignore_ascii_case("version")
493            || *p == "--help"
494            || *p == "-h"
495            || p.eq_ignore_ascii_case("help")
496    })
497}
498
499fn is_config_viewer(command: &str) -> bool {
500    let cl = command.trim().to_ascii_lowercase();
501    if cl.starts_with("git config") && !cl.contains("--set") && !cl.contains("--unset") {
502        return true;
503    }
504    if cl.starts_with("npm config list") || cl.starts_with("npm config get") {
505        return true;
506    }
507    if cl.starts_with("yarn config") && !cl.contains(" set") {
508        return true;
509    }
510    if cl.starts_with("pip config list") || cl.starts_with("pip3 config list") {
511        return true;
512    }
513    if cl.starts_with("rustup show") || cl.starts_with("rustup target list") {
514        return true;
515    }
516    if cl.starts_with("docker context ls") || cl.starts_with("docker context list") {
517        return true;
518    }
519    if cl.starts_with("kubectl config")
520        && (cl.contains("view") || cl.contains("get-contexts") || cl.contains("current-context"))
521    {
522        return true;
523    }
524    false
525}
526
527fn is_log_viewer(command: &str) -> bool {
528    let cl = command.trim().to_ascii_lowercase();
529    if cl.starts_with("journalctl") && !cl.contains("-f") && !cl.contains("--follow") {
530        return true;
531    }
532    if cl.starts_with("dmesg") && !cl.contains("-w") && !cl.contains("--follow") {
533        return true;
534    }
535    if cl.starts_with("docker logs") && !cl.contains("-f") && !cl.contains("--follow") {
536        return true;
537    }
538    if cl.starts_with("kubectl logs") && !cl.contains("-f") && !cl.contains("--follow") {
539        return true;
540    }
541    if cl.starts_with("docker compose logs") && !cl.contains("-f") && !cl.contains("--follow") {
542        return true;
543    }
544    false
545}
546
547fn is_archive_listing(command: &str) -> bool {
548    let cl = command.trim().to_ascii_lowercase();
549    if cl.starts_with("tar ") && (cl.contains(" -tf") || cl.contains(" -t") || cl.contains(" tf")) {
550        return true;
551    }
552    if cl.starts_with("unzip -l") || cl.starts_with("unzip -Z") {
553        return true;
554    }
555    let first = first_binary(command);
556    matches!(first, "zipinfo" | "lsar" | "7z" if cl.contains(" l ") || cl.contains(" l\t"))
557        || first == "zipinfo"
558        || first == "lsar"
559}
560
561fn is_clipboard_tool(command: &str) -> bool {
562    let first = first_binary(command);
563    if matches!(first, "pbpaste" | "wl-paste") {
564        return true;
565    }
566    let cl = command.trim().to_ascii_lowercase();
567    if cl.starts_with("xclip") && cl.contains("-o") {
568        return true;
569    }
570    if cl.starts_with("xsel") && (cl.contains("-o") || cl.contains("--output")) {
571        return true;
572    }
573    false
574}
575
576/// Git write-commands produce minimal output that agents must see verbatim.
577/// Compressing these risks abbreviating subcommand names (e.g. "commit" → "cmt")
578/// which agents then misinterpret as valid commands.
579fn is_git_write_command(command: &str) -> bool {
580    let cl = command.trim().to_ascii_lowercase();
581    if !cl.starts_with("git ") {
582        return false;
583    }
584    let git_write_subs = [
585        "commit",
586        "push",
587        "pull",
588        "merge",
589        "rebase",
590        "cherry-pick",
591        "tag",
592        "reset",
593    ];
594    let mut skip_next = false;
595    for arg in cl.split_whitespace().skip(1) {
596        if skip_next {
597            skip_next = false;
598            continue;
599        }
600        if arg == "-c" || arg == "-C" || arg == "--git-dir" || arg == "--work-tree" {
601            skip_next = true;
602            continue;
603        }
604        if arg.starts_with('-') {
605            continue;
606        }
607        return git_write_subs.contains(&arg);
608    }
609    false
610}
611
612pub(super) fn is_git_data_command(command: &str) -> bool {
613    let cl = command.trim().to_ascii_lowercase();
614    if !cl.contains("git") {
615        return false;
616    }
617    let exact_data_subs = [
618        "remote",
619        "rev-parse",
620        "rev-list",
621        "ls-files",
622        "ls-tree",
623        "ls-remote",
624        "shortlog",
625        "for-each-ref",
626        "cat-file",
627        "name-rev",
628        "describe",
629        "merge-base",
630    ];
631
632    let mut tokens = cl.split_whitespace();
633    while let Some(tok) = tokens.next() {
634        let base = tok.rsplit('/').next().unwrap_or(tok);
635        if base != "git" {
636            continue;
637        }
638        let mut skip_next = false;
639        for arg in tokens.by_ref() {
640            if skip_next {
641                skip_next = false;
642                continue;
643            }
644            if arg == "-c" || arg == "-C" || arg == "--git-dir" || arg == "--work-tree" {
645                skip_next = true;
646                continue;
647            }
648            if arg.starts_with('-') {
649                continue;
650            }
651            return exact_data_subs.contains(&arg);
652        }
653        return false;
654    }
655    false
656}
657
658fn is_task_dry_run(command: &str) -> bool {
659    let cl = command.trim().to_ascii_lowercase();
660    if cl.starts_with("make ") && (cl.contains(" -n") || cl.contains(" --dry-run")) {
661        return true;
662    }
663    if cl.starts_with("ansible") && (cl.contains("--check") || cl.contains("--diff")) {
664        return true;
665    }
666    false
667}
668
669fn is_env_dump(command: &str) -> bool {
670    let first = first_binary(command);
671    matches!(first, "env" | "printenv" | "set" | "export" | "locale")
672}
673
674/// Extracts the binary name (basename, no path) from the first token of a command.
675fn first_binary(command: &str) -> &str {
676    let first = command.split_whitespace().next().unwrap_or("");
677    first.rsplit('/').next().unwrap_or(first)
678}
679
680/// Non-git diff tools: `diff`, `colordiff`, `icdiff`, `delta`.
681fn is_standalone_diff_command(command: &str) -> bool {
682    let first = command.split_whitespace().next().unwrap_or("");
683    let base = first.rsplit('/').next().unwrap_or(first);
684    base.eq_ignore_ascii_case("diff")
685        || base.eq_ignore_ascii_case("colordiff")
686        || base.eq_ignore_ascii_case("icdiff")
687        || base.eq_ignore_ascii_case("delta")
688}
689
690/// Git subcommands that produce structural output the developer must read verbatim.
691fn is_structural_git_command(command: &str) -> bool {
692    let mut tokens = command.split_whitespace();
693    while let Some(tok) = tokens.next() {
694        let base = tok.rsplit('/').next().unwrap_or(tok);
695        if !base.eq_ignore_ascii_case("git") {
696            continue;
697        }
698        let mut skip_next = false;
699        let remaining: Vec<&str> = tokens.collect();
700        for arg in &remaining {
701            if skip_next {
702                skip_next = false;
703                continue;
704            }
705            if *arg == "-C" || *arg == "-c" || *arg == "--git-dir" || *arg == "--work-tree" {
706                skip_next = true;
707                continue;
708            }
709            if arg.starts_with('-') {
710                continue;
711            }
712            let sub = arg.to_ascii_lowercase();
713            return match sub.as_str() {
714                "diff" | "show" | "blame" => true,
715                "log" => has_patch_flag(&remaining) || has_stat_flag(&remaining),
716                "stash" => remaining.iter().any(|a| a.eq_ignore_ascii_case("show")),
717                _ => false,
718            };
719        }
720        return false;
721    }
722    false
723}
724
725/// Returns true if the argument list contains `-p` or `--patch`.
726fn has_patch_flag(args: &[&str]) -> bool {
727    args.iter()
728        .any(|a| *a == "-p" || *a == "--patch" || a.starts_with("-p"))
729}
730
731/// Returns true if the argument list contains `--stat`.
732fn has_stat_flag(args: &[&str]) -> bool {
733    args.iter()
734        .any(|a| *a == "--stat" || a.starts_with("--stat="))
735}