Skip to main content

lean_ctx/shell/compress/
classification.rs

1use super::passthrough::{BUILTIN_PASSTHROUGH, DEV_SCRIPT_KEYWORDS, SCRIPT_RUNNER_PREFIXES};
2
3fn is_dev_script_runner(cmd: &str) -> bool {
4    for prefix in SCRIPT_RUNNER_PREFIXES {
5        if let Some(rest) = cmd.strip_prefix(prefix) {
6            let script_name = rest.split_whitespace().next().unwrap_or("");
7            for kw in DEV_SCRIPT_KEYWORDS {
8                if script_name.contains(kw) {
9                    return true;
10                }
11            }
12        }
13    }
14    false
15}
16
17pub(in crate::shell) fn is_excluded_command(command: &str, excluded: &[String]) -> bool {
18    let cmd = command.trim().to_lowercase();
19    for pattern in BUILTIN_PASSTHROUGH {
20        if pattern.starts_with("--") {
21            if cmd.contains(pattern) {
22                return true;
23            }
24        } else if pattern.ends_with(' ') || pattern.ends_with('\t') {
25            if cmd == pattern.trim() || cmd.starts_with(pattern) {
26                return true;
27            }
28        } else if cmd == *pattern
29            || cmd.starts_with(&format!("{pattern} "))
30            || cmd.starts_with(&format!("{pattern}\t"))
31            || cmd.contains(&format!(" {pattern} "))
32            || cmd.contains(&format!(" {pattern}\t"))
33            || cmd.contains(&format!("|{pattern} "))
34            || cmd.contains(&format!("|{pattern}\t"))
35            || cmd.ends_with(&format!(" {pattern}"))
36            || cmd.ends_with(&format!("|{pattern}"))
37        {
38            return true;
39        }
40    }
41
42    if is_dev_script_runner(&cmd) {
43        return true;
44    }
45
46    if excluded.is_empty() {
47        return false;
48    }
49    excluded.iter().any(|excl| {
50        let excl_lower = excl.trim().to_lowercase();
51        cmd == excl_lower || cmd.starts_with(&format!("{excl_lower} "))
52    })
53}
54
55pub(super) fn is_search_output(command: &str) -> bool {
56    let c = command.trim_start();
57    c.starts_with("grep ")
58        || c.starts_with("rg ")
59        || c.starts_with("find ")
60        || c.starts_with("fd ")
61        || c.starts_with("ag ")
62        || c.starts_with("ack ")
63}
64
65/// Returns true for commands whose output structure is critical for developer
66/// readability. Pattern compression (light cleanup like removing `index` lines
67/// or limiting context) still applies, but the terse pipeline and generic
68/// compressors are skipped so diff hunks, blame annotations, etc. remain
69/// fully readable.
70pub fn has_structural_output(command: &str) -> bool {
71    if is_verbatim_output(command) {
72        return true;
73    }
74    if is_standalone_diff_command(command) {
75        return true;
76    }
77    is_structural_git_command(command)
78}
79
80/// Returns true for commands where the output IS the purpose of the command.
81/// These must never have their content transformed — only size-limited if huge.
82/// Checks both the full command AND the last pipe segment for comprehensive coverage.
83pub fn is_verbatim_output(command: &str) -> bool {
84    is_verbatim_single(command) || is_verbatim_pipe_tail(command)
85}
86
87fn is_verbatim_single(command: &str) -> bool {
88    is_http_client(command)
89        || is_file_viewer(command)
90        || is_data_format_tool(command)
91        || is_binary_viewer(command)
92        || is_infra_inspection(command)
93        || is_crypto_command(command)
94        || is_database_query(command)
95        || is_dns_network_inspection(command)
96        || is_language_one_liner(command)
97        || is_container_listing(command)
98        || is_file_listing(command)
99        || is_system_query(command)
100        || is_cloud_cli_query(command)
101        || is_cli_api_data_command(command)
102        || is_package_manager_info(command)
103        || is_version_or_help(command)
104        || is_config_viewer(command)
105        || is_log_viewer(command)
106        || is_archive_listing(command)
107        || is_clipboard_tool(command)
108        || is_git_data_command(command)
109        || is_git_write_command(command)
110        || is_task_dry_run(command)
111        || is_env_dump(command)
112}
113
114/// CLI tools that fetch or output raw API/structured data.
115/// These MUST never be compressed -- compression destroys the payload.
116fn is_cli_api_data_command(command: &str) -> bool {
117    let cl = command.trim().to_ascii_lowercase();
118
119    // gh (GitHub CLI) -- api, run view --log, search, release view, gist view
120    if cl.starts_with("gh ")
121        && (cl.starts_with("gh api ")
122            || cl.starts_with("gh api\t")
123            || cl.contains(" --json")
124            || cl.contains(" --jq ")
125            || cl.contains(" --template ")
126            || (cl.contains("run view") && (cl.contains("--log") || cl.contains("log-failed")))
127            || cl.starts_with("gh search ")
128            || cl.starts_with("gh release view")
129            || cl.starts_with("gh gist view")
130            || cl.starts_with("gh gist list"))
131    {
132        return true;
133    }
134
135    // GitLab CLI (glab)
136    if cl.starts_with("glab ") && cl.starts_with("glab api ") {
137        return true;
138    }
139
140    // Jira CLI
141    if cl.starts_with("jira ") && (cl.contains(" view") || cl.contains(" list")) {
142        return true;
143    }
144
145    // Linear CLI
146    if cl.starts_with("linear ") {
147        return true;
148    }
149
150    // Stripe, Twilio, Vercel, Netlify, Fly, Railway, Supabase CLIs
151    let first = first_binary(command);
152    if matches!(
153        first,
154        "stripe" | "twilio" | "vercel" | "netlify" | "flyctl" | "fly" | "railway" | "supabase"
155    ) && (cl.contains(" list")
156        || cl.contains(" get")
157        || cl.contains(" show")
158        || cl.contains(" status")
159        || cl.contains(" info")
160        || cl.contains(" logs")
161        || cl.contains(" inspect")
162        || cl.contains(" export")
163        || cl.contains(" describe"))
164    {
165        return true;
166    }
167
168    // Cloudflare (wrangler)
169    if cl.starts_with("wrangler ")
170        && !cl.starts_with("wrangler dev")
171        && (cl.contains(" tail") || cl.contains(" secret list") || cl.contains(" kv "))
172    {
173        return true;
174    }
175
176    // Heroku
177    if cl.starts_with("heroku ")
178        && (cl.contains(" config")
179            || cl.contains(" logs")
180            || cl.contains(" ps")
181            || cl.contains(" info"))
182    {
183        return true;
184    }
185
186    false
187}
188
189/// For piped commands like `kubectl get pods -o json | jq '.items[]'`,
190/// check if the LAST command in the pipe is a verbatim tool.
191fn is_verbatim_pipe_tail(command: &str) -> bool {
192    if !command.contains('|') {
193        return false;
194    }
195    let last_segment = command.rsplit('|').next().unwrap_or("").trim();
196    if last_segment.is_empty() {
197        return false;
198    }
199    is_verbatim_single(last_segment)
200}
201
202fn is_http_client(command: &str) -> bool {
203    let first = first_binary(command);
204    matches!(
205        first,
206        "curl" | "wget" | "http" | "https" | "xh" | "curlie" | "grpcurl" | "grpc_cli"
207    )
208}
209
210fn is_file_viewer(command: &str) -> bool {
211    let first = first_binary(command);
212    match first {
213        "cat" | "bat" | "batcat" | "pygmentize" | "highlight" => true,
214        "head" | "tail" => !command.contains("-f") && !command.contains("--follow"),
215        _ => false,
216    }
217}
218
219fn is_data_format_tool(command: &str) -> bool {
220    let first = first_binary(command);
221    matches!(
222        first,
223        "jq" | "yq"
224            | "xq"
225            | "fx"
226            | "gron"
227            | "mlr"
228            | "miller"
229            | "dasel"
230            | "csvlook"
231            | "csvcut"
232            | "csvgrep"
233            | "csvjson"
234            | "in2csv"
235            | "sql2csv"
236    )
237}
238
239fn is_binary_viewer(command: &str) -> bool {
240    let first = first_binary(command);
241    matches!(first, "xxd" | "hexdump" | "od" | "strings" | "file")
242}
243
244fn is_infra_inspection(command: &str) -> bool {
245    let cl = command.trim().to_ascii_lowercase();
246    if cl.starts_with("terraform output")
247        || cl.starts_with("terraform show")
248        || cl.starts_with("terraform state show")
249        || cl.starts_with("terraform state list")
250        || cl.starts_with("terraform state pull")
251        || cl.starts_with("tofu output")
252        || cl.starts_with("tofu show")
253        || cl.starts_with("tofu state show")
254        || cl.starts_with("tofu state list")
255        || cl.starts_with("tofu state pull")
256        || cl.starts_with("pulumi stack output")
257        || cl.starts_with("pulumi stack export")
258    {
259        return true;
260    }
261    if cl.starts_with("docker inspect") || cl.starts_with("podman inspect") {
262        return true;
263    }
264    if (cl.starts_with("kubectl get") || cl.starts_with("k get"))
265        && (cl.contains("-o yaml")
266            || cl.contains("-o json")
267            || cl.contains("-oyaml")
268            || cl.contains("-ojson")
269            || cl.contains("--output yaml")
270            || cl.contains("--output json")
271            || cl.contains("--output=yaml")
272            || cl.contains("--output=json"))
273    {
274        return true;
275    }
276    if cl.starts_with("kubectl describe") || cl.starts_with("k describe") {
277        return true;
278    }
279    if cl.starts_with("helm get") || cl.starts_with("helm template") {
280        return true;
281    }
282    false
283}
284
285fn is_crypto_command(command: &str) -> bool {
286    let first = first_binary(command);
287    if first == "openssl" {
288        return true;
289    }
290    matches!(first, "gpg" | "age" | "ssh-keygen" | "certutil")
291}
292
293fn is_database_query(command: &str) -> bool {
294    let cl = command.to_ascii_lowercase();
295    if cl.starts_with("psql ") && (cl.contains(" -c ") || cl.contains("--command")) {
296        return true;
297    }
298    if cl.starts_with("mysql ") && (cl.contains(" -e ") || cl.contains("--execute")) {
299        return true;
300    }
301    if cl.starts_with("mariadb ") && (cl.contains(" -e ") || cl.contains("--execute")) {
302        return true;
303    }
304    if cl.starts_with("sqlite3 ") && cl.contains('"') {
305        return true;
306    }
307    if cl.starts_with("mongosh ") && cl.contains("--eval") {
308        return true;
309    }
310    false
311}
312
313fn is_dns_network_inspection(command: &str) -> bool {
314    let first = first_binary(command);
315    matches!(
316        first,
317        "dig" | "nslookup" | "host" | "whois" | "drill" | "resolvectl"
318    )
319}
320
321fn is_language_one_liner(command: &str) -> bool {
322    let cl = command.to_ascii_lowercase();
323    (cl.starts_with("python ") || cl.starts_with("python3 "))
324        && (cl.contains(" -c ") || cl.contains(" -c\"") || cl.contains(" -c'"))
325        || (cl.starts_with("node ") && (cl.contains(" -e ") || cl.contains(" --eval")))
326        || (cl.starts_with("ruby ") && cl.contains(" -e "))
327        || (cl.starts_with("perl ") && cl.contains(" -e "))
328        || (cl.starts_with("php ") && cl.contains(" -r "))
329}
330
331fn is_container_listing(command: &str) -> bool {
332    let cl = command.trim().to_ascii_lowercase();
333    if cl.starts_with("docker ps") || cl.starts_with("docker images") {
334        return true;
335    }
336    if cl.starts_with("podman ps") || cl.starts_with("podman images") {
337        return true;
338    }
339    if cl.starts_with("kubectl get") || cl.starts_with("k get") {
340        return true;
341    }
342    if cl.starts_with("helm list") || cl.starts_with("helm ls") {
343        return true;
344    }
345    if cl.starts_with("docker compose ps") || cl.starts_with("docker-compose ps") {
346        return true;
347    }
348    false
349}
350
351fn is_file_listing(command: &str) -> bool {
352    let first = first_binary(command);
353    matches!(
354        first,
355        "find" | "fd" | "fdfind" | "ls" | "exa" | "eza" | "lsd"
356    )
357}
358
359fn is_system_query(command: &str) -> bool {
360    let first = first_binary(command);
361    matches!(
362        first,
363        "stat"
364            | "wc"
365            | "du"
366            | "df"
367            | "free"
368            | "uname"
369            | "id"
370            | "whoami"
371            | "hostname"
372            | "uptime"
373            | "lscpu"
374            | "lsblk"
375            | "ip"
376            | "ifconfig"
377            | "route"
378            | "ss"
379            | "netstat"
380            | "base64"
381            | "sha256sum"
382            | "sha1sum"
383            | "md5sum"
384            | "cksum"
385            | "readlink"
386            | "realpath"
387            | "which"
388            | "type"
389            | "command"
390    )
391}
392
393fn is_cloud_cli_query(command: &str) -> bool {
394    let cl = command.trim().to_ascii_lowercase();
395    let cloud_query_verbs = [
396        "describe",
397        "get",
398        "list",
399        "show",
400        "export",
401        "inspect",
402        "info",
403        "status",
404        "whoami",
405        "caller-identity",
406        "account",
407    ];
408
409    let is_aws = cl.starts_with("aws ") && !cl.starts_with("aws configure");
410    let is_gcloud =
411        cl.starts_with("gcloud ") && !cl.starts_with("gcloud auth") && !cl.contains(" deploy");
412    let is_az = cl.starts_with("az ") && !cl.starts_with("az login");
413
414    if !(is_aws || is_gcloud || is_az) {
415        return false;
416    }
417
418    cloud_query_verbs
419        .iter()
420        .any(|verb| cl.contains(&format!(" {verb}")))
421}
422
423fn is_package_manager_info(command: &str) -> bool {
424    let cl = command.trim().to_ascii_lowercase();
425
426    if cl.starts_with("npm ") {
427        return cl.starts_with("npm list")
428            || cl.starts_with("npm ls")
429            || cl.starts_with("npm info")
430            || cl.starts_with("npm view")
431            || cl.starts_with("npm show")
432            || cl.starts_with("npm outdated")
433            || cl.starts_with("npm audit");
434    }
435    if cl.starts_with("yarn ") {
436        return cl.starts_with("yarn list")
437            || cl.starts_with("yarn info")
438            || cl.starts_with("yarn why")
439            || cl.starts_with("yarn outdated")
440            || cl.starts_with("yarn audit");
441    }
442    if cl.starts_with("pnpm ") {
443        return cl.starts_with("pnpm list")
444            || cl.starts_with("pnpm ls")
445            || cl.starts_with("pnpm why")
446            || cl.starts_with("pnpm outdated")
447            || cl.starts_with("pnpm audit");
448    }
449    if cl.starts_with("pip ") || cl.starts_with("pip3 ") {
450        return cl.contains(" list") || cl.contains(" show") || cl.contains(" freeze");
451    }
452    if cl.starts_with("gem ") {
453        return cl.starts_with("gem list")
454            || cl.starts_with("gem info")
455            || cl.starts_with("gem specification");
456    }
457    if cl.starts_with("cargo ") {
458        return cl.starts_with("cargo metadata")
459            || cl.starts_with("cargo tree")
460            || cl.starts_with("cargo pkgid");
461    }
462    if cl.starts_with("go ") {
463        return cl.starts_with("go list") || cl.starts_with("go version");
464    }
465    if cl.starts_with("composer ") {
466        return cl.starts_with("composer show")
467            || cl.starts_with("composer info")
468            || cl.starts_with("composer outdated");
469    }
470    if cl.starts_with("brew ") {
471        return cl.starts_with("brew list")
472            || cl.starts_with("brew info")
473            || cl.starts_with("brew deps")
474            || cl.starts_with("brew outdated");
475    }
476    if cl.starts_with("apt ") || cl.starts_with("dpkg ") {
477        return cl.starts_with("apt list")
478            || cl.starts_with("apt show")
479            || cl.starts_with("dpkg -l")
480            || cl.starts_with("dpkg --list")
481            || cl.starts_with("dpkg -s");
482    }
483    false
484}
485
486fn is_version_or_help(command: &str) -> bool {
487    let parts: Vec<&str> = command.split_whitespace().collect();
488    if parts.len() < 2 || parts.len() > 3 {
489        return false;
490    }
491    parts.iter().any(|p| {
492        *p == "--version"
493            || *p == "-V"
494            || p.eq_ignore_ascii_case("version")
495            || *p == "--help"
496            || *p == "-h"
497            || p.eq_ignore_ascii_case("help")
498    })
499}
500
501fn is_config_viewer(command: &str) -> bool {
502    let cl = command.trim().to_ascii_lowercase();
503    if cl.starts_with("git config") && !cl.contains("--set") && !cl.contains("--unset") {
504        return true;
505    }
506    if cl.starts_with("npm config list") || cl.starts_with("npm config get") {
507        return true;
508    }
509    if cl.starts_with("yarn config") && !cl.contains(" set") {
510        return true;
511    }
512    if cl.starts_with("pip config list") || cl.starts_with("pip3 config list") {
513        return true;
514    }
515    if cl.starts_with("rustup show") || cl.starts_with("rustup target list") {
516        return true;
517    }
518    if cl.starts_with("docker context ls") || cl.starts_with("docker context list") {
519        return true;
520    }
521    if cl.starts_with("kubectl config")
522        && (cl.contains("view") || cl.contains("get-contexts") || cl.contains("current-context"))
523    {
524        return true;
525    }
526    false
527}
528
529fn is_log_viewer(command: &str) -> bool {
530    let cl = command.trim().to_ascii_lowercase();
531    if cl.starts_with("journalctl") && !cl.contains("-f") && !cl.contains("--follow") {
532        return true;
533    }
534    if cl.starts_with("dmesg") && !cl.contains("-w") && !cl.contains("--follow") {
535        return true;
536    }
537    if cl.starts_with("docker logs") && !cl.contains("-f") && !cl.contains("--follow") {
538        return true;
539    }
540    if cl.starts_with("kubectl logs") && !cl.contains("-f") && !cl.contains("--follow") {
541        return true;
542    }
543    if cl.starts_with("docker compose logs") && !cl.contains("-f") && !cl.contains("--follow") {
544        return true;
545    }
546    false
547}
548
549fn is_archive_listing(command: &str) -> bool {
550    let cl = command.trim().to_ascii_lowercase();
551    if cl.starts_with("tar ") && (cl.contains(" -tf") || cl.contains(" -t") || cl.contains(" tf")) {
552        return true;
553    }
554    if cl.starts_with("unzip -l") || cl.starts_with("unzip -Z") {
555        return true;
556    }
557    let first = first_binary(command);
558    matches!(first, "zipinfo" | "lsar" | "7z" if cl.contains(" l ") || cl.contains(" l\t"))
559        || first == "zipinfo"
560        || first == "lsar"
561}
562
563fn is_clipboard_tool(command: &str) -> bool {
564    let first = first_binary(command);
565    if matches!(first, "pbpaste" | "wl-paste") {
566        return true;
567    }
568    let cl = command.trim().to_ascii_lowercase();
569    if cl.starts_with("xclip") && cl.contains("-o") {
570        return true;
571    }
572    if cl.starts_with("xsel") && (cl.contains("-o") || cl.contains("--output")) {
573        return true;
574    }
575    false
576}
577
578/// Git write-commands produce minimal output that agents must see verbatim.
579/// Compressing these risks abbreviating subcommand names (e.g. "commit" → "cmt")
580/// which agents then misinterpret as valid commands.
581fn is_git_write_command(command: &str) -> bool {
582    let cl = command.trim().to_ascii_lowercase();
583    if !cl.starts_with("git ") {
584        return false;
585    }
586    let git_write_subs = [
587        "commit",
588        "push",
589        "pull",
590        "merge",
591        "rebase",
592        "cherry-pick",
593        "tag",
594        "reset",
595    ];
596    let mut skip_next = false;
597    for arg in cl.split_whitespace().skip(1) {
598        if skip_next {
599            skip_next = false;
600            continue;
601        }
602        if arg == "-c" || arg == "-C" || arg == "--git-dir" || arg == "--work-tree" {
603            skip_next = true;
604            continue;
605        }
606        if arg.starts_with('-') {
607            continue;
608        }
609        return git_write_subs.contains(&arg);
610    }
611    false
612}
613
614pub(super) fn is_git_data_command(command: &str) -> bool {
615    let cl = command.trim().to_ascii_lowercase();
616    if !cl.contains("git") {
617        return false;
618    }
619    let exact_data_subs = [
620        "remote",
621        "rev-parse",
622        "rev-list",
623        "ls-files",
624        "ls-tree",
625        "ls-remote",
626        "shortlog",
627        "for-each-ref",
628        "cat-file",
629        "name-rev",
630        "describe",
631        "merge-base",
632    ];
633
634    let mut tokens = cl.split_whitespace();
635    while let Some(tok) = tokens.next() {
636        let base = tok.rsplit('/').next().unwrap_or(tok);
637        if base != "git" {
638            continue;
639        }
640        let mut skip_next = false;
641        for arg in tokens.by_ref() {
642            if skip_next {
643                skip_next = false;
644                continue;
645            }
646            if arg == "-c" || arg == "-C" || arg == "--git-dir" || arg == "--work-tree" {
647                skip_next = true;
648                continue;
649            }
650            if arg.starts_with('-') {
651                continue;
652            }
653            return exact_data_subs.contains(&arg);
654        }
655        return false;
656    }
657    false
658}
659
660fn is_task_dry_run(command: &str) -> bool {
661    let cl = command.trim().to_ascii_lowercase();
662    if cl.starts_with("make ") && (cl.contains(" -n") || cl.contains(" --dry-run")) {
663        return true;
664    }
665    if cl.starts_with("ansible") && (cl.contains("--check") || cl.contains("--diff")) {
666        return true;
667    }
668    false
669}
670
671fn is_env_dump(command: &str) -> bool {
672    let first = first_binary(command);
673    matches!(first, "env" | "printenv" | "set" | "export" | "locale")
674}
675
676/// Extracts the binary name (basename, no path) from the first token of a command.
677fn first_binary(command: &str) -> &str {
678    let first = command.split_whitespace().next().unwrap_or("");
679    first.rsplit('/').next().unwrap_or(first)
680}
681
682/// Non-git diff tools: `diff`, `colordiff`, `icdiff`, `delta`.
683fn is_standalone_diff_command(command: &str) -> bool {
684    let first = command.split_whitespace().next().unwrap_or("");
685    let base = first.rsplit('/').next().unwrap_or(first);
686    base.eq_ignore_ascii_case("diff")
687        || base.eq_ignore_ascii_case("colordiff")
688        || base.eq_ignore_ascii_case("icdiff")
689        || base.eq_ignore_ascii_case("delta")
690}
691
692/// Git subcommands that produce structural output the developer must read verbatim.
693fn is_structural_git_command(command: &str) -> bool {
694    let mut tokens = command.split_whitespace();
695    while let Some(tok) = tokens.next() {
696        let base = tok.rsplit('/').next().unwrap_or(tok);
697        if !base.eq_ignore_ascii_case("git") {
698            continue;
699        }
700        let mut skip_next = false;
701        let remaining: Vec<&str> = tokens.collect();
702        for arg in &remaining {
703            if skip_next {
704                skip_next = false;
705                continue;
706            }
707            if *arg == "-C" || *arg == "-c" || *arg == "--git-dir" || *arg == "--work-tree" {
708                skip_next = true;
709                continue;
710            }
711            if arg.starts_with('-') {
712                continue;
713            }
714            let sub = arg.to_ascii_lowercase();
715            return match sub.as_str() {
716                "diff" | "show" | "blame" => true,
717                "log" => has_patch_flag(&remaining) || has_stat_flag(&remaining),
718                "stash" => remaining.iter().any(|a| a.eq_ignore_ascii_case("show")),
719                _ => false,
720            };
721        }
722        return false;
723    }
724    false
725}
726
727/// Returns true if the argument list contains `-p` or `--patch`.
728fn has_patch_flag(args: &[&str]) -> bool {
729    args.iter()
730        .any(|a| *a == "-p" || *a == "--patch" || a.starts_with("-p"))
731}
732
733/// Returns true if the argument list contains `--stat`.
734fn has_stat_flag(args: &[&str]) -> bool {
735    args.iter()
736        .any(|a| *a == "--stat" || a.starts_with("--stat="))
737}