Skip to main content

codewhale_execpolicy/
bash_arity.rs

1//! Bash arity dictionary for command-prefix allow rule matching.
2//!
3//! [`BashArityDict`] maps a command prefix (space-separated, lowercase) to the
4//! number of positional (non-flag) words, *including the base command word*,
5//! that form the canonical prefix.
6//!
7//! ## Invariant
8//!
9//! Flags (tokens starting with `-`) are **never** counted toward arity.
10//! `auto_allow = ["git status"]` must match `git status -s` and
11//! `git status --porcelain`, but **not** `git push`.
12//!
13//! ## Coverage
14//!
15//! 30+ common tools are covered across: git, npm, yarn, pnpm, cargo, docker,
16//! kubectl, go, python/pip, gh, rustup, deno, bun, aws, terraform, make,
17//! and more.
18
19/// Static arity table: `(prefix, arity)`.
20///
21/// Arity is the total number of *positional* tokens (including the base
22/// command) that form the canonical prefix.  For example:
23///
24/// * `("git status", 2)` — 2 positional tokens: `git` + `status`.
25/// * `("npm run", 3)` — 3 positional tokens: `npm` + `run` + `<script>`.
26/// * `("make", 1)` — only the base command, no sub-command.
27pub static BASH_ARITY_TABLE: &[(&str, u8)] = &[
28    // ── git ──────────────────────────────────────────────────────────────────
29    ("git add", 2),
30    ("git am", 2),
31    ("git apply", 2),
32    ("git bisect", 2),
33    ("git blame", 2),
34    ("git branch", 2),
35    ("git cat-file", 2),
36    ("git checkout", 2),
37    ("git cherry-pick", 2),
38    ("git clean", 2),
39    ("git clone", 2),
40    ("git commit", 2),
41    ("git config", 2),
42    ("git describe", 2),
43    ("git diff", 2),
44    ("git fetch", 2),
45    ("git format-patch", 2),
46    ("git grep", 2),
47    ("git init", 2),
48    ("git log", 2),
49    ("git ls-files", 2),
50    ("git merge", 2),
51    ("git mv", 2),
52    ("git notes", 2),
53    ("git pull", 2),
54    ("git push", 2),
55    ("git rebase", 2),
56    ("git reflog", 2),
57    ("git remote", 2),
58    ("git reset", 2),
59    ("git restore", 2),
60    ("git revert", 2),
61    ("git rm", 2),
62    ("git show", 2),
63    ("git stash", 2),
64    ("git status", 2),
65    ("git submodule", 2),
66    ("git switch", 2),
67    ("git tag", 2),
68    ("git worktree", 2),
69    // ── npm ──────────────────────────────────────────────────────────────────
70    ("npm audit", 2),
71    ("npm build", 2),
72    ("npm cache", 2),
73    ("npm ci", 2),
74    ("npm dedupe", 2),
75    ("npm fund", 2),
76    ("npm help", 2),
77    ("npm info", 2),
78    ("npm init", 2),
79    ("npm install", 2),
80    ("npm link", 2),
81    ("npm list", 2),
82    ("npm ls", 2),
83    ("npm outdated", 2),
84    ("npm pack", 2),
85    ("npm prune", 2),
86    ("npm publish", 2),
87    ("npm rebuild", 2),
88    ("npm run", 3),
89    ("npm start", 2),
90    ("npm stop", 2),
91    ("npm test", 2),
92    ("npm uninstall", 2),
93    ("npm update", 2),
94    ("npm version", 2),
95    ("npm view", 2),
96    // ── yarn ─────────────────────────────────────────────────────────────────
97    ("yarn add", 2),
98    ("yarn audit", 2),
99    ("yarn build", 2),
100    ("yarn install", 2),
101    ("yarn run", 3),
102    ("yarn start", 2),
103    ("yarn test", 2),
104    ("yarn upgrade", 2),
105    ("yarn workspace", 3),
106    // ── pnpm ─────────────────────────────────────────────────────────────────
107    ("pnpm add", 2),
108    ("pnpm build", 2),
109    ("pnpm install", 2),
110    ("pnpm run", 3),
111    ("pnpm start", 2),
112    ("pnpm test", 2),
113    ("pnpm update", 2),
114    // ── cargo ────────────────────────────────────────────────────────────────
115    ("cargo add", 2),
116    ("cargo bench", 2),
117    ("cargo build", 2),
118    ("cargo check", 2),
119    ("cargo clean", 2),
120    ("cargo clippy", 2),
121    ("cargo doc", 2),
122    ("cargo fix", 2),
123    ("cargo fmt", 2),
124    ("cargo generate", 2),
125    ("cargo install", 2),
126    ("cargo metadata", 2),
127    ("cargo package", 2),
128    ("cargo publish", 2),
129    ("cargo remove", 2),
130    ("cargo run", 2),
131    ("cargo search", 2),
132    ("cargo test", 2),
133    ("cargo tree", 2),
134    ("cargo uninstall", 2),
135    ("cargo update", 2),
136    ("cargo yank", 2),
137    // ── docker ───────────────────────────────────────────────────────────────
138    ("docker build", 2),
139    ("docker compose", 3),
140    ("docker container", 3),
141    ("docker cp", 2),
142    ("docker exec", 2),
143    ("docker image", 3),
144    ("docker images", 2),
145    ("docker inspect", 2),
146    ("docker kill", 2),
147    ("docker logs", 2),
148    ("docker network", 3),
149    ("docker ps", 2),
150    ("docker pull", 2),
151    ("docker push", 2),
152    ("docker rm", 2),
153    ("docker rmi", 2),
154    ("docker run", 2),
155    ("docker start", 2),
156    ("docker stop", 2),
157    ("docker system", 3),
158    ("docker tag", 2),
159    ("docker volume", 3),
160    // ── kubectl ──────────────────────────────────────────────────────────────
161    ("kubectl apply", 2),
162    ("kubectl create", 3),
163    ("kubectl delete", 3),
164    ("kubectl describe", 3),
165    ("kubectl exec", 2),
166    ("kubectl explain", 2),
167    ("kubectl get", 3),
168    ("kubectl label", 2),
169    ("kubectl logs", 2),
170    ("kubectl patch", 2),
171    ("kubectl port-forward", 2),
172    ("kubectl rollout", 3),
173    ("kubectl scale", 2),
174    ("kubectl set", 2),
175    ("kubectl top", 3),
176    // ── go ───────────────────────────────────────────────────────────────────
177    ("go build", 2),
178    ("go clean", 2),
179    ("go env", 2),
180    ("go fmt", 2),
181    ("go generate", 2),
182    ("go get", 2),
183    ("go install", 2),
184    ("go list", 2),
185    ("go mod", 3),
186    ("go run", 2),
187    ("go test", 2),
188    ("go vet", 2),
189    ("go work", 3),
190    // ── python / pip ─────────────────────────────────────────────────────────
191    ("pip install", 2),
192    ("pip uninstall", 2),
193    ("pip list", 2),
194    ("pip show", 2),
195    ("pip freeze", 2),
196    ("pip3 install", 2),
197    ("pip3 uninstall", 2),
198    ("pip3 list", 2),
199    ("pip3 show", 2),
200    ("python -m", 3),
201    ("python3 -m", 3),
202    // ── make / cmake ─────────────────────────────────────────────────────────
203    ("make", 1),
204    ("cmake", 1),
205    // ── gh (GitHub CLI) ──────────────────────────────────────────────────────
206    ("gh pr", 3),
207    ("gh issue", 3),
208    ("gh repo", 3),
209    ("gh release", 3),
210    ("gh workflow", 3),
211    ("gh run", 3),
212    ("gh secret", 3),
213    // ── rustup ───────────────────────────────────────────────────────────────
214    ("rustup default", 2),
215    ("rustup install", 2),
216    ("rustup show", 2),
217    ("rustup target", 3),
218    ("rustup toolchain", 3),
219    ("rustup update", 2),
220    // ── deno / bun ───────────────────────────────────────────────────────────
221    ("deno run", 2),
222    ("deno test", 2),
223    ("deno fmt", 2),
224    ("deno lint", 2),
225    ("bun add", 2),
226    ("bun build", 2),
227    ("bun install", 2),
228    ("bun run", 3),
229    ("bun test", 2),
230    ("npx", 2),
231    // ── aws CLI ──────────────────────────────────────────────────────────────
232    ("aws s3", 3),
233    ("aws ec2", 3),
234    ("aws iam", 3),
235    ("aws lambda", 3),
236    ("aws cloudformation", 3),
237    ("aws ecs", 3),
238    ("aws eks", 3),
239    ("aws rds", 3),
240    ("aws sts", 3),
241    ("aws configure", 2),
242    // ── terraform ────────────────────────────────────────────────────────────
243    ("terraform init", 2),
244    ("terraform plan", 2),
245    ("terraform apply", 2),
246    ("terraform destroy", 2),
247    ("terraform validate", 2),
248    ("terraform output", 2),
249    ("terraform state", 3),
250    ("terraform workspace", 3),
251    // ── helm ─────────────────────────────────────────────────────────────────
252    ("helm install", 2),
253    ("helm upgrade", 2),
254    ("helm uninstall", 2),
255    ("helm list", 2),
256    ("helm repo", 3),
257    ("helm status", 2),
258    ("helm template", 2),
259];
260
261/// Arity dictionary for bash command-prefix allow rules.
262///
263/// Provides arity-aware prefix extraction so that `auto_allow = ["git status"]`
264/// correctly matches `git status -s` and `git status --porcelain` without
265/// also matching `git push`.
266///
267/// # Example
268///
269/// ```rust
270/// use codewhale_execpolicy::bash_arity::BashArityDict;
271///
272/// let dict = BashArityDict::new();
273/// assert_eq!(dict.classify(&["git", "status", "-s"]),   "git status");
274/// assert_eq!(dict.classify(&["git", "push", "origin"]), "git push");
275/// assert_eq!(dict.classify(&["npm", "run", "dev"]),     "npm run dev");
276/// assert_eq!(dict.classify(&["ls", "-la"]),             "ls");
277/// ```
278#[derive(Debug, Clone)]
279pub struct BashArityDict {
280    /// Internal table sorted longest-prefix-first for greedy matching.
281    entries: Vec<(&'static str, u8)>,
282}
283
284impl BashArityDict {
285    /// Construct a new dictionary pre-loaded with [`BASH_ARITY_TABLE`].
286    #[must_use]
287    pub fn new() -> Self {
288        let mut entries: Vec<(&'static str, u8)> = BASH_ARITY_TABLE.to_vec();
289        // Longest prefix first so greedy matching works correctly.
290        entries.sort_by_key(|entry| std::cmp::Reverse(entry.0.len()));
291        Self { entries }
292    }
293
294    /// Return the canonical command prefix for a slice of command tokens.
295    ///
296    /// # Algorithm
297    ///
298    /// 1. Strip all flag tokens (tokens that start with `-`).
299    /// 2. Build candidates of depth 1..=3 from positional tokens (longest first).
300    /// 3. If a candidate matches a dictionary entry, return `arity` positional
301    ///    tokens joined with spaces.
302    /// 4. If no dictionary entry matches, return the single base command name.
303    #[must_use]
304    pub fn classify(&self, tokens: &[&str]) -> String {
305        if tokens.is_empty() {
306            return String::new();
307        }
308
309        // Collect positional (non-flag) tokens, lowercased.
310        let positional: Vec<String> = tokens
311            .iter()
312            .filter(|t| !t.starts_with('-'))
313            .map(|t| t.to_ascii_lowercase())
314            .collect();
315
316        if positional.is_empty() {
317            return String::new();
318        }
319
320        // Try candidates from longest to shortest (max depth 3).
321        let max_depth = positional.len().min(3);
322        for depth in (1..=max_depth).rev() {
323            let candidate = positional[..depth].join(" ");
324            if let Some(&(_key, arity)) = self
325                .entries
326                .iter()
327                .find(|(key, _)| *key == candidate.as_str())
328            {
329                let take = (arity as usize).min(positional.len());
330                return positional[..take].join(" ");
331            }
332        }
333
334        // No match: return base command name only.
335        positional[0].clone()
336    }
337
338    /// Return `true` if the allow-rule `pattern` (a command prefix string such
339    /// as `"git status"`) matches the concrete command `command`.
340    ///
341    /// Matching is arity-aware:
342    /// - `"git status"` matches `"git status -s"` and `"git status --porcelain"`.
343    /// - `"git status"` does **not** match `"git push origin main"`.
344    /// - Exact string patterns (e.g. `"ls"`) still work as before.
345    ///
346    /// For patterns that are not in the arity table, the function falls back to
347    /// a plain prefix test on the normalised command so that existing exact-match
348    /// rules continue to work unchanged.
349    #[must_use]
350    pub fn allow_rule_matches(&self, pattern: &str, command: &str) -> bool {
351        let pattern_lower = pattern.trim().to_ascii_lowercase();
352        let command_tokens: Vec<&str> = command.split_whitespace().collect();
353
354        // Classify the concrete command through the arity dictionary.
355        let canonical = self.classify(&command_tokens);
356
357        // Primary check: the classified prefix equals the allow-rule pattern.
358        if canonical == pattern_lower {
359            return true;
360        }
361
362        // Fallback: word-boundary prefix match for patterns not in the arity table.
363        // Matches the exact pattern or the pattern followed by a space (i.e., at
364        // word boundary), so "ls" matches "ls" and "ls -la" but NOT "lsof".
365        let command_lower = command.trim().to_ascii_lowercase();
366        // Normalise whitespace in both sides before comparing.
367        let pattern_norm: String = pattern_lower
368            .split_whitespace()
369            .collect::<Vec<_>>()
370            .join(" ");
371        let command_norm: String = command_lower
372            .split_whitespace()
373            .collect::<Vec<_>>()
374            .join(" ");
375        command_norm == pattern_norm
376            || (command_norm.starts_with(&pattern_norm)
377                && command_norm.as_bytes().get(pattern_norm.len()) == Some(&b' '))
378    }
379
380    /// Iterate over all entries in the dictionary.
381    pub fn entries(&self) -> impl Iterator<Item = (&str, u8)> {
382        self.entries.iter().map(|(k, v)| (*k, *v))
383    }
384
385    /// Return the number of entries in the dictionary.
386    #[must_use]
387    pub fn len(&self) -> usize {
388        self.entries.len()
389    }
390
391    /// Return `true` if the dictionary is empty.
392    #[must_use]
393    pub fn is_empty(&self) -> bool {
394        self.entries.is_empty()
395    }
396}
397
398impl Default for BashArityDict {
399    fn default() -> Self {
400        Self::new()
401    }
402}
403
404#[cfg(test)]
405mod tests {
406    use super::*;
407
408    fn dict() -> BashArityDict {
409        BashArityDict::new()
410    }
411
412    // ── classify ─────────────────────────────────────────────────────────────
413
414    #[test]
415    fn classify_git_status_bare() {
416        assert_eq!(dict().classify(&["git", "status"]), "git status");
417    }
418
419    #[test]
420    fn classify_git_status_with_short_flag() {
421        assert_eq!(dict().classify(&["git", "status", "-s"]), "git status");
422    }
423
424    #[test]
425    fn classify_git_status_with_long_flag() {
426        assert_eq!(
427            dict().classify(&["git", "status", "--porcelain"]),
428            "git status"
429        );
430    }
431
432    #[test]
433    fn classify_git_push() {
434        assert_eq!(
435            dict().classify(&["git", "push", "origin", "main"]),
436            "git push"
437        );
438    }
439
440    #[test]
441    fn classify_git_push_force() {
442        assert_eq!(dict().classify(&["git", "push", "--force"]), "git push");
443    }
444
445    #[test]
446    fn classify_npm_run_dev_arity_3() {
447        assert_eq!(dict().classify(&["npm", "run", "dev"]), "npm run dev");
448    }
449
450    #[test]
451    fn classify_npm_install() {
452        assert_eq!(dict().classify(&["npm", "install"]), "npm install");
453    }
454
455    #[test]
456    fn classify_cargo_check_with_flag() {
457        assert_eq!(
458            dict().classify(&["cargo", "check", "--workspace"]),
459            "cargo check"
460        );
461    }
462
463    #[test]
464    fn classify_docker_compose_up_arity_3() {
465        assert_eq!(
466            dict().classify(&["docker", "compose", "up"]),
467            "docker compose up"
468        );
469    }
470
471    #[test]
472    fn classify_kubectl_get_pods_arity_3() {
473        assert_eq!(
474            dict().classify(&["kubectl", "get", "pods"]),
475            "kubectl get pods"
476        );
477    }
478
479    #[test]
480    fn classify_go_mod_tidy_arity_3() {
481        assert_eq!(dict().classify(&["go", "mod", "tidy"]), "go mod tidy");
482    }
483
484    #[test]
485    fn classify_make_no_subcommand() {
486        assert_eq!(dict().classify(&["make", "all"]), "make");
487    }
488
489    #[test]
490    fn classify_aws_s3_arity_3() {
491        assert_eq!(dict().classify(&["aws", "s3", "ls"]), "aws s3 ls");
492    }
493
494    #[test]
495    fn classify_terraform_plan() {
496        assert_eq!(
497            dict().classify(&["terraform", "plan", "-out=tfplan"]),
498            "terraform plan"
499        );
500    }
501
502    #[test]
503    fn classify_unknown_falls_back_to_base() {
504        assert_eq!(dict().classify(&["ls", "-la"]), "ls");
505    }
506
507    #[test]
508    fn classify_empty_returns_empty() {
509        assert_eq!(dict().classify(&[]), "");
510    }
511
512    // ── allow_rule_matches ────────────────────────────────────────────────────
513
514    #[test]
515    fn allow_rule_git_status_matches_with_flag() {
516        assert!(dict().allow_rule_matches("git status", "git status -s"));
517    }
518
519    #[test]
520    fn allow_rule_git_status_matches_porcelain() {
521        assert!(dict().allow_rule_matches("git status", "git status --porcelain"));
522    }
523
524    #[test]
525    fn allow_rule_git_status_does_not_match_push() {
526        assert!(!dict().allow_rule_matches("git status", "git push origin main"));
527    }
528
529    #[test]
530    fn allow_rule_git_status_does_not_match_checkout() {
531        assert!(!dict().allow_rule_matches("git status", "git checkout main"));
532    }
533
534    #[test]
535    fn allow_rule_npm_run_matches_dev() {
536        assert!(dict().allow_rule_matches("npm run dev", "npm run dev"));
537    }
538
539    #[test]
540    fn allow_rule_npm_run_dev_does_not_match_build() {
541        assert!(!dict().allow_rule_matches("npm run dev", "npm run build"));
542    }
543
544    #[test]
545    fn allow_rule_cargo_check_matches_with_flags() {
546        assert!(dict().allow_rule_matches("cargo check", "cargo check --workspace"));
547    }
548
549    #[test]
550    fn allow_rule_exact_match_still_works() {
551        // A pattern not in the arity table falls back to exact/prefix match.
552        assert!(dict().allow_rule_matches("ls", "ls -la"));
553    }
554
555    #[test]
556    fn allow_rule_make_matches_with_target() {
557        assert!(dict().allow_rule_matches("make", "make all"));
558        assert!(dict().allow_rule_matches("make", "make clean"));
559    }
560
561    #[test]
562    fn allow_rule_aws_s3_ls() {
563        assert!(dict().allow_rule_matches("aws s3 ls", "aws s3 ls"));
564        // "aws s3 cp" should not match "aws s3 ls"
565        assert!(!dict().allow_rule_matches("aws s3 ls", "aws s3 cp src dst"));
566    }
567
568    // ── coverage count ────────────────────────────────────────────────────────
569
570    #[test]
571    fn dict_covers_at_least_30_commands() {
572        // The issue requires 30+ common commands covered.
573        assert!(
574            BashArityDict::new().len() >= 30,
575            "expected at least 30 entries, got {}",
576            BashArityDict::new().len()
577        );
578    }
579}