Skip to main content

alint_rules/
command.rs

1//! `command` — shell out to an external CLI per matched file.
2//!
3//! Per-file rule: for every file matching `paths`, spawn the
4//! given `command:` argv with path-template substitution, capture
5//! exit code and stdout/stderr. Exit `0` is a pass; non-zero is
6//! one violation whose message is the (truncated) stdout+stderr.
7//! Spawn / wait failures and timeouts produce a violation with a
8//! clear cause line.
9//!
10//! ```yaml
11//! - id: workflows-clean
12//!   kind: command
13//!   paths: ".github/workflows/*.{yml,yaml}"
14//!   command: ["actionlint", "{path}"]
15//!   level: error
16//! ```
17//!
18//! Path templates supported in argv tokens (and in the alint-injected
19//! `ALINT_PATH` env var): `{path}`, `{dir}`, `{stem}`, `{ext}`,
20//! `{basename}`, `{parent_name}`. Working directory is the alint
21//! root. Stdin is closed (`/dev/null`).
22//!
23//! Environment threaded into the child:
24//!
25//! - `ALINT_PATH` — relative path of the matched file.
26//! - `ALINT_ROOT` — absolute repo root.
27//! - `ALINT_RULE_ID` — the rule's `id:`.
28//! - `ALINT_LEVEL` — `error` / `warning` / `info`.
29//! - `ALINT_VAR_<NAME>` — one per top-level `vars:` entry. The
30//!   `<NAME>` is the YAML key uppercased and otherwise passed
31//!   through verbatim (no character substitution); for portable
32//!   `command:` rules use only `[A-Za-z0-9_]` in `vars:` keys —
33//!   keys containing other characters (e.g. `my-var`) produce
34//!   env-var names that POSIX shells cannot reference, so the
35//!   child process won't see them.
36//! - `ALINT_FACT_<NAME>` — one per resolved fact, stringified.
37//!   Same naming contract as `ALINT_VAR_<NAME>`.
38//!
39//! Trust model: `command` rules are only allowed in the user's own
40//! top-level config. Any extended source (local file, HTTPS URL,
41//! `alint://bundled/`) declaring `kind: command` is rejected at
42//! load time by `alint_dsl::reject_command_rules_in` — otherwise a
43//! malicious or compromised ruleset would gain arbitrary process
44//! execution simply by being fetched. Mirrors the existing
45//! `custom:` fact gate.
46
47use std::io::Read;
48use std::path::Path;
49use std::process::{Command as StdCommand, Stdio};
50use std::time::{Duration, Instant};
51
52use alint_core::template::{PathTokens, render_path};
53use alint_core::{Context, Error, FactValue, Level, Result, Rule, RuleSpec, Scope, Violation};
54use serde::Deserialize;
55
56/// Default per-file timeout. Generous for slow tools (kubeconform
57/// pulling schemas, slow shellcheck on large files) but bounded
58/// enough to not stall a CI run on a hung child indefinitely.
59const DEFAULT_TIMEOUT_SECS: u64 = 30;
60
61/// Cap on each of stdout / stderr captured into a violation
62/// message. Tools like cargo can emit tens of MB on a single
63/// failed file; bound it to keep reports legible and memory low.
64const OUTPUT_CAP_BYTES: usize = 16 * 1024;
65
66/// Granularity of the wait-loop. 10ms is short enough that fast
67/// tools (10–50ms typical for shellcheck per file) don't see
68/// noticeable polling overhead, and long enough to keep CPU
69/// idle while the child runs.
70const POLL_INTERVAL: Duration = Duration::from_millis(10);
71
72#[derive(Debug, Deserialize)]
73#[serde(deny_unknown_fields)]
74struct Options {
75    command: Vec<String>,
76    /// Per-file timeout in seconds. Default
77    /// [`DEFAULT_TIMEOUT_SECS`].
78    #[serde(default)]
79    timeout: Option<u64>,
80}
81
82#[derive(Debug)]
83pub struct CommandRule {
84    id: String,
85    level: Level,
86    policy_url: Option<String>,
87    message: Option<String>,
88    scope: Scope,
89    argv: Vec<String>,
90    timeout: Duration,
91}
92
93impl Rule for CommandRule {
94    alint_core::rule_common_impl!();
95
96    fn path_scope(&self) -> Option<&Scope> {
97        Some(&self.scope)
98    }
99    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
100        let mut violations = Vec::new();
101        for entry in ctx.index.files() {
102            if !self.scope.matches(&entry.path, ctx.index) {
103                continue;
104            }
105            let tokens = PathTokens::from_path(&entry.path);
106            let rendered: Vec<String> = self.argv.iter().map(|s| render_path(s, &tokens)).collect();
107            if let Outcome::Fail(msg) = run_one(
108                &rendered,
109                ctx.root,
110                &entry.path,
111                &self.id,
112                self.level,
113                ctx,
114                self.timeout,
115            ) {
116                let final_msg = self.message.clone().unwrap_or(msg);
117                violations.push(Violation::new(final_msg).with_path(entry.path.clone()));
118            }
119        }
120        Ok(violations)
121    }
122}
123
124/// Outcome of one per-file invocation. `Pass` produces no
125/// violation; `Fail(message)` becomes a single violation
126/// anchored on the file path.
127enum Outcome {
128    Pass,
129    Fail(String),
130}
131
132#[allow(clippy::too_many_arguments)] // Fewer args = more state-keeping; this is the natural shape.
133fn run_one(
134    argv: &[String],
135    root: &Path,
136    rel_path: &Path,
137    rule_id: &str,
138    level: Level,
139    ctx: &Context<'_>,
140    timeout: Duration,
141) -> Outcome {
142    let Some((program, rest)) = argv.split_first() else {
143        return Outcome::Fail("command rule's argv is empty".to_string());
144    };
145
146    let mut cmd = StdCommand::new(program);
147    cmd.args(rest)
148        .current_dir(root)
149        .stdin(Stdio::null())
150        .stdout(Stdio::piped())
151        .stderr(Stdio::piped())
152        .env("ALINT_PATH", rel_path.to_string_lossy().as_ref())
153        .env("ALINT_ROOT", root.to_string_lossy().as_ref())
154        .env("ALINT_RULE_ID", rule_id)
155        .env("ALINT_LEVEL", level.as_str());
156
157    if let Some(vars) = ctx.vars {
158        for (k, v) in vars {
159            cmd.env(format!("ALINT_VAR_{}", k.to_uppercase()), v);
160        }
161    }
162    if let Some(facts) = ctx.facts {
163        for (k, v) in facts.as_map() {
164            cmd.env(format!("ALINT_FACT_{}", k.to_uppercase()), fact_to_env(v));
165        }
166    }
167
168    let mut child = match cmd.spawn() {
169        Ok(c) => c,
170        Err(e) => {
171            return Outcome::Fail(format!(
172                "could not spawn `{}`: {} \
173                 (is it on PATH? working dir: {})",
174                program,
175                e,
176                root.display()
177            ));
178        }
179    };
180
181    let start = Instant::now();
182    loop {
183        match child.try_wait() {
184            Ok(Some(status)) => {
185                let stdout_bytes = drain(child.stdout.take());
186                let stderr_bytes = drain(child.stderr.take());
187                if status.success() {
188                    return Outcome::Pass;
189                }
190                return Outcome::Fail(format_failure(
191                    program,
192                    status.code(),
193                    &stdout_bytes,
194                    &stderr_bytes,
195                ));
196            }
197            Ok(None) => {
198                if start.elapsed() >= timeout {
199                    let _ = child.kill();
200                    let _ = child.wait();
201                    return Outcome::Fail(format!(
202                        "`{}` did not exit within {}s (raise `timeout:` on the rule to extend)",
203                        program,
204                        timeout.as_secs()
205                    ));
206                }
207                std::thread::sleep(POLL_INTERVAL);
208            }
209            Err(e) => {
210                let _ = child.kill();
211                let _ = child.wait();
212                return Outcome::Fail(format!("`{program}` wait error: {e}"));
213            }
214        }
215    }
216}
217
218/// Read up to [`OUTPUT_CAP_BYTES`] from a captured pipe. Errors
219/// drain to an empty buffer so the failure-message render still
220/// produces something useful for the user.
221fn drain(pipe: Option<impl Read>) -> Vec<u8> {
222    let Some(mut p) = pipe else {
223        return Vec::new();
224    };
225    let mut buf = Vec::with_capacity(1024);
226    let _ = p
227        .by_ref()
228        .take(OUTPUT_CAP_BYTES as u64)
229        .read_to_end(&mut buf);
230    buf
231}
232
233fn format_failure(program: &str, code: Option<i32>, stdout: &[u8], stderr: &[u8]) -> String {
234    let stdout_s = lossy_trim(stdout);
235    let stderr_s = lossy_trim(stderr);
236    let exit = code.map_or_else(|| "killed by signal".to_string(), |c| format!("exit {c}"));
237    match (stdout_s.is_empty(), stderr_s.is_empty()) {
238        (true, true) => format!("`{program}` failed ({exit}); no output"),
239        (false, true) => format!("`{program}` failed ({exit}):\n{stdout_s}"),
240        (true, false) => format!("`{program}` failed ({exit}):\n{stderr_s}"),
241        (false, false) => format!("`{program}` failed ({exit}):\n{stdout_s}\n{stderr_s}"),
242    }
243}
244
245fn lossy_trim(bytes: &[u8]) -> String {
246    String::from_utf8_lossy(bytes).trim_end().to_string()
247}
248
249fn fact_to_env(v: &FactValue) -> String {
250    match v {
251        FactValue::Bool(b) => b.to_string(),
252        FactValue::Int(i) => i.to_string(),
253        FactValue::String(s) => s.clone(),
254    }
255}
256
257pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
258    let Some(_paths) = &spec.paths else {
259        return Err(Error::rule_config(
260            &spec.id,
261            "command requires a `paths` field",
262        ));
263    };
264    let opts: Options = spec
265        .deserialize_options()
266        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
267    if opts.command.is_empty() {
268        return Err(Error::rule_config(
269            &spec.id,
270            "command rule's `command:` argv must not be empty",
271        ));
272    }
273    if spec.fix.is_some() {
274        return Err(Error::rule_config(
275            &spec.id,
276            "command rules do not support `fix:` blocks in v0.5.x — \
277             wire a paired fix-on-save tool via a separate `command` \
278             rule (or another rule kind) for now",
279        ));
280    }
281    let timeout = Duration::from_secs(opts.timeout.unwrap_or(DEFAULT_TIMEOUT_SECS));
282    Ok(Box::new(CommandRule {
283        id: spec.id.clone(),
284        level: spec.level,
285        policy_url: spec.policy_url.clone(),
286        message: spec.message.clone(),
287        scope: Scope::from_spec(spec)?,
288        argv: opts.command,
289        timeout,
290    }))
291}
292
293// Tests below shell out to `/bin/sh` and `/bin/true` to
294// exercise the spawn / argv-template / timeout paths without
295// pulling in a per-OS test fixture. That doesn't translate to
296// Windows (`/bin/sh` doesn't exist), so the whole module is
297// gated to Unix targets — Cross-Platform / windows-latest skips
298// it cleanly while Linux + macOS continue to exercise it.
299#[cfg(all(test, unix))]
300mod tests {
301    use super::*;
302    use alint_core::{FileEntry, FileIndex};
303
304    fn idx(paths: &[&str]) -> FileIndex {
305        FileIndex::from_entries(
306            paths
307                .iter()
308                .map(|p| FileEntry {
309                    path: std::path::Path::new(p).into(),
310                    is_dir: false,
311                    size: 1,
312                })
313                .collect(),
314        )
315    }
316
317    fn rule(argv: Vec<&str>, scope: &str, timeout: Duration) -> CommandRule {
318        CommandRule {
319            id: "t".into(),
320            level: Level::Error,
321            policy_url: None,
322            message: None,
323            scope: Scope::from_patterns(&[scope.to_string()]).unwrap(),
324            argv: argv.into_iter().map(String::from).collect(),
325            timeout,
326        }
327    }
328
329    fn ctx<'a>(root: &'a Path, index: &'a FileIndex) -> Context<'a> {
330        Context {
331            root,
332            index,
333            registry: None,
334            facts: None,
335            vars: None,
336            git_tracked: None,
337            git_blame: None,
338        }
339    }
340
341    #[test]
342    fn pass_on_zero_exit() {
343        let tmp = tempfile::tempdir().unwrap();
344        std::fs::write(tmp.path().join("a.txt"), b"hello").unwrap();
345        let index = idx(&["a.txt"]);
346        let r = rule(
347            vec!["/bin/sh", "-c", "exit 0"],
348            "*.txt",
349            Duration::from_secs(5),
350        );
351        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
352        assert!(v.is_empty(), "unexpected violations: {v:?}");
353    }
354
355    #[test]
356    fn fail_on_nonzero_exit_carries_stderr() {
357        let tmp = tempfile::tempdir().unwrap();
358        std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
359        let index = idx(&["a.txt"]);
360        let r = rule(
361            vec!["/bin/sh", "-c", "echo problem >&2; exit 7"],
362            "*.txt",
363            Duration::from_secs(5),
364        );
365        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
366        assert_eq!(v.len(), 1);
367        assert_eq!(v[0].path.as_deref(), Some(Path::new("a.txt")));
368        assert!(v[0].message.contains("exit 7"), "msg: {}", v[0].message);
369        assert!(v[0].message.contains("problem"), "msg: {}", v[0].message);
370    }
371
372    #[test]
373    fn path_template_substitutes_in_argv() {
374        let tmp = tempfile::tempdir().unwrap();
375        std::fs::write(tmp.path().join("a.txt"), b"hi").unwrap();
376        let index = idx(&["a.txt"]);
377        // Echo the arg back via stderr so we can match it.
378        // `[ "$1" = "a.txt" ]` exits 0 on equal.
379        let r = rule(
380            vec![
381                "/bin/sh",
382                "-c",
383                "[ \"$1\" = a.txt ] || exit 1",
384                "_",
385                "{path}",
386            ],
387            "*.txt",
388            Duration::from_secs(5),
389        );
390        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
391        assert!(v.is_empty(), "argv substitution failed: {v:?}");
392    }
393
394    #[test]
395    fn timeout_emits_violation() {
396        let tmp = tempfile::tempdir().unwrap();
397        std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
398        let index = idx(&["a.txt"]);
399        let r = rule(
400            vec!["/bin/sh", "-c", "sleep 5"],
401            "*.txt",
402            Duration::from_millis(150),
403        );
404        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
405        assert_eq!(v.len(), 1);
406        assert!(
407            v[0].message.contains("did not exit"),
408            "msg: {}",
409            v[0].message
410        );
411    }
412
413    #[test]
414    fn unknown_program_produces_spawn_error_violation() {
415        let tmp = tempfile::tempdir().unwrap();
416        std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
417        let index = idx(&["a.txt"]);
418        let r = rule(
419            vec!["alint-no-such-program-xyzzy"],
420            "*.txt",
421            Duration::from_secs(2),
422        );
423        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
424        assert_eq!(v.len(), 1);
425        assert!(v[0].message.contains("could not spawn"));
426    }
427
428    #[test]
429    fn alint_path_env_set_for_child() {
430        let tmp = tempfile::tempdir().unwrap();
431        std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
432        let index = idx(&["a.txt"]);
433        // Child fails unless ALINT_PATH matches.
434        let r = rule(
435            vec!["/bin/sh", "-c", "[ \"$ALINT_PATH\" = a.txt ] || exit 1"],
436            "*.txt",
437            Duration::from_secs(5),
438        );
439        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
440        assert!(v.is_empty(), "ALINT_PATH not set: {v:?}");
441    }
442
443    #[test]
444    fn empty_argv_rejected_at_build_time() {
445        let yaml = r#"
446id: t
447kind: command
448level: error
449paths: "*.txt"
450command: []
451"#;
452        let spec: RuleSpec = serde_yaml_ng::from_str(yaml).unwrap();
453        let err = build(&spec).expect_err("empty argv must error");
454        assert!(format!("{err}").contains("argv must not be empty"));
455    }
456
457    #[test]
458    fn missing_paths_rejected_at_build_time() {
459        let yaml = r#"
460id: t
461kind: command
462level: error
463command: ["/bin/true"]
464"#;
465        let spec: RuleSpec = serde_yaml_ng::from_str(yaml).unwrap();
466        let err = build(&spec).expect_err("missing paths must error");
467        assert!(format!("{err}").contains("requires a `paths` field"));
468    }
469
470    #[test]
471    fn fix_block_rejected_at_build_time() {
472        let yaml = r#"
473id: t
474kind: command
475level: error
476paths: "*.txt"
477command: ["/bin/true"]
478fix:
479  file_remove: {}
480"#;
481        let spec: RuleSpec = serde_yaml_ng::from_str(yaml).unwrap();
482        let err = build(&spec).expect_err("fix on command rule must error");
483        assert!(format!("{err}").contains("do not support `fix:`"));
484    }
485
486    #[test]
487    fn scope_filter_narrows() {
488        // Two failing files; only the one inside a directory
489        // with `marker.lock` as ancestor should fire. Build via
490        // YAML so the `scope_filter:` is bundled into the rule's
491        // Scope by `Scope::from_spec` — same path real configs take.
492        let tmp = tempfile::tempdir().unwrap();
493        std::fs::create_dir_all(tmp.path().join("pkg")).unwrap();
494        std::fs::create_dir_all(tmp.path().join("other")).unwrap();
495        std::fs::write(tmp.path().join("pkg/marker.lock"), b"").unwrap();
496        std::fs::write(tmp.path().join("pkg/a.txt"), b"x").unwrap();
497        std::fs::write(tmp.path().join("other/a.txt"), b"x").unwrap();
498        let index = idx(&["pkg/marker.lock", "pkg/a.txt", "other/a.txt"]);
499        let yaml = r#"
500id: t
501kind: command
502level: error
503paths: "**/a.txt"
504scope_filter:
505  has_ancestor: marker.lock
506command: ["/bin/sh", "-c", "exit 1"]
507"#;
508        let spec: RuleSpec = serde_yaml_ng::from_str(yaml).unwrap();
509        let r = build(&spec).unwrap();
510        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
511        assert_eq!(v.len(), 1, "only in-scope file should fire: {v:?}");
512        assert_eq!(v[0].path.as_deref(), Some(Path::new("pkg/a.txt")));
513    }
514}