Skip to main content

alint_rules/
command.rs

1//! `command` — shell out to an external CLI per matched file.
2//!
3//! Per-file rule: for every file matching `paths`, spawn the
4//! given `command:` argv with path-template substitution, capture
5//! exit code and stdout/stderr. Exit `0` is a pass; non-zero is
6//! one violation whose message is the (truncated) stdout+stderr.
7//! Spawn / wait failures and timeouts produce a violation with a
8//! clear cause line.
9//!
10//! ```yaml
11//! - id: workflows-clean
12//!   kind: command
13//!   paths: ".github/workflows/*.{yml,yaml}"
14//!   command: ["actionlint", "{path}"]
15//!   level: error
16//! ```
17//!
18//! Path templates supported in argv tokens (and in the alint-injected
19//! `ALINT_PATH` env var): `{path}`, `{dir}`, `{stem}`, `{ext}`,
20//! `{basename}`, `{parent_name}`. Working directory is the alint
21//! root. Stdin is closed (`/dev/null`).
22//!
23//! Environment threaded into the child:
24//!
25//! - `ALINT_PATH` — relative path of the matched file.
26//! - `ALINT_ROOT` — absolute repo root.
27//! - `ALINT_RULE_ID` — the rule's `id:`.
28//! - `ALINT_LEVEL` — `error` / `warning` / `info`.
29//! - `ALINT_VAR_<NAME>` — one per top-level `vars:` entry,
30//!   uppercased.
31//! - `ALINT_FACT_<NAME>` — one per resolved fact, stringified.
32//!
33//! Trust model: `command` rules are only allowed in the user's own
34//! top-level config. Any extended source (local file, HTTPS URL,
35//! `alint://bundled/`) declaring `kind: command` is rejected at
36//! load time by `alint_dsl::reject_command_rules_in` — otherwise a
37//! malicious or compromised ruleset would gain arbitrary process
38//! execution simply by being fetched. Mirrors the existing
39//! `custom:` fact gate.
40
41use std::io::Read;
42use std::path::Path;
43use std::process::{Command as StdCommand, Stdio};
44use std::time::{Duration, Instant};
45
46use alint_core::template::{PathTokens, render_path};
47use alint_core::{Context, Error, FactValue, Level, Result, Rule, RuleSpec, Scope, Violation};
48use serde::Deserialize;
49
50/// Default per-file timeout. Generous for slow tools (kubeconform
51/// pulling schemas, slow shellcheck on large files) but bounded
52/// enough to not stall a CI run on a hung child indefinitely.
53const DEFAULT_TIMEOUT_SECS: u64 = 30;
54
55/// Cap on each of stdout / stderr captured into a violation
56/// message. Tools like cargo can emit tens of MB on a single
57/// failed file; bound it to keep reports legible and memory low.
58const OUTPUT_CAP_BYTES: usize = 16 * 1024;
59
60/// Granularity of the wait-loop. 10ms is short enough that fast
61/// tools (10–50ms typical for shellcheck per file) don't see
62/// noticeable polling overhead, and long enough to keep CPU
63/// idle while the child runs.
64const POLL_INTERVAL: Duration = Duration::from_millis(10);
65
66#[derive(Debug, Deserialize)]
67#[serde(deny_unknown_fields)]
68struct Options {
69    command: Vec<String>,
70    /// Per-file timeout in seconds. Default
71    /// [`DEFAULT_TIMEOUT_SECS`].
72    #[serde(default)]
73    timeout: Option<u64>,
74}
75
76#[derive(Debug)]
77pub struct CommandRule {
78    id: String,
79    level: Level,
80    policy_url: Option<String>,
81    message: Option<String>,
82    scope: Scope,
83    argv: Vec<String>,
84    timeout: Duration,
85}
86
87impl Rule for CommandRule {
88    fn id(&self) -> &str {
89        &self.id
90    }
91    fn level(&self) -> Level {
92        self.level
93    }
94    fn policy_url(&self) -> Option<&str> {
95        self.policy_url.as_deref()
96    }
97
98    fn path_scope(&self) -> Option<&Scope> {
99        Some(&self.scope)
100    }
101
102    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
103        let mut violations = Vec::new();
104        for entry in ctx.index.files() {
105            if !self.scope.matches(&entry.path) {
106                continue;
107            }
108            let tokens = PathTokens::from_path(&entry.path);
109            let rendered: Vec<String> = self.argv.iter().map(|s| render_path(s, &tokens)).collect();
110            if let Outcome::Fail(msg) = run_one(
111                &rendered,
112                ctx.root,
113                &entry.path,
114                &self.id,
115                self.level,
116                ctx,
117                self.timeout,
118            ) {
119                let final_msg = self.message.clone().unwrap_or(msg);
120                violations.push(Violation::new(final_msg).with_path(&entry.path));
121            }
122        }
123        Ok(violations)
124    }
125}
126
127/// Outcome of one per-file invocation. `Pass` produces no
128/// violation; `Fail(message)` becomes a single violation
129/// anchored on the file path.
130enum Outcome {
131    Pass,
132    Fail(String),
133}
134
135#[allow(clippy::too_many_arguments)] // Fewer args = more state-keeping; this is the natural shape.
136fn run_one(
137    argv: &[String],
138    root: &Path,
139    rel_path: &Path,
140    rule_id: &str,
141    level: Level,
142    ctx: &Context<'_>,
143    timeout: Duration,
144) -> Outcome {
145    let Some((program, rest)) = argv.split_first() else {
146        return Outcome::Fail("command rule's argv is empty".to_string());
147    };
148
149    let mut cmd = StdCommand::new(program);
150    cmd.args(rest)
151        .current_dir(root)
152        .stdin(Stdio::null())
153        .stdout(Stdio::piped())
154        .stderr(Stdio::piped())
155        .env("ALINT_PATH", rel_path.to_string_lossy().as_ref())
156        .env("ALINT_ROOT", root.to_string_lossy().as_ref())
157        .env("ALINT_RULE_ID", rule_id)
158        .env("ALINT_LEVEL", level.as_str());
159
160    if let Some(vars) = ctx.vars {
161        for (k, v) in vars {
162            cmd.env(format!("ALINT_VAR_{}", k.to_uppercase()), v);
163        }
164    }
165    if let Some(facts) = ctx.facts {
166        for (k, v) in facts.as_map() {
167            cmd.env(format!("ALINT_FACT_{}", k.to_uppercase()), fact_to_env(v));
168        }
169    }
170
171    let mut child = match cmd.spawn() {
172        Ok(c) => c,
173        Err(e) => {
174            return Outcome::Fail(format!(
175                "could not spawn `{}`: {} \
176                 (is it on PATH? working dir: {})",
177                program,
178                e,
179                root.display()
180            ));
181        }
182    };
183
184    let start = Instant::now();
185    loop {
186        match child.try_wait() {
187            Ok(Some(status)) => {
188                let stdout_bytes = drain(child.stdout.take());
189                let stderr_bytes = drain(child.stderr.take());
190                if status.success() {
191                    return Outcome::Pass;
192                }
193                return Outcome::Fail(format_failure(
194                    program,
195                    status.code(),
196                    &stdout_bytes,
197                    &stderr_bytes,
198                ));
199            }
200            Ok(None) => {
201                if start.elapsed() >= timeout {
202                    let _ = child.kill();
203                    let _ = child.wait();
204                    return Outcome::Fail(format!(
205                        "`{}` did not exit within {}s (raise `timeout:` on the rule to extend)",
206                        program,
207                        timeout.as_secs()
208                    ));
209                }
210                std::thread::sleep(POLL_INTERVAL);
211            }
212            Err(e) => {
213                let _ = child.kill();
214                let _ = child.wait();
215                return Outcome::Fail(format!("`{program}` wait error: {e}"));
216            }
217        }
218    }
219}
220
221/// Read up to [`OUTPUT_CAP_BYTES`] from a captured pipe. Errors
222/// drain to an empty buffer so the failure-message render still
223/// produces something useful for the user.
224fn drain(pipe: Option<impl Read>) -> Vec<u8> {
225    let Some(mut p) = pipe else {
226        return Vec::new();
227    };
228    let mut buf = Vec::with_capacity(1024);
229    let _ = p
230        .by_ref()
231        .take(OUTPUT_CAP_BYTES as u64)
232        .read_to_end(&mut buf);
233    buf
234}
235
236fn format_failure(program: &str, code: Option<i32>, stdout: &[u8], stderr: &[u8]) -> String {
237    let stdout_s = lossy_trim(stdout);
238    let stderr_s = lossy_trim(stderr);
239    let exit = code.map_or_else(|| "killed by signal".to_string(), |c| format!("exit {c}"));
240    match (stdout_s.is_empty(), stderr_s.is_empty()) {
241        (true, true) => format!("`{program}` failed ({exit}); no output"),
242        (false, true) => format!("`{program}` failed ({exit}):\n{stdout_s}"),
243        (true, false) => format!("`{program}` failed ({exit}):\n{stderr_s}"),
244        (false, false) => format!("`{program}` failed ({exit}):\n{stdout_s}\n{stderr_s}"),
245    }
246}
247
248fn lossy_trim(bytes: &[u8]) -> String {
249    String::from_utf8_lossy(bytes).trim_end().to_string()
250}
251
252fn fact_to_env(v: &FactValue) -> String {
253    match v {
254        FactValue::Bool(b) => b.to_string(),
255        FactValue::Int(i) => i.to_string(),
256        FactValue::String(s) => s.clone(),
257    }
258}
259
260pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
261    let Some(paths) = &spec.paths else {
262        return Err(Error::rule_config(
263            &spec.id,
264            "command requires a `paths` field",
265        ));
266    };
267    let opts: Options = spec
268        .deserialize_options()
269        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
270    if opts.command.is_empty() {
271        return Err(Error::rule_config(
272            &spec.id,
273            "command rule's `command:` argv must not be empty",
274        ));
275    }
276    if spec.fix.is_some() {
277        return Err(Error::rule_config(
278            &spec.id,
279            "command rules do not support `fix:` blocks in v0.5.x — \
280             wire a paired fix-on-save tool via a separate `command` \
281             rule (or another rule kind) for now",
282        ));
283    }
284    let timeout = Duration::from_secs(opts.timeout.unwrap_or(DEFAULT_TIMEOUT_SECS));
285    Ok(Box::new(CommandRule {
286        id: spec.id.clone(),
287        level: spec.level,
288        policy_url: spec.policy_url.clone(),
289        message: spec.message.clone(),
290        scope: Scope::from_paths_spec(paths)?,
291        argv: opts.command,
292        timeout,
293    }))
294}
295
296// Tests below shell out to `/bin/sh` and `/bin/true` to
297// exercise the spawn / argv-template / timeout paths without
298// pulling in a per-OS test fixture. That doesn't translate to
299// Windows (`/bin/sh` doesn't exist), so the whole module is
300// gated to Unix targets — Cross-Platform / windows-latest skips
301// it cleanly while Linux + macOS continue to exercise it.
302#[cfg(all(test, unix))]
303mod tests {
304    use super::*;
305    use alint_core::{FileEntry, FileIndex};
306    use std::path::PathBuf;
307
308    fn idx(paths: &[&str]) -> FileIndex {
309        FileIndex {
310            entries: paths
311                .iter()
312                .map(|p| FileEntry {
313                    path: PathBuf::from(p),
314                    is_dir: false,
315                    size: 1,
316                })
317                .collect(),
318        }
319    }
320
321    fn rule(argv: Vec<&str>, scope: &str, timeout: Duration) -> CommandRule {
322        CommandRule {
323            id: "t".into(),
324            level: Level::Error,
325            policy_url: None,
326            message: None,
327            scope: Scope::from_patterns(&[scope.to_string()]).unwrap(),
328            argv: argv.into_iter().map(String::from).collect(),
329            timeout,
330        }
331    }
332
333    fn ctx<'a>(root: &'a Path, index: &'a FileIndex) -> Context<'a> {
334        Context {
335            root,
336            index,
337            registry: None,
338            facts: None,
339            vars: None,
340            git_tracked: None,
341            git_blame: None,
342        }
343    }
344
345    #[test]
346    fn pass_on_zero_exit() {
347        let tmp = tempfile::tempdir().unwrap();
348        std::fs::write(tmp.path().join("a.txt"), b"hello").unwrap();
349        let index = idx(&["a.txt"]);
350        let r = rule(
351            vec!["/bin/sh", "-c", "exit 0"],
352            "*.txt",
353            Duration::from_secs(5),
354        );
355        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
356        assert!(v.is_empty(), "unexpected violations: {v:?}");
357    }
358
359    #[test]
360    fn fail_on_nonzero_exit_carries_stderr() {
361        let tmp = tempfile::tempdir().unwrap();
362        std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
363        let index = idx(&["a.txt"]);
364        let r = rule(
365            vec!["/bin/sh", "-c", "echo problem >&2; exit 7"],
366            "*.txt",
367            Duration::from_secs(5),
368        );
369        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
370        assert_eq!(v.len(), 1);
371        assert_eq!(v[0].path.as_deref(), Some(Path::new("a.txt")));
372        assert!(v[0].message.contains("exit 7"), "msg: {}", v[0].message);
373        assert!(v[0].message.contains("problem"), "msg: {}", v[0].message);
374    }
375
376    #[test]
377    fn path_template_substitutes_in_argv() {
378        let tmp = tempfile::tempdir().unwrap();
379        std::fs::write(tmp.path().join("a.txt"), b"hi").unwrap();
380        let index = idx(&["a.txt"]);
381        // Echo the arg back via stderr so we can match it.
382        // `[ "$1" = "a.txt" ]` exits 0 on equal.
383        let r = rule(
384            vec![
385                "/bin/sh",
386                "-c",
387                "[ \"$1\" = a.txt ] || exit 1",
388                "_",
389                "{path}",
390            ],
391            "*.txt",
392            Duration::from_secs(5),
393        );
394        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
395        assert!(v.is_empty(), "argv substitution failed: {v:?}");
396    }
397
398    #[test]
399    fn timeout_emits_violation() {
400        let tmp = tempfile::tempdir().unwrap();
401        std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
402        let index = idx(&["a.txt"]);
403        let r = rule(
404            vec!["/bin/sh", "-c", "sleep 5"],
405            "*.txt",
406            Duration::from_millis(150),
407        );
408        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
409        assert_eq!(v.len(), 1);
410        assert!(
411            v[0].message.contains("did not exit"),
412            "msg: {}",
413            v[0].message
414        );
415    }
416
417    #[test]
418    fn unknown_program_produces_spawn_error_violation() {
419        let tmp = tempfile::tempdir().unwrap();
420        std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
421        let index = idx(&["a.txt"]);
422        let r = rule(
423            vec!["alint-no-such-program-xyzzy"],
424            "*.txt",
425            Duration::from_secs(2),
426        );
427        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
428        assert_eq!(v.len(), 1);
429        assert!(v[0].message.contains("could not spawn"));
430    }
431
432    #[test]
433    fn alint_path_env_set_for_child() {
434        let tmp = tempfile::tempdir().unwrap();
435        std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
436        let index = idx(&["a.txt"]);
437        // Child fails unless ALINT_PATH matches.
438        let r = rule(
439            vec!["/bin/sh", "-c", "[ \"$ALINT_PATH\" = a.txt ] || exit 1"],
440            "*.txt",
441            Duration::from_secs(5),
442        );
443        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
444        assert!(v.is_empty(), "ALINT_PATH not set: {v:?}");
445    }
446
447    #[test]
448    fn empty_argv_rejected_at_build_time() {
449        let yaml = r#"
450id: t
451kind: command
452level: error
453paths: "*.txt"
454command: []
455"#;
456        let spec: RuleSpec = serde_yaml_ng::from_str(yaml).unwrap();
457        let err = build(&spec).expect_err("empty argv must error");
458        assert!(format!("{err}").contains("argv must not be empty"));
459    }
460
461    #[test]
462    fn missing_paths_rejected_at_build_time() {
463        let yaml = r#"
464id: t
465kind: command
466level: error
467command: ["/bin/true"]
468"#;
469        let spec: RuleSpec = serde_yaml_ng::from_str(yaml).unwrap();
470        let err = build(&spec).expect_err("missing paths must error");
471        assert!(format!("{err}").contains("requires a `paths` field"));
472    }
473
474    #[test]
475    fn fix_block_rejected_at_build_time() {
476        let yaml = r#"
477id: t
478kind: command
479level: error
480paths: "*.txt"
481command: ["/bin/true"]
482fix:
483  file_remove: {}
484"#;
485        let spec: RuleSpec = serde_yaml_ng::from_str(yaml).unwrap();
486        let err = build(&spec).expect_err("fix on command rule must error");
487        assert!(format!("{err}").contains("do not support `fix:`"));
488    }
489}