Skip to main content

alint_rules/
command.rs

1//! `command` — shell out to an external CLI per matched file.
2//!
3//! Per-file rule: for every file matching `paths`, spawn the
4//! given `command:` argv with path-template substitution, capture
5//! exit code and stdout/stderr. Exit `0` is a pass; non-zero is
6//! one violation whose message is the (truncated) stdout+stderr.
7//! Spawn / wait failures and timeouts produce a violation with a
8//! clear cause line.
9//!
10//! ```yaml
11//! - id: workflows-clean
12//!   kind: command
13//!   paths: ".github/workflows/*.{yml,yaml}"
14//!   command: ["actionlint", "{path}"]
15//!   level: error
16//! ```
17//!
18//! Path templates supported in argv tokens (and in the alint-injected
19//! `ALINT_PATH` env var): `{path}`, `{dir}`, `{stem}`, `{ext}`,
20//! `{basename}`, `{parent_name}`. Working directory is the alint
21//! root. Stdin is closed (`/dev/null`).
22//!
23//! Environment threaded into the child:
24//!
25//! - `ALINT_PATH` — relative path of the matched file.
26//! - `ALINT_ROOT` — absolute repo root.
27//! - `ALINT_RULE_ID` — the rule's `id:`.
28//! - `ALINT_LEVEL` — `error` / `warning` / `info`.
29//! - `ALINT_VAR_<NAME>` — one per top-level `vars:` entry,
30//!   uppercased.
31//! - `ALINT_FACT_<NAME>` — one per resolved fact, stringified.
32//!
33//! Trust model: `command` rules are only allowed in the user's own
34//! top-level config. Any extended source (local file, HTTPS URL,
35//! `alint://bundled/`) declaring `kind: command` is rejected at
36//! load time by `alint_dsl::reject_command_rules_in` — otherwise a
37//! malicious or compromised ruleset would gain arbitrary process
38//! execution simply by being fetched. Mirrors the existing
39//! `custom:` fact gate.
40
41use std::io::Read;
42use std::path::Path;
43use std::process::{Command as StdCommand, Stdio};
44use std::time::{Duration, Instant};
45
46use alint_core::template::{PathTokens, render_path};
47use alint_core::{
48    Context, Error, FactValue, Level, Result, Rule, RuleSpec, Scope, ScopeFilter, Violation,
49};
50use serde::Deserialize;
51
52/// Default per-file timeout. Generous for slow tools (kubeconform
53/// pulling schemas, slow shellcheck on large files) but bounded
54/// enough to not stall a CI run on a hung child indefinitely.
55const DEFAULT_TIMEOUT_SECS: u64 = 30;
56
57/// Cap on each of stdout / stderr captured into a violation
58/// message. Tools like cargo can emit tens of MB on a single
59/// failed file; bound it to keep reports legible and memory low.
60const OUTPUT_CAP_BYTES: usize = 16 * 1024;
61
62/// Granularity of the wait-loop. 10ms is short enough that fast
63/// tools (10–50ms typical for shellcheck per file) don't see
64/// noticeable polling overhead, and long enough to keep CPU
65/// idle while the child runs.
66const POLL_INTERVAL: Duration = Duration::from_millis(10);
67
68#[derive(Debug, Deserialize)]
69#[serde(deny_unknown_fields)]
70struct Options {
71    command: Vec<String>,
72    /// Per-file timeout in seconds. Default
73    /// [`DEFAULT_TIMEOUT_SECS`].
74    #[serde(default)]
75    timeout: Option<u64>,
76}
77
78#[derive(Debug)]
79pub struct CommandRule {
80    id: String,
81    level: Level,
82    policy_url: Option<String>,
83    message: Option<String>,
84    scope: Scope,
85    scope_filter: Option<ScopeFilter>,
86    argv: Vec<String>,
87    timeout: Duration,
88}
89
90impl Rule for CommandRule {
91    fn id(&self) -> &str {
92        &self.id
93    }
94    fn level(&self) -> Level {
95        self.level
96    }
97    fn policy_url(&self) -> Option<&str> {
98        self.policy_url.as_deref()
99    }
100
101    fn path_scope(&self) -> Option<&Scope> {
102        Some(&self.scope)
103    }
104
105    fn scope_filter(&self) -> Option<&ScopeFilter> {
106        self.scope_filter.as_ref()
107    }
108
109    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
110        let mut violations = Vec::new();
111        for entry in ctx.index.files() {
112            if !self.scope.matches(&entry.path) {
113                continue;
114            }
115            if let Some(filter) = &self.scope_filter
116                && !filter.matches(&entry.path, ctx.index)
117            {
118                continue;
119            }
120            let tokens = PathTokens::from_path(&entry.path);
121            let rendered: Vec<String> = self.argv.iter().map(|s| render_path(s, &tokens)).collect();
122            if let Outcome::Fail(msg) = run_one(
123                &rendered,
124                ctx.root,
125                &entry.path,
126                &self.id,
127                self.level,
128                ctx,
129                self.timeout,
130            ) {
131                let final_msg = self.message.clone().unwrap_or(msg);
132                violations.push(Violation::new(final_msg).with_path(entry.path.clone()));
133            }
134        }
135        Ok(violations)
136    }
137}
138
139/// Outcome of one per-file invocation. `Pass` produces no
140/// violation; `Fail(message)` becomes a single violation
141/// anchored on the file path.
142enum Outcome {
143    Pass,
144    Fail(String),
145}
146
147#[allow(clippy::too_many_arguments)] // Fewer args = more state-keeping; this is the natural shape.
148fn run_one(
149    argv: &[String],
150    root: &Path,
151    rel_path: &Path,
152    rule_id: &str,
153    level: Level,
154    ctx: &Context<'_>,
155    timeout: Duration,
156) -> Outcome {
157    let Some((program, rest)) = argv.split_first() else {
158        return Outcome::Fail("command rule's argv is empty".to_string());
159    };
160
161    let mut cmd = StdCommand::new(program);
162    cmd.args(rest)
163        .current_dir(root)
164        .stdin(Stdio::null())
165        .stdout(Stdio::piped())
166        .stderr(Stdio::piped())
167        .env("ALINT_PATH", rel_path.to_string_lossy().as_ref())
168        .env("ALINT_ROOT", root.to_string_lossy().as_ref())
169        .env("ALINT_RULE_ID", rule_id)
170        .env("ALINT_LEVEL", level.as_str());
171
172    if let Some(vars) = ctx.vars {
173        for (k, v) in vars {
174            cmd.env(format!("ALINT_VAR_{}", k.to_uppercase()), v);
175        }
176    }
177    if let Some(facts) = ctx.facts {
178        for (k, v) in facts.as_map() {
179            cmd.env(format!("ALINT_FACT_{}", k.to_uppercase()), fact_to_env(v));
180        }
181    }
182
183    let mut child = match cmd.spawn() {
184        Ok(c) => c,
185        Err(e) => {
186            return Outcome::Fail(format!(
187                "could not spawn `{}`: {} \
188                 (is it on PATH? working dir: {})",
189                program,
190                e,
191                root.display()
192            ));
193        }
194    };
195
196    let start = Instant::now();
197    loop {
198        match child.try_wait() {
199            Ok(Some(status)) => {
200                let stdout_bytes = drain(child.stdout.take());
201                let stderr_bytes = drain(child.stderr.take());
202                if status.success() {
203                    return Outcome::Pass;
204                }
205                return Outcome::Fail(format_failure(
206                    program,
207                    status.code(),
208                    &stdout_bytes,
209                    &stderr_bytes,
210                ));
211            }
212            Ok(None) => {
213                if start.elapsed() >= timeout {
214                    let _ = child.kill();
215                    let _ = child.wait();
216                    return Outcome::Fail(format!(
217                        "`{}` did not exit within {}s (raise `timeout:` on the rule to extend)",
218                        program,
219                        timeout.as_secs()
220                    ));
221                }
222                std::thread::sleep(POLL_INTERVAL);
223            }
224            Err(e) => {
225                let _ = child.kill();
226                let _ = child.wait();
227                return Outcome::Fail(format!("`{program}` wait error: {e}"));
228            }
229        }
230    }
231}
232
233/// Read up to [`OUTPUT_CAP_BYTES`] from a captured pipe. Errors
234/// drain to an empty buffer so the failure-message render still
235/// produces something useful for the user.
236fn drain(pipe: Option<impl Read>) -> Vec<u8> {
237    let Some(mut p) = pipe else {
238        return Vec::new();
239    };
240    let mut buf = Vec::with_capacity(1024);
241    let _ = p
242        .by_ref()
243        .take(OUTPUT_CAP_BYTES as u64)
244        .read_to_end(&mut buf);
245    buf
246}
247
248fn format_failure(program: &str, code: Option<i32>, stdout: &[u8], stderr: &[u8]) -> String {
249    let stdout_s = lossy_trim(stdout);
250    let stderr_s = lossy_trim(stderr);
251    let exit = code.map_or_else(|| "killed by signal".to_string(), |c| format!("exit {c}"));
252    match (stdout_s.is_empty(), stderr_s.is_empty()) {
253        (true, true) => format!("`{program}` failed ({exit}); no output"),
254        (false, true) => format!("`{program}` failed ({exit}):\n{stdout_s}"),
255        (true, false) => format!("`{program}` failed ({exit}):\n{stderr_s}"),
256        (false, false) => format!("`{program}` failed ({exit}):\n{stdout_s}\n{stderr_s}"),
257    }
258}
259
260fn lossy_trim(bytes: &[u8]) -> String {
261    String::from_utf8_lossy(bytes).trim_end().to_string()
262}
263
264fn fact_to_env(v: &FactValue) -> String {
265    match v {
266        FactValue::Bool(b) => b.to_string(),
267        FactValue::Int(i) => i.to_string(),
268        FactValue::String(s) => s.clone(),
269    }
270}
271
272pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
273    let Some(paths) = &spec.paths else {
274        return Err(Error::rule_config(
275            &spec.id,
276            "command requires a `paths` field",
277        ));
278    };
279    let opts: Options = spec
280        .deserialize_options()
281        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
282    if opts.command.is_empty() {
283        return Err(Error::rule_config(
284            &spec.id,
285            "command rule's `command:` argv must not be empty",
286        ));
287    }
288    if spec.fix.is_some() {
289        return Err(Error::rule_config(
290            &spec.id,
291            "command rules do not support `fix:` blocks in v0.5.x — \
292             wire a paired fix-on-save tool via a separate `command` \
293             rule (or another rule kind) for now",
294        ));
295    }
296    let timeout = Duration::from_secs(opts.timeout.unwrap_or(DEFAULT_TIMEOUT_SECS));
297    Ok(Box::new(CommandRule {
298        id: spec.id.clone(),
299        level: spec.level,
300        policy_url: spec.policy_url.clone(),
301        message: spec.message.clone(),
302        scope: Scope::from_paths_spec(paths)?,
303        scope_filter: spec.parse_scope_filter()?,
304        argv: opts.command,
305        timeout,
306    }))
307}
308
309// Tests below shell out to `/bin/sh` and `/bin/true` to
310// exercise the spawn / argv-template / timeout paths without
311// pulling in a per-OS test fixture. That doesn't translate to
312// Windows (`/bin/sh` doesn't exist), so the whole module is
313// gated to Unix targets — Cross-Platform / windows-latest skips
314// it cleanly while Linux + macOS continue to exercise it.
315#[cfg(all(test, unix))]
316mod tests {
317    use super::*;
318    use alint_core::{FileEntry, FileIndex};
319
320    fn idx(paths: &[&str]) -> FileIndex {
321        FileIndex::from_entries(
322            paths
323                .iter()
324                .map(|p| FileEntry {
325                    path: std::path::Path::new(p).into(),
326                    is_dir: false,
327                    size: 1,
328                })
329                .collect(),
330        )
331    }
332
333    fn rule(argv: Vec<&str>, scope: &str, timeout: Duration) -> CommandRule {
334        CommandRule {
335            id: "t".into(),
336            level: Level::Error,
337            policy_url: None,
338            message: None,
339            scope: Scope::from_patterns(&[scope.to_string()]).unwrap(),
340            scope_filter: None,
341            argv: argv.into_iter().map(String::from).collect(),
342            timeout,
343        }
344    }
345
346    fn ctx<'a>(root: &'a Path, index: &'a FileIndex) -> Context<'a> {
347        Context {
348            root,
349            index,
350            registry: None,
351            facts: None,
352            vars: None,
353            git_tracked: None,
354            git_blame: None,
355        }
356    }
357
358    #[test]
359    fn pass_on_zero_exit() {
360        let tmp = tempfile::tempdir().unwrap();
361        std::fs::write(tmp.path().join("a.txt"), b"hello").unwrap();
362        let index = idx(&["a.txt"]);
363        let r = rule(
364            vec!["/bin/sh", "-c", "exit 0"],
365            "*.txt",
366            Duration::from_secs(5),
367        );
368        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
369        assert!(v.is_empty(), "unexpected violations: {v:?}");
370    }
371
372    #[test]
373    fn fail_on_nonzero_exit_carries_stderr() {
374        let tmp = tempfile::tempdir().unwrap();
375        std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
376        let index = idx(&["a.txt"]);
377        let r = rule(
378            vec!["/bin/sh", "-c", "echo problem >&2; exit 7"],
379            "*.txt",
380            Duration::from_secs(5),
381        );
382        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
383        assert_eq!(v.len(), 1);
384        assert_eq!(v[0].path.as_deref(), Some(Path::new("a.txt")));
385        assert!(v[0].message.contains("exit 7"), "msg: {}", v[0].message);
386        assert!(v[0].message.contains("problem"), "msg: {}", v[0].message);
387    }
388
389    #[test]
390    fn path_template_substitutes_in_argv() {
391        let tmp = tempfile::tempdir().unwrap();
392        std::fs::write(tmp.path().join("a.txt"), b"hi").unwrap();
393        let index = idx(&["a.txt"]);
394        // Echo the arg back via stderr so we can match it.
395        // `[ "$1" = "a.txt" ]` exits 0 on equal.
396        let r = rule(
397            vec![
398                "/bin/sh",
399                "-c",
400                "[ \"$1\" = a.txt ] || exit 1",
401                "_",
402                "{path}",
403            ],
404            "*.txt",
405            Duration::from_secs(5),
406        );
407        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
408        assert!(v.is_empty(), "argv substitution failed: {v:?}");
409    }
410
411    #[test]
412    fn timeout_emits_violation() {
413        let tmp = tempfile::tempdir().unwrap();
414        std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
415        let index = idx(&["a.txt"]);
416        let r = rule(
417            vec!["/bin/sh", "-c", "sleep 5"],
418            "*.txt",
419            Duration::from_millis(150),
420        );
421        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
422        assert_eq!(v.len(), 1);
423        assert!(
424            v[0].message.contains("did not exit"),
425            "msg: {}",
426            v[0].message
427        );
428    }
429
430    #[test]
431    fn unknown_program_produces_spawn_error_violation() {
432        let tmp = tempfile::tempdir().unwrap();
433        std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
434        let index = idx(&["a.txt"]);
435        let r = rule(
436            vec!["alint-no-such-program-xyzzy"],
437            "*.txt",
438            Duration::from_secs(2),
439        );
440        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
441        assert_eq!(v.len(), 1);
442        assert!(v[0].message.contains("could not spawn"));
443    }
444
445    #[test]
446    fn alint_path_env_set_for_child() {
447        let tmp = tempfile::tempdir().unwrap();
448        std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
449        let index = idx(&["a.txt"]);
450        // Child fails unless ALINT_PATH matches.
451        let r = rule(
452            vec!["/bin/sh", "-c", "[ \"$ALINT_PATH\" = a.txt ] || exit 1"],
453            "*.txt",
454            Duration::from_secs(5),
455        );
456        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
457        assert!(v.is_empty(), "ALINT_PATH not set: {v:?}");
458    }
459
460    #[test]
461    fn empty_argv_rejected_at_build_time() {
462        let yaml = r#"
463id: t
464kind: command
465level: error
466paths: "*.txt"
467command: []
468"#;
469        let spec: RuleSpec = serde_yaml_ng::from_str(yaml).unwrap();
470        let err = build(&spec).expect_err("empty argv must error");
471        assert!(format!("{err}").contains("argv must not be empty"));
472    }
473
474    #[test]
475    fn missing_paths_rejected_at_build_time() {
476        let yaml = r#"
477id: t
478kind: command
479level: error
480command: ["/bin/true"]
481"#;
482        let spec: RuleSpec = serde_yaml_ng::from_str(yaml).unwrap();
483        let err = build(&spec).expect_err("missing paths must error");
484        assert!(format!("{err}").contains("requires a `paths` field"));
485    }
486
487    #[test]
488    fn fix_block_rejected_at_build_time() {
489        let yaml = r#"
490id: t
491kind: command
492level: error
493paths: "*.txt"
494command: ["/bin/true"]
495fix:
496  file_remove: {}
497"#;
498        let spec: RuleSpec = serde_yaml_ng::from_str(yaml).unwrap();
499        let err = build(&spec).expect_err("fix on command rule must error");
500        assert!(format!("{err}").contains("do not support `fix:`"));
501    }
502
503    #[test]
504    fn scope_filter_narrows() {
505        // Two failing files; only the one inside a directory
506        // with `marker.lock` as ancestor should fire.
507        let tmp = tempfile::tempdir().unwrap();
508        std::fs::create_dir_all(tmp.path().join("pkg")).unwrap();
509        std::fs::create_dir_all(tmp.path().join("other")).unwrap();
510        std::fs::write(tmp.path().join("pkg/marker.lock"), b"").unwrap();
511        std::fs::write(tmp.path().join("pkg/a.txt"), b"x").unwrap();
512        std::fs::write(tmp.path().join("other/a.txt"), b"x").unwrap();
513        let index = idx(&["pkg/marker.lock", "pkg/a.txt", "other/a.txt"]);
514        let r = CommandRule {
515            id: "t".into(),
516            level: Level::Error,
517            policy_url: None,
518            message: None,
519            scope: Scope::from_patterns(&["**/a.txt".into()]).unwrap(),
520            scope_filter: Some(ScopeFilter::has_ancestor_unchecked(vec!["marker.lock"])),
521            argv: vec!["/bin/sh".into(), "-c".into(), "exit 1".into()],
522            timeout: Duration::from_secs(5),
523        };
524        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
525        assert_eq!(v.len(), 1, "only in-scope file should fire: {v:?}");
526        assert_eq!(v[0].path.as_deref(), Some(Path::new("pkg/a.txt")));
527    }
528}