alint_rules/
command.rs

1//! `command` — shell out to an external CLI per matched file.
2//!
3//! Per-file rule: for every file matching `paths`, spawn the
4//! given `command:` argv with path-template substitution, capture
5//! exit code and stdout/stderr. Exit `0` is a pass; non-zero is
6//! one violation whose message is the (truncated) stdout+stderr.
7//! Spawn / wait failures and timeouts produce a violation with a
8//! clear cause line.
9//!
10//! ```yaml
11//! - id: workflows-clean
12//!   kind: command
13//!   paths: ".github/workflows/*.{yml,yaml}"
14//!   command: ["actionlint", "{path}"]
15//!   level: error
16//! ```
17//!
18//! Path templates supported in argv tokens (and in the alint-injected
19//! `ALINT_PATH` env var): `{path}`, `{dir}`, `{stem}`, `{ext}`,
20//! `{basename}`, `{parent_name}`. Working directory is the alint
21//! root. Stdin is closed (`/dev/null`).
22//!
23//! Environment threaded into the child:
24//!
25//! - `ALINT_PATH` — relative path of the matched file.
26//! - `ALINT_ROOT` — absolute repo root.
27//! - `ALINT_RULE_ID` — the rule's `id:`.
28//! - `ALINT_LEVEL` — `error` / `warning` / `info`.
29//! - `ALINT_VAR_<NAME>` — one per top-level `vars:` entry,
30//!   uppercased.
31//! - `ALINT_FACT_<NAME>` — one per resolved fact, stringified.
32//!
33//! Trust model: `command` rules are only allowed in the user's own
34//! top-level config. Any extended source (local file, HTTPS URL,
35//! `alint://bundled/`) declaring `kind: command` is rejected at
36//! load time by `alint_dsl::reject_command_rules_in` — otherwise a
37//! malicious or compromised ruleset would gain arbitrary process
38//! execution simply by being fetched. Mirrors the existing
39//! `custom:` fact gate.
40
41use std::io::Read;
42use std::path::Path;
43use std::process::{Command as StdCommand, Stdio};
44use std::time::{Duration, Instant};
45
46use alint_core::template::{PathTokens, render_path};
47use alint_core::{Context, Error, FactValue, Level, Result, Rule, RuleSpec, Scope, Violation};
48use serde::Deserialize;
49
50/// Default per-file timeout. Generous for slow tools (kubeconform
51/// pulling schemas, slow shellcheck on large files) but bounded
52/// enough to not stall a CI run on a hung child indefinitely.
53const DEFAULT_TIMEOUT_SECS: u64 = 30;
54
55/// Cap on each of stdout / stderr captured into a violation
56/// message. Tools like cargo can emit tens of MB on a single
57/// failed file; bound it to keep reports legible and memory low.
58const OUTPUT_CAP_BYTES: usize = 16 * 1024;
59
60/// Granularity of the wait-loop. 10ms is short enough that fast
61/// tools (10–50ms typical for shellcheck per file) don't see
62/// noticeable polling overhead, and long enough to keep CPU
63/// idle while the child runs.
64const POLL_INTERVAL: Duration = Duration::from_millis(10);
65
66#[derive(Debug, Deserialize)]
67#[serde(deny_unknown_fields)]
68struct Options {
69    command: Vec<String>,
70    /// Per-file timeout in seconds. Default
71    /// [`DEFAULT_TIMEOUT_SECS`].
72    #[serde(default)]
73    timeout: Option<u64>,
74}
75
76#[derive(Debug)]
77pub struct CommandRule {
78    id: String,
79    level: Level,
80    policy_url: Option<String>,
81    message: Option<String>,
82    scope: Scope,
83    argv: Vec<String>,
84    timeout: Duration,
85}
86
87impl Rule for CommandRule {
88    alint_core::rule_common_impl!();
89
90    fn path_scope(&self) -> Option<&Scope> {
91        Some(&self.scope)
92    }
93    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
94        let mut violations = Vec::new();
95        for entry in ctx.index.files() {
96            if !self.scope.matches(&entry.path, ctx.index) {
97                continue;
98            }
99            let tokens = PathTokens::from_path(&entry.path);
100            let rendered: Vec<String> = self.argv.iter().map(|s| render_path(s, &tokens)).collect();
101            if let Outcome::Fail(msg) = run_one(
102                &rendered,
103                ctx.root,
104                &entry.path,
105                &self.id,
106                self.level,
107                ctx,
108                self.timeout,
109            ) {
110                let final_msg = self.message.clone().unwrap_or(msg);
111                violations.push(Violation::new(final_msg).with_path(entry.path.clone()));
112            }
113        }
114        Ok(violations)
115    }
116}
117
118/// Outcome of one per-file invocation. `Pass` produces no
119/// violation; `Fail(message)` becomes a single violation
120/// anchored on the file path.
121enum Outcome {
122    Pass,
123    Fail(String),
124}
125
126#[allow(clippy::too_many_arguments)] // Fewer args = more state-keeping; this is the natural shape.
127fn run_one(
128    argv: &[String],
129    root: &Path,
130    rel_path: &Path,
131    rule_id: &str,
132    level: Level,
133    ctx: &Context<'_>,
134    timeout: Duration,
135) -> Outcome {
136    let Some((program, rest)) = argv.split_first() else {
137        return Outcome::Fail("command rule's argv is empty".to_string());
138    };
139
140    let mut cmd = StdCommand::new(program);
141    cmd.args(rest)
142        .current_dir(root)
143        .stdin(Stdio::null())
144        .stdout(Stdio::piped())
145        .stderr(Stdio::piped())
146        .env("ALINT_PATH", rel_path.to_string_lossy().as_ref())
147        .env("ALINT_ROOT", root.to_string_lossy().as_ref())
148        .env("ALINT_RULE_ID", rule_id)
149        .env("ALINT_LEVEL", level.as_str());
150
151    if let Some(vars) = ctx.vars {
152        for (k, v) in vars {
153            cmd.env(format!("ALINT_VAR_{}", k.to_uppercase()), v);
154        }
155    }
156    if let Some(facts) = ctx.facts {
157        for (k, v) in facts.as_map() {
158            cmd.env(format!("ALINT_FACT_{}", k.to_uppercase()), fact_to_env(v));
159        }
160    }
161
162    let mut child = match cmd.spawn() {
163        Ok(c) => c,
164        Err(e) => {
165            return Outcome::Fail(format!(
166                "could not spawn `{}`: {} \
167                 (is it on PATH? working dir: {})",
168                program,
169                e,
170                root.display()
171            ));
172        }
173    };
174
175    let start = Instant::now();
176    loop {
177        match child.try_wait() {
178            Ok(Some(status)) => {
179                let stdout_bytes = drain(child.stdout.take());
180                let stderr_bytes = drain(child.stderr.take());
181                if status.success() {
182                    return Outcome::Pass;
183                }
184                return Outcome::Fail(format_failure(
185                    program,
186                    status.code(),
187                    &stdout_bytes,
188                    &stderr_bytes,
189                ));
190            }
191            Ok(None) => {
192                if start.elapsed() >= timeout {
193                    let _ = child.kill();
194                    let _ = child.wait();
195                    return Outcome::Fail(format!(
196                        "`{}` did not exit within {}s (raise `timeout:` on the rule to extend)",
197                        program,
198                        timeout.as_secs()
199                    ));
200                }
201                std::thread::sleep(POLL_INTERVAL);
202            }
203            Err(e) => {
204                let _ = child.kill();
205                let _ = child.wait();
206                return Outcome::Fail(format!("`{program}` wait error: {e}"));
207            }
208        }
209    }
210}
211
212/// Read up to [`OUTPUT_CAP_BYTES`] from a captured pipe. Errors
213/// drain to an empty buffer so the failure-message render still
214/// produces something useful for the user.
215fn drain(pipe: Option<impl Read>) -> Vec<u8> {
216    let Some(mut p) = pipe else {
217        return Vec::new();
218    };
219    let mut buf = Vec::with_capacity(1024);
220    let _ = p
221        .by_ref()
222        .take(OUTPUT_CAP_BYTES as u64)
223        .read_to_end(&mut buf);
224    buf
225}
226
227fn format_failure(program: &str, code: Option<i32>, stdout: &[u8], stderr: &[u8]) -> String {
228    let stdout_s = lossy_trim(stdout);
229    let stderr_s = lossy_trim(stderr);
230    let exit = code.map_or_else(|| "killed by signal".to_string(), |c| format!("exit {c}"));
231    match (stdout_s.is_empty(), stderr_s.is_empty()) {
232        (true, true) => format!("`{program}` failed ({exit}); no output"),
233        (false, true) => format!("`{program}` failed ({exit}):\n{stdout_s}"),
234        (true, false) => format!("`{program}` failed ({exit}):\n{stderr_s}"),
235        (false, false) => format!("`{program}` failed ({exit}):\n{stdout_s}\n{stderr_s}"),
236    }
237}
238
239fn lossy_trim(bytes: &[u8]) -> String {
240    String::from_utf8_lossy(bytes).trim_end().to_string()
241}
242
243fn fact_to_env(v: &FactValue) -> String {
244    match v {
245        FactValue::Bool(b) => b.to_string(),
246        FactValue::Int(i) => i.to_string(),
247        FactValue::String(s) => s.clone(),
248    }
249}
250
251pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
252    let Some(_paths) = &spec.paths else {
253        return Err(Error::rule_config(
254            &spec.id,
255            "command requires a `paths` field",
256        ));
257    };
258    let opts: Options = spec
259        .deserialize_options()
260        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
261    if opts.command.is_empty() {
262        return Err(Error::rule_config(
263            &spec.id,
264            "command rule's `command:` argv must not be empty",
265        ));
266    }
267    if spec.fix.is_some() {
268        return Err(Error::rule_config(
269            &spec.id,
270            "command rules do not support `fix:` blocks in v0.5.x — \
271             wire a paired fix-on-save tool via a separate `command` \
272             rule (or another rule kind) for now",
273        ));
274    }
275    let timeout = Duration::from_secs(opts.timeout.unwrap_or(DEFAULT_TIMEOUT_SECS));
276    Ok(Box::new(CommandRule {
277        id: spec.id.clone(),
278        level: spec.level,
279        policy_url: spec.policy_url.clone(),
280        message: spec.message.clone(),
281        scope: Scope::from_spec(spec)?,
282        argv: opts.command,
283        timeout,
284    }))
285}
286
287// Tests below shell out to `/bin/sh` and `/bin/true` to
288// exercise the spawn / argv-template / timeout paths without
289// pulling in a per-OS test fixture. That doesn't translate to
290// Windows (`/bin/sh` doesn't exist), so the whole module is
291// gated to Unix targets — Cross-Platform / windows-latest skips
292// it cleanly while Linux + macOS continue to exercise it.
293#[cfg(all(test, unix))]
294mod tests {
295    use super::*;
296    use alint_core::{FileEntry, FileIndex};
297
298    fn idx(paths: &[&str]) -> FileIndex {
299        FileIndex::from_entries(
300            paths
301                .iter()
302                .map(|p| FileEntry {
303                    path: std::path::Path::new(p).into(),
304                    is_dir: false,
305                    size: 1,
306                })
307                .collect(),
308        )
309    }
310
311    fn rule(argv: Vec<&str>, scope: &str, timeout: Duration) -> CommandRule {
312        CommandRule {
313            id: "t".into(),
314            level: Level::Error,
315            policy_url: None,
316            message: None,
317            scope: Scope::from_patterns(&[scope.to_string()]).unwrap(),
318            argv: argv.into_iter().map(String::from).collect(),
319            timeout,
320        }
321    }
322
323    fn ctx<'a>(root: &'a Path, index: &'a FileIndex) -> Context<'a> {
324        Context {
325            root,
326            index,
327            registry: None,
328            facts: None,
329            vars: None,
330            git_tracked: None,
331            git_blame: None,
332        }
333    }
334
335    #[test]
336    fn pass_on_zero_exit() {
337        let tmp = tempfile::tempdir().unwrap();
338        std::fs::write(tmp.path().join("a.txt"), b"hello").unwrap();
339        let index = idx(&["a.txt"]);
340        let r = rule(
341            vec!["/bin/sh", "-c", "exit 0"],
342            "*.txt",
343            Duration::from_secs(5),
344        );
345        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
346        assert!(v.is_empty(), "unexpected violations: {v:?}");
347    }
348
349    #[test]
350    fn fail_on_nonzero_exit_carries_stderr() {
351        let tmp = tempfile::tempdir().unwrap();
352        std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
353        let index = idx(&["a.txt"]);
354        let r = rule(
355            vec!["/bin/sh", "-c", "echo problem >&2; exit 7"],
356            "*.txt",
357            Duration::from_secs(5),
358        );
359        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
360        assert_eq!(v.len(), 1);
361        assert_eq!(v[0].path.as_deref(), Some(Path::new("a.txt")));
362        assert!(v[0].message.contains("exit 7"), "msg: {}", v[0].message);
363        assert!(v[0].message.contains("problem"), "msg: {}", v[0].message);
364    }
365
366    #[test]
367    fn path_template_substitutes_in_argv() {
368        let tmp = tempfile::tempdir().unwrap();
369        std::fs::write(tmp.path().join("a.txt"), b"hi").unwrap();
370        let index = idx(&["a.txt"]);
371        // Echo the arg back via stderr so we can match it.
372        // `[ "$1" = "a.txt" ]` exits 0 on equal.
373        let r = rule(
374            vec![
375                "/bin/sh",
376                "-c",
377                "[ \"$1\" = a.txt ] || exit 1",
378                "_",
379                "{path}",
380            ],
381            "*.txt",
382            Duration::from_secs(5),
383        );
384        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
385        assert!(v.is_empty(), "argv substitution failed: {v:?}");
386    }
387
388    #[test]
389    fn timeout_emits_violation() {
390        let tmp = tempfile::tempdir().unwrap();
391        std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
392        let index = idx(&["a.txt"]);
393        let r = rule(
394            vec!["/bin/sh", "-c", "sleep 5"],
395            "*.txt",
396            Duration::from_millis(150),
397        );
398        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
399        assert_eq!(v.len(), 1);
400        assert!(
401            v[0].message.contains("did not exit"),
402            "msg: {}",
403            v[0].message
404        );
405    }
406
407    #[test]
408    fn unknown_program_produces_spawn_error_violation() {
409        let tmp = tempfile::tempdir().unwrap();
410        std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
411        let index = idx(&["a.txt"]);
412        let r = rule(
413            vec!["alint-no-such-program-xyzzy"],
414            "*.txt",
415            Duration::from_secs(2),
416        );
417        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
418        assert_eq!(v.len(), 1);
419        assert!(v[0].message.contains("could not spawn"));
420    }
421
422    #[test]
423    fn alint_path_env_set_for_child() {
424        let tmp = tempfile::tempdir().unwrap();
425        std::fs::write(tmp.path().join("a.txt"), b"x").unwrap();
426        let index = idx(&["a.txt"]);
427        // Child fails unless ALINT_PATH matches.
428        let r = rule(
429            vec!["/bin/sh", "-c", "[ \"$ALINT_PATH\" = a.txt ] || exit 1"],
430            "*.txt",
431            Duration::from_secs(5),
432        );
433        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
434        assert!(v.is_empty(), "ALINT_PATH not set: {v:?}");
435    }
436
437    #[test]
438    fn empty_argv_rejected_at_build_time() {
439        let yaml = r#"
440id: t
441kind: command
442level: error
443paths: "*.txt"
444command: []
445"#;
446        let spec: RuleSpec = serde_yaml_ng::from_str(yaml).unwrap();
447        let err = build(&spec).expect_err("empty argv must error");
448        assert!(format!("{err}").contains("argv must not be empty"));
449    }
450
451    #[test]
452    fn missing_paths_rejected_at_build_time() {
453        let yaml = r#"
454id: t
455kind: command
456level: error
457command: ["/bin/true"]
458"#;
459        let spec: RuleSpec = serde_yaml_ng::from_str(yaml).unwrap();
460        let err = build(&spec).expect_err("missing paths must error");
461        assert!(format!("{err}").contains("requires a `paths` field"));
462    }
463
464    #[test]
465    fn fix_block_rejected_at_build_time() {
466        let yaml = r#"
467id: t
468kind: command
469level: error
470paths: "*.txt"
471command: ["/bin/true"]
472fix:
473  file_remove: {}
474"#;
475        let spec: RuleSpec = serde_yaml_ng::from_str(yaml).unwrap();
476        let err = build(&spec).expect_err("fix on command rule must error");
477        assert!(format!("{err}").contains("do not support `fix:`"));
478    }
479
480    #[test]
481    fn scope_filter_narrows() {
482        // Two failing files; only the one inside a directory
483        // with `marker.lock` as ancestor should fire. Build via
484        // YAML so the `scope_filter:` is bundled into the rule's
485        // Scope by `Scope::from_spec` — same path real configs take.
486        let tmp = tempfile::tempdir().unwrap();
487        std::fs::create_dir_all(tmp.path().join("pkg")).unwrap();
488        std::fs::create_dir_all(tmp.path().join("other")).unwrap();
489        std::fs::write(tmp.path().join("pkg/marker.lock"), b"").unwrap();
490        std::fs::write(tmp.path().join("pkg/a.txt"), b"x").unwrap();
491        std::fs::write(tmp.path().join("other/a.txt"), b"x").unwrap();
492        let index = idx(&["pkg/marker.lock", "pkg/a.txt", "other/a.txt"]);
493        let yaml = r#"
494id: t
495kind: command
496level: error
497paths: "**/a.txt"
498scope_filter:
499  has_ancestor: marker.lock
500command: ["/bin/sh", "-c", "exit 1"]
501"#;
502        let spec: RuleSpec = serde_yaml_ng::from_str(yaml).unwrap();
503        let r = build(&spec).unwrap();
504        let v = r.evaluate(&ctx(tmp.path(), &index)).unwrap();
505        assert_eq!(v.len(), 1, "only in-scope file should fire: {v:?}");
506        assert_eq!(v[0].path.as_deref(), Some(Path::new("pkg/a.txt")));
507    }
508}
alint_rules/command.rs

alint_rules/
command.rs