Skip to main content

alint_rules/
generated_file_fresh.rs

1//! `generated_file_fresh` — a committed file must match a
2//! declared generator's stdout. A **non-mutating freshness
3//! check**: alint does NOT run codegen as a build step; this
4//! only *verifies* that the committed artefact equals what the
5//! user-declared, maintainer-trusted generator produces, by
6//! capturing its stdout — it never writes the working tree.
7//!
8//! It spawns a user-supplied process, so it is trust-gated at
9//! config load by `alint_dsl::reject_command_rules_in` (same tier
10//! as the `command` / `command_idempotent` rules): only the
11//! user's own top-level config may declare it; an `extends:`'d
12//! ruleset (local / HTTPS / `alint://bundled/`) declaring it is
13//! refused — adopting a ruleset must never imply arbitrary code
14//! execution. Single-shot (one spawn, one declared file), not
15//! per-file. Design + open-question resolutions:
16//! `docs/design/v0.10/generated_file_fresh.md`.
17//!
18//! ```yaml
19//! - id: bindings-fresh
20//!   kind: generated_file_fresh
21//!   file: crates/ffi/include/core.h
22//!   command: ["cbindgen", "--config", "cbindgen.toml", "crates/core"]
23//!   workdir: "."                 # generator cwd (default: lint root)
24//!   normalize: final-newline     # none (default) | trim | final-newline
25//!   level: error
26//! ```
27
28use std::path::Path;
29use std::time::Duration;
30
31use alint_core::{Context, Error, Level, Result, Rule, RuleSpec, Violation};
32use serde::Deserialize;
33
34#[derive(Debug, Clone, Copy, Deserialize, Default, PartialEq, Eq)]
35#[serde(rename_all = "kebab-case")]
36enum Normalize {
37    /// Exact byte equality.
38    #[default]
39    None,
40    /// Trim leading/trailing whitespace of the whole output.
41    Trim,
42    /// Normalise only a single trailing newline (the most common
43    /// generator/editor diff).
44    FinalNewline,
45}
46
47impl Normalize {
48    fn apply(self, s: &str) -> String {
49        match self {
50            Self::None => s.to_string(),
51            Self::Trim => s.trim().to_string(),
52            Self::FinalNewline => s.strip_suffix('\n').unwrap_or(s).to_string(),
53        }
54    }
55}
56
57#[derive(Debug, Deserialize)]
58#[serde(deny_unknown_fields)]
59struct Options {
60    file: String,
61    command: Vec<String>,
62    #[serde(default)]
63    workdir: Option<String>,
64    #[serde(default)]
65    normalize: Normalize,
66    /// Child timeout in seconds. Default
67    /// [`crate::spawn::DEFAULT_SPAWN_TIMEOUT_SECS`].
68    #[serde(default)]
69    timeout: Option<u64>,
70}
71
72#[derive(Debug)]
73pub struct GeneratedFileFreshRule {
74    id: String,
75    level: Level,
76    policy_url: Option<String>,
77    message: Option<String>,
78    file: String,
79    command: Vec<String>,
80    workdir: String,
81    normalize: Normalize,
82    timeout: u64,
83}
84
85impl Rule for GeneratedFileFreshRule {
86    alint_core::rule_common_impl!();
87
88    fn requires_full_index(&self) -> bool {
89        // Single-shot: staleness is independent of which files
90        // changed, so it always evaluates (never `--changed`-
91        // filtered). `path_scope` stays `None` (default) so the
92        // engine doesn't skip-by-intersection. Same dispatch
93        // class as `pair`.
94        true
95    }
96
97    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
98        let file = Path::new(&self.file);
99        let env = [
100            ("ALINT_ROOT", ctx.root.to_string_lossy().into_owned()),
101            ("ALINT_RULE_ID", self.id.clone()),
102            ("ALINT_LEVEL", self.level.as_str().to_string()),
103        ];
104        let (status, stdout, stderr) = match crate::spawn::run_capturing(
105            &self.command,
106            &ctx.root.join(&self.workdir),
107            &env,
108            Duration::from_secs(self.timeout),
109        ) {
110            crate::spawn::SpawnOutcome::Exited {
111                status,
112                stdout,
113                stderr,
114            } => (status, stdout, stderr),
115            crate::spawn::SpawnOutcome::SpawnError(e) => {
116                let program = self.command.first().map_or("", String::as_str);
117                return Ok(vec![self.violation(
118                    file,
119                    &format!("generator `{program}` could not be spawned: {e}"),
120                )]);
121            }
122            crate::spawn::SpawnOutcome::TimedOut { secs } => {
123                return Ok(vec![self.violation(
124                    file,
125                    &format!(
126                        "generator did not exit within {secs}s \
127                         (raise `timeout:` on the rule to extend)"
128                    ),
129                )]);
130            }
131        };
132
133        if !status.success() {
134            let stderr = String::from_utf8_lossy(&stderr);
135            let snippet: String = stderr.trim().chars().take(400).collect();
136            let code = status
137                .code()
138                .map_or_else(|| "a signal".to_string(), |c| c.to_string());
139            return Ok(vec![self.violation(
140                file,
141                &format!("generator exited with {code}: {snippet}"),
142            )]);
143        }
144
145        let committed = match crate::io::read_capped(&ctx.root.join(file)) {
146            Ok(b) => b,
147            Err(crate::io::ReadCapError::TooLarge(n)) => {
148                return Ok(vec![self.violation(
149                    file,
150                    &format!("is too large to diff ({n} bytes; 256 MiB cap)"),
151                )]);
152            }
153            Err(crate::io::ReadCapError::Io(_)) => {
154                return Ok(vec![self.violation(
155                    file,
156                    "is not on disk, but the generator produced output for it",
157                )]);
158            }
159        };
160
161        let stale = if self.normalize == Normalize::None {
162            committed != stdout
163        } else {
164            let produced = self.normalize.apply(&String::from_utf8_lossy(&stdout));
165            let on_disk = self.normalize.apply(&String::from_utf8_lossy(&committed));
166            produced != on_disk
167        };
168        if stale {
169            return Ok(vec![self.violation(
170                file,
171                &format!(
172                    "is stale — its committed contents differ from `{}` output{}",
173                    self.command.join(" "),
174                    first_diff_hint(&stdout, &committed),
175                ),
176            )]);
177        }
178        Ok(Vec::new())
179    }
180}
181
182impl GeneratedFileFreshRule {
183    fn violation(&self, file: &Path, desc: &str) -> Violation {
184        let msg = self
185            .message
186            .clone()
187            .unwrap_or_else(|| format!("{}: {desc}", file.display()));
188        Violation::new(msg).with_path(file.to_path_buf())
189    }
190}
191
192/// A short hint at where the generator output and the committed
193/// file first diverge (line-based; lossy is fine for a hint).
194fn first_diff_hint(produced: &[u8], committed: &[u8]) -> String {
195    let p = String::from_utf8_lossy(produced);
196    let c = String::from_utf8_lossy(committed);
197    for (i, (lp, lc)) in p.lines().zip(c.lines()).enumerate() {
198        if lp != lc {
199            return format!(" (first differs at line {})", i + 1);
200        }
201    }
202    let (np, nc) = (p.lines().count(), c.lines().count());
203    if np == nc {
204        String::new()
205    } else {
206        format!(" (generator produced {np} lines, file has {nc})")
207    }
208}
209
210pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
211    let opts: Options = spec
212        .deserialize_options()
213        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
214    if opts.file.trim().is_empty() {
215        return Err(Error::rule_config(
216            &spec.id,
217            "generated_file_fresh `file` must not be empty",
218        ));
219    }
220    if opts.command.is_empty() {
221        return Err(Error::rule_config(
222            &spec.id,
223            "generated_file_fresh requires a non-empty `command` argv \
224             (the generator that produces `file` on stdout)",
225        ));
226    }
227    Ok(Box::new(GeneratedFileFreshRule {
228        id: spec.id.clone(),
229        level: spec.level,
230        policy_url: spec.policy_url.clone(),
231        message: spec.message.clone(),
232        file: opts.file,
233        command: opts.command,
234        workdir: opts.workdir.unwrap_or_else(|| ".".to_string()),
235        normalize: opts.normalize,
236        timeout: opts
237            .timeout
238            .unwrap_or(crate::spawn::DEFAULT_SPAWN_TIMEOUT_SECS),
239    }))
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245
246    fn rule(file: &str, command: &[&str], normalize: Normalize) -> GeneratedFileFreshRule {
247        GeneratedFileFreshRule {
248            id: "t".into(),
249            level: Level::Error,
250            policy_url: None,
251            message: None,
252            file: file.into(),
253            command: command.iter().map(ToString::to_string).collect(),
254            workdir: ".".into(),
255            normalize,
256            timeout: 60,
257        }
258    }
259
260    fn eval(r: &GeneratedFileFreshRule, root: &Path) -> Vec<Violation> {
261        let idx = alint_core::FileIndex::from_entries(Vec::new());
262        let ctx = Context {
263            root,
264            index: &idx,
265            registry: None,
266            facts: None,
267            vars: None,
268            git_tracked: None,
269            git_blame: None,
270        };
271        r.evaluate(&ctx).unwrap()
272    }
273
274    #[test]
275    fn fresh_file_is_silent() {
276        let dir = tempfile::tempdir().unwrap();
277        std::fs::write(dir.path().join("out.txt"), "alpha\nbravo\n").unwrap();
278        let r = rule(
279            "out.txt",
280            &["sh", "-c", "printf 'alpha\\nbravo\\n'"],
281            Normalize::None,
282        );
283        assert!(eval(&r, dir.path()).is_empty());
284    }
285
286    #[test]
287    fn stale_file_fails_with_line_hint() {
288        let dir = tempfile::tempdir().unwrap();
289        std::fs::write(dir.path().join("out.txt"), "alpha\nWRONG\n").unwrap();
290        let r = rule(
291            "out.txt",
292            &["sh", "-c", "printf 'alpha\\nbravo\\n'"],
293            Normalize::None,
294        );
295        let v = eval(&r, dir.path());
296        assert_eq!(v.len(), 1);
297        assert!(v[0].message.contains("stale"));
298        assert!(v[0].message.contains("line 2"), "{:?}", v[0].message);
299    }
300
301    #[test]
302    fn trim_normalize_absorbs_surrounding_whitespace() {
303        // Design-doc normalize matrix promised none/trim/
304        // final-newline; `trim` was untested.
305        let dir = tempfile::tempdir().unwrap();
306        std::fs::write(dir.path().join("out.txt"), "  hello\n\n").unwrap();
307        let g = ["sh", "-c", "printf hello"];
308        assert_eq!(
309            eval(&rule("out.txt", &g, Normalize::None), dir.path()).len(),
310            1,
311            "exact-byte compare sees the whitespace diff"
312        );
313        assert!(
314            eval(&rule("out.txt", &g, Normalize::Trim), dir.path()).is_empty(),
315            "trim normalize absorbs surrounding whitespace"
316        );
317    }
318
319    #[test]
320    fn final_newline_normalize_absorbs_trailing_newline() {
321        let dir = tempfile::tempdir().unwrap();
322        // File has no trailing newline; generator emits one.
323        std::fs::write(dir.path().join("out.txt"), "alpha\nbravo").unwrap();
324        let g = ["sh", "-c", "printf 'alpha\\nbravo\\n'"];
325        assert_eq!(
326            eval(&rule("out.txt", &g, Normalize::None), dir.path()).len(),
327            1,
328            "exact-byte compare sees the newline diff"
329        );
330        assert!(
331            eval(&rule("out.txt", &g, Normalize::FinalNewline), dir.path()).is_empty(),
332            "final-newline normalize absorbs it"
333        );
334    }
335
336    #[test]
337    fn missing_committed_file_is_a_violation() {
338        let dir = tempfile::tempdir().unwrap();
339        let r = rule("nope.txt", &["sh", "-c", "printf x"], Normalize::None);
340        let v = eval(&r, dir.path());
341        assert_eq!(v.len(), 1);
342        assert!(v[0].message.contains("not on disk"));
343    }
344
345    #[test]
346    fn generator_nonzero_exit_is_a_violation() {
347        let dir = tempfile::tempdir().unwrap();
348        std::fs::write(dir.path().join("out.txt"), "x").unwrap();
349        let r = rule(
350            "out.txt",
351            &["sh", "-c", "echo boom >&2; exit 3"],
352            Normalize::None,
353        );
354        let v = eval(&r, dir.path());
355        assert_eq!(v.len(), 1);
356        assert!(v[0].message.contains("exited with 3"));
357        assert!(v[0].message.contains("boom"));
358    }
359
360    #[test]
361    fn missing_generator_program_is_a_violation() {
362        let dir = tempfile::tempdir().unwrap();
363        std::fs::write(dir.path().join("out.txt"), "x").unwrap();
364        let r = rule("out.txt", &["alint-no-such-generator-xyz"], Normalize::None);
365        let v = eval(&r, dir.path());
366        assert_eq!(v.len(), 1);
367        assert!(v[0].message.contains("could not be spawned"));
368    }
369
370    #[cfg(unix)]
371    #[test]
372    fn hung_generator_times_out_with_one_violation() {
373        let dir = tempfile::tempdir().unwrap();
374        std::fs::write(dir.path().join("out.txt"), b"x").unwrap();
375        let mut r = rule("out.txt", &["sh", "-c", "sleep 5"], Normalize::None);
376        r.timeout = 1;
377        let v = eval(&r, dir.path());
378        assert_eq!(v.len(), 1, "a hung generator must yield one violation");
379        assert!(
380            v[0].message.contains("did not exit within 1s"),
381            "{:?}",
382            v[0].message
383        );
384    }
385}