Skip to main content

alint_rules/
file_content_matches.rs

1//! `file_content_matches` — every file in scope must match a regex.
2
3use std::path::Path;
4
5use alint_core::{
6    Context, Error, FixSpec, Fixer, Level, PerFileRule, Result, Rule, RuleSpec, Scope, Violation,
7};
8use regex::Regex;
9use serde::Deserialize;
10
11use crate::fixers::FileAppendFixer;
12
13#[derive(Debug, Deserialize)]
14struct Options {
15    pattern: String,
16}
17
18#[derive(Debug)]
19pub struct FileContentMatchesRule {
20    id: String,
21    level: Level,
22    policy_url: Option<String>,
23    message: Option<String>,
24    scope: Scope,
25    pattern_src: String,
26    pattern: Regex,
27    fixer: Option<FileAppendFixer>,
28}
29
30impl Rule for FileContentMatchesRule {
31    fn id(&self) -> &str {
32        &self.id
33    }
34    fn level(&self) -> Level {
35        self.level
36    }
37    fn policy_url(&self) -> Option<&str> {
38        self.policy_url.as_deref()
39    }
40
41    fn fixer(&self) -> Option<&dyn Fixer> {
42        self.fixer.as_ref().map(|f| f as &dyn Fixer)
43    }
44
45    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
46        let mut violations = Vec::new();
47        for entry in ctx.index.files() {
48            if !self.scope.matches(&entry.path, ctx.index) {
49                continue;
50            }
51            let full = ctx.root.join(&entry.path);
52            let bytes = match std::fs::read(&full) {
53                Ok(b) => b,
54                Err(e) => {
55                    violations.push(
56                        Violation::new(format!("could not read file: {e}"))
57                            .with_path(entry.path.clone()),
58                    );
59                    continue;
60                }
61            };
62            violations.extend(self.evaluate_file(ctx, &entry.path, &bytes)?);
63        }
64        Ok(violations)
65    }
66
67    fn as_per_file(&self) -> Option<&dyn PerFileRule> {
68        Some(self)
69    }
70}
71
72impl PerFileRule for FileContentMatchesRule {
73    fn path_scope(&self) -> &Scope {
74        &self.scope
75    }
76
77    fn evaluate_file(
78        &self,
79        _ctx: &Context<'_>,
80        path: &Path,
81        bytes: &[u8],
82    ) -> Result<Vec<Violation>> {
83        let Ok(text) = std::str::from_utf8(bytes) else {
84            return Ok(vec![
85                Violation::new("file is not valid UTF-8; cannot match regex")
86                    .with_path(std::sync::Arc::<Path>::from(path)),
87            ]);
88        };
89        if self.pattern.is_match(text) {
90            return Ok(Vec::new());
91        }
92        let msg = self.message.clone().unwrap_or_else(|| {
93            format!(
94                "content does not match required pattern /{}/",
95                self.pattern_src
96            )
97        });
98        Ok(vec![
99            Violation::new(msg).with_path(std::sync::Arc::<Path>::from(path)),
100        ])
101    }
102}
103
104pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
105    let Some(_paths) = &spec.paths else {
106        return Err(Error::rule_config(
107            &spec.id,
108            "file_content_matches requires a `paths` field",
109        ));
110    };
111    let opts: Options = spec
112        .deserialize_options()
113        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
114    let pattern = Regex::new(&opts.pattern)
115        .map_err(|e| Error::rule_config(&spec.id, format!("invalid pattern: {e}")))?;
116    let fixer = match &spec.fix {
117        Some(FixSpec::FileAppend { file_append }) => {
118            let source = alint_core::resolve_content_source(
119                &spec.id,
120                "file_append",
121                &file_append.content,
122                &file_append.content_from,
123            )?;
124            Some(FileAppendFixer::new(source))
125        }
126        Some(other) => {
127            return Err(Error::rule_config(
128                &spec.id,
129                format!(
130                    "fix.{} is not compatible with file_content_matches",
131                    other.op_name()
132                ),
133            ));
134        }
135        None => None,
136    };
137    Ok(Box::new(FileContentMatchesRule {
138        id: spec.id.clone(),
139        level: spec.level,
140        policy_url: spec.policy_url.clone(),
141        message: spec.message.clone(),
142        scope: Scope::from_spec(spec)?,
143        pattern_src: opts.pattern,
144        pattern,
145        fixer,
146    }))
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152    use crate::test_support::{ctx, spec_yaml, tempdir_with_files};
153
154    #[test]
155    fn build_rejects_missing_paths_field() {
156        let spec = spec_yaml(
157            "id: t\n\
158             kind: file_content_matches\n\
159             pattern: \".*\"\n\
160             level: error\n",
161        );
162        assert!(build(&spec).is_err());
163    }
164
165    #[test]
166    fn build_rejects_invalid_regex() {
167        let spec = spec_yaml(
168            "id: t\n\
169             kind: file_content_matches\n\
170             paths: \"**/*\"\n\
171             pattern: \"[unterminated\"\n\
172             level: error\n",
173        );
174        assert!(build(&spec).is_err());
175    }
176
177    #[test]
178    fn evaluate_passes_when_pattern_matches() {
179        let spec = spec_yaml(
180            "id: t\n\
181             kind: file_content_matches\n\
182             paths: \"LICENSE\"\n\
183             pattern: \"Apache License\"\n\
184             level: error\n",
185        );
186        let rule = build(&spec).unwrap();
187        let (tmp, idx) =
188            tempdir_with_files(&[("LICENSE", b"Apache License Version 2.0, January 2004\n")]);
189        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
190        assert!(v.is_empty(), "pattern should match: {v:?}");
191    }
192
193    #[test]
194    fn evaluate_fires_when_pattern_missing() {
195        let spec = spec_yaml(
196            "id: t\n\
197             kind: file_content_matches\n\
198             paths: \"LICENSE\"\n\
199             pattern: \"Apache License\"\n\
200             level: error\n",
201        );
202        let rule = build(&spec).unwrap();
203        let (tmp, idx) = tempdir_with_files(&[("LICENSE", b"MIT License\n\nCopyright ...\n")]);
204        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
205        assert_eq!(v.len(), 1);
206    }
207
208    #[test]
209    fn evaluate_skips_files_outside_scope() {
210        let spec = spec_yaml(
211            "id: t\n\
212             kind: file_content_matches\n\
213             paths: \"LICENSE\"\n\
214             pattern: \"Apache\"\n\
215             level: error\n",
216        );
217        let rule = build(&spec).unwrap();
218        let (tmp, idx) = tempdir_with_files(&[("README.md", b"no apache here")]);
219        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
220        assert!(v.is_empty(), "out-of-scope shouldn't fire: {v:?}");
221    }
222
223    #[test]
224    fn evaluate_fires_with_clear_message_on_non_utf8() {
225        // file_content_matches needs to read text to apply the
226        // regex; non-UTF-8 input surfaces an explicit violation
227        // rather than silently skipping (so a binary commit
228        // doesn't accidentally hide a missing-pattern policy).
229        let spec = spec_yaml(
230            "id: t\n\
231             kind: file_content_matches\n\
232             paths: \"img.bin\"\n\
233             pattern: \"never matches\"\n\
234             level: error\n",
235        );
236        let rule = build(&spec).unwrap();
237        let (tmp, idx) = tempdir_with_files(&[("img.bin", &[0xff, 0xfe, 0xfd])]);
238        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
239        assert_eq!(v.len(), 1, "non-UTF-8 should report one violation");
240        assert!(
241            v[0].message.contains("UTF-8"),
242            "message should mention UTF-8: {}",
243            v[0].message
244        );
245    }
246}