Skip to main content

alint_rules/
file_content_matches.rs

1//! `file_content_matches` — every file in scope must match a regex.
2
3use std::path::Path;
4
5use alint_core::{
6    Context, Error, FixSpec, Fixer, Level, PerFileRule, Result, Rule, RuleSpec, Scope, ScopeFilter,
7    Violation,
8};
9use regex::Regex;
10use serde::Deserialize;
11
12use crate::fixers::FileAppendFixer;
13
14#[derive(Debug, Deserialize)]
15struct Options {
16    pattern: String,
17}
18
19#[derive(Debug)]
20pub struct FileContentMatchesRule {
21    id: String,
22    level: Level,
23    policy_url: Option<String>,
24    message: Option<String>,
25    scope: Scope,
26    scope_filter: Option<ScopeFilter>,
27    pattern_src: String,
28    pattern: Regex,
29    fixer: Option<FileAppendFixer>,
30}
31
32impl Rule for FileContentMatchesRule {
33    fn id(&self) -> &str {
34        &self.id
35    }
36    fn level(&self) -> Level {
37        self.level
38    }
39    fn policy_url(&self) -> Option<&str> {
40        self.policy_url.as_deref()
41    }
42
43    fn fixer(&self) -> Option<&dyn Fixer> {
44        self.fixer.as_ref().map(|f| f as &dyn Fixer)
45    }
46
47    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
48        let mut violations = Vec::new();
49        for entry in ctx.index.files() {
50            if !self.scope.matches(&entry.path) {
51                continue;
52            }
53            if let Some(filter) = &self.scope_filter
54                && !filter.matches(&entry.path, ctx.index)
55            {
56                continue;
57            }
58            let full = ctx.root.join(&entry.path);
59            let bytes = match std::fs::read(&full) {
60                Ok(b) => b,
61                Err(e) => {
62                    violations.push(
63                        Violation::new(format!("could not read file: {e}"))
64                            .with_path(entry.path.clone()),
65                    );
66                    continue;
67                }
68            };
69            violations.extend(self.evaluate_file(ctx, &entry.path, &bytes)?);
70        }
71        Ok(violations)
72    }
73
74    fn as_per_file(&self) -> Option<&dyn PerFileRule> {
75        Some(self)
76    }
77
78    fn scope_filter(&self) -> Option<&ScopeFilter> {
79        self.scope_filter.as_ref()
80    }
81}
82
83impl PerFileRule for FileContentMatchesRule {
84    fn path_scope(&self) -> &Scope {
85        &self.scope
86    }
87
88    fn evaluate_file(
89        &self,
90        _ctx: &Context<'_>,
91        path: &Path,
92        bytes: &[u8],
93    ) -> Result<Vec<Violation>> {
94        let Ok(text) = std::str::from_utf8(bytes) else {
95            return Ok(vec![
96                Violation::new("file is not valid UTF-8; cannot match regex")
97                    .with_path(std::sync::Arc::<Path>::from(path)),
98            ]);
99        };
100        if self.pattern.is_match(text) {
101            return Ok(Vec::new());
102        }
103        let msg = self.message.clone().unwrap_or_else(|| {
104            format!(
105                "content does not match required pattern /{}/",
106                self.pattern_src
107            )
108        });
109        Ok(vec![
110            Violation::new(msg).with_path(std::sync::Arc::<Path>::from(path)),
111        ])
112    }
113}
114
115pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
116    let Some(paths) = &spec.paths else {
117        return Err(Error::rule_config(
118            &spec.id,
119            "file_content_matches requires a `paths` field",
120        ));
121    };
122    let opts: Options = spec
123        .deserialize_options()
124        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
125    let pattern = Regex::new(&opts.pattern)
126        .map_err(|e| Error::rule_config(&spec.id, format!("invalid pattern: {e}")))?;
127    let fixer = match &spec.fix {
128        Some(FixSpec::FileAppend { file_append }) => {
129            let source = alint_core::resolve_content_source(
130                &spec.id,
131                "file_append",
132                &file_append.content,
133                &file_append.content_from,
134            )?;
135            Some(FileAppendFixer::new(source))
136        }
137        Some(other) => {
138            return Err(Error::rule_config(
139                &spec.id,
140                format!(
141                    "fix.{} is not compatible with file_content_matches",
142                    other.op_name()
143                ),
144            ));
145        }
146        None => None,
147    };
148    Ok(Box::new(FileContentMatchesRule {
149        id: spec.id.clone(),
150        level: spec.level,
151        policy_url: spec.policy_url.clone(),
152        message: spec.message.clone(),
153        scope: Scope::from_paths_spec(paths)?,
154        scope_filter: spec.parse_scope_filter()?,
155        pattern_src: opts.pattern,
156        pattern,
157        fixer,
158    }))
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164    use crate::test_support::{ctx, spec_yaml, tempdir_with_files};
165
166    #[test]
167    fn build_rejects_missing_paths_field() {
168        let spec = spec_yaml(
169            "id: t\n\
170             kind: file_content_matches\n\
171             pattern: \".*\"\n\
172             level: error\n",
173        );
174        assert!(build(&spec).is_err());
175    }
176
177    #[test]
178    fn build_rejects_invalid_regex() {
179        let spec = spec_yaml(
180            "id: t\n\
181             kind: file_content_matches\n\
182             paths: \"**/*\"\n\
183             pattern: \"[unterminated\"\n\
184             level: error\n",
185        );
186        assert!(build(&spec).is_err());
187    }
188
189    #[test]
190    fn evaluate_passes_when_pattern_matches() {
191        let spec = spec_yaml(
192            "id: t\n\
193             kind: file_content_matches\n\
194             paths: \"LICENSE\"\n\
195             pattern: \"Apache License\"\n\
196             level: error\n",
197        );
198        let rule = build(&spec).unwrap();
199        let (tmp, idx) =
200            tempdir_with_files(&[("LICENSE", b"Apache License Version 2.0, January 2004\n")]);
201        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
202        assert!(v.is_empty(), "pattern should match: {v:?}");
203    }
204
205    #[test]
206    fn evaluate_fires_when_pattern_missing() {
207        let spec = spec_yaml(
208            "id: t\n\
209             kind: file_content_matches\n\
210             paths: \"LICENSE\"\n\
211             pattern: \"Apache License\"\n\
212             level: error\n",
213        );
214        let rule = build(&spec).unwrap();
215        let (tmp, idx) = tempdir_with_files(&[("LICENSE", b"MIT License\n\nCopyright ...\n")]);
216        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
217        assert_eq!(v.len(), 1);
218    }
219
220    #[test]
221    fn evaluate_skips_files_outside_scope() {
222        let spec = spec_yaml(
223            "id: t\n\
224             kind: file_content_matches\n\
225             paths: \"LICENSE\"\n\
226             pattern: \"Apache\"\n\
227             level: error\n",
228        );
229        let rule = build(&spec).unwrap();
230        let (tmp, idx) = tempdir_with_files(&[("README.md", b"no apache here")]);
231        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
232        assert!(v.is_empty(), "out-of-scope shouldn't fire: {v:?}");
233    }
234
235    #[test]
236    fn evaluate_fires_with_clear_message_on_non_utf8() {
237        // file_content_matches needs to read text to apply the
238        // regex; non-UTF-8 input surfaces an explicit violation
239        // rather than silently skipping (so a binary commit
240        // doesn't accidentally hide a missing-pattern policy).
241        let spec = spec_yaml(
242            "id: t\n\
243             kind: file_content_matches\n\
244             paths: \"img.bin\"\n\
245             pattern: \"never matches\"\n\
246             level: error\n",
247        );
248        let rule = build(&spec).unwrap();
249        let (tmp, idx) = tempdir_with_files(&[("img.bin", &[0xff, 0xfe, 0xfd])]);
250        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
251        assert_eq!(v.len(), 1, "non-UTF-8 should report one violation");
252        assert!(
253            v[0].message.contains("UTF-8"),
254            "message should mention UTF-8: {}",
255            v[0].message
256        );
257    }
258}