Skip to main content

alint_rules/
file_content_matches.rs

1//! `file_content_matches` — every file in scope must match a regex.
2
3use std::path::Path;
4
5use alint_core::{
6    Context, Error, FixSpec, Fixer, Level, PerFileRule, Result, Rule, RuleSpec, Scope, Violation,
7};
8use regex::Regex;
9use serde::Deserialize;
10
11use crate::fixers::FileAppendFixer;
12
13#[derive(Debug, Deserialize)]
14#[serde(deny_unknown_fields)]
15struct Options {
16    pattern: String,
17}
18
19#[derive(Debug)]
20pub struct FileContentMatchesRule {
21    id: String,
22    level: Level,
23    policy_url: Option<String>,
24    message: Option<String>,
25    scope: Scope,
26    pattern_src: String,
27    pattern: Regex,
28    fixer: Option<FileAppendFixer>,
29}
30
31impl Rule for FileContentMatchesRule {
32    alint_core::rule_common_impl!();
33
34    fn fixer(&self) -> Option<&dyn Fixer> {
35        self.fixer.as_ref().map(|f| f as &dyn Fixer)
36    }
37
38    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
39        let mut violations = Vec::new();
40        for entry in ctx.index.files() {
41            if !self.scope.matches(&entry.path, ctx.index) {
42                continue;
43            }
44            let full = ctx.root.join(&entry.path);
45            let bytes = match std::fs::read(&full) {
46                Ok(b) => b,
47                Err(e) => {
48                    violations.push(
49                        Violation::new(format!("could not read file: {e}"))
50                            .with_path(entry.path.clone()),
51                    );
52                    continue;
53                }
54            };
55            violations.extend(self.evaluate_file(ctx, &entry.path, &bytes)?);
56        }
57        Ok(violations)
58    }
59
60    fn as_per_file(&self) -> Option<&dyn PerFileRule> {
61        Some(self)
62    }
63}
64
65impl PerFileRule for FileContentMatchesRule {
66    fn path_scope(&self) -> &Scope {
67        &self.scope
68    }
69
70    fn evaluate_file(
71        &self,
72        _ctx: &Context<'_>,
73        path: &Path,
74        bytes: &[u8],
75    ) -> Result<Vec<Violation>> {
76        let Ok(text) = std::str::from_utf8(bytes) else {
77            return Ok(vec![
78                Violation::new("file is not valid UTF-8; cannot match regex")
79                    .with_path(std::sync::Arc::<Path>::from(path)),
80            ]);
81        };
82        if self.pattern.is_match(text) {
83            return Ok(Vec::new());
84        }
85        let msg = self.message.clone().unwrap_or_else(|| {
86            format!(
87                "content does not match required pattern /{}/",
88                self.pattern_src
89            )
90        });
91        Ok(vec![
92            Violation::new(msg).with_path(std::sync::Arc::<Path>::from(path)),
93        ])
94    }
95}
96
97pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
98    let Some(_paths) = &spec.paths else {
99        return Err(Error::rule_config(
100            &spec.id,
101            "file_content_matches requires a `paths` field",
102        ));
103    };
104    let opts: Options = spec
105        .deserialize_options()
106        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
107    let pattern = Regex::new(&opts.pattern)
108        .map_err(|e| Error::rule_config(&spec.id, format!("invalid pattern: {e}")))?;
109    let fixer = match &spec.fix {
110        Some(FixSpec::FileAppend { file_append }) => {
111            let source = alint_core::resolve_content_source(
112                &spec.id,
113                "file_append",
114                &file_append.content,
115                &file_append.content_from,
116            )?;
117            Some(FileAppendFixer::new(source))
118        }
119        Some(other) => {
120            return Err(Error::rule_config(
121                &spec.id,
122                format!(
123                    "fix.{} is not compatible with file_content_matches",
124                    other.op_name()
125                ),
126            ));
127        }
128        None => None,
129    };
130    Ok(Box::new(FileContentMatchesRule {
131        id: spec.id.clone(),
132        level: spec.level,
133        policy_url: spec.policy_url.clone(),
134        message: spec.message.clone(),
135        scope: Scope::from_spec(spec)?,
136        pattern_src: opts.pattern,
137        pattern,
138        fixer,
139    }))
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145    use crate::test_support::{ctx, spec_yaml, tempdir_with_files};
146
147    #[test]
148    fn build_rejects_missing_paths_field() {
149        let spec = spec_yaml(
150            "id: t\n\
151             kind: file_content_matches\n\
152             pattern: \".*\"\n\
153             level: error\n",
154        );
155        assert!(build(&spec).is_err());
156    }
157
158    #[test]
159    fn build_rejects_invalid_regex() {
160        let spec = spec_yaml(
161            "id: t\n\
162             kind: file_content_matches\n\
163             paths: \"**/*\"\n\
164             pattern: \"[unterminated\"\n\
165             level: error\n",
166        );
167        assert!(build(&spec).is_err());
168    }
169
170    #[test]
171    fn evaluate_passes_when_pattern_matches() {
172        let spec = spec_yaml(
173            "id: t\n\
174             kind: file_content_matches\n\
175             paths: \"LICENSE\"\n\
176             pattern: \"Apache License\"\n\
177             level: error\n",
178        );
179        let rule = build(&spec).unwrap();
180        let (tmp, idx) =
181            tempdir_with_files(&[("LICENSE", b"Apache License Version 2.0, January 2004\n")]);
182        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
183        assert!(v.is_empty(), "pattern should match: {v:?}");
184    }
185
186    #[test]
187    fn evaluate_fires_when_pattern_missing() {
188        let spec = spec_yaml(
189            "id: t\n\
190             kind: file_content_matches\n\
191             paths: \"LICENSE\"\n\
192             pattern: \"Apache License\"\n\
193             level: error\n",
194        );
195        let rule = build(&spec).unwrap();
196        let (tmp, idx) = tempdir_with_files(&[("LICENSE", b"MIT License\n\nCopyright ...\n")]);
197        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
198        assert_eq!(v.len(), 1);
199    }
200
201    #[test]
202    fn evaluate_skips_files_outside_scope() {
203        let spec = spec_yaml(
204            "id: t\n\
205             kind: file_content_matches\n\
206             paths: \"LICENSE\"\n\
207             pattern: \"Apache\"\n\
208             level: error\n",
209        );
210        let rule = build(&spec).unwrap();
211        let (tmp, idx) = tempdir_with_files(&[("README.md", b"no apache here")]);
212        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
213        assert!(v.is_empty(), "out-of-scope shouldn't fire: {v:?}");
214    }
215
216    #[test]
217    fn evaluate_fires_with_clear_message_on_non_utf8() {
218        // file_content_matches needs to read text to apply the
219        // regex; non-UTF-8 input surfaces an explicit violation
220        // rather than silently skipping (so a binary commit
221        // doesn't accidentally hide a missing-pattern policy).
222        let spec = spec_yaml(
223            "id: t\n\
224             kind: file_content_matches\n\
225             paths: \"img.bin\"\n\
226             pattern: \"never matches\"\n\
227             level: error\n",
228        );
229        let rule = build(&spec).unwrap();
230        let (tmp, idx) = tempdir_with_files(&[("img.bin", &[0xff, 0xfe, 0xfd])]);
231        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
232        assert_eq!(v.len(), 1, "non-UTF-8 should report one violation");
233        assert!(
234            v[0].message.contains("UTF-8"),
235            "message should mention UTF-8: {}",
236            v[0].message
237        );
238    }
239}