Skip to main content

alint_rules/
file_is_text.rs

1//! `file_is_text` — every file in scope must be detected as text (not binary).
2//!
3//! Detection uses `content_inspector` on the first 8 KiB of each file
4//! (magic-byte + heuristic analysis). UTF-8, UTF-16 (with BOM), and plain
5//! 7-bit ASCII are treated as text.
6
7use alint_core::{Context, Error, Level, Result, Rule, RuleSpec, Scope, Violation};
8
9use crate::io::{Classification, classify_bytes, read_prefix};
10
11#[derive(Debug)]
12pub struct FileIsTextRule {
13    id: String,
14    level: Level,
15    policy_url: Option<String>,
16    message: Option<String>,
17    scope: Scope,
18}
19
20impl Rule for FileIsTextRule {
21    fn id(&self) -> &str {
22        &self.id
23    }
24    fn level(&self) -> Level {
25        self.level
26    }
27    fn policy_url(&self) -> Option<&str> {
28        self.policy_url.as_deref()
29    }
30
31    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
32        let mut violations = Vec::new();
33        for entry in ctx.index.files() {
34            if !self.scope.matches(&entry.path) {
35                continue;
36            }
37            if entry.size == 0 {
38                // Empty files are text by convention.
39                continue;
40            }
41            let full = ctx.root.join(&entry.path);
42            let bytes = match read_prefix(&full) {
43                Ok(b) => b,
44                Err(e) => {
45                    violations.push(
46                        Violation::new(format!("could not read file: {e}"))
47                            .with_path(entry.path.clone()),
48                    );
49                    continue;
50                }
51            };
52            if classify_bytes(&bytes) == Classification::Binary {
53                let msg = self.message.clone().unwrap_or_else(|| {
54                    "file is detected as binary; text is required here".to_string()
55                });
56                violations.push(Violation::new(msg).with_path(entry.path.clone()));
57            }
58        }
59        Ok(violations)
60    }
61}
62
63pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
64    let Some(paths) = &spec.paths else {
65        return Err(Error::rule_config(
66            &spec.id,
67            "file_is_text requires a `paths` field",
68        ));
69    };
70    Ok(Box::new(FileIsTextRule {
71        id: spec.id.clone(),
72        level: spec.level,
73        policy_url: spec.policy_url.clone(),
74        message: spec.message.clone(),
75        scope: Scope::from_paths_spec(paths)?,
76    }))
77}
78
79#[cfg(test)]
80mod tests {
81    use super::*;
82    use crate::test_support::{ctx, spec_yaml, tempdir_with_files};
83
84    #[test]
85    fn build_rejects_missing_paths_field() {
86        let spec = spec_yaml(
87            "id: t\n\
88             kind: file_is_text\n\
89             level: warning\n",
90        );
91        assert!(build(&spec).is_err());
92    }
93
94    #[test]
95    fn evaluate_passes_on_utf8_text() {
96        let spec = spec_yaml(
97            "id: t\n\
98             kind: file_is_text\n\
99             paths: \"**/*.rs\"\n\
100             level: warning\n",
101        );
102        let rule = build(&spec).unwrap();
103        let (tmp, idx) = tempdir_with_files(&[("a.rs", b"// hello\nfn main() {}\n")]);
104        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
105        assert!(v.is_empty(), "utf-8 text should pass: {v:?}");
106    }
107
108    #[test]
109    fn evaluate_fires_on_binary_content() {
110        let spec = spec_yaml(
111            "id: t\n\
112             kind: file_is_text\n\
113             paths: \"**/*\"\n\
114             level: warning\n",
115        );
116        let rule = build(&spec).unwrap();
117        // Bytes with NUL + binary tail; content_inspector
118        // should classify as Binary.
119        let mut binary = vec![0u8; 16];
120        binary.extend_from_slice(&[0xff, 0xfe, 0xfd, 0xfc]);
121        let (tmp, idx) = tempdir_with_files(&[("img.bin", &binary)]);
122        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
123        assert_eq!(v.len(), 1, "binary should fire: {v:?}");
124    }
125
126    #[test]
127    fn evaluate_silent_on_zero_byte_file() {
128        // Empty files are treated as text by convention —
129        // no read needed, no violation.
130        let spec = spec_yaml(
131            "id: t\n\
132             kind: file_is_text\n\
133             paths: \"**/*\"\n\
134             level: warning\n",
135        );
136        let rule = build(&spec).unwrap();
137        let (tmp, idx) = tempdir_with_files(&[("empty", b"")]);
138        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
139        assert!(v.is_empty());
140    }
141
142    #[test]
143    fn evaluate_skips_out_of_scope_files() {
144        let spec = spec_yaml(
145            "id: t\n\
146             kind: file_is_text\n\
147             paths: \"src/**/*.rs\"\n\
148             level: warning\n",
149        );
150        let rule = build(&spec).unwrap();
151        let (tmp, idx) = tempdir_with_files(&[("img.bin", &[0u8; 64])]);
152        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
153        assert!(v.is_empty(), "out-of-scope shouldn't fire: {v:?}");
154    }
155}