Skip to main content

alint_rules/
file_is_text.rs

1//! `file_is_text` — every file in scope must be detected as text (not binary).
2//!
3//! Detection uses `content_inspector` on the first 8 KiB of each file
4//! (magic-byte + heuristic analysis). UTF-8, UTF-16 (with BOM), and plain
5//! 7-bit ASCII are treated as text.
6
7use std::path::Path;
8
9use alint_core::{Context, Error, Level, PerFileRule, Result, Rule, RuleSpec, Scope, Violation};
10
11use crate::io::{Classification, TEXT_INSPECT_LEN, classify_bytes, read_prefix};
12
13#[derive(Debug)]
14pub struct FileIsTextRule {
15    id: String,
16    level: Level,
17    policy_url: Option<String>,
18    message: Option<String>,
19    scope: Scope,
20}
21
22impl Rule for FileIsTextRule {
23    alint_core::rule_common_impl!();
24
25    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
26        let mut violations = Vec::new();
27        for entry in ctx.index.files() {
28            if !self.scope.matches(&entry.path, ctx.index) {
29                continue;
30            }
31            if entry.size == 0 {
32                // Empty files are text by convention.
33                continue;
34            }
35            // Bounded read: only the first TEXT_INSPECT_LEN
36            // bytes feed `content_inspector`. Solo runs read
37            // just that prefix; the dispatch-flip path receives
38            // the whole file from the engine and inspects only
39            // the prefix.
40            let full = ctx.root.join(&entry.path);
41            let bytes = match read_prefix(&full) {
42                Ok(b) => b,
43                Err(e) => {
44                    violations.push(
45                        Violation::new(format!("could not read file: {e}"))
46                            .with_path(entry.path.clone()),
47                    );
48                    continue;
49                }
50            };
51            violations.extend(self.evaluate_file(ctx, &entry.path, &bytes)?);
52        }
53        Ok(violations)
54    }
55
56    fn as_per_file(&self) -> Option<&dyn PerFileRule> {
57        Some(self)
58    }
59}
60
61impl PerFileRule for FileIsTextRule {
62    fn path_scope(&self) -> &Scope {
63        &self.scope
64    }
65
66    fn evaluate_file(
67        &self,
68        _ctx: &Context<'_>,
69        path: &Path,
70        bytes: &[u8],
71    ) -> Result<Vec<Violation>> {
72        if bytes.is_empty() {
73            return Ok(Vec::new());
74        }
75        // Inspect only the first TEXT_INSPECT_LEN bytes; the
76        // engine handed us the full file but the classifier
77        // only needs the prefix.
78        let sample = &bytes[..bytes.len().min(TEXT_INSPECT_LEN)];
79        if classify_bytes(sample) != Classification::Binary {
80            return Ok(Vec::new());
81        }
82        let msg = self
83            .message
84            .clone()
85            .unwrap_or_else(|| "file is detected as binary; text is required here".to_string());
86        Ok(vec![
87            Violation::new(msg).with_path(std::sync::Arc::<Path>::from(path)),
88        ])
89    }
90
91    fn max_bytes_needed(&self) -> Option<usize> {
92        Some(TEXT_INSPECT_LEN)
93    }
94}
95
96pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
97    let Some(_paths) = &spec.paths else {
98        return Err(Error::rule_config(
99            &spec.id,
100            "file_is_text requires a `paths` field",
101        ));
102    };
103    Ok(Box::new(FileIsTextRule {
104        id: spec.id.clone(),
105        level: spec.level,
106        policy_url: spec.policy_url.clone(),
107        message: spec.message.clone(),
108        scope: Scope::from_spec(spec)?,
109    }))
110}
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115    use crate::test_support::{ctx, spec_yaml, tempdir_with_files};
116
117    #[test]
118    fn build_rejects_missing_paths_field() {
119        let spec = spec_yaml(
120            "id: t\n\
121             kind: file_is_text\n\
122             level: warning\n",
123        );
124        assert!(build(&spec).is_err());
125    }
126
127    #[test]
128    fn evaluate_passes_on_utf8_text() {
129        let spec = spec_yaml(
130            "id: t\n\
131             kind: file_is_text\n\
132             paths: \"**/*.rs\"\n\
133             level: warning\n",
134        );
135        let rule = build(&spec).unwrap();
136        let (tmp, idx) = tempdir_with_files(&[("a.rs", b"// hello\nfn main() {}\n")]);
137        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
138        assert!(v.is_empty(), "utf-8 text should pass: {v:?}");
139    }
140
141    #[test]
142    fn evaluate_fires_on_binary_content() {
143        let spec = spec_yaml(
144            "id: t\n\
145             kind: file_is_text\n\
146             paths: \"**/*\"\n\
147             level: warning\n",
148        );
149        let rule = build(&spec).unwrap();
150        // Bytes with NUL + binary tail; content_inspector
151        // should classify as Binary.
152        let mut binary = vec![0u8; 16];
153        binary.extend_from_slice(&[0xff, 0xfe, 0xfd, 0xfc]);
154        let (tmp, idx) = tempdir_with_files(&[("img.bin", &binary)]);
155        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
156        assert_eq!(v.len(), 1, "binary should fire: {v:?}");
157    }
158
159    #[test]
160    fn evaluate_silent_on_zero_byte_file() {
161        // Empty files are treated as text by convention —
162        // no read needed, no violation.
163        let spec = spec_yaml(
164            "id: t\n\
165             kind: file_is_text\n\
166             paths: \"**/*\"\n\
167             level: warning\n",
168        );
169        let rule = build(&spec).unwrap();
170        let (tmp, idx) = tempdir_with_files(&[("empty", b"")]);
171        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
172        assert!(v.is_empty());
173    }
174
175    #[test]
176    fn evaluate_skips_out_of_scope_files() {
177        let spec = spec_yaml(
178            "id: t\n\
179             kind: file_is_text\n\
180             paths: \"src/**/*.rs\"\n\
181             level: warning\n",
182        );
183        let rule = build(&spec).unwrap();
184        let (tmp, idx) = tempdir_with_files(&[("img.bin", &[0u8; 64])]);
185        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
186        assert!(v.is_empty(), "out-of-scope shouldn't fire: {v:?}");
187    }
188}