Skip to main content

alint_rules/
file_is_text.rs

1//! `file_is_text` — every file in scope must be detected as text (not binary).
2//!
3//! Detection uses `content_inspector` on the first 8 KiB of each file
4//! (magic-byte + heuristic analysis). UTF-8, UTF-16 (with BOM), and plain
5//! 7-bit ASCII are treated as text.
6
7use std::path::Path;
8
9use alint_core::{Context, Error, Level, PerFileRule, Result, Rule, RuleSpec, Scope, Violation};
10
11use crate::io::{Classification, TEXT_INSPECT_LEN, classify_bytes, read_prefix};
12
13#[derive(Debug)]
14pub struct FileIsTextRule {
15    id: String,
16    level: Level,
17    policy_url: Option<String>,
18    message: Option<String>,
19    scope: Scope,
20}
21
22impl Rule for FileIsTextRule {
23    fn id(&self) -> &str {
24        &self.id
25    }
26    fn level(&self) -> Level {
27        self.level
28    }
29    fn policy_url(&self) -> Option<&str> {
30        self.policy_url.as_deref()
31    }
32
33    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
34        let mut violations = Vec::new();
35        for entry in ctx.index.files() {
36            if !self.scope.matches(&entry.path, ctx.index) {
37                continue;
38            }
39            if entry.size == 0 {
40                // Empty files are text by convention.
41                continue;
42            }
43            // Bounded read: only the first TEXT_INSPECT_LEN
44            // bytes feed `content_inspector`. Solo runs read
45            // just that prefix; the dispatch-flip path receives
46            // the whole file from the engine and inspects only
47            // the prefix.
48            let full = ctx.root.join(&entry.path);
49            let bytes = match read_prefix(&full) {
50                Ok(b) => b,
51                Err(e) => {
52                    violations.push(
53                        Violation::new(format!("could not read file: {e}"))
54                            .with_path(entry.path.clone()),
55                    );
56                    continue;
57                }
58            };
59            violations.extend(self.evaluate_file(ctx, &entry.path, &bytes)?);
60        }
61        Ok(violations)
62    }
63
64    fn as_per_file(&self) -> Option<&dyn PerFileRule> {
65        Some(self)
66    }
67}
68
69impl PerFileRule for FileIsTextRule {
70    fn path_scope(&self) -> &Scope {
71        &self.scope
72    }
73
74    fn evaluate_file(
75        &self,
76        _ctx: &Context<'_>,
77        path: &Path,
78        bytes: &[u8],
79    ) -> Result<Vec<Violation>> {
80        if bytes.is_empty() {
81            return Ok(Vec::new());
82        }
83        // Inspect only the first TEXT_INSPECT_LEN bytes; the
84        // engine handed us the full file but the classifier
85        // only needs the prefix.
86        let sample = &bytes[..bytes.len().min(TEXT_INSPECT_LEN)];
87        if classify_bytes(sample) != Classification::Binary {
88            return Ok(Vec::new());
89        }
90        let msg = self
91            .message
92            .clone()
93            .unwrap_or_else(|| "file is detected as binary; text is required here".to_string());
94        Ok(vec![
95            Violation::new(msg).with_path(std::sync::Arc::<Path>::from(path)),
96        ])
97    }
98
99    fn max_bytes_needed(&self) -> Option<usize> {
100        Some(TEXT_INSPECT_LEN)
101    }
102}
103
104pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
105    let Some(_paths) = &spec.paths else {
106        return Err(Error::rule_config(
107            &spec.id,
108            "file_is_text requires a `paths` field",
109        ));
110    };
111    Ok(Box::new(FileIsTextRule {
112        id: spec.id.clone(),
113        level: spec.level,
114        policy_url: spec.policy_url.clone(),
115        message: spec.message.clone(),
116        scope: Scope::from_spec(spec)?,
117    }))
118}
119
120#[cfg(test)]
121mod tests {
122    use super::*;
123    use crate::test_support::{ctx, spec_yaml, tempdir_with_files};
124
125    #[test]
126    fn build_rejects_missing_paths_field() {
127        let spec = spec_yaml(
128            "id: t\n\
129             kind: file_is_text\n\
130             level: warning\n",
131        );
132        assert!(build(&spec).is_err());
133    }
134
135    #[test]
136    fn evaluate_passes_on_utf8_text() {
137        let spec = spec_yaml(
138            "id: t\n\
139             kind: file_is_text\n\
140             paths: \"**/*.rs\"\n\
141             level: warning\n",
142        );
143        let rule = build(&spec).unwrap();
144        let (tmp, idx) = tempdir_with_files(&[("a.rs", b"// hello\nfn main() {}\n")]);
145        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
146        assert!(v.is_empty(), "utf-8 text should pass: {v:?}");
147    }
148
149    #[test]
150    fn evaluate_fires_on_binary_content() {
151        let spec = spec_yaml(
152            "id: t\n\
153             kind: file_is_text\n\
154             paths: \"**/*\"\n\
155             level: warning\n",
156        );
157        let rule = build(&spec).unwrap();
158        // Bytes with NUL + binary tail; content_inspector
159        // should classify as Binary.
160        let mut binary = vec![0u8; 16];
161        binary.extend_from_slice(&[0xff, 0xfe, 0xfd, 0xfc]);
162        let (tmp, idx) = tempdir_with_files(&[("img.bin", &binary)]);
163        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
164        assert_eq!(v.len(), 1, "binary should fire: {v:?}");
165    }
166
167    #[test]
168    fn evaluate_silent_on_zero_byte_file() {
169        // Empty files are treated as text by convention —
170        // no read needed, no violation.
171        let spec = spec_yaml(
172            "id: t\n\
173             kind: file_is_text\n\
174             paths: \"**/*\"\n\
175             level: warning\n",
176        );
177        let rule = build(&spec).unwrap();
178        let (tmp, idx) = tempdir_with_files(&[("empty", b"")]);
179        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
180        assert!(v.is_empty());
181    }
182
183    #[test]
184    fn evaluate_skips_out_of_scope_files() {
185        let spec = spec_yaml(
186            "id: t\n\
187             kind: file_is_text\n\
188             paths: \"src/**/*.rs\"\n\
189             level: warning\n",
190        );
191        let rule = build(&spec).unwrap();
192        let (tmp, idx) = tempdir_with_files(&[("img.bin", &[0u8; 64])]);
193        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
194        assert!(v.is_empty(), "out-of-scope shouldn't fire: {v:?}");
195    }
196}