Skip to main content

alint_rules/
file_is_text.rs

1//! `file_is_text` — every file in scope must be detected as text (not binary).
2//!
3//! Detection uses `content_inspector` on the first 8 KiB of each file
4//! (magic-byte + heuristic analysis). UTF-8, UTF-16 (with BOM), and plain
5//! 7-bit ASCII are treated as text.
6
7use std::path::Path;
8
9use alint_core::{
10    Context, Error, Level, PerFileRule, Result, Rule, RuleSpec, Scope, ScopeFilter, Violation,
11};
12
13use crate::io::{Classification, TEXT_INSPECT_LEN, classify_bytes, read_prefix};
14
15#[derive(Debug)]
16pub struct FileIsTextRule {
17    id: String,
18    level: Level,
19    policy_url: Option<String>,
20    message: Option<String>,
21    scope: Scope,
22    scope_filter: Option<ScopeFilter>,
23}
24
25impl Rule for FileIsTextRule {
26    fn id(&self) -> &str {
27        &self.id
28    }
29    fn level(&self) -> Level {
30        self.level
31    }
32    fn policy_url(&self) -> Option<&str> {
33        self.policy_url.as_deref()
34    }
35
36    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
37        let mut violations = Vec::new();
38        for entry in ctx.index.files() {
39            if !self.scope.matches(&entry.path) {
40                continue;
41            }
42            if let Some(filter) = &self.scope_filter
43                && !filter.matches(&entry.path, ctx.index)
44            {
45                continue;
46            }
47            if entry.size == 0 {
48                // Empty files are text by convention.
49                continue;
50            }
51            // Bounded read: only the first TEXT_INSPECT_LEN
52            // bytes feed `content_inspector`. Solo runs read
53            // just that prefix; the dispatch-flip path receives
54            // the whole file from the engine and inspects only
55            // the prefix.
56            let full = ctx.root.join(&entry.path);
57            let bytes = match read_prefix(&full) {
58                Ok(b) => b,
59                Err(e) => {
60                    violations.push(
61                        Violation::new(format!("could not read file: {e}"))
62                            .with_path(entry.path.clone()),
63                    );
64                    continue;
65                }
66            };
67            violations.extend(self.evaluate_file(ctx, &entry.path, &bytes)?);
68        }
69        Ok(violations)
70    }
71
72    fn as_per_file(&self) -> Option<&dyn PerFileRule> {
73        Some(self)
74    }
75
76    fn scope_filter(&self) -> Option<&ScopeFilter> {
77        self.scope_filter.as_ref()
78    }
79}
80
81impl PerFileRule for FileIsTextRule {
82    fn path_scope(&self) -> &Scope {
83        &self.scope
84    }
85
86    fn evaluate_file(
87        &self,
88        _ctx: &Context<'_>,
89        path: &Path,
90        bytes: &[u8],
91    ) -> Result<Vec<Violation>> {
92        if bytes.is_empty() {
93            return Ok(Vec::new());
94        }
95        // Inspect only the first TEXT_INSPECT_LEN bytes; the
96        // engine handed us the full file but the classifier
97        // only needs the prefix.
98        let sample = &bytes[..bytes.len().min(TEXT_INSPECT_LEN)];
99        if classify_bytes(sample) != Classification::Binary {
100            return Ok(Vec::new());
101        }
102        let msg = self
103            .message
104            .clone()
105            .unwrap_or_else(|| "file is detected as binary; text is required here".to_string());
106        Ok(vec![
107            Violation::new(msg).with_path(std::sync::Arc::<Path>::from(path)),
108        ])
109    }
110
111    fn max_bytes_needed(&self) -> Option<usize> {
112        Some(TEXT_INSPECT_LEN)
113    }
114}
115
116pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
117    let Some(paths) = &spec.paths else {
118        return Err(Error::rule_config(
119            &spec.id,
120            "file_is_text requires a `paths` field",
121        ));
122    };
123    Ok(Box::new(FileIsTextRule {
124        id: spec.id.clone(),
125        level: spec.level,
126        policy_url: spec.policy_url.clone(),
127        message: spec.message.clone(),
128        scope: Scope::from_paths_spec(paths)?,
129        scope_filter: spec.parse_scope_filter()?,
130    }))
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136    use crate::test_support::{ctx, spec_yaml, tempdir_with_files};
137
138    #[test]
139    fn build_rejects_missing_paths_field() {
140        let spec = spec_yaml(
141            "id: t\n\
142             kind: file_is_text\n\
143             level: warning\n",
144        );
145        assert!(build(&spec).is_err());
146    }
147
148    #[test]
149    fn evaluate_passes_on_utf8_text() {
150        let spec = spec_yaml(
151            "id: t\n\
152             kind: file_is_text\n\
153             paths: \"**/*.rs\"\n\
154             level: warning\n",
155        );
156        let rule = build(&spec).unwrap();
157        let (tmp, idx) = tempdir_with_files(&[("a.rs", b"// hello\nfn main() {}\n")]);
158        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
159        assert!(v.is_empty(), "utf-8 text should pass: {v:?}");
160    }
161
162    #[test]
163    fn evaluate_fires_on_binary_content() {
164        let spec = spec_yaml(
165            "id: t\n\
166             kind: file_is_text\n\
167             paths: \"**/*\"\n\
168             level: warning\n",
169        );
170        let rule = build(&spec).unwrap();
171        // Bytes with NUL + binary tail; content_inspector
172        // should classify as Binary.
173        let mut binary = vec![0u8; 16];
174        binary.extend_from_slice(&[0xff, 0xfe, 0xfd, 0xfc]);
175        let (tmp, idx) = tempdir_with_files(&[("img.bin", &binary)]);
176        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
177        assert_eq!(v.len(), 1, "binary should fire: {v:?}");
178    }
179
180    #[test]
181    fn evaluate_silent_on_zero_byte_file() {
182        // Empty files are treated as text by convention —
183        // no read needed, no violation.
184        let spec = spec_yaml(
185            "id: t\n\
186             kind: file_is_text\n\
187             paths: \"**/*\"\n\
188             level: warning\n",
189        );
190        let rule = build(&spec).unwrap();
191        let (tmp, idx) = tempdir_with_files(&[("empty", b"")]);
192        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
193        assert!(v.is_empty());
194    }
195
196    #[test]
197    fn evaluate_skips_out_of_scope_files() {
198        let spec = spec_yaml(
199            "id: t\n\
200             kind: file_is_text\n\
201             paths: \"src/**/*.rs\"\n\
202             level: warning\n",
203        );
204        let rule = build(&spec).unwrap();
205        let (tmp, idx) = tempdir_with_files(&[("img.bin", &[0u8; 64])]);
206        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
207        assert!(v.is_empty(), "out-of-scope shouldn't fire: {v:?}");
208    }
209}