Skip to main content

alint_rules/
io.rs

1//! Shared I/O helpers for content-reading rules.
2
3use std::io::{Read as _, Seek, SeekFrom};
4use std::path::Path;
5
6/// How much of a file to sample when classifying text vs. binary.
7pub const TEXT_INSPECT_LEN: usize = 8 * 1024;
8
9/// Read up to `TEXT_INSPECT_LEN` bytes from the start of a file. Returned
10/// `Ok(None)` means the file was empty; `Err` is propagated I/O error.
11pub fn read_prefix(path: &Path) -> std::io::Result<Vec<u8>> {
12    read_prefix_n(path, TEXT_INSPECT_LEN)
13}
14
15/// Read up to `n` bytes from the start of `path`. Used by rules that
16/// only need to inspect a leading window — `executable_has_shebang`
17/// (2 bytes for `#!`), `file_starts_with` (`pattern.len()` bytes).
18/// Reads less than `n` if the file is shorter; returns the actual byte
19/// count in the returned `Vec`'s length.
20pub fn read_prefix_n(path: &Path, n: usize) -> std::io::Result<Vec<u8>> {
21    let mut file = std::fs::File::open(path)?;
22    let mut buf = vec![0u8; n];
23    let read = file.read(&mut buf)?;
24    buf.truncate(read);
25    Ok(buf)
26}
27
28/// Read up to `n` bytes from the END of `path`. Used by rules that
29/// only need to inspect the tail — `file_ends_with` (`pattern.len()`
30/// bytes). Returns the actual byte count in the returned `Vec`'s
31/// length; fewer than `n` bytes if the file is shorter. Files smaller
32/// than `n` are read whole.
33pub fn read_suffix_n(path: &Path, n: usize) -> std::io::Result<Vec<u8>> {
34    let mut file = std::fs::File::open(path)?;
35    let len = file.seek(SeekFrom::End(0))?;
36    // 32-bit platforms: `usize::MAX < u64::MAX`, so a > 4 GiB
37    // file would truncate. `try_from` falls back to reading the
38    // requested `n` (which is bounded to a sane caller value)
39    // when the conversion fails.
40    let to_read = usize::try_from(len).unwrap_or(n).min(n);
41    file.seek(SeekFrom::Start(len - to_read as u64))?;
42    let mut buf = vec![0u8; to_read];
43    file.read_exact(&mut buf)?;
44    Ok(buf)
45}
46
47/// Classification of a file's contents. Computed lazily — callers check the
48/// subset they care about.
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50pub enum Classification {
51    Text,
52    Binary,
53}
54
55pub fn classify_bytes(bytes: &[u8]) -> Classification {
56    match content_inspector::inspect(bytes) {
57        content_inspector::ContentType::BINARY => Classification::Binary,
58        _ => Classification::Text,
59    }
60}
61
62/// Hard cap on a single whole-file read by the cross-file /
63/// structured rule kinds (`registry_paths_resolve`,
64/// `cross_file_value_equals`, `pair_hash`, `generated_file_fresh`).
65/// Generous — every realistic manifest / generated file is orders
66/// of magnitude smaller — yet bounded so a hostile or accidental
67/// multi-GB file in a linted repo yields a clear violation
68/// instead of OOM-ing the run.
69pub const MAX_ANALYZE_BYTES: u64 = 256 * 1024 * 1024;
70
71/// Failure of [`read_capped`]: the file exceeds
72/// [`MAX_ANALYZE_BYTES`] (carrying its size), or an ordinary I/O
73/// error (kept distinct so callers turn "too large" into a clear
74/// violation rather than reusing their not-found / skip path).
75#[derive(Debug)]
76pub enum ReadCapError {
77    TooLarge(u64),
78    Io(std::io::Error),
79}
80
81/// Read a whole file, refusing (via a cheap `metadata` stat, so
82/// the oversized bytes are never read) anything larger than
83/// `max`. `pub(crate)` so rule-level tests can inject a tiny
84/// `max` to exercise the over-cap violation path without
85/// materialising a >256 MiB fixture.
86pub(crate) fn read_capped_with(path: &Path, max: u64) -> Result<Vec<u8>, ReadCapError> {
87    match std::fs::metadata(path) {
88        Ok(m) if m.len() > max => Err(ReadCapError::TooLarge(m.len())),
89        Ok(_) => std::fs::read(path).map_err(ReadCapError::Io),
90        Err(e) => Err(ReadCapError::Io(e)),
91    }
92}
93
94/// Whole-file read bounded by [`MAX_ANALYZE_BYTES`]. Used by the
95/// cross-file / structured rules for the manifest / source /
96/// target / committed-file reads they do themselves.
97pub fn read_capped(path: &Path) -> Result<Vec<u8>, ReadCapError> {
98    read_capped_with(path, MAX_ANALYZE_BYTES)
99}
100
101#[cfg(test)]
102mod tests {
103    use super::*;
104
105    #[test]
106    fn read_capped_returns_bytes_under_cap() {
107        let dir = tempfile::tempdir().unwrap();
108        let p = dir.path().join("f");
109        std::fs::write(&p, b"hello").unwrap();
110        match read_capped(&p) {
111            Ok(b) => assert_eq!(b, b"hello"),
112            _ => panic!("expected Bytes under the cap"),
113        }
114    }
115
116    #[test]
117    fn read_capped_with_rejects_over_cap_without_reading() {
118        let dir = tempfile::tempdir().unwrap();
119        let p = dir.path().join("big");
120        std::fs::write(&p, b"0123456789").unwrap();
121        match read_capped_with(&p, 4) {
122            Err(ReadCapError::TooLarge(n)) => assert_eq!(n, 10),
123            _ => panic!("a 10-byte file must exceed a 4-byte cap"),
124        }
125    }
126
127    #[test]
128    fn read_capped_missing_path_is_io_error() {
129        let dir = tempfile::tempdir().unwrap();
130        match read_capped(&dir.path().join("nope")) {
131            Err(ReadCapError::Io(_)) => {}
132            _ => panic!("a missing path must be an Io error"),
133        }
134    }
135}