Skip to main content

flat/
filters.rs

1use std::fs::File;
2use std::io::Read;
3use std::path::Path;
4
5/// Secret file patterns that should always be excluded
6const SECRET_PATTERNS: &[&str] = &[
7    ".env",
8    "credentials.json",
9    "serviceaccount.json",
10    "id_rsa",
11    "id_dsa",
12    "id_ecdsa",
13    "id_ed25519",
14];
15
16/// Secret substring patterns (case-insensitive)
17const SECRET_SUBSTRINGS: &[&str] = &["secret", "password", "credential"];
18
19/// File extensions that indicate binary/non-text files
20const BINARY_EXTENSIONS: &[&str] = &[
21    // Images
22    "png", "jpg", "jpeg", "gif", "bmp", "ico", "svg", "webp", // Media
23    "mp4", "mp3", "wav", "avi", "mov", "flac", "ogg", // Archives
24    "zip", "tar", "gz", "7z", "rar", "bz2", "xz", // Binaries
25    "exe", "dll", "so", "dylib", "bin", // Compiled
26    "wasm", "class", "pyc", "o", "a", "lib", // Other
27    "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx",
28];
29
30#[derive(Debug, Clone, PartialEq)]
31pub enum SkipReason {
32    Secret,
33    Binary,
34    TooLarge,
35    Extension,
36    Match,
37    Gitignore,
38    ReadError,
39}
40
41impl std::fmt::Display for SkipReason {
42    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43        match self {
44            SkipReason::Secret => write!(f, "secret"),
45            SkipReason::Binary => write!(f, "binary"),
46            SkipReason::TooLarge => write!(f, "too large"),
47            SkipReason::Extension => write!(f, "extension"),
48            SkipReason::Match => write!(f, "no match"),
49            SkipReason::Gitignore => write!(f, "gitignore"),
50            SkipReason::ReadError => write!(f, "read error"),
51        }
52    }
53}
54
55/// Check if a filename matches secret patterns
56pub fn is_secret_file(path: &Path) -> bool {
57    let file_name = match path.file_name() {
58        Some(name) => name.to_string_lossy().to_lowercase(),
59        None => return false,
60    };
61
62    // Check exact patterns
63    if SECRET_PATTERNS.iter().any(|p| file_name == *p) {
64        return true;
65    }
66
67    // Check .env variants
68    if file_name.starts_with(".env") {
69        return true;
70    }
71
72    // Check extensions
73    if let Some(ext) = path.extension() {
74        let ext = ext.to_string_lossy().to_lowercase();
75        if matches!(ext.as_str(), "key" | "pem" | "p12" | "pfx") {
76            return true;
77        }
78    }
79
80    // Check substrings
81    SECRET_SUBSTRINGS.iter().any(|s| file_name.contains(s))
82}
83
84/// Check if a file extension indicates a binary file
85pub fn is_binary_extension(path: &Path) -> bool {
86    if let Some(ext) = path.extension() {
87        let ext = ext.to_string_lossy().to_lowercase();
88        return BINARY_EXTENSIONS.contains(&ext.as_str());
89    }
90    false
91}
92
93/// Check if a file is binary by reading its content
94/// Returns true if the file appears to be binary (contains null bytes in first 8KB)
95pub fn is_binary_content(path: &Path) -> bool {
96    let mut file = match File::open(path) {
97        Ok(f) => f,
98        Err(_) => return false,
99    };
100
101    let mut buffer = vec![0; 8192];
102    match file.read(&mut buffer) {
103        Ok(n) => {
104            // Check for null bytes in the read portion
105            buffer[..n].contains(&0)
106        }
107        Err(_) => false,
108    }
109}
110
111/// Check if a file exceeds the size limit
112pub fn exceeds_size_limit(path: &Path, max_size: u64) -> bool {
113    match std::fs::metadata(path) {
114        Ok(metadata) => metadata.len() > max_size,
115        Err(_) => false,
116    }
117}
118
119#[cfg(test)]
120mod tests {
121    use super::*;
122
123    #[test]
124    fn test_secret_file_detection() {
125        assert!(is_secret_file(Path::new(".env")));
126        assert!(is_secret_file(Path::new(".env.local")));
127        assert!(is_secret_file(Path::new(".env.production")));
128        assert!(is_secret_file(Path::new("credentials.json")));
129        assert!(is_secret_file(Path::new("id_rsa")));
130        assert!(is_secret_file(Path::new("my.key")));
131        assert!(is_secret_file(Path::new("cert.pem")));
132        assert!(is_secret_file(Path::new("my-secret-file.txt")));
133        assert!(is_secret_file(Path::new("passwords.txt")));
134
135        assert!(!is_secret_file(Path::new("main.rs")));
136        assert!(!is_secret_file(Path::new("config.toml")));
137    }
138
139    #[test]
140    fn test_binary_extension_detection() {
141        assert!(is_binary_extension(Path::new("image.png")));
142        assert!(is_binary_extension(Path::new("logo.jpg")));
143        assert!(is_binary_extension(Path::new("output.wasm")));
144        assert!(is_binary_extension(Path::new("archive.zip")));
145        assert!(is_binary_extension(Path::new("binary.exe")));
146
147        assert!(!is_binary_extension(Path::new("main.rs")));
148        assert!(!is_binary_extension(Path::new("config.toml")));
149        assert!(!is_binary_extension(Path::new("README.md")));
150    }
151}