infiniloom_engine/scanner/
common.rs1use std::path::Path;
7
8pub const BINARY_EXTENSIONS: &[&str] = &[
13 "exe", "dll", "so", "dylib", "a", "o", "obj", "lib", "pyc", "pyo", "class", "jar", "war", "ear", "zip", "tar", "gz", "bz2", "xz", "7z", "rar", "tgz", "png", "jpg", "jpeg", "gif", "bmp", "ico", "webp", "svg", "tiff", "psd",
18 "mp3", "mp4", "avi", "mov", "wav", "flac", "ogg", "webm", "mkv", "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx", "odt", "woff", "woff2", "ttf", "eot", "otf", "db", "sqlite", "sqlite3", "bin", "dat", "cache",
24];
25
26pub fn is_binary_extension(path: &Path) -> bool {
44 let ext = match path.extension().and_then(|e| e.to_str()) {
45 Some(e) => e.to_lowercase(),
46 None => return false,
47 };
48
49 BINARY_EXTENSIONS.contains(&ext.as_str())
50}
51
52pub fn is_binary_content(content: &[u8]) -> bool {
74 let check_len = content.len().min(8192);
76 let sample = &content[..check_len];
77
78 if sample.is_empty() {
79 return false;
80 }
81
82 let binary_chars = sample
84 .iter()
85 .filter(|&&b| {
86 b == 0 || (b < 32 && b != b'\n' && b != b'\r' && b != b'\t')
88 })
89 .count();
90
91 let threshold = sample.len() / 10;
93 binary_chars > threshold
94}
95
96#[cfg(test)]
97mod tests {
98 use super::*;
99
100 #[test]
101 fn test_binary_extension_executables() {
102 assert!(is_binary_extension(Path::new("program.exe")));
103 assert!(is_binary_extension(Path::new("lib.dll")));
104 assert!(is_binary_extension(Path::new("library.so")));
105 assert!(is_binary_extension(Path::new("framework.dylib")));
106 }
107
108 #[test]
109 fn test_binary_extension_archives() {
110 assert!(is_binary_extension(Path::new("archive.zip")));
111 assert!(is_binary_extension(Path::new("backup.tar")));
112 assert!(is_binary_extension(Path::new("compressed.gz")));
113 assert!(is_binary_extension(Path::new("package.7z")));
114 }
115
116 #[test]
117 fn test_binary_extension_images() {
118 assert!(is_binary_extension(Path::new("photo.jpg")));
119 assert!(is_binary_extension(Path::new("logo.png")));
120 assert!(is_binary_extension(Path::new("icon.gif")));
121 assert!(is_binary_extension(Path::new("image.webp")));
122 }
123
124 #[test]
125 fn test_binary_extension_media() {
126 assert!(is_binary_extension(Path::new("song.mp3")));
127 assert!(is_binary_extension(Path::new("video.mp4")));
128 assert!(is_binary_extension(Path::new("movie.mkv")));
129 }
130
131 #[test]
132 fn test_binary_extension_documents() {
133 assert!(is_binary_extension(Path::new("doc.pdf")));
134 assert!(is_binary_extension(Path::new("spreadsheet.xlsx")));
135 assert!(is_binary_extension(Path::new("presentation.pptx")));
136 }
137
138 #[test]
139 fn test_binary_extension_fonts() {
140 assert!(is_binary_extension(Path::new("font.woff")));
141 assert!(is_binary_extension(Path::new("font.woff2")));
142 assert!(is_binary_extension(Path::new("font.ttf")));
143 }
144
145 #[test]
146 fn test_binary_extension_database() {
147 assert!(is_binary_extension(Path::new("data.db")));
148 assert!(is_binary_extension(Path::new("store.sqlite")));
149 assert!(is_binary_extension(Path::new("cache.sqlite3")));
150 }
151
152 #[test]
153 fn test_non_binary_extensions() {
154 assert!(!is_binary_extension(Path::new("code.rs")));
155 assert!(!is_binary_extension(Path::new("script.py")));
156 assert!(!is_binary_extension(Path::new("module.ts")));
157 assert!(!is_binary_extension(Path::new("style.css")));
158 assert!(!is_binary_extension(Path::new("data.json")));
159 assert!(!is_binary_extension(Path::new("config.yaml")));
160 assert!(!is_binary_extension(Path::new("readme.md")));
161 }
162
163 #[test]
164 fn test_no_extension() {
165 assert!(!is_binary_extension(Path::new("Makefile")));
166 assert!(!is_binary_extension(Path::new("Dockerfile")));
167 assert!(!is_binary_extension(Path::new(".gitignore")));
168 }
169
170 #[test]
171 fn test_case_insensitive() {
172 assert!(is_binary_extension(Path::new("FILE.PNG")));
173 assert!(is_binary_extension(Path::new("Archive.ZIP")));
174 assert!(is_binary_extension(Path::new("Video.MP4")));
175 }
176
177 #[test]
178 fn test_binary_content_text() {
179 assert!(!is_binary_content(b"fn main() {\n println!(\"hello\");\n}"));
181 assert!(!is_binary_content(b"Hello, World!\n"));
182 assert!(!is_binary_content(b"def foo():\n return 42\n"));
183 }
184
185 #[test]
186 fn test_binary_content_with_nulls() {
187 let binary = vec![0u8; 100];
189 assert!(is_binary_content(&binary));
190
191 let mut mixed = b"some text".to_vec();
193 mixed.extend(vec![0u8; 100]);
194 assert!(is_binary_content(&mixed));
195 }
196
197 #[test]
198 fn test_binary_content_control_chars() {
199 let control: Vec<u8> = (0..32)
201 .filter(|&b| b != b'\n' && b != b'\r' && b != b'\t')
202 .collect();
203 let mut content = control.repeat(10);
204 content.extend(b"some text");
205 assert!(is_binary_content(&content));
207 }
208
209 #[test]
210 fn test_binary_content_empty() {
211 assert!(!is_binary_content(b""));
212 }
213
214 #[test]
215 fn test_binary_content_whitespace_ok() {
216 assert!(!is_binary_content(b"line1\nline2\r\nline3\ttabbed"));
218 }
219}