use std::fs::File;
use std::io::Read;
use std::path::Path;
pub fn is_binary_file(path: &Path) -> bool {
if is_binary_by_extension(path) {
return true;
}
is_binary_by_content(path)
}
fn is_binary_by_extension(path: &Path) -> bool {
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
matches!(
ext.to_lowercase().as_str(),
"exe" | "dll" | "so" | "dylib" | "a" | "o" | "lib" | "bin"
| "zip" | "tar" | "gz" | "bz2" | "xz" | "7z" | "rar" | "tgz"
| "png" | "jpg" | "jpeg" | "gif" | "bmp" | "ico" | "svg" | "webp"
| "mp4" | "avi" | "mov" | "wmv" | "flv" | "mkv" | "webm"
| "mp3" | "wav" | "ogg" | "flac" | "aac" | "wma"
| "pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx"
| "wasm" | "pyc" | "class" | "jar" | "war"
| "lock" | "min.js" | "bundle.js"
)
} else {
false
}
}
fn is_binary_by_content(path: &Path) -> bool {
let mut file = match File::open(path) {
Ok(f) => f,
Err(_) => return false,
};
let mut buffer = [0u8; 8192];
let bytes_read = match file.read(&mut buffer) {
Ok(n) => n,
Err(_) => return false,
};
if bytes_read == 0 {
return false;
}
let data = &buffer[..bytes_read];
if data.contains(&0) {
return true;
}
let non_printable_count = data
.iter()
.filter(|&&b| !is_printable_or_whitespace(b))
.count();
let non_printable_ratio = non_printable_count as f64 / bytes_read as f64;
if non_printable_ratio > 0.30 {
if std::str::from_utf8(data).is_err() {
return true;
}
if non_printable_ratio > 0.80 {
return true;
}
}
false
}
#[inline]
fn is_printable_or_whitespace(byte: u8) -> bool {
matches!(byte, 0x09 | 0x0A | 0x0D | 0x20..=0x7E)
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use std::io::Write;
use tempfile::TempDir;
#[test]
fn test_binary_by_extension() {
assert!(is_binary_by_extension(Path::new("test.exe")));
assert!(is_binary_by_extension(Path::new("libfoo.so")));
assert!(is_binary_by_extension(Path::new("image.png")));
assert!(is_binary_by_extension(Path::new("archive.zip")));
assert!(is_binary_by_extension(Path::new("video.mp4")));
assert!(!is_binary_by_extension(Path::new("main.rs")));
assert!(!is_binary_by_extension(Path::new("README.md")));
}
#[test]
fn test_text_file_detection() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("test.txt");
let mut file = File::create(&file_path).unwrap();
writeln!(file, "This is a text file").unwrap();
writeln!(file, "with multiple lines").unwrap();
drop(file);
assert!(!is_binary_by_content(&file_path));
}
#[test]
fn test_binary_file_detection() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("test.bin");
let mut file = File::create(&file_path).unwrap();
file.write_all(&[0x00, 0x01, 0x02, 0x03, 0xFF]).unwrap();
drop(file);
assert!(is_binary_by_content(&file_path));
}
#[test]
fn test_non_printable_ratio() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("test.dat");
let mut file = File::create(&file_path).unwrap();
let data: Vec<u8> = (0x01..=0x08).cycle().take(1000).collect();
file.write_all(&data).unwrap();
drop(file);
assert!(is_binary_by_content(&file_path));
}
#[test]
fn test_utf8_validity() {
let dir = TempDir::new().unwrap();
let valid_path = dir.path().join("valid.txt");
fs::write(&valid_path, "Hello, δΈη!").unwrap();
assert!(!is_binary_by_content(&valid_path));
let invalid_path = dir.path().join("invalid.txt");
fs::write(&invalid_path, [0xFF, 0xFE, 0xFD]).unwrap();
assert!(is_binary_by_content(&invalid_path));
}
#[test]
fn test_printable_or_whitespace() {
assert!(is_printable_or_whitespace(b' ')); assert!(is_printable_or_whitespace(b'\t')); assert!(is_printable_or_whitespace(b'\n')); assert!(is_printable_or_whitespace(b'\r')); assert!(is_printable_or_whitespace(b'A'));
assert!(is_printable_or_whitespace(b'z'));
assert!(is_printable_or_whitespace(b'0'));
assert!(!is_printable_or_whitespace(0x00)); assert!(!is_printable_or_whitespace(0x01)); assert!(!is_printable_or_whitespace(0xFF)); }
}