use std::fs::File;
use std::io::Read;
use std::path::Path;
const BINARY_SIGNATURES: &[&[u8]] = &[
&[0x89, 0x50, 0x4E, 0x47], &[0xFF, 0xD8, 0xFF], &[0x47, 0x49, 0x46, 0x38], &[0x42, 0x4D], &[0x00, 0x00, 0x01, 0x00], &[0x52, 0x49, 0x46, 0x46], &[0x50, 0x4B, 0x03, 0x04], &[0x1F, 0x8B], &[0x42, 0x5A, 0x68], &[0xFD, 0x37, 0x7A, 0x58], &[0x52, 0x61, 0x72, 0x21], &[0x37, 0x7A, 0xBC, 0xAF], &[0x7F, 0x45, 0x4C, 0x46], &[0x4D, 0x5A], &[0xCF, 0xFA, 0xED, 0xFE], &[0xCE, 0xFA, 0xED, 0xFE], &[0xCA, 0xFE, 0xBA, 0xBE], &[0x25, 0x50, 0x44, 0x46], &[0xD0, 0xCF, 0x11, 0xE0], &[0x49, 0x44, 0x33], &[0xFF, 0xFB], &[0x4F, 0x67, 0x67, 0x53], &[0x00, 0x01, 0x00, 0x00], &[0x4F, 0x54, 0x54, 0x4F], &[0x53, 0x51, 0x4C, 0x69], ];
const BINARY_EXTENSIONS: &[&str] = &[
"png", "jpg", "jpeg", "gif", "bmp", "ico", "webp", "tiff", "tif", "psd", "svg",
"zip", "tar", "gz", "bz2", "xz", "7z", "rar", "jar", "war", "ear", "exe", "dll", "so", "dylib", "bin", "o", "a", "lib", "pyc", "pyo", "class",
"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx", "odt", "ods", "odp", "mp3", "mp4", "avi", "mkv", "mov", "wmv", "flv", "wav", "flac", "ogg", "m4a",
"ttf", "otf", "woff", "woff2", "eot", "db", "sqlite", "sqlite3", "pickle", "npy", "npz",
];
pub fn is_binary(path: &Path) -> bool {
if let Some(ext) = path.extension() {
let ext_lower = ext.to_string_lossy().to_lowercase();
if BINARY_EXTENSIONS.contains(&ext_lower.as_str()) {
return true;
}
}
is_binary_content(path)
}
pub fn is_binary_content(path: &Path) -> bool {
let Ok(mut file) = File::open(path) else {
return false; };
let mut buffer = [0u8; 8192];
let Ok(bytes_read) = file.read(&mut buffer) else {
return false;
};
if bytes_read == 0 {
return false; }
let content = &buffer[..bytes_read];
for sig in BINARY_SIGNATURES {
if content.starts_with(sig) {
return true;
}
}
if content.contains(&0) {
return true;
}
let non_printable = content
.iter()
.filter(|&&b| {
b < 0x09 || (b > 0x0D && b < 0x20) || b == 0x7F
})
.count();
non_printable * 10 > bytes_read
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn test_text_file() {
let mut file = NamedTempFile::new().unwrap();
writeln!(file, "Hello, world!").unwrap();
writeln!(file, "This is a text file.").unwrap();
assert!(!is_binary(file.path()));
}
#[test]
fn test_binary_with_nulls() {
let mut file = NamedTempFile::new().unwrap();
file.write_all(&[0x00, 0x01, 0x02, 0x03]).unwrap();
assert!(is_binary(file.path()));
}
#[test]
fn test_png_signature() {
let mut file = NamedTempFile::new().unwrap();
file.write_all(&[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])
.unwrap();
assert!(is_binary(file.path()));
}
#[test]
fn test_empty_file_is_text() {
let file = NamedTempFile::new().unwrap();
assert!(!is_binary(file.path()));
}
#[test]
fn test_binary_extension() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("test.png");
std::fs::write(&path, "not actually png data").unwrap();
assert!(is_binary(&path));
}
}