use std::collections::HashSet;
use std::fs::File;
use std::io::{Read, Seek, SeekFrom};
use std::path::Path;
use std::sync::OnceLock;
use crate::probe::file_type::BINARY_MAGIC_BYTES;
const BINARY_EXTENSIONS_RAW: &[&str] = &[
"exe", "dll", "so", "dylib", "a", "lib", "o", "obj", "class", "jar", "war", "ear", "wasm",
"zip", "tar", "gz", "bz2", "xz", "zst", "7z", "rar", "png", "jpg", "jpeg", "gif", "bmp",
"ico", "webp", "mp3", "mp4", "avi", "mkv", "mov", "flac", "wav", "ogg", "pdf", "doc", "docx",
"xls", "xlsx", "ppt", "pptx", "ttf", "otf", "woff", "woff2", "eot", "db", "sqlite",
"sqlite3", "pyc", "pyo",
];
fn binary_extensions() -> &'static HashSet<&'static str> {
static SET: OnceLock<HashSet<&'static str>> = OnceLock::new();
SET.get_or_init(|| BINARY_EXTENSIONS_RAW.iter().copied().collect())
}
pub fn is_binary(path: &Path) -> std::io::Result<bool> {
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
let lower = ext.to_ascii_lowercase();
if binary_extensions().contains(lower.as_str()) {
return Ok(true);
}
if let Some(stem) = path.file_stem().and_then(|s| s.to_str())
&& stem.to_ascii_lowercase().ends_with(".min")
&& (lower == "js" || lower == "css")
{
return Ok(true);
}
}
let mut file = File::open(path)?;
is_binary_file(path, &mut file)
}
pub(crate) fn is_binary_file(_path: &Path, file: &mut File) -> std::io::Result<bool> {
file.seek(SeekFrom::Start(0))?;
const BUF_SZ: usize = 512;
let mut buf = vec![0u8; BUF_SZ];
let n = file.read(&mut buf)?;
if n == 0 {
return Ok(false);
}
buf.truncate(n);
for magic in BINARY_MAGIC_BYTES {
if n >= magic.len() && buf[..magic.len()] == **magic {
return Ok(true);
}
}
if n >= 257 + 6 {
if &buf[257..257 + 6] == b"ustar\0" {
return Ok(true);
}
}
let non_text = buf
.iter()
.filter(|&&b| b == 0 || (b < 7) || (b > 14 && b < 32 && b != 27))
.count();
#[allow(clippy::cast_precision_loss)]
Ok((non_text as f64 / n as f64) > 0.30)
}