use content_inspector::ContentType;
use std::{fs::File, io::Read, path::Path, str};
const READ_BUFFER_SIZE: usize = 1024;
pub(crate) fn is_likely_text(path: &Path) -> std::io::Result<bool> {
let mut file = File::open(path)?;
let mut buffer = [0; READ_BUFFER_SIZE];
let bytes_read = file.read(&mut buffer)?;
let buffer_slice = &buffer[..bytes_read];
let content_type = content_inspector::inspect(buffer_slice);
Ok(match content_type {
ContentType::UTF_8_BOM => true,
ContentType::UTF_8 => str::from_utf8(buffer_slice).is_ok(), ContentType::BINARY => false,
_ => false,
})
}
#[cfg(test)]
mod tests {
use super::*;
use std::{fs, io::Write};
use tempfile::tempdir;
#[test]
fn test_detect_utf8_text() -> std::io::Result<()> {
let temp = tempdir()?;
let file_path = temp.path().join("utf8.txt");
fs::write(&file_path, "This is plain UTF-8 text.")?;
assert!(is_likely_text(&file_path)?); temp.close()?;
Ok(())
}
#[test]
fn test_detect_utf8_bom_text() -> std::io::Result<()> {
let temp = tempdir()?;
let file_path = temp.path().join("utf8_bom.txt");
let mut file = fs::File::create(&file_path)?;
file.write_all(&[0xEF, 0xBB, 0xBF])?;
file.write_all(b"Text with UTF-8 BOM.")?;
drop(file);
assert!(is_likely_text(&file_path)?); temp.close()?;
Ok(())
}
#[test]
fn test_detect_binary_null_byte() -> std::io::Result<()> {
let temp = tempdir()?;
let file_path = temp.path().join("binary_null.bin");
fs::write(&file_path, b"Binary data with a \0 null byte.")?;
assert!(!is_likely_text(&file_path)?); temp.close()?;
Ok(())
}
#[test]
fn test_detect_binary_high_bytes() -> std::io::Result<()> {
let temp = tempdir()?;
let file_path = temp.path().join("binary_high.bin");
fs::write(&file_path, [0x01, 0x02, 0x03, 0xFF, 0xFE, 0xFD])?;
assert!(!is_likely_text(&file_path)?); temp.close()?;
Ok(())
}
#[test]
fn test_detect_empty_file() -> std::io::Result<()> {
let temp = tempdir()?;
let file_path = temp.path().join("empty.txt");
fs::write(&file_path, "")?;
assert!(is_likely_text(&file_path)?); temp.close()?;
Ok(())
}
#[test]
fn test_detect_png_file() -> std::io::Result<()> {
let temp = tempdir()?;
let file_path = temp.path().join("image.png");
fs::write(&file_path, [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])?;
assert!(!is_likely_text(&file_path)?); temp.close()?;
Ok(())
}
#[test]
fn test_detect_non_existent_file() {
let path = Path::new("non_existent_file_for_text_detection.txt");
let result = is_likely_text(path);
assert!(result.is_err()); }
#[test]
fn test_detect_ascii_text() -> std::io::Result<()> {
let temp = tempdir()?;
let file_path = temp.path().join("ascii.txt");
fs::write(&file_path, "Just plain ASCII.")?;
assert!(is_likely_text(&file_path)?); temp.close()?;
Ok(())
}
#[test]
fn test_detect_invalid_utf8_sequence() -> std::io::Result<()> {
let temp = tempdir()?;
let file_path = temp.path().join("invalid_utf8.txt");
fs::write(&file_path, [0x48, 0x65, 0x6c, 0x6c, 0x80, 0x6f])?; assert!(!is_likely_text(&file_path)?); temp.close()?;
Ok(())
}
}