1use std::fs::File;
8use std::io::{Read, Result as IoResult};
9use std::path::Path;
10
11use crate::config::buffers::DEFAULT_READ_BUFFER;
12
13const SAMPLE_SIZE: usize = DEFAULT_READ_BUFFER;
16
17const NON_PRINTABLE_THRESHOLD_PERCENT: usize = 30;
20
21#[must_use]
28pub fn is_binary_bytes(data: &[u8]) -> bool {
29 if data.is_empty() {
30 return false;
31 }
32
33 if data.contains(&0) {
34 return true;
35 }
36
37 let non_printable = data
38 .iter()
39 .filter(|&&byte| !is_printable_ascii(byte))
40 .count();
41
42 non_printable.saturating_mul(100) > data.len().saturating_mul(NON_PRINTABLE_THRESHOLD_PERCENT)
43}
44
45pub fn is_binary_file(path: &Path) -> IoResult<bool> {
51 let mut file = File::open(path)?;
52 let mut buffer = vec![0u8; SAMPLE_SIZE];
53 let read = file.read(&mut buffer)?;
54 Ok(is_binary_bytes(&buffer[..read]))
55}
56
57fn is_printable_ascii(byte: u8) -> bool {
58 matches!(byte, 0x09 | 0x0A | 0x0D | 0x0C | 0x0B | 0x20..=0x7E)
59}
60
61#[cfg(test)]
62mod tests {
63 use super::*;
64 use std::io::Write;
65 use tempfile::NamedTempFile;
66
67 #[test]
68 fn detects_ascii_text_as_non_binary() {
69 let data = b"hello world\nthis is text";
70 assert!(!is_binary_bytes(data));
71 }
72
73 #[test]
74 fn detects_null_byte_as_binary() {
75 let data = b"hello\0world";
76 assert!(is_binary_bytes(data));
77 }
78
79 #[test]
80 fn detects_high_ratio_non_printable_as_binary() {
81 let data = [0x01u8; 100];
82 assert!(is_binary_bytes(&data));
83 }
84
85 #[test]
86 fn file_detection_respects_null_bytes() {
87 let mut temp = NamedTempFile::new().unwrap();
88 temp.write_all(b"text before\0text after").unwrap();
89 temp.flush().unwrap();
90 assert!(is_binary_file(temp.path()).unwrap());
91 }
92
93 #[test]
94 fn file_detection_handles_text() {
95 let mut temp = NamedTempFile::new().unwrap();
96 temp.write_all(b"plain ascii text\n").unwrap();
97 temp.flush().unwrap();
98 assert!(!is_binary_file(temp.path()).unwrap());
99 }
100}