1use std::io::{Read as _, Seek, SeekFrom};
4use std::path::Path;
5
6pub const TEXT_INSPECT_LEN: usize = 8 * 1024;
8
9pub fn read_prefix(path: &Path) -> std::io::Result<Vec<u8>> {
12 read_prefix_n(path, TEXT_INSPECT_LEN)
13}
14
15pub fn read_prefix_n(path: &Path, n: usize) -> std::io::Result<Vec<u8>> {
21 let mut file = std::fs::File::open(path)?;
22 let mut buf = vec![0u8; n];
23 let read = file.read(&mut buf)?;
24 buf.truncate(read);
25 Ok(buf)
26}
27
28pub fn read_suffix_n(path: &Path, n: usize) -> std::io::Result<Vec<u8>> {
34 let mut file = std::fs::File::open(path)?;
35 let len = file.seek(SeekFrom::End(0))?;
36 let to_read = usize::try_from(len).unwrap_or(n).min(n);
41 file.seek(SeekFrom::Start(len - to_read as u64))?;
42 let mut buf = vec![0u8; to_read];
43 file.read_exact(&mut buf)?;
44 Ok(buf)
45}
46
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50pub enum Classification {
51 Text,
52 Binary,
53}
54
55pub fn classify_bytes(bytes: &[u8]) -> Classification {
56 match content_inspector::inspect(bytes) {
57 content_inspector::ContentType::BINARY => Classification::Binary,
58 _ => Classification::Text,
59 }
60}
61
62pub const MAX_ANALYZE_BYTES: u64 = 256 * 1024 * 1024;
70
71#[derive(Debug)]
76pub enum ReadCapError {
77 TooLarge(u64),
78 Io(std::io::Error),
79}
80
81fn read_capped_with(path: &Path, max: u64) -> Result<Vec<u8>, ReadCapError> {
85 match std::fs::metadata(path) {
86 Ok(m) if m.len() > max => Err(ReadCapError::TooLarge(m.len())),
87 Ok(_) => std::fs::read(path).map_err(ReadCapError::Io),
88 Err(e) => Err(ReadCapError::Io(e)),
89 }
90}
91
92pub fn read_capped(path: &Path) -> Result<Vec<u8>, ReadCapError> {
96 read_capped_with(path, MAX_ANALYZE_BYTES)
97}
98
99#[cfg(test)]
100mod tests {
101 use super::*;
102
103 #[test]
104 fn read_capped_returns_bytes_under_cap() {
105 let dir = tempfile::tempdir().unwrap();
106 let p = dir.path().join("f");
107 std::fs::write(&p, b"hello").unwrap();
108 match read_capped(&p) {
109 Ok(b) => assert_eq!(b, b"hello"),
110 _ => panic!("expected Bytes under the cap"),
111 }
112 }
113
114 #[test]
115 fn read_capped_with_rejects_over_cap_without_reading() {
116 let dir = tempfile::tempdir().unwrap();
117 let p = dir.path().join("big");
118 std::fs::write(&p, b"0123456789").unwrap();
119 match read_capped_with(&p, 4) {
120 Err(ReadCapError::TooLarge(n)) => assert_eq!(n, 10),
121 _ => panic!("a 10-byte file must exceed a 4-byte cap"),
122 }
123 }
124
125 #[test]
126 fn read_capped_missing_path_is_io_error() {
127 let dir = tempfile::tempdir().unwrap();
128 match read_capped(&dir.path().join("nope")) {
129 Err(ReadCapError::Io(_)) => {}
130 _ => panic!("a missing path must be an Io error"),
131 }
132 }
133}