use std::{
fs,
io::ErrorKind::*,
path::{Path, PathBuf},
};
use rustc_hash::FxHashSet;
use walkdir::WalkDir;
pub fn read_files_or_die(
lhs_path: &Path,
rhs_path: &Path,
missing_as_empty: bool,
) -> (Vec<u8>, Vec<u8>) {
let lhs_res = fs::read(lhs_path);
let rhs_res = fs::read(rhs_path);
match (lhs_res, rhs_res) {
(Ok(lhs_src), Ok(rhs_src)) => (lhs_src, rhs_src),
(Ok(lhs_src), Err(e)) if missing_as_empty && e.kind() == NotFound => (lhs_src, vec![]),
(Err(e), Ok(rhs_src)) if missing_as_empty && e.kind() == NotFound => (vec![], rhs_src),
(Ok(lhs_src), Err(_)) if rhs_path == Path::new("/dev/null") => (lhs_src, vec![]),
(Err(_), Ok(rhs_src)) if lhs_path == Path::new("/dev/null") => (vec![], rhs_src),
(lhs_res, rhs_res) => {
if let Err(e) = lhs_res {
eprint_read_error(lhs_path, &e);
}
if let Err(e) = rhs_res {
eprint_read_error(rhs_path, &e);
}
std::process::exit(1);
}
}
}
fn eprint_read_error(path: &Path, e: &std::io::Error) {
match e.kind() {
std::io::ErrorKind::NotFound => {
eprintln!("No such file: {}", path.display());
}
std::io::ErrorKind::PermissionDenied => {
eprintln!("Permission denied when reading file: {}", path.display());
}
_ => {
eprintln!(
"Could not read file: {} (error {:?})",
path.display(),
e.kind()
);
}
};
}
pub fn read_or_die(path: &Path) -> Vec<u8> {
match fs::read(path) {
Ok(src) => src,
Err(e) => {
eprint_read_error(path, &e);
std::process::exit(1);
}
}
}
fn utf16_from_bytes_lossy(bytes: &[u8]) -> String {
let is_big_endian = match &bytes {
[0xfe, 0xff, ..] => true,
[0xff, 0xfe, ..] => false,
_ => false, };
let u16_values: Vec<u16> = bytes
.chunks_exact(2)
.into_iter()
.map(|a| {
if is_big_endian {
u16::from_be_bytes([a[0], a[1]])
} else {
u16::from_le_bytes([a[0], a[1]])
}
})
.collect();
String::from_utf16_lossy(u16_values.as_slice())
}
pub enum ProbableFileKind {
Text(String),
Binary,
}
pub fn guess_content(bytes: &[u8]) -> ProbableFileKind {
let mut magic_bytes = bytes;
if magic_bytes.len() > 1000 {
magic_bytes = &magic_bytes[..1000];
}
let mime = tree_magic_mini::from_u8(magic_bytes);
match mime {
"application/pdf" => return ProbableFileKind::Binary,
"application/zip" => return ProbableFileKind::Binary,
v if v.starts_with("image/") => return ProbableFileKind::Binary,
v if v.starts_with("audio/") => return ProbableFileKind::Binary,
v if v.starts_with("video/") => return ProbableFileKind::Binary,
v if v.starts_with("font/") => return ProbableFileKind::Binary,
_ => {}
}
let utf8_string = String::from_utf8_lossy(bytes).to_string();
let num_utf8_invalid = utf8_string
.chars()
.take(1000)
.filter(|c| *c == std::char::REPLACEMENT_CHARACTER || *c == '\0')
.count();
if num_utf8_invalid <= 10 {
return ProbableFileKind::Text(utf8_string);
}
let utf16_string = utf16_from_bytes_lossy(bytes);
let num_utf16_invalid = utf16_string
.chars()
.take(1000)
.filter(|c| *c == std::char::REPLACEMENT_CHARACTER || *c == '\0')
.count();
if num_utf16_invalid <= 5 {
return ProbableFileKind::Text(utf16_string);
}
ProbableFileKind::Binary
}
fn relative_file_paths_in_dir(dir: &Path) -> Vec<PathBuf> {
WalkDir::new(dir)
.into_iter()
.filter_map(Result::ok)
.map(|entry| entry.into_path())
.filter(|path| !path.is_dir())
.map(|path| path.strip_prefix(dir).unwrap().to_path_buf())
.collect()
}
pub fn relative_paths_in_either(lhs_dir: &Path, rhs_dir: &Path) -> Vec<PathBuf> {
let lhs_paths = relative_file_paths_in_dir(lhs_dir);
let rhs_paths = relative_file_paths_in_dir(rhs_dir);
let mut seen = FxHashSet::default();
let mut res: Vec<PathBuf> = vec![];
let mut i = 0;
let mut j = 0;
loop {
match (lhs_paths.get(i), rhs_paths.get(j)) {
(Some(lhs_path), Some(rhs_path)) if lhs_path == rhs_path => {
if !seen.contains(lhs_path) {
res.push(lhs_path.clone());
seen.insert(lhs_path);
}
i += 1;
j += 1;
}
(Some(lhs_path), Some(rhs_path)) => {
if seen.contains(lhs_path) {
i += 1;
} else if seen.contains(rhs_path) {
j += 1;
} else {
res.push(lhs_path.clone());
res.push(rhs_path.clone());
seen.insert(lhs_path);
seen.insert(rhs_path);
i += 1;
j += 1;
}
}
_ => break,
}
}
res.extend(lhs_paths.into_iter().skip(i));
res.extend(rhs_paths.into_iter().skip(j));
res
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_plaintext_is_text() {
let s = "hello world";
assert!(matches!(
guess_content(s.as_bytes()),
ProbableFileKind::Text(_)
));
}
#[test]
fn test_null_bytes_are_binary() {
let s = "\0".repeat(1000);
assert!(matches!(
guess_content(s.as_bytes()),
ProbableFileKind::Binary
));
}
}