use std::{
fs::{self, File},
io::{self, Read},
path::{Path, PathBuf},
};
use ec4rs::property::{self, Charset};
use encoding_rs_io::DecodeReaderBytesBuilder;
use ignore::WalkBuilder;
use itertools::Itertools;
use crate::cli::IgnoreArgs;
pub fn get_target_files(
targets: &[PathBuf],
ignore_args: &IgnoreArgs,
) -> anyhow::Result<impl Iterator<Item = Result<PathBuf, ignore::Error>>> {
let first_target = targets.first().expect("at least one target necessary");
let mut builder = WalkBuilder::new(first_target);
for t in targets.iter().skip(1) {
builder.add(t);
}
if ignore_args.hidden {
builder.hidden(false);
if ignore_args.git_settings {
builder.filter_entry(|entry| !(entry.path().is_dir() && entry.file_name() == ".git"));
}
}
if !ignore_args.git_settings {
builder
.git_ignore(false)
.git_exclude(false)
.git_global(false);
}
builder.ignore(false);
if let Some(file_name) = ignore_args.ignore_file.as_ref() {
builder.add_custom_ignore_filename(file_name);
}
Ok(builder.build()
.map(|entry| entry.map(|f| f.into_path()))
.filter(|path| path.as_ref().map_or(true, |p| p.is_file()))
.unique_by(|entry| {
match entry {
Ok(p) => match p.canonicalize() {
Ok(cp) => cp.to_str().map(|s| s.to_string()),
Err(e) => Some(e.to_string()),
},
Err(e) => Some(e.to_string()),
}
}))
}
pub fn read_file(file_path: &Path, charset: &property::Charset) -> io::Result<String> {
let mut reader = DecodeReaderBytesBuilder::new()
.encoding(Some(charset_as_encoding(charset)))
.build(File::open(file_path)?);
let mut content = String::new();
reader.read_to_string(&mut content)?;
Ok(content)
}
pub fn overwrite_file(
file_path: &Path,
charset: &property::Charset,
content: &str,
) -> io::Result<()> {
let mut output: Vec<u8> = match charset {
Charset::Utf8 | Charset::Utf8Bom => content.as_bytes().to_vec(),
Charset::Latin1 => {
let target_encoding = charset_as_encoding(charset);
let (encoded, actual_encoding, _) = target_encoding.encode(content);
debug_assert_eq!(
target_encoding, actual_encoding,
"If the encoding in encoding_rs cannot be produced with encode(), \
the bytes have to be created differently."
);
encoded.into_owned()
}
Charset::Utf16Le => content
.encode_utf16()
.flat_map(|c| [c as u8, (c >> 8) as u8])
.collect::<Vec<u8>>(),
Charset::Utf16Be => content
.encode_utf16()
.flat_map(|c| [(c >> 8) as u8, c as u8])
.collect::<Vec<u8>>(),
};
add_bom(charset, &mut output);
fs::write(file_path, output)
}
pub fn add_bom(charset: &property::Charset, file_content: &mut Vec<u8>) {
let bom = match charset {
Charset::Utf8 | Charset::Latin1 => None,
Charset::Utf8Bom => Some(vec![0xEF, 0xBB, 0xBF]),
Charset::Utf16Le => Some(vec![0xFF, 0xFE]),
Charset::Utf16Be => Some(vec![0xFE, 0xFF]),
};
if let Some(prefix) = bom {
file_content.splice(0..0, prefix);
}
}
fn charset_as_encoding(charset: &property::Charset) -> &'static encoding_rs::Encoding {
match charset {
Charset::Utf8 | Charset::Utf8Bom => encoding_rs::UTF_8,
Charset::Latin1 => encoding_rs::WINDOWS_1252,
Charset::Utf16Le => encoding_rs::UTF_16LE,
Charset::Utf16Be => encoding_rs::UTF_16BE,
}
}
#[cfg(test)]
mod tests;