use std::{
fs::{self, File},
io::{self, Read},
path::Path,
};
use anyhow::Result;
use charset_normalizer_rs::{entity, utils};
use ec4rs::property::{self, Charset};
use encoding::EncoderTrap;
use log::warn;
use snafu::ensure;
use super::{PropertyHandler, errors};
use crate::files;
pub fn get_charset(properties: &ec4rs::Properties) -> property::Charset {
match properties.get::<property::Charset>() {
Ok(charset) => charset,
Err(_) => property::Charset::Utf8,
}
}
pub struct CharsetHandler {
charset: property::Charset,
}
impl PropertyHandler for CharsetHandler {
fn check(&self, file_path: &Path) -> Result<()> {
match Self::get_charset_from_file(file_path)? {
Some(determinted_charset) => {
let expected_charset = self.charset;
match determinted_charset {
Ok(actual_charset) => {
ensure!(
actual_charset == expected_charset,
errors::CharsetSnafu {
actual_charset: actual_charset.to_string(),
expected_charset,
}
);
}
Err(actual_charset) => errors::CharsetSnafu {
actual_charset,
expected_charset,
}
.fail()?,
}
}
None => {
warn!(
"Charset in file '{}' could not be determinted",
file_path.display()
);
}
};
Ok(())
}
fn fix(&self, file_path: &Path) -> Result<()> {
let determinted = self.set_charset_of_file(file_path)?;
if !determinted {
warn!(
"Charset in file '{}' could not be determinted",
file_path.display()
);
}
Ok(())
}
}
impl CharsetHandler {
pub fn build(properties: &ec4rs::Properties) -> Option<CharsetHandler> {
match properties.get::<property::Charset>() {
Ok(charset) => Some(CharsetHandler { charset }),
Err(_) => None, }
}
fn get_charset_from_file(
file_path: &Path,
) -> io::Result<Option<Result<property::Charset, String>>> {
let charset_match = Self::get_charset_match_from_path(file_path)?;
Ok(charset_match.map(|c| Self::get_charset_from_charset_match(&c)))
}
fn set_charset_of_file(&self, file_path: &Path) -> io::Result<bool> {
let charset_match = Self::get_charset_match_from_path(file_path)?;
match charset_match {
Some(charset_match) => {
let actual_charset = Self::get_charset_from_charset_match(&charset_match);
let charset_wrong = actual_charset != Ok(self.charset);
if charset_wrong {
let input = charset_match.decoded_payload().unwrap();
let mut output =
utils::encode(input, &self.charset_name(), EncoderTrap::Strict).unwrap();
files::add_bom(&self.charset, &mut output);
fs::write(file_path, output)?;
}
Ok(true)
}
None => Ok(false),
}
}
fn get_charset_match_from_path(path: &Path) -> io::Result<Option<entity::CharsetMatch>> {
let settings = Some(entity::NormalizerSettings {
exclude_encodings: vec![
String::from("ascii"), String::from("macintosh"), ],
enable_fallback: false,
..Default::default()
});
let mut file = File::open(path)?;
let file_size = file.metadata()?.len();
let mut bytes = Vec::with_capacity(file_size as usize);
file.read_to_end(&mut bytes)?;
Ok(charset_normalizer_rs::from_bytes(&bytes, settings)
.expect("settings are valid")
.get_best()
.cloned())
}
fn get_charset_from_charset_match(
charset_match: &entity::CharsetMatch,
) -> Result<property::Charset, String> {
match charset_match.encoding() {
"utf-8" => {
if charset_match.bom() {
Ok(Charset::Utf8Bom)
} else {
Ok(Charset::Utf8)
}
}
"iso-8859-1" => Ok(Charset::Latin1),
"utf-16le" => Ok(Charset::Utf16Le),
"utf-16be" => Ok(Charset::Utf16Be),
charset => Err(String::from(charset)),
}
}
fn charset_name(&self) -> String {
match self.charset {
Charset::Utf8Bom => Charset::Utf8,
c => c,
}
.to_string()
}
}
#[cfg(test)]
mod tests;