use super::super::bom::detect_bom;
use super::EncodingInfo;
use alloc::string::ToString;
use core::str;
#[must_use]
pub fn detect_encoding(bytes: &[u8]) -> EncodingInfo {
if let Some((bom_type, _)) = detect_bom(bytes) {
return EncodingInfo::with_bom(
bom_type.encoding_name().to_string(),
1.0, bom_type,
);
}
str::from_utf8(bytes).map_or_else(
|_| detect_non_utf8_encoding(bytes),
|text| {
let confidence = if is_likely_ass_content(text) {
0.95 } else {
0.8 };
EncodingInfo::new("UTF-8".to_string(), confidence)
},
)
}
#[must_use]
pub fn is_likely_ass_content(text: &str) -> bool {
if text.contains("[Script Info]")
|| text.contains("[V4+ Styles]")
|| text.contains("[Events]")
|| text.contains("[Fonts]")
|| text.contains("[Graphics]")
{
return true;
}
if text.contains("Dialogue:")
|| text.contains("Comment:")
|| text.contains("ScriptType:")
|| text.contains("PlayRes")
|| text.contains("Style:")
{
return true;
}
false
}
fn detect_non_utf8_encoding(bytes: &[u8]) -> EncodingInfo {
let has_extended_ascii = bytes.iter().any(|&b| b >= 0x80);
if has_extended_ascii {
EncodingInfo::new("Windows-1252".to_string(), 0.6)
} else {
EncodingInfo::new("ASCII".to_string(), 0.9)
}
}