#![deny(missing_docs)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Bom {
Utf8,
Utf16Be,
Utf16Le,
Utf32Be,
Utf32Le,
}
impl Bom {
pub fn len(self) -> usize {
match self {
Bom::Utf8 => 3,
Bom::Utf16Be | Bom::Utf16Le => 2,
Bom::Utf32Be | Bom::Utf32Le => 4,
}
}
}
pub fn detect_bom(b: &[u8]) -> Option<Bom> {
if b.starts_with(&[0xEF, 0xBB, 0xBF]) {
return Some(Bom::Utf8);
}
if b.starts_with(&[0xFF, 0xFE, 0x00, 0x00]) {
return Some(Bom::Utf32Le);
}
if b.starts_with(&[0x00, 0x00, 0xFE, 0xFF]) {
return Some(Bom::Utf32Be);
}
if b.starts_with(&[0xFE, 0xFF]) {
return Some(Bom::Utf16Be);
}
if b.starts_with(&[0xFF, 0xFE]) {
return Some(Bom::Utf16Le);
}
None
}
pub fn strip_bytes(b: &[u8]) -> &[u8] {
match detect_bom(b) {
Some(bom) => &b[bom.len()..],
None => b,
}
}
pub fn strip_str(s: &str) -> &str {
s.strip_prefix('\u{FEFF}').unwrap_or(s)
}
pub fn strip_all(s: &str) -> String {
s.chars().filter(|c| *c != '\u{FEFF}').collect()
}