#[cfg(not(feature = "std"))]
extern crate alloc;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum BomType {
Utf8,
Utf16Le,
Utf16Be,
Utf32Le,
Utf32Be,
}
impl BomType {
#[must_use]
pub const fn signature(self) -> &'static [u8] {
match self {
Self::Utf8 => &[0xEF, 0xBB, 0xBF],
Self::Utf16Le => &[0xFF, 0xFE],
Self::Utf16Be => &[0xFE, 0xFF],
Self::Utf32Le => &[0xFF, 0xFE, 0x00, 0x00],
Self::Utf32Be => &[0x00, 0x00, 0xFE, 0xFF],
}
}
#[must_use]
pub const fn len(self) -> usize {
self.signature().len()
}
#[must_use]
pub const fn is_empty(self) -> bool {
false
}
#[must_use]
pub const fn encoding_name(self) -> &'static str {
match self {
Self::Utf8 => "UTF-8",
Self::Utf16Le => "UTF-16LE",
Self::Utf16Be => "UTF-16BE",
Self::Utf32Le => "UTF-32LE",
Self::Utf32Be => "UTF-32BE",
}
}
}
#[must_use]
pub fn strip_bom(text: &str) -> (&str, bool) {
let bytes = text.as_bytes();
if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
return (&text[3..], true);
}
if bytes.starts_with(&[0xFF, 0xFE, 0x00, 0x00]) {
return (text, false);
}
if bytes.starts_with(&[0x00, 0x00, 0xFE, 0xFF]) {
return (text, false);
}
if bytes.starts_with(&[0xFF, 0xFE]) {
return (text, false);
}
if bytes.starts_with(&[0xFE, 0xFF]) {
return (text, false);
}
(text, false)
}
#[must_use]
pub fn detect_bom(bytes: &[u8]) -> Option<(BomType, usize)> {
if bytes.starts_with(&[0xFF, 0xFE, 0x00, 0x00]) {
Some((BomType::Utf32Le, 4))
} else if bytes.starts_with(&[0x00, 0x00, 0xFE, 0xFF]) {
Some((BomType::Utf32Be, 4))
} else if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
Some((BomType::Utf8, 3))
} else if bytes.starts_with(&[0xFF, 0xFE]) {
Some((BomType::Utf16Le, 2))
} else if bytes.starts_with(&[0xFE, 0xFF]) {
Some((BomType::Utf16Be, 2))
} else {
None
}
}
#[cfg(test)]
mod tests {}