#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum BomType {
UTF8,
UTF16LE,
UTF16BE,
UTF32LE,
UTF32BE,
UTF7,
UTF1,
UTFEBDIC,
SCSU,
BOCU1,
GB1803,
}
impl BomType {
pub const fn bom_bytes(&self) -> &'static [u8] {
use BomType::*;
match self {
UTF8 => &[0xEF, 0xBB, 0xBF],
UTF16LE => &[0xFF, 0xFE],
UTF16BE => &[0xFE, 0xFF],
UTF32LE => &[0x00, 0x00, 0xFF, 0xFE],
UTF32BE => &[0xFF, 0xFE, 0x00, 0x00],
UTF7 => &[0x2B, 0x2F, 0x76],
UTF1 => &[0xF7, 0x64, 0x4C],
UTFEBDIC => &[0xDD, 0x73, 0x66, 0x73],
SCSU => &[0x0E, 0xFE, 0xFF],
BOCU1 => &[0xFB, 0xEE, 0x28],
GB1803 => &[0x84, 0x31, 0x95, 0x33],
}
}
pub const fn bom_length(&self) -> usize {
self.bom_bytes().len()
}
pub fn test_bytes(&self, tested_bytes: &[u8]) -> BomBytesTest {
if tested_bytes.len() < self.bom_length() {
if tested_bytes == &self.bom_bytes()[..tested_bytes.len()] {
BomBytesTest::Incomplete
}
else {
BomBytesTest::NotBom
}
} else {
if &tested_bytes[..self.bom_length()] == self.bom_bytes() {
BomBytesTest::StartsWithBom
} else {
BomBytesTest::NotBom
}
}
}
pub fn try_find_bytes_bom<'a>(tested_bytes: &'a [u8], bom_types_tested: &[BomType]) -> BomsBytesTest<'a> {
use BomType::*;
let mut result = BomsBytesTest::Complete { bom_type: None, additional_bytes: tested_bytes };
macro_rules! try_encoding {
($encoding:expr) => {
if bom_types_tested.contains(&$encoding) {
match $encoding.test_bytes(tested_bytes) {
BomBytesTest::Incomplete => result = BomsBytesTest::Incomplete,
BomBytesTest::NotBom => (),
BomBytesTest::StartsWithBom => return BomsBytesTest::Complete { bom_type: Some($encoding), additional_bytes: &tested_bytes[$encoding.bom_length()..] },
}
}
};
}
try_encoding!(UTF8);
try_encoding!(UTF32LE);
try_encoding!(UTF32BE);
try_encoding!(UTF16LE);
try_encoding!(UTF16BE);
try_encoding!(UTF7);
try_encoding!(UTF1);
try_encoding!(UTFEBDIC);
try_encoding!(SCSU);
try_encoding!(BOCU1);
try_encoding!(GB1803);
result
}
pub fn all() -> &'static [BomType] {
use BomType::*;
&[
UTF8,
UTF32LE,
UTF32BE,
UTF16LE,
UTF16BE,
UTF7,
UTF1,
UTFEBDIC,
SCSU,
BOCU1,
GB1803,
]
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BomBytesTest {
Incomplete,
NotBom,
StartsWithBom,
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BomsBytesTest<'a> {
Incomplete,
Complete {
bom_type: Option<BomType>,
additional_bytes: &'a [u8],
},
}
pub(crate) type BomSize = u8;
pub(crate) const MAX_BOM_LENGTH: BomSize = 4;