use std::borrow::Cow;
#[cfg(feature = "encoding")]
use encoding_rs::{Encoding, UTF_16BE, UTF_16LE, UTF_8};
#[cfg(feature = "encoding")]
use crate::Error;
use crate::Result;
pub(crate) const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
#[cfg(feature = "encoding")]
pub(crate) const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE];
#[cfg(feature = "encoding")]
pub(crate) const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF];
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Decoder {
#[cfg(feature = "encoding")]
pub(crate) encoding: &'static Encoding,
}
impl Decoder {
pub(crate) fn utf8() -> Self {
Decoder {
#[cfg(feature = "encoding")]
encoding: UTF_8,
}
}
#[cfg(all(test, feature = "encoding", feature = "serialize"))]
pub(crate) fn utf16() -> Self {
Decoder { encoding: UTF_16LE }
}
}
impl Decoder {
#[cfg(feature = "encoding")]
pub fn encoding(&self) -> &'static Encoding {
self.encoding
}
pub fn decode<'b>(&self, bytes: &'b [u8]) -> Result<Cow<'b, str>> {
#[cfg(not(feature = "encoding"))]
let decoded = Ok(Cow::Borrowed(std::str::from_utf8(bytes)?));
#[cfg(feature = "encoding")]
let decoded = decode(bytes, self.encoding);
decoded
}
}
#[cfg(feature = "encoding")]
pub fn decode<'b>(bytes: &'b [u8], encoding: &'static Encoding) -> Result<Cow<'b, str>> {
encoding
.decode_without_bom_handling_and_without_replacement(bytes)
.ok_or(Error::NonDecodable(None))
}
#[cfg(feature = "encoding")]
pub fn detect_encoding(bytes: &[u8]) -> Option<(&'static Encoding, usize)> {
match bytes {
_ if bytes.starts_with(UTF16_BE_BOM) => Some((UTF_16BE, 2)),
_ if bytes.starts_with(UTF16_LE_BOM) => Some((UTF_16LE, 2)),
_ if bytes.starts_with(UTF8_BOM) => Some((UTF_8, 3)),
_ if bytes.starts_with(&[0x00, b'<', 0x00, b'?']) => Some((UTF_16BE, 0)), _ if bytes.starts_with(&[b'<', 0x00, b'?', 0x00]) => Some((UTF_16LE, 0)), _ if bytes.starts_with(&[b'<', b'?', b'x', b'm']) => Some((UTF_8, 0)),
_ => None,
}
}