use std::borrow::Cow;
#[cfg(feature = "encoding")]
use encoding_rs::{DecoderResult, Encoding, UTF_16BE, UTF_16LE, UTF_8};
#[cfg(feature = "encoding")]
use crate::Error;
use crate::Result;
pub(crate) const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
#[cfg(feature = "encoding")]
pub(crate) const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE];
#[cfg(feature = "encoding")]
pub(crate) const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF];
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Decoder {
#[cfg(feature = "encoding")]
pub(crate) encoding: &'static Encoding,
}
impl Decoder {
pub(crate) fn utf8() -> Self {
Decoder {
#[cfg(feature = "encoding")]
encoding: UTF_8,
}
}
#[cfg(all(test, feature = "encoding", feature = "serialize"))]
pub(crate) fn utf16() -> Self {
Decoder { encoding: UTF_16LE }
}
}
impl Decoder {
#[cfg(feature = "encoding")]
pub const fn encoding(&self) -> &'static Encoding {
self.encoding
}
pub fn decode<'b>(&self, bytes: &'b [u8]) -> Result<Cow<'b, str>> {
#[cfg(not(feature = "encoding"))]
let decoded = Ok(Cow::Borrowed(std::str::from_utf8(bytes)?));
#[cfg(feature = "encoding")]
let decoded = decode(bytes, self.encoding);
decoded
}
pub fn decode_into(&self, bytes: &[u8], buf: &mut String) -> Result<()> {
#[cfg(not(feature = "encoding"))]
buf.push_str(std::str::from_utf8(bytes)?);
#[cfg(feature = "encoding")]
decode_into(bytes, self.encoding, buf)?;
Ok(())
}
pub(crate) fn decode_cow<'b>(&self, bytes: &Cow<'b, [u8]>) -> Result<Cow<'b, str>> {
match bytes {
Cow::Borrowed(bytes) => self.decode(bytes),
Cow::Owned(bytes) => Ok(self.decode(bytes)?.into_owned().into()),
}
}
}
#[cfg(feature = "encoding")]
pub fn decode<'b>(bytes: &'b [u8], encoding: &'static Encoding) -> Result<Cow<'b, str>> {
encoding
.decode_without_bom_handling_and_without_replacement(bytes)
.ok_or(Error::NonDecodable(None))
}
#[cfg(feature = "encoding")]
pub fn decode_into(bytes: &[u8], encoding: &'static Encoding, buf: &mut String) -> Result<()> {
if encoding == UTF_8 {
buf.push_str(std::str::from_utf8(bytes)?);
return Ok(());
}
let mut decoder = encoding.new_decoder_without_bom_handling();
buf.reserve(
decoder
.max_utf8_buffer_length_without_replacement(bytes.len())
.unwrap(),
);
let (result, read) = decoder.decode_to_string_without_replacement(bytes, buf, true);
match result {
DecoderResult::InputEmpty => {
debug_assert_eq!(read, bytes.len());
Ok(())
}
DecoderResult::Malformed(_, _) => Err(Error::NonDecodable(None)),
DecoderResult::OutputFull => unreachable!(),
}
}
#[cfg(feature = "encoding")]
pub fn detect_encoding(bytes: &[u8]) -> Option<(&'static Encoding, usize)> {
match bytes {
_ if bytes.starts_with(UTF16_BE_BOM) => Some((UTF_16BE, 2)),
_ if bytes.starts_with(UTF16_LE_BOM) => Some((UTF_16LE, 2)),
_ if bytes.starts_with(UTF8_BOM) => Some((UTF_8, 3)),
_ if bytes.starts_with(&[0x00, b'<', 0x00, b'?']) => Some((UTF_16BE, 0)), _ if bytes.starts_with(&[b'<', 0x00, b'?', 0x00]) => Some((UTF_16LE, 0)), _ if bytes.starts_with(&[b'<', b'?', b'x', b'm']) => Some((UTF_8, 0)),
_ => None,
}
}