use std::borrow::Cow;
use std::str::Utf8Error;
#[cfg(feature = "encoding")]
use encoding_rs::{DecoderResult, Encoding, UTF_16BE, UTF_16LE, UTF_8};
pub(crate) const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
#[cfg(feature = "encoding")]
pub(crate) const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE];
#[cfg(feature = "encoding")]
pub(crate) const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF];
#[derive(Clone, Debug, PartialEq, Eq)]
#[non_exhaustive]
pub enum EncodingError {
Utf8(Utf8Error),
#[cfg(feature = "encoding")]
Other(&'static Encoding),
}
impl From<Utf8Error> for EncodingError {
#[inline]
fn from(e: Utf8Error) -> Self {
Self::Utf8(e)
}
}
impl std::error::Error for EncodingError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::Utf8(e) => Some(e),
#[cfg(feature = "encoding")]
Self::Other(_) => None,
}
}
}
impl std::fmt::Display for EncodingError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Utf8(e) => write!(f, "cannot decode input using UTF-8: {}", e),
#[cfg(feature = "encoding")]
Self::Other(encoding) => write!(f, "cannot decode input using {}", encoding.name()),
}
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Decoder {
#[cfg(feature = "encoding")]
pub(crate) encoding: &'static Encoding,
}
impl Decoder {
pub(crate) fn utf8() -> Self {
Decoder {
#[cfg(feature = "encoding")]
encoding: UTF_8,
}
}
#[cfg(all(test, feature = "encoding", feature = "serialize"))]
pub(crate) fn utf16() -> Self {
Decoder { encoding: UTF_16LE }
}
}
impl Decoder {
#[cfg(feature = "encoding")]
pub const fn encoding(&self) -> &'static Encoding {
self.encoding
}
pub fn decode<'b>(&self, bytes: &'b [u8]) -> Result<Cow<'b, str>, EncodingError> {
#[cfg(not(feature = "encoding"))]
let decoded = Ok(Cow::Borrowed(std::str::from_utf8(bytes)?));
#[cfg(feature = "encoding")]
let decoded = decode(bytes, self.encoding);
decoded
}
pub fn decode_into(&self, bytes: &[u8], buf: &mut String) -> Result<(), EncodingError> {
#[cfg(not(feature = "encoding"))]
buf.push_str(std::str::from_utf8(bytes)?);
#[cfg(feature = "encoding")]
decode_into(bytes, self.encoding, buf)?;
Ok(())
}
pub(crate) fn decode_cow<'b>(
&self,
bytes: &Cow<'b, [u8]>,
) -> Result<Cow<'b, str>, EncodingError> {
match bytes {
Cow::Borrowed(bytes) => self.decode(bytes),
Cow::Owned(bytes) => Ok(self.decode(bytes)?.into_owned().into()),
}
}
}
#[cfg(feature = "encoding")]
pub fn decode<'b>(
bytes: &'b [u8],
encoding: &'static Encoding,
) -> Result<Cow<'b, str>, EncodingError> {
encoding
.decode_without_bom_handling_and_without_replacement(bytes)
.ok_or(EncodingError::Other(encoding))
}
#[cfg(feature = "encoding")]
pub fn decode_into(
bytes: &[u8],
encoding: &'static Encoding,
buf: &mut String,
) -> Result<(), EncodingError> {
if encoding == UTF_8 {
buf.push_str(std::str::from_utf8(bytes)?);
return Ok(());
}
let mut decoder = encoding.new_decoder_without_bom_handling();
buf.reserve(
decoder
.max_utf8_buffer_length_without_replacement(bytes.len())
.unwrap(),
);
let (result, read) = decoder.decode_to_string_without_replacement(bytes, buf, true);
match result {
DecoderResult::InputEmpty => {
debug_assert_eq!(read, bytes.len());
Ok(())
}
DecoderResult::Malformed(_, _) => Err(EncodingError::Other(encoding)),
DecoderResult::OutputFull => unreachable!(),
}
}
#[cfg(feature = "encoding")]
pub fn detect_encoding(bytes: &[u8]) -> Option<(&'static Encoding, usize)> {
match bytes {
_ if bytes.starts_with(UTF16_BE_BOM) => Some((UTF_16BE, 2)),
_ if bytes.starts_with(UTF16_LE_BOM) => Some((UTF_16LE, 2)),
_ if bytes.starts_with(UTF8_BOM) => Some((UTF_8, 3)),
_ if bytes.starts_with(&[0x00, b'<', 0x00, b'?']) => Some((UTF_16BE, 0)), _ if bytes.starts_with(&[b'<', 0x00, b'?', 0x00]) => Some((UTF_16LE, 0)), _ if bytes.starts_with(&[b'<', b'?', b'x', b'm']) => Some((UTF_8, 0)),
_ => None,
}
}