use std::borrow::Cow;
use std::str::Utf8Error;
#[cfg(feature = "encoding")]
use encoding_rs;
#[cfg(feature = "encoding")]
use std::io::{self, BufRead, Read};
pub(crate) const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
pub(crate) const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE];
pub(crate) const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF];
#[derive(Clone, Debug, PartialEq, Eq)]
#[non_exhaustive]
pub enum EncodingError {
Utf8(Utf8Error),
#[cfg(feature = "encoding")]
Other(&'static encoding_rs::Encoding),
}
impl From<Utf8Error> for EncodingError {
#[inline]
fn from(e: Utf8Error) -> Self {
Self::Utf8(e)
}
}
impl std::error::Error for EncodingError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::Utf8(e) => Some(e),
#[cfg(feature = "encoding")]
Self::Other(_) => None,
}
}
}
impl std::fmt::Display for EncodingError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Utf8(e) => write!(f, "cannot decode input using UTF-8: {}", e),
#[cfg(feature = "encoding")]
Self::Other(encoding) => write!(f, "cannot decode input using {}", encoding.name()),
}
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Decoder {
#[cfg(feature = "encoding")]
pub(crate) encoding: &'static encoding_rs::Encoding,
}
impl Decoder {
pub(crate) const fn utf8() -> Self {
Decoder {
#[cfg(feature = "encoding")]
encoding: encoding_rs::UTF_8,
}
}
#[cfg(all(test, feature = "encoding", feature = "serialize"))]
pub(crate) const fn utf16() -> Self {
Decoder {
encoding: encoding_rs::UTF_16LE,
}
}
}
impl Decoder {
#[cfg(feature = "encoding")]
pub const fn encoding(&self) -> &'static encoding_rs::Encoding {
self.encoding
}
pub fn decode<'b>(&self, bytes: &'b [u8]) -> Result<Cow<'b, str>, EncodingError> {
#[cfg(not(feature = "encoding"))]
let decoded = Ok(Cow::Borrowed(std::str::from_utf8(bytes)?));
#[cfg(feature = "encoding")]
let decoded = decode(bytes, self.encoding);
decoded
}
pub fn decode_into(&self, bytes: &[u8], buf: &mut String) -> Result<(), EncodingError> {
#[cfg(not(feature = "encoding"))]
buf.push_str(std::str::from_utf8(bytes)?);
#[cfg(feature = "encoding")]
decode_into(bytes, self.encoding, buf)?;
Ok(())
}
pub(crate) fn decode_cow<'b>(
&self,
bytes: &Cow<'b, [u8]>,
) -> Result<Cow<'b, str>, EncodingError> {
match bytes {
Cow::Borrowed(bytes) => self.decode(bytes),
Cow::Owned(bytes) => Ok(self.decode(bytes)?.into_owned().into()),
}
}
pub(crate) fn content<'b>(
&self,
bytes: &Cow<'b, [u8]>,
normalize_eol: impl Fn(&str) -> Cow<str>,
) -> Result<Cow<'b, str>, EncodingError> {
match bytes {
Cow::Borrowed(bytes) => {
let text = self.decode(bytes)?;
match normalize_eol(&text) {
Cow::Borrowed(_) => Ok(text),
Cow::Owned(s) => Ok(Cow::Owned(s)),
}
}
Cow::Owned(bytes) => {
let text = self.decode(bytes)?;
let text = normalize_eol(&text);
Ok(text.into_owned().into())
}
}
}
}
#[cfg(feature = "encoding")]
pub fn decode<'b>(
bytes: &'b [u8],
encoding: &'static encoding_rs::Encoding,
) -> Result<Cow<'b, str>, EncodingError> {
encoding
.decode_without_bom_handling_and_without_replacement(bytes)
.ok_or(EncodingError::Other(encoding))
}
#[cfg(feature = "encoding")]
pub fn decode_into(
bytes: &[u8],
encoding: &'static encoding_rs::Encoding,
buf: &mut String,
) -> Result<(), EncodingError> {
if encoding == encoding_rs::UTF_8 {
buf.push_str(std::str::from_utf8(bytes)?);
return Ok(());
}
let mut decoder = encoding.new_decoder_without_bom_handling();
buf.reserve(
decoder
.max_utf8_buffer_length_without_replacement(bytes.len())
.unwrap(),
);
let (result, read) = decoder.decode_to_string_without_replacement(bytes, buf, true);
match result {
encoding_rs::DecoderResult::InputEmpty => {
debug_assert_eq!(read, bytes.len());
Ok(())
}
encoding_rs::DecoderResult::Malformed(_, _) => Err(EncodingError::Other(encoding)),
encoding_rs::DecoderResult::OutputFull => unreachable!(),
}
}
pub fn detect_encoding(bytes: &[u8]) -> Option<DetectedEncoding> {
#[allow(clippy::byte_char_slices)]
match bytes {
_ if bytes.starts_with(UTF16_BE_BOM) => Some(DetectedEncoding::Utf16BeBom),
_ if bytes.starts_with(UTF16_LE_BOM) => Some(DetectedEncoding::Utf16LeBom),
_ if bytes.starts_with(UTF8_BOM) => Some(DetectedEncoding::Utf8Bom),
_ if bytes.starts_with(&[0x00, b'<', 0x00, b'?']) => Some(DetectedEncoding::Utf16BeLike), _ if bytes.starts_with(&[b'<', 0x00, b'?', 0x00]) => Some(DetectedEncoding::Utf16LeLike), _ if bytes.starts_with(&[b'<', b'?', b'x', b'm']) => {
Some(DetectedEncoding::AsciiCompatible)
}
_ => None,
}
}
pub enum DetectedEncoding {
AsciiCompatible,
Utf8Bom,
Utf16LeLike,
Utf16LeBom,
Utf16BeLike,
Utf16BeBom,
}
impl DetectedEncoding {
#[cfg(feature = "encoding")]
pub const fn encoding(&self) -> &'static encoding_rs::Encoding {
match self {
DetectedEncoding::AsciiCompatible | DetectedEncoding::Utf8Bom => encoding_rs::UTF_8,
DetectedEncoding::Utf16LeLike | DetectedEncoding::Utf16LeBom => encoding_rs::UTF_16LE,
DetectedEncoding::Utf16BeLike | DetectedEncoding::Utf16BeBom => encoding_rs::UTF_16BE,
}
}
pub const fn bom_len(&self) -> usize {
match self {
DetectedEncoding::Utf8Bom => 3,
DetectedEncoding::Utf16LeBom | DetectedEncoding::Utf16BeBom => 2,
DetectedEncoding::AsciiCompatible
| DetectedEncoding::Utf16LeLike
| DetectedEncoding::Utf16BeLike => 0,
}
}
}
#[cfg(feature = "encoding")]
const PREFIX_CAP: usize = 64;
#[cfg(feature = "encoding")]
struct Prefix {
buf: [u8; PREFIX_CAP],
len: usize,
detected: bool,
}
#[cfg(feature = "encoding")]
pub struct DecodingReader<R> {
inner: R,
decoder: encoding_rs::Decoder,
decoder_finished: bool,
out_buf: Box<[u8]>,
out_pos: usize,
out_len: usize,
prefix: Option<Box<Prefix>>,
inner_eof: bool,
}
#[cfg(feature = "encoding")]
impl<R: std::fmt::Debug> std::fmt::Debug for DecodingReader<R> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("DecodingReader")
.field("inner", &self.inner)
.field("encoding", &self.decoder.encoding())
.field("out_pos", &self.out_pos)
.field("out_len", &self.out_len)
.field("inner_eof", &self.inner_eof)
.field("prefix_active", &self.prefix.is_some())
.finish()
}
}
#[cfg(feature = "encoding")]
impl<R> DecodingReader<R> {
pub fn new(inner: R) -> Self {
Self {
inner,
decoder: encoding_rs::UTF_8.new_decoder_without_bom_handling(),
decoder_finished: false,
out_buf: vec![0u8; 8192].into_boxed_slice(),
out_pos: 0,
out_len: 0,
prefix: Some(Box::new(Prefix {
buf: [0; PREFIX_CAP],
len: 0,
detected: false,
})),
inner_eof: false,
}
}
pub const fn get_ref(&self) -> &R {
&self.inner
}
pub const fn get_mut(&mut self) -> &mut R {
&mut self.inner
}
pub fn into_inner(self) -> R {
self.inner
}
pub fn encoding(&self) -> &'static encoding_rs::Encoding {
self.decoder.encoding()
}
pub fn set_encoding(&mut self, encoding: &'static encoding_rs::Encoding) {
if self.decoder.encoding() == encoding {
return;
}
assert!(
self.prefix.is_some(),
"set_encoding() called after prefix buffer was drained; \
encoding can only be changed while the prefix is still active"
);
self.decoder = encoding.new_decoder_without_bom_handling();
self.decoder_finished = false;
}
}
#[cfg(feature = "encoding")]
impl<R: BufRead> BufRead for DecodingReader<R> {
fn fill_buf(&mut self) -> io::Result<&[u8]> {
if self.out_pos < self.out_len {
return Ok(&self.out_buf[self.out_pos..self.out_len]);
}
self.out_pos = 0;
self.out_len = 0;
if let Some(prefix) = &mut self.prefix {
if !prefix.detected {
prefix.detected = true;
while prefix.len < PREFIX_CAP {
match self.inner.read(&mut prefix.buf[prefix.len..]) {
Ok(0) => {
self.inner_eof = true;
break;
}
Ok(n) => prefix.len += n,
Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
}
}
let detection_bytes = &prefix.buf[..prefix.len];
if let Some(detected) = detect_encoding(detection_bytes) {
let bom_len = detected.bom_len();
if bom_len > 0 {
prefix.buf.copy_within(bom_len..prefix.len, 0);
prefix.len -= bom_len;
}
let encoding = detected.encoding();
if encoding != encoding_rs::UTF_8 {
self.decoder = encoding.new_decoder_without_bom_handling();
}
}
}
if self.decoder_finished {
return Ok(&[]);
}
if prefix.len == 0 {
self.prefix = None;
} else {
let src = &prefix.buf[..prefix.len];
let (result, read, written) = self.decoder.decode_to_utf8_without_replacement(
src,
&mut self.out_buf[..],
false,
);
prefix.buf.copy_within(read..prefix.len, 0);
prefix.len -= read;
self.out_len = written;
match result {
encoding_rs::DecoderResult::InputEmpty if written > 0 => {
return Ok(&self.out_buf[..self.out_len]);
}
encoding_rs::DecoderResult::InputEmpty => {
}
encoding_rs::DecoderResult::OutputFull => {
return Ok(&self.out_buf[..self.out_len]);
}
encoding_rs::DecoderResult::Malformed(_, _) => {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
EncodingError::Other(self.decoder.encoding()),
));
}
}
if prefix.len == 0 {
self.prefix = None;
}
}
}
if self.decoder_finished {
return Ok(&[]);
}
loop {
if self.inner_eof {
let (result, _, written) = self.decoder.decode_to_utf8_without_replacement(
b"",
&mut self.out_buf[..],
true,
);
self.out_len = written;
match result {
encoding_rs::DecoderResult::InputEmpty => {
self.decoder_finished = true;
return Ok(&self.out_buf[..self.out_len]);
}
encoding_rs::DecoderResult::OutputFull => {
return Ok(&self.out_buf[..self.out_len]);
}
encoding_rs::DecoderResult::Malformed(_, _) => {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
EncodingError::Other(self.decoder.encoding()),
));
}
}
}
let (result, read, written) = {
let src = self.inner.fill_buf()?;
if src.is_empty() {
self.inner_eof = true;
continue; }
self.decoder
.decode_to_utf8_without_replacement(src, &mut self.out_buf[..], false)
};
self.inner.consume(read);
self.out_len = written;
match result {
encoding_rs::DecoderResult::InputEmpty if written > 0 => {
return Ok(&self.out_buf[..self.out_len]);
}
encoding_rs::DecoderResult::InputEmpty => {
}
encoding_rs::DecoderResult::OutputFull => {
return Ok(&self.out_buf[..self.out_len]);
}
encoding_rs::DecoderResult::Malformed(_, _) => {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
EncodingError::Other(self.decoder.encoding()),
));
}
}
}
}
fn consume(&mut self, amt: usize) {
debug_assert!(
self.out_pos + amt <= self.out_len,
"consume({amt}) out of range: out_pos={}, out_len={}",
self.out_pos,
self.out_len,
);
self.out_pos += amt;
}
}
#[cfg(feature = "encoding")]
impl<R: BufRead> Read for DecodingReader<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
if buf.is_empty() {
return Ok(0);
}
let available = self.fill_buf()?;
if available.is_empty() {
return Ok(0);
}
let len = available.len().min(buf.len());
buf[..len].copy_from_slice(&available[..len]);
self.consume(len);
Ok(len)
}
}
#[cfg(all(test, feature = "encoding"))]
mod decoding_reader {
use super::*;
use std::io::{BufReader, Read};
struct ChunkedReader<'a> {
data: &'a [u8],
pos: usize,
chunk_size: usize,
}
impl<'a> ChunkedReader<'a> {
fn new(data: &'a [u8], chunk_size: usize) -> Self {
Self {
data,
pos: 0,
chunk_size,
}
}
}
impl<'a> Read for ChunkedReader<'a> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
if self.pos >= self.data.len() {
return Ok(0);
}
let len = self
.chunk_size
.min(buf.len())
.min(self.data.len() - self.pos);
buf[..len].copy_from_slice(&self.data[self.pos..self.pos + len]);
self.pos += len;
Ok(len)
}
}
fn utf16le_with_bom(s: &str) -> Vec<u8> {
let mut out = vec![0xFF, 0xFE]; for code_unit in s.encode_utf16() {
out.extend_from_slice(&code_unit.to_le_bytes());
}
out
}
fn utf16be_with_bom(s: &str) -> Vec<u8> {
let mut out = vec![0xFE, 0xFF]; for code_unit in s.encode_utf16() {
out.extend_from_slice(&code_unit.to_be_bytes());
}
out
}
fn utf16le_no_bom(s: &str) -> Vec<u8> {
let mut out = Vec::new();
for code_unit in s.encode_utf16() {
out.extend_from_slice(&code_unit.to_le_bytes());
}
out
}
fn utf16be_no_bom(s: &str) -> Vec<u8> {
let mut out = Vec::new();
for code_unit in s.encode_utf16() {
out.extend_from_slice(&code_unit.to_be_bytes());
}
out
}
fn read_all(reader: &mut DecodingReader<impl BufRead>) -> io::Result<String> {
let mut result = Vec::new();
reader.read_to_end(&mut result)?;
Ok(String::from_utf8(result).expect("DecodingReader should produce valid UTF-8"))
}
mod edge_cases {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn empty_input() {
let data = b"";
let mut reader = DecodingReader::new(&data[..]);
let mut buf = [0u8; 10];
let n = reader.read(&mut buf).unwrap();
assert_eq!(n, 0);
}
#[test]
fn utf8_bom_only() {
let data = b"\xEF\xBB\xBF";
let mut reader = DecodingReader::new(&data[..]);
assert_eq!(read_all(&mut reader).unwrap(), "");
}
#[test]
fn utf16le_bom_only() {
let data = &[0xFF, 0xFE];
let mut reader = DecodingReader::new(&data[..]);
assert_eq!(read_all(&mut reader).unwrap(), "");
}
#[test]
fn utf16be_bom_only() {
let data = &[0xFE, 0xFF];
let mut reader = DecodingReader::new(&data[..]);
assert_eq!(read_all(&mut reader).unwrap(), "");
}
#[test]
fn invalid_utf8_is_rejected() {
let data: &[u8] = &[0x48, 0x65, 0x6C, 0xFF, 0xFE];
let mut reader = DecodingReader::new(&data[..]);
let err = read_all(&mut reader).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::InvalidData);
}
#[test]
fn truncated_utf16_at_eof() {
let data: &[u8] = &[0xFF, 0xFE, 0x48, 0x00, 0x65];
let mut reader = DecodingReader::new(&data[..]);
let err = read_all(&mut reader).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::InvalidData);
}
#[test]
fn read_with_one_byte_buffer() {
let data = "Hello, δΈη!".as_bytes();
let mut reader = DecodingReader::new(&data[..]);
let mut result = Vec::new();
let mut buf = [0u8; 1];
loop {
let n = reader.read(&mut buf).unwrap();
if n == 0 {
break;
}
result.extend_from_slice(&buf[..n]);
}
assert_eq!(String::from_utf8(result).unwrap(), "Hello, δΈη!");
}
}
mod bufread_interface {
use super::*;
use pretty_assertions::assert_eq;
use std::io::BufRead;
#[test]
fn fill_buf_and_consume() {
let data = b"Hello, World!";
let mut reader = DecodingReader::new(&data[..]);
let buf = reader.fill_buf().unwrap();
assert!(!buf.is_empty());
assert_eq!(buf[0], b'H');
reader.consume(5);
let buf = reader.fill_buf().unwrap();
assert!(!buf.is_empty());
assert_eq!(buf[0], b',');
}
#[test]
fn partial_consume_then_read_more() {
let data = b"Hello, World!";
let mut reader = DecodingReader::new(&data[..]);
let mut result = Vec::new();
loop {
let buf = reader.fill_buf().unwrap();
if buf.is_empty() {
break;
}
result.extend_from_slice(buf);
let len = buf.len();
reader.consume(len);
}
assert_eq!(std::str::from_utf8(&result).unwrap(), "Hello, World!");
let buf = reader.fill_buf().unwrap();
assert!(buf.is_empty());
}
#[test]
fn fill_buf_after_eof_is_idempotent() {
let data = b"Hello";
let mut reader = DecodingReader::new(&data[..]);
loop {
let buf = reader.fill_buf().unwrap();
if buf.is_empty() {
break;
}
let len = buf.len();
reader.consume(len);
}
for _ in 0..3 {
let buf = reader.fill_buf().unwrap();
assert!(buf.is_empty());
}
}
#[test]
#[should_panic(expected = "consume")]
fn consume_overflow_panics_in_debug() {
let data = b"Hi";
let mut reader = DecodingReader::new(&data[..]);
let _ = reader.fill_buf().unwrap();
reader.consume(100);
}
}
mod accessors {
use super::*;
use pretty_assertions::assert_eq;
use std::io::Cursor;
#[test]
fn get_ref() {
let data = b"Hello";
let cursor = Cursor::new(data.to_vec());
let reader = DecodingReader::new(cursor);
assert_eq!(reader.get_ref().get_ref(), data);
}
#[test]
fn get_mut() {
let data = b"Hello";
let cursor = Cursor::new(data.to_vec());
let mut reader = DecodingReader::new(cursor);
reader.get_mut().set_position(2);
assert_eq!(reader.get_ref().position(), 2);
}
#[test]
fn into_inner() {
let data = b"Hello";
let cursor = Cursor::new(data.to_vec());
let reader = DecodingReader::new(cursor);
let inner = reader.into_inner();
assert_eq!(inner.get_ref(), data);
}
#[test]
fn encoding_default_is_utf8() {
let reader = DecodingReader::new(&b"Hello"[..]);
assert_eq!(reader.encoding(), encoding_rs::UTF_8);
}
}
mod encoding_switching {
use super::*;
use pretty_assertions::assert_eq;
use std::io::BufRead;
#[test]
fn encoding_reflects_detection() {
let data = utf16le_with_bom("Hello");
let mut reader = DecodingReader::new(&data[..]);
let _ = read_all(&mut reader).unwrap();
assert_eq!(reader.encoding(), encoding_rs::UTF_16LE);
}
#[test]
fn set_encoding_changes_encoding() {
let mut reader = DecodingReader::new(&b"Hello"[..]);
assert_eq!(reader.encoding(), encoding_rs::UTF_8);
reader.set_encoding(encoding_rs::UTF_16LE);
assert_eq!(reader.encoding(), encoding_rs::UTF_16LE);
}
#[test]
fn set_encoding_preserves_buffered_output() {
let data = b"Hello";
let mut reader = DecodingReader::new(&data[..]);
let buf = reader.fill_buf().unwrap();
assert_eq!(buf, b"Hello");
reader.set_encoding(encoding_rs::WINDOWS_1252);
assert_eq!(reader.encoding(), encoding_rs::WINDOWS_1252);
let buf = reader.fill_buf().unwrap();
assert_eq!(buf, b"Hello");
}
#[test]
fn set_encoding_same_as_detected_is_noop() {
let data = b"Hello, World!";
let mut reader = DecodingReader::new(&data[..]);
let first_chunk;
{
let buf = reader.fill_buf().unwrap();
assert!(buf.len() > 0);
first_chunk = std::str::from_utf8(buf).unwrap().to_string();
let n = buf.len();
reader.consume(n);
}
assert_eq!(reader.encoding(), encoding_rs::UTF_8);
reader.set_encoding(encoding_rs::UTF_8);
assert_eq!(reader.encoding(), encoding_rs::UTF_8);
let rest = read_all(&mut reader).unwrap();
assert_eq!(format!("{first_chunk}{rest}"), "Hello, World!");
}
#[test]
fn set_encoding_mid_stream() {
let data = b"Hello, World!";
let mut reader = DecodingReader::new(&data[..]);
let buf = reader.fill_buf().unwrap();
let n = std::cmp::min(buf.len(), 5);
reader.consume(n);
assert_eq!(reader.encoding(), encoding_rs::UTF_8);
reader.set_encoding(encoding_rs::WINDOWS_1252);
assert_eq!(reader.encoding(), encoding_rs::WINDOWS_1252);
let rest = read_all(&mut reader).unwrap();
assert_eq!(rest, ", World!");
}
}
mod matrix_decoding_tests {
use super::*;
use pretty_assertions::assert_eq;
struct TestCase {
label: &'static str,
text: &'static str,
}
const CASES: &[TestCase] = &[
TestCase {
label: "empty",
text: "",
},
TestCase {
label: "single_multibyte",
text: "β¬",
},
TestCase {
label: "ascii",
text: "Hello",
},
TestCase {
label: "multibyte",
text: "Hello, δΈη! π",
},
TestCase {
label: "surrogate_pairs",
text: "Music: ππ΅",
},
TestCase {
label: "xml_declaration",
text: "<?xml version=\"1.0\"?><root/>",
},
];
fn large_cases() -> Vec<(&'static str, String)> {
vec![
("large_ascii", "abcdefghij".repeat(1000)),
("large_multibyte", "Hello, δΈη! π ".repeat(500)),
]
}
enum Encoding {
Utf8,
Utf8Bom,
Utf16Le,
Utf16Be,
Utf16LeNoBom,
Utf16BeNoBom,
}
impl Encoding {
fn encode(&self, text: &str) -> Vec<u8> {
match self {
Encoding::Utf8 => text.as_bytes().to_vec(),
Encoding::Utf8Bom => {
let mut out = vec![0xEF, 0xBB, 0xBF];
out.extend_from_slice(text.as_bytes());
out
}
Encoding::Utf16Le => utf16le_with_bom(text),
Encoding::Utf16Be => utf16be_with_bom(text),
Encoding::Utf16LeNoBom => utf16le_no_bom(text),
Encoding::Utf16BeNoBom => utf16be_no_bom(text),
}
}
fn label(&self) -> &'static str {
match self {
Encoding::Utf8 => "utf8",
Encoding::Utf8Bom => "utf8_bom",
Encoding::Utf16Le => "utf16le",
Encoding::Utf16Be => "utf16be",
Encoding::Utf16LeNoBom => "utf16le_no_bom",
Encoding::Utf16BeNoBom => "utf16be_no_bom",
}
}
fn all_for(text: &str) -> Vec<Encoding> {
let mut encs = vec![
Encoding::Utf8,
Encoding::Utf8Bom,
Encoding::Utf16Le,
Encoding::Utf16Be,
];
if text.starts_with("<?xml") {
encs.push(Encoding::Utf16LeNoBom);
encs.push(Encoding::Utf16BeNoBom);
}
encs
}
}
#[test]
fn bulk_read() {
for case in CASES {
for enc in Encoding::all_for(case.text) {
let data = enc.encode(case.text);
let mut reader = DecodingReader::new(&data[..]);
assert_eq!(
read_all(&mut reader).unwrap(),
case.text,
"bulk_read failed: case={}, encoding={}",
case.label,
enc.label(),
);
}
}
for (label, text) in large_cases() {
for enc in Encoding::all_for(&text) {
let data = enc.encode(&text);
let mut reader = DecodingReader::new(&data[..]);
assert_eq!(
read_all(&mut reader).unwrap(),
text,
"bulk_read failed: case={}, encoding={}",
label,
enc.label(),
);
}
}
}
#[test]
fn chunked_read() {
for case in CASES {
for enc in Encoding::all_for(case.text) {
for chunk_size in [1, 2, 3, 4, 5] {
let data = enc.encode(case.text);
let mut reader = DecodingReader::new(BufReader::new(ChunkedReader::new(
&data, chunk_size,
)));
assert_eq!(
read_all(&mut reader).unwrap(),
case.text,
"chunked_read failed: case={}, encoding={}, chunk_size={}",
case.label,
enc.label(),
chunk_size,
);
}
}
}
}
#[test]
fn large_chunked_read() {
for (label, text) in large_cases() {
for enc in Encoding::all_for(&text) {
for chunk_size in [1, 2, 3, 4, 5] {
let data = enc.encode(&text);
let mut reader = DecodingReader::new(BufReader::new(ChunkedReader::new(
&data, chunk_size,
)));
assert_eq!(
read_all(&mut reader).unwrap(),
text,
"large_chunked_read failed: case={}, encoding={}, chunk_size={}",
label,
enc.label(),
chunk_size,
);
}
}
}
}
}
}