use crate::decoder::Decode;
use crate::decoder::Decoder;
const LENGTH: usize = 56;
const BYTES: &[u8] = &[
0xe1, 0xb8, 0xbc, 0xc6, 0xa1, 0xe1, 0xb6, 0x89, 0xc3, 0xab, 0xe1, 0xb6, 0x86, 0x20, 0xc8, 0x8b,
0xe1, 0xb9, 0x95, 0xc5, 0xa1, 0xe1, 0xb6, 0x99, 0xe1, 0xb9, 0x81, 0x20, 0xe1, 0xb8, 0x8d, 0xe1,
0xbb, 0xa1, 0xe1, 0xb8, 0xbd, 0xc7, 0xad, 0xe1, 0xb5, 0xb3, 0x20, 0xca, 0x82, 0xc7, 0x90, 0xc5,
0xa5, 0x20, 0xd3, 0x93, 0xe1, 0xb9, 0x81, 0xe1, 0xbb, 0x87, 0xe1, 0xba, 0x97, 0x2e, 0x0a, 0x4c,
0x6f, 0x72, 0x65, 0x6d, 0x20, 0x69, 0x70, 0x73, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72,
0x20, 0x73, 0x69, 0x74, 0x20, 0x61, 0x6d, 0x65, 0x74, 0x2e, 0x0a,
];
const BYTES_BOM: &[u8] = &[
0xef, 0xbb, 0xbf, 0xe1, 0xb8, 0xbc, 0xc6, 0xa1, 0xe1, 0xb6, 0x89, 0xc3, 0xab, 0xe1, 0xb6, 0x86, 0x20, 0xc8, 0x8b,
0xe1, 0xb9, 0x95, 0xc5, 0xa1, 0xe1, 0xb6, 0x99, 0xe1, 0xb9, 0x81, 0x20, 0xe1, 0xb8, 0x8d, 0xe1,
0xbb, 0xa1, 0xe1, 0xb8, 0xbd, 0xc7, 0xad, 0xe1, 0xb5, 0xb3, 0x20, 0xca, 0x82, 0xc7, 0x90, 0xc5,
0xa5, 0x20, 0xd3, 0x93, 0xe1, 0xb9, 0x81, 0xe1, 0xbb, 0x87, 0xe1, 0xba, 0x97, 0x2e, 0x0a, 0x4c,
0x6f, 0x72, 0x65, 0x6d, 0x20, 0x69, 0x70, 0x73, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72,
0x20, 0x73, 0x69, 0x74, 0x20, 0x61, 0x6d, 0x65, 0x74, 0x2e, 0x0a,
];
const BYTES_UTF16LE: &[u8] = &[
0xff, 0xfe, 0x3c, 0x1e, 0xa1, 0x01, 0x89, 0x1d, 0xeb, 0x00, 0x86, 0x1d, 0x20, 0x00, 0x0b, 0x02,
0x55, 0x1e, 0x61, 0x01, 0x99, 0x1d, 0x41, 0x1e, 0x20, 0x00, 0x0d, 0x1e, 0xe1, 0x1e, 0x3d, 0x1e,
0xed, 0x01, 0x73, 0x1d, 0x20, 0x00, 0x82, 0x02, 0xd0, 0x01, 0x65, 0x01, 0x20, 0x00, 0xd3, 0x04,
0x41, 0x1e, 0xc7, 0x1e, 0x97, 0x1e, 0x2e, 0x00, 0x0a, 0x00, 0x4c, 0x00, 0x6f, 0x00, 0x72, 0x00,
0x65, 0x00, 0x6d, 0x00, 0x20, 0x00, 0x69, 0x00, 0x70, 0x00, 0x73, 0x00, 0x75, 0x00, 0x6d, 0x00,
0x20, 0x00, 0x64, 0x00, 0x6f, 0x00, 0x6c, 0x00, 0x6f, 0x00, 0x72, 0x00, 0x20, 0x00, 0x73, 0x00,
0x69, 0x00, 0x74, 0x00, 0x20, 0x00, 0x61, 0x00, 0x6d, 0x00, 0x65, 0x00, 0x74, 0x00, 0x2e, 0x00,
0x0a, 0x00,
];
const BYTES_UTF16BE: &[u8] = &[
0xfe, 0xff, 0x1e, 0x3c, 0x01, 0xa1, 0x1d, 0x89, 0x00, 0xeb, 0x1d, 0x86, 0x00, 0x20, 0x02, 0x0b,
0x1e, 0x55, 0x01, 0x61, 0x1d, 0x99, 0x1e, 0x41, 0x00, 0x20, 0x1e, 0x0d, 0x1e, 0xe1, 0x1e, 0x3d,
0x01, 0xed, 0x1d, 0x73, 0x00, 0x20, 0x02, 0x82, 0x01, 0xd0, 0x01, 0x65, 0x00, 0x20, 0x04, 0xd3,
0x1e, 0x41, 0x1e, 0xc7, 0x1e, 0x97, 0x00, 0x2e, 0x00, 0x0a, 0x00, 0x4c, 0x00, 0x6f, 0x00, 0x72,
0x00, 0x65, 0x00, 0x6d, 0x00, 0x20, 0x00, 0x69, 0x00, 0x70, 0x00, 0x73, 0x00, 0x75, 0x00, 0x6d,
0x00, 0x20, 0x00, 0x64, 0x00, 0x6f, 0x00, 0x6c, 0x00, 0x6f, 0x00, 0x72, 0x00, 0x20, 0x00, 0x73,
0x00, 0x69, 0x00, 0x74, 0x00, 0x20, 0x00, 0x61, 0x00, 0x6d, 0x00, 0x65, 0x00, 0x74, 0x00, 0x2e,
0x00, 0x0a,
];
const TEXT: &str = "Ḽơᶉëᶆ ȋṕšᶙṁ ḍỡḽǭᵳ ʂǐť ӓṁệẗ.\nLorem ipsum dolor sit amet.\n";
#[test]
fn read_eof() {
let mut decode = Decode::new("".bytes().collect());
assert_eq!(decode.next(), None);
assert_eq!(decode.next(), None);
}
#[test]
fn read_eof_2() {
let mut decode = Decode::new("xxxx".bytes().collect());
assert_eq!(decode.next(), Some('x'));
assert_eq!(decode.next(), Some('x'));
assert_eq!(decode.next(), Some('x'));
assert_eq!(decode.next(), Some('x'));
assert_eq!(decode.next(), None);
assert_eq!(decode.next(), None);
}
#[test]
fn check_utf8_no_bom() {
let decode = Decode::new(BYTES.to_vec());
let mut res = String::new();
for ch in decode {
res.push(ch);
}
assert_eq!(res, TEXT);
}
#[test]
fn check_utf8_bom() {
let decode = Decode::new(BYTES_BOM.to_vec());
let mut res = String::new();
for ch in decode {
res.push(ch);
}
assert_eq!(res, TEXT);
}
#[test]
fn check_utf16le() {
let decode = Decode::new(BYTES_UTF16LE.to_vec());
let mut res = String::new();
for ch in decode {
res.push(ch);
}
assert_eq!(res, TEXT);
}
#[test]
fn check_utf16be() {
let decode = Decode::new(BYTES_UTF16BE.to_vec());
let mut res = String::new();
for ch in decode {
res.push(ch);
}
assert_eq!(res, TEXT);
}
#[test]
fn check_invalid_utf8() {
let valid: Vec<&[u8]> = vec![b"\xc3\xb1", b"\xe2\x82\xa1", b"\xf0\x90\x8c\xbc"];
let invalid: Vec<&[u8]> = vec![
b"\xc3\x28", b"\xa0\xa1", b"\xe2\x28\xa1", b"\xe2\x82\x28", b"\xf0\x28\x8c\xbc", b"\xf0\x90\x28\xbc", b"\xf0\x28\x8c\x28", b"\xf8\xa1\xa1\xa1\xa1", b"\xfc\xa1\xa1\xa1\xa1\xa1", b"\xf0\x90\x8c", ];
for sequence in valid {
let mut decoder = Decode::new(sequence.to_vec());
let ch = decoder.next();
if ch.is_none() {
panic!("Failed at {:?}", sequence);
}
let sval = std::str::from_utf8(sequence).unwrap();
let tval = ch.unwrap().to_string();
assert_eq!(sval, tval);
}
for sequence in invalid {
let mut decoder = Decode::new(sequence.to_vec());
let ch = decoder.next();
if let Some(c) = ch {
if c != char::REPLACEMENT_CHARACTER {
panic!("Failed at {:?}, got {:?}", sequence, c);
}
}
if std::str::from_utf8(sequence).is_ok() {
panic!("Rust incorrectly decoded invalid string {:?}", sequence);
}
}
}
#[test]
fn check_invalid_utf16() {
let test1 = b"\xff\xfe\x20\x00\x00\xd8";
let mut decoder = Decode::new(test1.to_vec());
assert_eq!(decoder.next(), Some(' '));
assert_eq!(decoder.next(), Some(char::REPLACEMENT_CHARACTER));
assert_eq!(decoder.next(), None);
let test1 = b"\xfe\xff\x00\x20\xd8\x00";
let mut decoder = Decode::new(test1.to_vec());
assert_eq!(decoder.next(), Some(' '));
assert_eq!(decoder.next(), Some(char::REPLACEMENT_CHARACTER));
assert_eq!(decoder.next(), None);
let test1 = b"\xff\xfe\x20\x00\x00\xd8\x00\x00";
let mut decoder = Decode::new(test1.to_vec());
assert_eq!(decoder.next(), Some(' '));
assert_eq!(decoder.next(), Some(char::REPLACEMENT_CHARACTER));
assert_eq!(decoder.next(), None);
let test1 = b"\xfe\xff\x00\x20\xd8\x00\x00\x00";
let mut decoder = Decode::new(test1.to_vec());
assert_eq!(decoder.next(), Some(' '));
assert_eq!(decoder.next(), Some(char::REPLACEMENT_CHARACTER));
assert_eq!(decoder.next(), None);
let mut target = ['\0'; 10];
let mut decoder = Decode::new(test1.to_vec());
assert_eq!(decoder.fill_n(10, &mut target), 2);
}
#[test]
fn valid_utf16_surrogate() {
let mut decoder = Decode::new(b"\xff\xfe\x00\xd8\x37\xdc".to_vec());
assert_eq!(decoder.next(), Some('\u{10037}'));
assert_eq!(decoder.next(), None);
let mut decoder = Decode::new(b"\xfe\xff\xd8\x00\xdc\x37".to_vec());
assert_eq!(decoder.next(), Some('\u{10037}'));
assert_eq!(decoder.next(), None);
}
#[test]
fn chunks() {
let mut decoder = Decode::new(BYTES.to_vec());
assert_eq!(decoder.next_n(LENGTH * 2 + 7).len(), LENGTH);
let mut decoder = Decode::new(BYTES_BOM.to_vec());
assert_eq!(decoder.next_n(LENGTH * 2 + 7).len(), LENGTH);
let mut decoder = Decode::new(BYTES_UTF16BE.to_vec());
assert_eq!(decoder.next_n(LENGTH * 2 + 7).len(), LENGTH);
let mut decoder = Decode::new(BYTES_UTF16LE.to_vec());
assert_eq!(decoder.next_n(LENGTH * 2 + 7).len(), LENGTH);
let mut decoder = Decode::new(BYTES.to_vec());
let mut array = ['-'; 130];
let chars: Vec<char> = TEXT.chars().collect();
assert_eq!(decoder.fill_n(8, &mut array), 8);
assert_eq!(
String::from_iter(&array[0..8]),
String::from_iter(&chars[0..8])
);
assert_eq!(array[9..130], ['-'; (130 - 9)]);
assert_eq!(decoder.fill_n(LENGTH, &mut array), LENGTH - 8);
assert_eq!(
String::from_iter(&array[0..LENGTH - 8]),
String::from_iter(&chars[8..LENGTH])
);
assert_eq!(array[LENGTH - 8..130], ['-'; (130 - (LENGTH - 8))]);
let mut decoder = Decode::new(BYTES.to_vec());
assert_eq!(decoder.fill_n(LENGTH * 6 + 7, &mut array), LENGTH);
assert_eq!(
String::from_iter(&array[0..LENGTH]),
String::from_iter(&chars)
);
assert_eq!(array[LENGTH..130], ['-'; (130 - LENGTH)]);
let mut decoder = Decode::new(BYTES_BOM.to_vec());
let mut array = ['-'; 130];
let chars: Vec<char> = TEXT.chars().collect();
assert_eq!(decoder.fill_n(8, &mut array), 8);
assert_eq!(
String::from_iter(&array[0..8]),
String::from_iter(&chars[0..8])
);
assert_eq!(array[9..130], ['-'; (130 - 9)]);
assert_eq!(decoder.fill_n(LENGTH, &mut array), LENGTH - 8);
assert_eq!(
String::from_iter(&array[0..LENGTH - 8]),
String::from_iter(&chars[8..LENGTH])
);
assert_eq!(array[LENGTH - 8..130], ['-'; (130 - (LENGTH - 8))]);
let mut decoder = Decode::new(BYTES_BOM.to_vec());
assert_eq!(decoder.fill_n(LENGTH * 6 + 7, &mut array), LENGTH);
assert_eq!(
String::from_iter(&array[0..LENGTH]),
String::from_iter(&chars)
);
assert_eq!(array[LENGTH..130], ['-'; (130 - LENGTH)]);
let mut decoder = Decode::new(BYTES_UTF16BE.to_vec());
let mut array = ['-'; 130];
let chars: Vec<char> = TEXT.chars().collect();
assert_eq!(decoder.fill_n(8, &mut array), 8);
assert_eq!(
String::from_iter(&array[0..8]),
String::from_iter(&chars[0..8])
);
assert_eq!(array[9..130], ['-'; (130 - 9)]);
assert_eq!(decoder.fill_n(LENGTH, &mut array), LENGTH - 8);
assert_eq!(
String::from_iter(&array[0..LENGTH - 8]),
String::from_iter(&chars[8..LENGTH])
);
assert_eq!(array[LENGTH - 8..130], ['-'; (130 - (LENGTH - 8))]);
let mut decoder = Decode::new(BYTES_UTF16BE.to_vec());
assert_eq!(decoder.fill_n(LENGTH * 6 + 7, &mut array), LENGTH);
assert_eq!(
String::from_iter(&array[0..LENGTH]),
String::from_iter(&chars)
);
assert_eq!(array[LENGTH..130], ['-'; (130 - LENGTH)]);
let mut decoder = Decode::new(BYTES_UTF16LE.to_vec());
let mut array = ['-'; 130];
let chars: Vec<char> = TEXT.chars().collect();
assert_eq!(decoder.fill_n(8, &mut array), 8);
assert_eq!(
String::from_iter(&array[0..8]),
String::from_iter(&chars[0..8])
);
assert_eq!(array[9..130], ['-'; (130 - 9)]);
assert_eq!(decoder.fill_n(LENGTH, &mut array), LENGTH - 8);
assert_eq!(
String::from_iter(&array[0..LENGTH - 8]),
String::from_iter(&chars[8..LENGTH])
);
assert_eq!(array[LENGTH - 8..130], ['-'; (130 - (LENGTH - 8))]);
let mut decoder = Decode::new(BYTES_UTF16LE.to_vec());
assert_eq!(decoder.fill_n(LENGTH * 6 + 7, &mut array), LENGTH);
assert_eq!(
String::from_iter(&array[0..LENGTH]),
String::from_iter(&chars)
);
assert_eq!(array[LENGTH..130], ['-'; (130 - LENGTH)]);
}
#[test]
fn high_surrogates() {
let text: Vec<char> = "𐍊 7".chars().collect();
let encoded: Vec<&[u8]> = vec![
b"\xf0\x90\x8d\x8a\x20\x37",
b"\xfe\xff\xd8\x00\xdf\x4a\x00\x20\x00\x37",
b"\xff\xfe\x00\xd8\x4a\xdf\x20\x00\x37\x00",
];
for bytes in encoded {
let mut decoder = Decode::new(bytes.to_vec());
assert_eq!(decoder.next_n(10), text);
}
}