use std::str;
use helpers;
pub(crate) fn encode(encoder: &super::Encoder, v: &[u8]) -> String {
let mut index = 0;
let len = v.len();
let mut out = String::with_capacity(len + len / 8);
while index < len {
let old_offset = index;
macro_rules! maybe_ascii {
($i: expr) => {{
let b = v[$i];
match b {
helpers::BSLASH => helpers::escape_u8(&mut out, encoder, b),
0x20..=0x7e => out.push(b as char), 0x00..=0x1F | 0x7f..=0xFF => helpers::escape_u8(&mut out, encoder, b),
}
}};
}
macro_rules! escape_them {
() => {{
for i in old_offset..(index + 1) {
maybe_ascii!(i);
}
index += 1;
continue;
}};
}
macro_rules! write_them {
() => {{
out.push_str(&str::from_utf8(&v[old_offset..(index + 1)]).unwrap());
}};
}
macro_rules! next {
() => {{
index += 1;
if index >= len {
index -= 1;
escape_them!();
}
v[index]
}};
}
let first = v[index];
if first >= 128 {
let w = UTF8_CHAR_WIDTH[first as usize];
match w {
2 => {
if next!() & !CONT_MASK != TAG_CONT_U8 {
escape_them!(); }
}
3 => {
match (first, next!()) {
(0xE0, 0xA0..=0xBF)
| (0xE1..=0xEC, 0x80..=0xBF)
| (0xED, 0x80..=0x9F)
| (0xEE..=0xEF, 0x80..=0xBF) => {}
_ => escape_them!(), }
if next!() & !CONT_MASK != TAG_CONT_U8 {
escape_them!(); }
}
4 => {
match (first, next!()) {
(0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F) => {}
_ => escape_them!(), }
if next!() & !CONT_MASK != TAG_CONT_U8 {
escape_them!(); }
if next!() & !CONT_MASK != TAG_CONT_U8 {
escape_them!(); }
}
_ => escape_them!(), }
write_them!();
index += 1;
} else {
maybe_ascii!(index);
index += 1;
}
}
out
}
static UTF8_CHAR_WIDTH: [u8; 256] = [
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ];
const CONT_MASK: u8 = 0b0011_1111;
const TAG_CONT_U8: u8 = 0b1000_0000;
#[test]
fn sanity_encode() {
fn enc(s: &str) -> String {
let out = encode(&super::Encoder::new(), s.as_bytes());
let _ = ::std::str::from_utf8(&out.as_bytes()).unwrap();
out
}
fn assert_enc(s: &str) {
assert_eq!(enc(s), s);
}
assert_enc("foo bar");
assert_enc("¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ");
assert_enc(" ʰ ʱ ʲ ʳ ʴ ʵ ʶ ʷ ʸ ʹ ʺ ʻ");
assert_enc("܀ ܁ ܂ ܃ ܄ ܅ ܆ ܇ ܈ ܉ ܊ ܋ ܌ ܍ ");
assert_enc("Ꭰ Ꭱ Ꭲ Ꭳ Ꭴ Ꭵ Ꭶ Ꭷ Ꭸ Ꭹ");
assert_enc("ἀ ἁ ἂ ἃ ἄ ἅ ἆ ἇ Ἀ Ἁ");
assert_enc(" ‐ ");
assert_enc("‑ ‒ – — ― ‖ ‗ ‘ ’ ‚ ‛ “");
assert_enc(" ⃐ ⃑ ⃒ ⃓ ⃔ ⃕ ⃖ ⃗ ⃘ ⃙ ⃚ ⃛ ⃜ ⃝ ⃞ ⃟ ⃠ ⃡ ⃢ ⃣ ⃤ ⃥ ⃦ ⃧ ⃨ ⃩ ⃪ ");
assert_eq!(
enc("¡ ¢ £ ¤ \\¥ ¦ § ¨ © ª « \\¬ "),
r"¡ ¢ £ ¤ \\¥ ¦ § ¨ © ª « \\¬ "
);
assert_eq!(
enc("Ā ā Ă \nă Ą ą Ć\n ć Ĉ ĉ\n"),
r"Ā ā Ă \nă Ą ą Ć\n ć Ĉ ĉ\n",
);
}
#[test]
fn sanity_encode_binary() {
let mut bytes: Vec<u8> = Vec::new();
bytes.extend_from_slice("¡ ¢ £".as_bytes());
bytes.extend_from_slice(b"\t\n\r"); bytes.extend_from_slice(b"\x07\x7f\xFE");
bytes.extend_from_slice("¤ ¥ ¦".as_bytes());
assert_eq!(
encode(&super::Encoder::new(), &bytes),
r"¡ ¢ £\t\n\r\x07\x7F\xFE¤ ¥ ¦"
);
}
#[test]
fn sanity_encode_pretty() {
let expected = "foo\nbar\n";
let result = encode(&super::Encoder::pretty(), expected.as_bytes());
assert_eq!(expected, result);
}