use super::hex::byte_to_two_digit_hex;
use super::hex::char_to_four_digit_hex;
use super::hex::char_to_six_digit_hex;
use super::hex::char_to_surrogate_pair;
use super::hex::dword_to_eight_digit_hex;
use super::EncodingMethod;
use super::EncodingStandard;
use super::EscapeType;
use super::C_ESCAPES;
use super::JSON_ESCAPES;
use super::PYTHON_ESCAPES;
use super::RUST_ESCAPES;
use super::TOML_ESCAPES;
use super::TRIVET_ESCAPES;
use crate::strings::StringStandard;
use std::collections::BTreeMap;
#[cfg(not(feature = "no_ucd"))]
use {crate::strings::UCD, std::rc::Rc};
#[cfg(not(feature = "no_ucd"))]
pub fn get_ucd_reverse() -> Rc<BTreeMap<char, &'static str>> {
let mut map = BTreeMap::new();
for (key, value) in UCD {
map.insert(*value, *key);
}
Rc::new(map)
}
pub struct StringEncoder {
pub escape_char: char,
pub use_ascii_escapes: bool,
pub ascii_escape: char,
pub low_unicode_escape: char,
pub high_unicode_escape: char,
pub brace_unicode_escape: char,
pub named_unicode_escape: char,
pub use_names: bool,
pub encoding_standard: EncodingStandard,
pub encoding_method: EncodingMethod,
pub escapes: BTreeMap<char, String>,
#[cfg(not(feature = "no_ucd"))]
pub ucd: Rc<BTreeMap<char, &'static str>>,
}
impl StringEncoder {
pub fn new() -> Self {
let mut encoder = StringEncoder {
escape_char: '\\',
use_ascii_escapes: true,
ascii_escape: 'x',
low_unicode_escape: 'u',
high_unicode_escape: 'U',
brace_unicode_escape: 'u',
named_unicode_escape: 'N',
encoding_method: EncodingMethod::Braced8,
encoding_standard: EncodingStandard::OnlyControl,
use_names: true,
escapes: BTreeMap::new(),
#[cfg(not(feature = "no_ucd"))]
ucd: get_ucd_reverse(),
};
encoder.set(StringStandard::Trivet);
encoder
}
fn install(&mut self, table: &[(char, EscapeType)]) {
let mut tree = BTreeMap::new();
for (ch, escape_type) in table {
match escape_type {
EscapeType::BraceU18 => {
self.brace_unicode_escape = *ch;
self.encoding_method = EncodingMethod::Braced8;
}
EscapeType::BraceU16 => {
self.brace_unicode_escape = *ch;
self.encoding_method = EncodingMethod::Braced6;
}
EscapeType::BracketUNamed => {
self.named_unicode_escape = *ch;
}
EscapeType::Char(code) => {
tree.insert(*code, ch.to_string());
}
EscapeType::Discard | EscapeType::DiscardWS | EscapeType::Undefined => {
}
EscapeType::NakedASCII => {
self.ascii_escape = *ch;
self.use_ascii_escapes = true;
}
EscapeType::NakedByte => {
self.ascii_escape = *ch;
self.use_ascii_escapes = true;
}
EscapeType::NakedU4 => {
self.low_unicode_escape = *ch;
self.encoding_method = EncodingMethod::Naked4;
self.encoding_standard = EncodingStandard::EncodeAbove(0xffff);
}
EscapeType::NakedU8 => {
self.high_unicode_escape = *ch;
self.encoding_method = EncodingMethod::Naked48;
}
}
}
self.escapes = tree;
}
pub fn set(&mut self, std: StringStandard) {
self.use_ascii_escapes = false;
self.use_names = false;
self.encoding_standard = EncodingStandard::OnlyControl;
match std {
StringStandard::C => {
self.install(&C_ESCAPES);
}
StringStandard::JSON => {
self.install(&JSON_ESCAPES);
}
StringStandard::TOML => {
self.install(&TOML_ESCAPES);
}
StringStandard::Python => {
self.install(&PYTHON_ESCAPES);
}
StringStandard::Rust => {
self.install(&RUST_ESCAPES);
}
StringStandard::Trivet => {
self.install(&TRIVET_ESCAPES);
}
}
}
fn encode_character(&self, ch: char, result: &mut String) {
if self.use_ascii_escapes && ch <= '\x7f' {
result.push(self.escape_char);
result.push(self.ascii_escape);
let digits = byte_to_two_digit_hex(ch as u8);
result.push(digits[0]);
result.push(digits[1]);
return;
}
#[cfg(not(feature = "no_ucd"))]
if self.use_names {
if let Some(name) = self.ucd.get(&ch) {
result.push(self.escape_char);
result.push(self.named_unicode_escape);
result.push('{');
result.push_str(name);
result.push('}');
return;
}
}
match self.encoding_method {
EncodingMethod::Naked4 => {
if ch > '\u{ffff}' {
let digits = char_to_surrogate_pair(ch);
result.push(self.escape_char);
result.push(self.low_unicode_escape);
result.push(digits[0]);
result.push(digits[1]);
result.push(digits[2]);
result.push(digits[3]);
result.push(self.escape_char);
result.push(self.low_unicode_escape);
result.push(digits[4]);
result.push(digits[5]);
result.push(digits[6]);
result.push(digits[7]);
} else {
let digits = char_to_four_digit_hex(ch);
result.push(self.escape_char);
result.push(self.low_unicode_escape);
result.push(digits[0]);
result.push(digits[1]);
result.push(digits[2]);
result.push(digits[3]);
}
}
EncodingMethod::Braced6 => {
result.push(self.escape_char);
result.push(self.brace_unicode_escape);
result.push('{');
let digits = char_to_six_digit_hex(ch);
result.push(digits[0]);
result.push(digits[1]);
result.push(digits[2]);
result.push(digits[3]);
result.push(digits[4]);
result.push(digits[5]);
result.push('}');
}
EncodingMethod::Braced8 => {
result.push(self.escape_char);
result.push(self.brace_unicode_escape);
result.push('{');
let digits = char_to_six_digit_hex(ch);
result.push(digits[0]);
result.push(digits[1]);
result.push(digits[2]);
result.push(digits[3]);
result.push(digits[4]);
result.push(digits[5]);
result.push('}');
}
EncodingMethod::Naked2 => {
let mut high = ch as u32;
loop {
result.push(self.escape_char);
result.push(self.ascii_escape);
let digits = byte_to_two_digit_hex((high & 0xff) as u8);
result.push(digits[0]);
result.push(digits[1]);
high >>= 8;
if high == 0 {
break;
}
}
}
EncodingMethod::Naked48 => {
result.push(self.escape_char);
if ch > '\u{ffff}' {
let digits = dword_to_eight_digit_hex(ch as u32);
result.push(self.high_unicode_escape);
result.push(digits[0]);
result.push(digits[1]);
result.push(digits[2]);
result.push(digits[3]);
result.push(digits[4]);
result.push(digits[5]);
result.push(digits[6]);
result.push(digits[7]);
} else {
let digits = char_to_four_digit_hex(ch);
result.push(self.low_unicode_escape);
result.push(digits[0]);
result.push(digits[1]);
result.push(digits[2]);
result.push(digits[3]);
}
}
}
}
pub fn encode(&self, value: &str) -> String {
let mut result = String::new();
'outer: for ch in value.chars() {
if let Some(value) = self.escapes.get(&ch) {
result.push(self.escape_char);
result.push_str(value);
continue 'outer;
}
if ch.is_control() {
self.encode_character(ch, &mut result);
continue 'outer;
}
let value = ch as u32;
match self.encoding_standard {
EncodingStandard::ASCII => {
if ch.is_ascii() {
result.push(ch);
} else {
self.encode_character(ch, &mut result);
}
}
EncodingStandard::EncodeAbove(limit) => {
if value > limit {
self.encode_character(ch, &mut result);
} else {
result.push(ch);
}
}
EncodingStandard::EncodeRanges(ref ranges) => {
for range in ranges {
if range.contains(&value) {
self.encode_character(ch, &mut result);
continue 'outer;
}
}
result.push(ch);
}
EncodingStandard::OnlyControl => {
result.push(ch);
}
}
}
result
}
}
impl Default for StringEncoder {
fn default() -> Self {
Self::new()
}
}