#[cfg(not(feature = "no_ucd"))]
use super::ucd::UCD;
use super::C_ESCAPES;
use super::JSON_ESCAPES;
use super::PYTHON_ESCAPES;
use super::RUST_ESCAPES;
use super::TOML_ESCAPES;
use super::TRIVET_ESCAPES;
use crate::decoder::Decode;
use crate::strings::EscapeType;
use crate::strings::IllegalUnicodeProtocol;
use crate::strings::StringStandard;
use crate::strings::UnknownEscapeProtocol;
use crate::{
errors::{syntax_error, unexpected_character_error, ParseResult},
Loc, ParserCore,
};
use std::collections::BTreeMap;
#[cfg(not(feature = "no_ucd"))]
use std::rc::Rc;
const CAPACITY: usize = 64;
#[cfg(not(feature = "no_ucd"))]
pub fn get_ucd() -> Box<Rc<BTreeMap<&'static str, char>>> {
let mut map = BTreeMap::new();
for (key, value) in UCD {
map.insert(*key, *value);
}
Box::new(Rc::new(map))
}
#[derive(Clone)]
pub struct StringParser {
pub enable_escapes: bool,
pub escape_char: char,
pub permit_low_control_characters: bool,
pub unknown_escape_protocol: UnknownEscapeProtocol,
pub allow_surrogate_pairs: bool,
pub illegal_unicode_protocol: IllegalUnicodeProtocol,
pub allow_octal_escapes: bool,
pub octal_escapes_are_flexible: bool,
escapes: BTreeMap<char, EscapeType>,
fast_escapes: [EscapeType; 128],
#[cfg(not(feature = "no_ucd"))]
pub ucd: Rc<BTreeMap<&'static str, char>>,
}
impl StringParser {
#[cfg(not(feature = "no_ucd"))]
pub fn new() -> Self {
let mut parser = StringParser {
enable_escapes: true,
permit_low_control_characters: true,
escape_char: '\\',
allow_octal_escapes: true,
octal_escapes_are_flexible: true,
allow_surrogate_pairs: true,
illegal_unicode_protocol: IllegalUnicodeProtocol::ReplacementCharacter,
unknown_escape_protocol: UnknownEscapeProtocol::LiteralEscape,
escapes: BTreeMap::from(TRIVET_ESCAPES),
fast_escapes: [EscapeType::Undefined; 128],
ucd: *get_ucd(),
};
parser.fix_escapes();
parser
}
#[cfg(feature = "no_ucd")]
pub fn new() -> Self {
let mut parser = StringParser {
enable_escapes: true,
permit_low_control_characters: true,
escape_char: '\\',
allow_octal_escapes: true,
octal_escapes_are_flexible: true,
allow_surrogate_pairs: true,
illegal_unicode_protocol: IllegalUnicodeProtocol::ReplacementCharacter,
unknown_escape_protocol: UnknownEscapeProtocol::LiteralEscape,
escapes: BTreeMap::from(TRIVET_ESCAPES),
fast_escapes: [EscapeType::Undefined; 128],
};
parser.fix_escapes();
parser
}
#[cfg(not(feature = "no_ucd"))]
pub fn new_from_db(ucd: &Rc<BTreeMap<&'static str, char>>) -> Self {
let mut parser = StringParser {
enable_escapes: true,
permit_low_control_characters: true,
escape_char: '\\',
allow_octal_escapes: true,
octal_escapes_are_flexible: true,
allow_surrogate_pairs: true,
illegal_unicode_protocol: IllegalUnicodeProtocol::ReplacementCharacter,
unknown_escape_protocol: UnknownEscapeProtocol::LiteralEscape,
escapes: BTreeMap::from(TRIVET_ESCAPES),
fast_escapes: [EscapeType::Undefined; 128],
ucd: ucd.clone(),
};
parser.fix_escapes();
parser
}
pub fn set(&mut self, std: StringStandard) {
match std {
StringStandard::Trivet => {
self.enable_escapes = true;
self.permit_low_control_characters = true;
self.escape_char = '\\';
self.allow_octal_escapes = true;
self.octal_escapes_are_flexible = true;
self.allow_surrogate_pairs = true;
self.illegal_unicode_protocol = IllegalUnicodeProtocol::ReplacementCharacter;
self.unknown_escape_protocol = UnknownEscapeProtocol::LiteralEscape;
self.escapes = BTreeMap::from(TRIVET_ESCAPES);
}
StringStandard::C => {
self.enable_escapes = true;
self.permit_low_control_characters = true;
self.escape_char = '\\';
self.allow_octal_escapes = true;
self.octal_escapes_are_flexible = true;
self.allow_surrogate_pairs = false;
self.illegal_unicode_protocol = IllegalUnicodeProtocol::ReplacementCharacter;
self.unknown_escape_protocol = UnknownEscapeProtocol::LiteralEscape;
self.escapes = BTreeMap::from(C_ESCAPES);
}
StringStandard::Rust => {
self.enable_escapes = true;
self.permit_low_control_characters = true;
self.escape_char = '\\';
self.allow_octal_escapes = false;
self.allow_surrogate_pairs = false;
self.illegal_unicode_protocol = IllegalUnicodeProtocol::Error;
self.unknown_escape_protocol = UnknownEscapeProtocol::Error;
self.escapes = BTreeMap::from(RUST_ESCAPES);
}
StringStandard::JSON => {
self.enable_escapes = true;
self.permit_low_control_characters = false;
self.escape_char = '\\';
self.allow_octal_escapes = false;
self.allow_surrogate_pairs = true;
self.illegal_unicode_protocol = IllegalUnicodeProtocol::ReplacementCharacter;
self.unknown_escape_protocol = UnknownEscapeProtocol::Error;
self.escapes = BTreeMap::from(JSON_ESCAPES);
}
StringStandard::TOML => {
self.enable_escapes = true;
self.permit_low_control_characters = false;
self.escape_char = '\\';
self.allow_octal_escapes = false;
self.allow_surrogate_pairs = false;
self.illegal_unicode_protocol = IllegalUnicodeProtocol::Error;
self.unknown_escape_protocol = UnknownEscapeProtocol::Error;
self.escapes = BTreeMap::from(TOML_ESCAPES);
}
StringStandard::Python => {
self.enable_escapes = true;
self.permit_low_control_characters = true;
self.escape_char = '\\';
self.allow_octal_escapes = true;
self.octal_escapes_are_flexible = true;
self.allow_surrogate_pairs = false;
self.illegal_unicode_protocol = IllegalUnicodeProtocol::ReplacementCharacter;
self.unknown_escape_protocol = UnknownEscapeProtocol::LiteralEscape;
self.escapes = BTreeMap::from(PYTHON_ESCAPES);
}
}
self.fix_escapes();
}
pub fn set_escapes(&mut self, escapes: BTreeMap<char, EscapeType>) {
self.escapes = escapes;
self.fix_escapes();
}
fn fix_escapes(&mut self) {
self.fast_escapes = [EscapeType::Undefined; 128];
for (key, value) in self.escapes.iter() {
if key <= &'\u{80}' {
self.fast_escapes[*key as usize] = *value
}
}
}
fn invalid_escape(&self, ch: char, loc: Loc, string: &mut String) -> ParseResult<()> {
match self.unknown_escape_protocol {
UnknownEscapeProtocol::Discard => Ok(()),
UnknownEscapeProtocol::DropEscape => {
string.push(ch);
Ok(())
}
UnknownEscapeProtocol::Error => Err(syntax_error(
loc,
format!("Invalid escape '{}{}'", self.escape_char, ch).as_str(),
)),
UnknownEscapeProtocol::LiteralEscape => {
string.push(self.escape_char);
string.push(ch);
Ok(())
}
UnknownEscapeProtocol::Replace(ch) => {
string.push(ch);
Ok(())
}
UnknownEscapeProtocol::ReplacementCharacter => {
string.push(char::REPLACEMENT_CHARACTER);
Ok(())
}
}
}
fn handle_illegal_unicode(&self, value: u32, loc: Loc, string: &mut String) -> ParseResult<()> {
match self.illegal_unicode_protocol {
IllegalUnicodeProtocol::Discard => Ok(()),
IllegalUnicodeProtocol::Error => Err(syntax_error(
loc,
format!("Value is not a valid Unicode code point: {:04x}", value).as_str(),
)),
IllegalUnicodeProtocol::Replace(ch) => {
string.push(ch);
Ok(())
}
IllegalUnicodeProtocol::ReplacementCharacter => {
string.push(char::REPLACEMENT_CHARACTER);
Ok(())
}
}
}
fn parse_surrogate_pair(
&self,
parser: &mut ParserCore,
first: u32,
loc: Loc,
string: &mut String,
) -> ParseResult<()> {
if !parser.peek_and_consume(self.escape_char) {
return self.handle_illegal_unicode(first, loc, string);
}
let ch = parser.peek();
parser.consume();
let second = match self.escapes.get(&ch) {
Some(EscapeType::BraceU18) => {
self.parse_braced_hex(parser, 1, 8, true)?
}
Some(EscapeType::BraceU16) => {
self.parse_braced_hex(parser, 1, 6, false)?
}
Some(EscapeType::NakedU4) => {
let digits = parser.peek_n(4);
parser.consume_n(4);
(match u16::from_str_radix(&digits, 16) {
Ok(value) => value,
Err(err) => {
return Err(syntax_error(
loc,
format!("Invalid hex value (ref:1) '{}': {}", digits, err).as_str(),
))
}
}) as u32
}
Some(EscapeType::NakedU8) => {
let digits = parser.peek_n(8);
parser.consume_n(8);
match u32::from_str_radix(&digits, 16) {
Ok(value) => value,
Err(err) => {
return Err(syntax_error(
loc,
format!("Invalid hex value (ref:2) '{}': {}", digits, err).as_str(),
))
}
}
}
_ => {
return Err(syntax_error(loc,
"Found what seems to be the first half of a surrogate pair, but no second half was found."
));
}
};
if !self.allow_surrogate_pairs {
return Err(syntax_error(loc, "Surrogate pairs are not permitted"));
}
if !(0xd800..0xdc00).contains(&first) || !(0xdc00..0xe000).contains(&second) {
return Err(syntax_error(
loc,
format!("Invalid surrogate pair {:04x},{:04x}", first, second).as_str(),
));
}
let value = (first - 0xD800) * 0x400 + (second - 0xDC00) + 0x10000;
self.u32_to_char(value, loc, string)?;
Ok(())
}
fn parse_braced_hex(
&self,
parser: &mut ParserCore,
low: usize,
high: usize,
underscores: bool,
) -> ParseResult<u32> {
let loc = parser.loc();
if !parser.peek_and_consume('{') {
return Err(unexpected_character_error(loc, "{", parser.peek()));
}
let digits = if underscores {
parser.take_while_unless(|ch| ch.is_ascii_hexdigit(), |ch| ch == '_')
} else {
parser.take_while(|ch| ch.is_ascii_hexdigit())
};
if !parser.peek_and_consume('}') {
return Err(unexpected_character_error(parser.loc(), "}", parser.peek()));
}
if !(low..=high).contains(&digits.len()) {
if digits.len() < low {
return Err(syntax_error(loc, "Too few digits given in escape"));
}
return Err(syntax_error(loc, "Too many digits given in escape"));
}
Ok(u32::from_str_radix(&digits, 16).unwrap())
}
fn u32_to_char(&self, value: u32, loc: Loc, string: &mut String) -> ParseResult<()> {
match char::from_u32(value) {
None => {
self.handle_illegal_unicode(value, loc, string)
}
Some(ch) => {
string.push(ch);
Ok(())
}
}
}
fn parse_escape(&self, parser: &mut ParserCore, string: &mut String) -> ParseResult<()> {
let loc = parser.loc();
let mut ch = parser.peek();
parser.consume();
let esc_type = if ch.is_ascii() {
&self.fast_escapes[ch as usize]
} else if let Some(esc_type) = self.escapes.get(&ch) {
esc_type
} else {
&EscapeType::Undefined
};
match esc_type {
EscapeType::Char(rp) => {
string.push(*rp);
Ok(())
}
EscapeType::Undefined => {
if self.allow_octal_escapes && ('0'..='7').contains(&ch) {
let mut value = (ch as u32) - ('0' as u32);
for _ in 0..2 {
ch = parser.peek();
if ('0'..='7').contains(&ch) {
value *= 8;
value += (ch as u32) - ('0' as u32);
parser.consume();
} else {
if !self.octal_escapes_are_flexible {
return Err(syntax_error(
loc,
"Octal escape must have three digits",
));
}
break;
}
}
self.u32_to_char(value, loc, string)?;
return Ok(());
}
self.invalid_escape(ch, loc, string)?;
Ok(())
}
EscapeType::BraceU18 => {
let value = self.parse_braced_hex(parser, 1, 8, true)?;
if (0xd800..0xe000).contains(&value) {
self.parse_surrogate_pair(parser, value, loc, string)?
} else {
self.u32_to_char(value, loc, string)?
};
Ok(())
}
EscapeType::BraceU16 => {
let value = self.parse_braced_hex(parser, 1, 6, false)?;
if (0xd800..0xe000).contains(&value) {
self.parse_surrogate_pair(parser, value, loc, string)?
} else {
self.u32_to_char(value, loc, string)?
};
Ok(())
}
EscapeType::BracketUNamed => {
#[cfg(not(feature = "no_ucd"))]
{
if !parser.peek_and_consume('{') {
return Err(unexpected_character_error(loc, "{", parser.peek()));
}
let name = parser.take_while(|ch| ch != '}');
if !parser.peek_and_consume('}') {
return Err(unexpected_character_error(loc, "}", parser.peek()));
}
let name = name.to_uppercase();
match self.ucd.get(name.as_str()) {
Some(ch) => {
string.push(*ch);
Ok(())
}
None => Err(syntax_error(
loc,
format!("Unknown Unicode character name '{}'", name).as_str(),
)),
}
}
#[cfg(feature = "no_ucd")]
{
Err(syntax_error(loc, "Unicode name lookup is not enabled."))
}
}
EscapeType::Discard => Ok(()),
EscapeType::DiscardWS => {
parser.consume_ws_only();
Ok(())
}
EscapeType::NakedASCII => {
let digits = parser.peek_n(2);
parser.consume_n(2);
let value = match u8::from_str_radix(&digits, 16) {
Ok(value) => value,
Err(err) => {
return Err(syntax_error(
loc,
format!("Invalid ASCII hex value '{}': {}", digits, err).as_str(),
))
}
};
if value > 0x7f {
return Err(syntax_error(
loc,
format!("Invalid ASCII value (too high): '{}'", digits).as_str(),
));
}
string.push(unsafe { char::from_u32_unchecked(value as u32) });
Ok(())
}
EscapeType::NakedByte => {
let digits = parser.peek_n(2);
parser.consume_n(2);
let value = match u8::from_str_radix(&digits, 16) {
Ok(value) => value,
Err(err) => {
return Err(syntax_error(
loc,
format!("Invalid hex value (ref:3) '{}': {}", digits, err).as_str(),
))
}
} as u32;
string.push(char::from_u32(value).unwrap());
Ok(())
}
EscapeType::NakedU4 => {
let digits = parser.peek_n(4);
parser.consume_n(4);
let value = match u16::from_str_radix(&digits, 16) {
Ok(value) => value,
Err(err) => {
return Err(syntax_error(
loc,
format!("Invalid hex value (ref:4) '{}': {}", digits, err).as_str(),
))
}
} as u32;
if (0xd800..0xe000).contains(&value) {
return self.parse_surrogate_pair(parser, value, loc, string);
}
string.push(unsafe { char::from_u32_unchecked(value) });
Ok(())
}
EscapeType::NakedU8 => {
let digits = parser.peek_n(8);
parser.consume_n(8);
let value = match u32::from_str_radix(&digits, 16) {
Ok(value) => value,
Err(err) => {
return Err(syntax_error(
loc,
format!("Invalid hex value (ref:5) '{}': {}", digits, err).as_str(),
))
}
};
if (0xd800..0xe000).contains(&value) {
return self.parse_surrogate_pair(parser, value, loc, string);
}
match char::from_u32(value) {
Some(ch) => {
string.push(ch);
Ok(())
}
None => self.handle_illegal_unicode(value, loc, string),
}
}
}
}
fn parse_esc_con_ter(&self, parser: &mut ParserCore, terminal: char) -> ParseResult<String> {
let mut result = String::with_capacity(CAPACITY);
let loc = parser.loc();
while !parser.is_at_eof() {
let ch = parser.peek();
if ch == terminal {
parser.consume();
return Ok(result);
} else if ch == self.escape_char {
parser.consume();
self.parse_escape(parser, &mut result)?;
} else {
parser.consume();
result.push(ch)
}
}
Err(syntax_error(loc, "Found unterminated string."))
}
fn parse_esc_ter(&self, parser: &mut ParserCore, terminal: char) -> ParseResult<String> {
let mut result = String::with_capacity(CAPACITY);
let loc = parser.loc();
while !parser.is_at_eof() {
let ch = parser.peek();
if ch == terminal {
parser.consume();
return Ok(result);
} else if ch < '\x20' {
return Err(syntax_error(
parser.loc(),
&format!(
"Control characters are not permitted in strings: '{:?}'",
ch
),
));
} else if ch == self.escape_char {
parser.consume();
self.parse_escape(parser, &mut result)?;
} else {
parser.consume();
result.push(ch)
}
}
Err(syntax_error(loc, "Found unterminated string."))
}
fn parse_con_ter(&self, parser: &mut ParserCore, terminal: char) -> ParseResult<String> {
let mut result = String::with_capacity(CAPACITY);
let loc = parser.loc();
while !parser.is_at_eof() {
let ch = parser.peek();
if ch == terminal {
parser.consume();
return Ok(result);
} else {
parser.consume();
result.push(ch)
}
}
Err(syntax_error(loc, "Found unterminated string."))
}
fn parse_ter(&self, parser: &mut ParserCore, terminal: char) -> ParseResult<String> {
let mut result = String::with_capacity(CAPACITY);
let loc = parser.loc();
while !parser.is_at_eof() {
let ch = parser.peek();
if ch == terminal {
parser.consume();
return Ok(result);
} else if ch < '\x20' {
return Err(syntax_error(
parser.loc(),
&format!(
"Control characters are not permitted in strings: '{:?}'",
ch
),
));
} else {
parser.consume();
result.push(ch)
}
}
Err(syntax_error(loc, "Found unterminated string."))
}
fn read_c(&self, parser: &mut ParserCore) -> ParseResult<String> {
Ok(parser.take_while(|_| true))
}
fn read_ce(&self, parser: &mut ParserCore) -> ParseResult<String> {
let mut result = String::with_capacity(CAPACITY);
while !parser.is_at_eof() {
let ch = parser.peek();
parser.consume();
if ch == self.escape_char {
self.parse_escape(parser, &mut result)?
} else {
result.push(ch)
}
}
Ok(result)
}
fn read(&self, parser: &mut ParserCore) -> ParseResult<String> {
let result = parser.take_while(|ch| ch >= '\x20');
if parser.is_at_eof() {
Ok(result)
} else {
let ch = parser.peek();
Err(syntax_error(
parser.loc(),
&format!(
"Control characters are not permitted in strings: '{:?}'",
ch
),
))
}
}
fn read_e(&self, parser: &mut ParserCore) -> ParseResult<String> {
let mut result = String::with_capacity(CAPACITY);
while !parser.is_at_eof() {
let ch = parser.peek();
if ch == self.escape_char {
parser.consume();
self.parse_escape(parser, &mut result)?
} else if ch < '\x20' {
return Err(syntax_error(
parser.loc(),
&format!(
"Control characters are not permitted in strings: '{:?}'",
ch
),
));
} else {
parser.consume();
result.push(ch)
}
}
Ok(result)
}
pub fn process(&self, parser: &mut ParserCore, terminal: Option<char>) -> ParseResult<String> {
match terminal {
None => {
if self.enable_escapes {
if self.permit_low_control_characters {
self.read_ce(parser)
} else {
self.read_e(parser)
}
} else if self.permit_low_control_characters {
self.read_c(parser)
} else {
self.read(parser)
}
}
Some(terminal) => {
if self.enable_escapes {
if self.permit_low_control_characters {
self.parse_esc_con_ter(parser, terminal)
} else {
self.parse_esc_ter(parser, terminal)
}
} else if self.permit_low_control_characters {
self.parse_con_ter(parser, terminal)
} else {
self.parse_ter(parser, terminal)
}
}
}
}
pub fn parse_string(&self, value: &str) -> ParseResult<String> {
let decoder = Decode::new(value.bytes().collect());
let mut parser = ParserCore::new("<string>", decoder);
self.process(&mut parser, None)
}
}
impl Default for StringParser {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod test {
use std::collections::BTreeMap;
use super::StringParser;
use crate::parse_from_string;
use crate::strings::{EscapeType, IllegalUnicodeProtocol, UnknownEscapeProtocol};
#[test]
fn simple_test() {
let mut sp = StringParser::new();
sp.enable_escapes = false;
sp.permit_low_control_characters = false;
let cases = &[
(
r#"This is a simple string."#,
None,
"This is a simple string.",
),
(r#"This is an escape\n."#, None, "This is an escape\\n."),
("This is a control code\x02.", None, ""),
(
r#"This is a simple string.""#,
Some('"'),
"This is a simple string.",
),
(r#"This is a simple string."#, Some('"'), ""),
(
r#"This is an escape\n.""#,
Some('"'),
"This is an escape\\n.",
),
("This is a control code\x02.\"", Some('"'), ""),
];
for (in_str, term, out_str) in cases {
let mut parser = parse_from_string(in_str);
let result = sp.process(parser.borrow_core(), *term);
if out_str.is_empty() {
assert!(result.is_err())
} else {
assert_eq!(&result.unwrap(), out_str)
}
}
}
#[test]
fn control_test() {
let mut sp = StringParser::new();
sp.enable_escapes = false;
sp.permit_low_control_characters = true;
let cases = &[
(
r#"This is a simple string."#,
None,
"This is a simple string.",
),
(r#"This is an escape\n."#, None, "This is an escape\\n."),
(
"This is a control code\x02.",
None,
"This is a control code\x02.",
),
(
r#"This is a simple string.""#,
Some('"'),
"This is a simple string.",
),
(r#"This is a simple string."#, Some('"'), ""),
(
r#"This is an escape\n.""#,
Some('"'),
"This is an escape\\n.",
),
(
"This is a control code\x02.\"",
Some('"'),
"This is a control code\x02.",
),
];
for (in_str, term, out_str) in cases {
let mut parser = parse_from_string(in_str);
let result = sp.process(parser.borrow_core(), *term);
if out_str.is_empty() {
assert!(result.is_err())
} else {
assert_eq!(&result.unwrap(), out_str)
}
}
}
#[test]
fn escape_test() {
let mut sp = StringParser::new();
sp.enable_escapes = true;
sp.permit_low_control_characters = false;
let cases = &[
(
r#"This is a simple string."#,
None,
"This is a simple string.",
),
(r#"This is an escape\n."#, None, "This is an escape\n."),
("This is a control code\x02.", None, ""),
(
r#"This is a simple string.""#,
Some('"'),
"This is a simple string.",
),
(r#"This is a simple string."#, Some('"'), ""),
(
r#"This is an escape\n.""#,
Some('"'),
"This is an escape\n.",
),
("This is a control code\x02.\"", Some('"'), ""),
];
for (in_str, term, out_str) in cases {
let mut parser = parse_from_string(in_str);
let result = sp.process(parser.borrow_core(), *term);
if out_str.is_empty() {
assert!(result.is_err())
} else {
assert_eq!(&result.unwrap(), out_str)
}
}
}
#[test]
fn odd_escapes_test() {
let mut sp = StringParser::new();
sp.enable_escapes = true;
sp.permit_low_control_characters = true;
sp.allow_surrogate_pairs = true;
let escapes = BTreeMap::from([
('\n', EscapeType::Discard),
('\\', EscapeType::Char('\\')),
('\'', EscapeType::Char('\'')),
('\"', EscapeType::Char('\"')),
('a', EscapeType::Char('\x07')),
('b', EscapeType::Char('\x08')),
('f', EscapeType::Char('\x0c')),
('n', EscapeType::Char('\n')),
('r', EscapeType::Char('\r')),
('t', EscapeType::Char('\t')),
('v', EscapeType::Char('\x0b')),
('x', EscapeType::NakedByte),
('N', EscapeType::BracketUNamed),
('u', EscapeType::NakedU4),
('U', EscapeType::NakedU8),
('z', EscapeType::Char('0')),
('å', EscapeType::Discard),
]);
sp.unknown_escape_protocol = UnknownEscapeProtocol::Error;
sp.illegal_unicode_protocol = IllegalUnicodeProtocol::Error;
sp.set_escapes(escapes);
let mut parser =
parse_from_string(r#"A very \\escaped\\ string. \'\"\a\b\f\n\r\t\v\z\å\z"#);
let result = sp.process(parser.borrow_core(), None);
assert_eq!(
result.unwrap(),
"A very \\escaped\\ string. '\"\u{7}\u{8}\u{c}\n\r\t\u{b}00"
);
let mut parser = parse_from_string(r#"\ud801\udce0"#);
let result = sp.process(parser.borrow_core(), None);
assert_eq!(result.unwrap(), "𐓠");
let mut parser = parse_from_string(r#"\ud801\u002e"#);
let result = sp.process(parser.borrow_core(), None);
assert!(result.is_err());
let mut parser = parse_from_string(r#"\ud801*"#);
let result = sp.process(parser.borrow_core(), None);
println!("{:?}", result);
assert!(result.is_err());
let mut parser = parse_from_string(r#"\ß"#);
let result = sp.process(parser.borrow_core(), None);
assert!(result.is_err());
}
#[test]
fn control_escape_test() {
let mut sp = StringParser::new();
sp.enable_escapes = true;
sp.permit_low_control_characters = true;
let cases = &[
(
r#"This is a simple string."#,
None,
"This is a simple string.",
),
(r#"This is an escape\n."#, None, "This is an escape\n."),
(
"This is a control code\x02.",
None,
"This is a control code\x02.",
),
(
r#"This is a simple string.""#,
Some('"'),
"This is a simple string.",
),
(r#"This is a simple string."#, Some('"'), ""),
(
r#"This is an escape\n.""#,
Some('"'),
"This is an escape\n.",
),
(
"This is a control code\x02.\"",
Some('"'),
"This is a control code\x02.",
),
];
for (in_str, term, out_str) in cases {
let mut parser = parse_from_string(in_str);
let result = sp.process(parser.borrow_core(), *term);
if out_str.is_empty() {
assert!(result.is_err())
} else {
assert_eq!(&result.unwrap(), out_str)
}
}
}
}