use crate::decoder::Decode;
#[cfg(not(feature = "no_ucd"))]
use crate::strings::get_ucd;
use crate::strings::EncodingMethod;
use crate::strings::EncodingStandard;
use crate::strings::IllegalUnicodeProtocol;
use crate::strings::StringEncoder;
use crate::strings::StringParser;
use crate::strings::StringStandard;
use crate::strings::UnknownEscapeProtocol;
use crate::ParserCore;
#[test]
fn simple_1() {
let mut strpar = StringParser::new();
strpar.set(StringStandard::C);
let text_in = "this is a simple string";
let decoder = Decode::new(text_in.bytes().collect());
let mut parser = ParserCore::new("<string>", decoder);
let text_out = strpar.process(&mut parser, None).unwrap();
assert_eq!(text_in, text_out);
strpar.set(StringStandard::JSON);
let text_in = "this is a simple string";
let decoder = Decode::new(text_in.bytes().collect());
let mut parser = ParserCore::new("<string>", decoder);
let text_out = strpar.process(&mut parser, None).unwrap();
assert_eq!(text_in, text_out);
strpar.set(StringStandard::TOML);
let text_in = "this is a simple string";
let decoder = Decode::new(text_in.bytes().collect());
let mut parser = ParserCore::new("<string>", decoder);
let text_out = strpar.process(&mut parser, None).unwrap();
assert_eq!(text_in, text_out);
strpar.set(StringStandard::Rust);
let text_in = "this is a simple string";
let decoder = Decode::new(text_in.bytes().collect());
let mut parser = ParserCore::new("<string>", decoder);
let text_out = strpar.process(&mut parser, None).unwrap();
assert_eq!(text_in, text_out);
}
#[test]
fn bad_strings() {
let mut strpar = StringParser::new();
strpar.set(StringStandard::Rust);
let mut text_in = "this is a simple string";
let mut decoder = Decode::new(text_in.bytes().collect());
let mut parser = ParserCore::new("<string>", decoder);
assert!(strpar.process(&mut parser, Some('}')).is_err());
strpar.set(StringStandard::Rust);
strpar.permit_low_control_characters = false;
text_in = "badchar->\x08}";
decoder = Decode::new(text_in.bytes().collect());
parser = ParserCore::new("<string>", decoder);
assert!(strpar.process(&mut parser, Some('}')).is_err());
text_in = "badchar->\x08}";
decoder = Decode::new(text_in.bytes().collect());
parser = ParserCore::new("<string>", decoder);
assert!(strpar.process(&mut parser, None).is_err());
strpar.enable_escapes = false;
text_in = "badchar->\x08}";
decoder = Decode::new(text_in.bytes().collect());
parser = ParserCore::new("<string>", decoder);
assert!(strpar.process(&mut parser, None).is_err());
}
#[test]
fn simple_2() {
let mut strpar = StringParser::new();
strpar.set(StringStandard::Python);
let mut text_in = "this is a simple string";
let mut decoder = Decode::new(text_in.bytes().collect());
let mut parser = ParserCore::new("<string>", decoder);
let mut text_out = strpar.process(&mut parser, None).unwrap();
assert_eq!(text_in, text_out);
strpar.set(StringStandard::Trivet);
text_in = "this is a simple string>";
decoder = Decode::new(text_in.bytes().collect());
parser = ParserCore::new("<string>", decoder);
text_out = strpar.process(&mut parser, Some('>')).unwrap();
assert_eq!("this is a simple string", text_out);
}
#[test]
fn escapes() {
let mut strpar = StringParser::new();
strpar.set(StringStandard::C);
assert_eq!(
strpar
.parse_string(r#"The\n"rain"\b\bin\020\r\afalls"#)
.unwrap(),
"The\n\"rain\"\x08\x08in\x10\r\x07falls"
);
assert_eq!(
strpar
.parse_string("The\\nrain\\b\\bin\\20\\r\\afalls")
.unwrap(),
"The\nrain\x08\x08in\x10\r\x07falls"
);
strpar.octal_escapes_are_flexible = false;
assert!(strpar
.parse_string("The\\nrain\\b\\bin\\20\\r\\afalls")
.is_err());
strpar.set(StringStandard::Rust);
assert!(strpar
.parse_string("The rain in\\x20Spain\\x20\\x9c.")
.is_err());
assert!(strpar
.parse_string("The rain in\\x2jSpain\\x20\\x9c.")
.is_err());
assert_eq!(
strpar
.parse_string("The rain in\\x20Spain\\x20\\x2c.")
.unwrap(),
"The rain in Spain \x2c."
);
strpar.set(StringStandard::C);
assert_eq!(strpar.parse_string("\\x20\\xd8.").unwrap(), " \u{d8}.");
strpar.set(StringStandard::Python);
strpar.illegal_unicode_protocol = IllegalUnicodeProtocol::Error;
assert_eq!(
strpar
.parse_string("The rain in\\u0020Spain\\U00000020\\x9c.")
.unwrap(),
"The rain in Spain \u{9c}."
);
assert!(strpar
.parse_string("The rain in\\u0020Spain\\U0000002g\\x9c.")
.is_err());
assert!(strpar
.parse_string("The rain in\\u002gSpain\\U00000020\\x9c.")
.is_err());
assert!(strpar
.parse_string("The rain in\\u0020Spain\\U1000dccc\\x9c.")
.is_err());
}
#[test]
fn no_escapes() {
let mut strpar = StringParser::default();
assert_eq!(
strpar.parse_string("no escapes here").unwrap(),
"no escapes here"
);
strpar.enable_escapes = false;
assert_eq!(
strpar.parse_string("fla\\r\\n\\x09\\u{2c} is >>").unwrap(),
"fla\\r\\n\\x09\\u{2c} is >>"
);
assert_eq!(
strpar.parse_string("fla\\r\\n\\x09\\u{2c} is ").unwrap(),
"fla\\r\\n\\x09\\u{2c} is "
);
}
#[test]
fn hex_ascii_escape_test() {
let mut strpar = StringParser::new();
strpar.set(StringStandard::Trivet);
let text_in = "the\\x0arain\\x20in\\x20falls\\x00 mainly on the plain\\x2e";
let decoder = Decode::new(text_in.bytes().collect());
let mut parser = ParserCore::new("<string>", decoder);
let text = strpar.process(&mut parser, None).unwrap();
assert_eq!(text, "the\nrain in falls\0 mainly on the plain.");
}
#[test]
fn hex_byte_escape_test() {
let mut strpar = StringParser::new();
strpar.set(StringStandard::Trivet);
let text_in = "the\\xff-\\x80 is not ascii";
let decoder = Decode::new(text_in.bytes().collect());
let mut parser = ParserCore::new("<string>", decoder);
let text = strpar.process(&mut parser, None).unwrap();
assert_eq!(
text.as_bytes(),
"the\u{00ff}-\u{0080} is not ascii".as_bytes()
);
}
#[test]
fn hex_escapes() {
let mut strpar = StringParser::new();
strpar.set(StringStandard::Trivet);
assert!(strpar.parse_string("tom\\u{1_0000_0000}swift").is_err());
assert!(strpar.parse_string("tom\\u{}swift").is_err());
strpar.set(StringStandard::Rust);
assert!(strpar.parse_string("tom\\u{1000000}swift").is_err());
assert_eq!(
strpar.parse_string("tom\\u{002e}swift").unwrap(),
"tom.swift"
);
assert!(strpar.parse_string("tom\\u{}swift").is_err());
strpar.set(StringStandard::Python);
#[cfg(not(feature = "no_ucd"))]
{
assert!(strpar.parse_string("tom\\Nhyphen-minus}swift").is_err());
assert!(strpar.parse_string("tom\\N{hyphen-minusswift").is_err());
assert!(strpar.parse_string("tom\\N{xyzzy}swift").is_err());
assert_eq!(
strpar.parse_string("tom\\N{hyphen-minus}swift").unwrap(),
"tom-swift"
);
}
}
#[test]
fn discard() {
let mut strpar = StringParser::new();
strpar.set(StringStandard::C);
assert_eq!(
strpar.parse_string("this \\\n thing").unwrap(),
"this thing"
);
strpar.set(StringStandard::Python);
assert_eq!(
strpar.parse_string("this \\\n thing").unwrap(),
"this thing"
);
strpar.set(StringStandard::Rust);
assert_eq!(
strpar.parse_string("this \\\n thing").unwrap(),
"this thing"
);
}
#[cfg(not(feature = "no_ucd"))]
#[test]
fn with_db() {
let ucd = get_ucd();
let _ = StringParser::new_from_db(&ucd);
let _ = StringParser::new_from_db(&ucd);
let _ = StringParser::new_from_db(&ucd);
let mut strpar = StringParser::new_from_db(&ucd);
strpar.set(StringStandard::Trivet);
let text = strpar.parse_string("t\\N{hyphen-minus}z").unwrap();
assert_eq!(text, "t-z");
}
#[test]
fn invalid_escapes() {
let mut strpar = StringParser::new();
strpar.set(StringStandard::Trivet);
assert!(strpar.parse_string("tom\\xgeorge").is_err());
strpar.set(StringStandard::Trivet);
assert!(strpar.parse_string("tom\\xfgeorge").is_err());
strpar.set(StringStandard::Trivet);
assert!(strpar.parse_string("tom\\uF021george").is_err());
strpar.set(StringStandard::Trivet);
assert!(strpar.parse_string("tom\\u{0020george").is_err());
strpar.set(StringStandard::Trivet);
assert!(strpar.parse_string("tom\\u{002022121}george").is_err());
strpar.unknown_escape_protocol = UnknownEscapeProtocol::Discard;
assert_eq!(strpar.parse_string("tom\\qgeorge").unwrap(), "tomgeorge");
strpar.unknown_escape_protocol = UnknownEscapeProtocol::DropEscape;
assert_eq!(strpar.parse_string("tom\\qgeorge").unwrap(), "tomqgeorge");
strpar.unknown_escape_protocol = UnknownEscapeProtocol::Error;
assert!(strpar.parse_string("tom\\qgeorge").is_err());
strpar.unknown_escape_protocol = UnknownEscapeProtocol::LiteralEscape;
assert_eq!(strpar.parse_string("tom\\qgeorge").unwrap(), "tom\\qgeorge");
strpar.unknown_escape_protocol = UnknownEscapeProtocol::Replace('-');
assert_eq!(strpar.parse_string("tom\\qgeorge").unwrap(), "tom-george");
strpar.unknown_escape_protocol = UnknownEscapeProtocol::ReplacementCharacter;
assert_eq!(
strpar.parse_string("tom\\qgeorge").unwrap(),
"tom\u{fffd}george"
);
}
#[test]
fn invalid_unicode() {
let mut strpar = StringParser::new();
strpar.set(StringStandard::Trivet);
strpar.illegal_unicode_protocol = IllegalUnicodeProtocol::Error;
assert!(strpar.parse_string("tom\\u{ffffffff}george").is_err());
strpar.illegal_unicode_protocol = IllegalUnicodeProtocol::Discard;
assert_eq!(
strpar.parse_string("tom\\u{ffffffff}george").unwrap(),
"tomgeorge"
);
strpar.illegal_unicode_protocol = IllegalUnicodeProtocol::Replace('-');
assert_eq!(
strpar.parse_string("tom\\u{ffffffff}george").unwrap(),
"tom-george"
);
strpar.illegal_unicode_protocol = IllegalUnicodeProtocol::ReplacementCharacter;
assert_eq!(
strpar.parse_string("tom\\u{ffffffff}george").unwrap(),
"tom\u{fffd}george"
);
}
#[test]
fn surrogate_pairs() {
let mut strpar = StringParser::new();
strpar.set(StringStandard::JSON);
assert_eq!(
strpar.parse_string("tom\\ud800\\udc37swift").unwrap(),
"tom\u{10037}swift"
);
strpar.set(StringStandard::TOML);
assert!(strpar.parse_string("tom\\ud800\\udc37swift").is_err());
strpar.set(StringStandard::Trivet);
assert_eq!(
strpar.parse_string("tom\\u{d800}\\u{dc37}swift").unwrap(),
"tom\u{10037}swift"
);
assert!(strpar.parse_string("tom\\u{d800}\\x37\\xdcswift").is_err());
assert!(strpar.parse_string("tom\\u{d800}\\u{d837}swift").is_err());
strpar.set(StringStandard::Python);
assert!(strpar
.parse_string("tom\\U0000d800\\U0000dc37swift")
.is_err());
assert!(strpar
.parse_string("tom\\U0000d800\\U00k0dc37swift")
.is_err());
assert!(strpar.parse_string("tom\\U0000d800\\udk37swift").is_err());
strpar.set(StringStandard::Rust);
assert!(strpar.parse_string("tom\\u{d800}\\u{dc37}swift").is_err());
assert!(strpar.parse_string("tom\\u{d800}\\u{dk37}swift").is_err());
}
#[test]
fn names() {
assert_eq!(StringStandard::C.to_string(), "C");
assert_eq!(StringStandard::JSON.to_string(), "JSON");
assert_eq!(StringStandard::TOML.to_string(), "TOML");
assert_eq!(StringStandard::Rust.to_string(), "Rust");
assert_eq!(StringStandard::Python.to_string(), "Python");
assert_eq!(StringStandard::Trivet.to_string(), "Trivet");
}
#[test]
fn encoding() {
let mut encoder = StringEncoder::new();
encoder.set(StringStandard::JSON);
#[cfg(not(feature = "uppercase_hex"))]
assert_eq!(
"The\\nfirst\\bthing\\ud800\\udc37",
encoder.encode("The\nfirst\u{08}thing\u{10037}")
);
#[cfg(feature = "uppercase_hex")]
assert_eq!(
"The\\nfirst\\bthing\\uD800\\uDC37",
encoder.encode("The\nfirst\u{08}thing\u{10037}")
);
encoder.encoding_standard = EncodingStandard::ASCII;
#[cfg(not(feature = "uppercase_hex"))]
assert_eq!(
r#"The\u0103first\u1203thing\ufe0aa man\u2120will do"#,
encoder.encode("The\u{0103}first\u{1203}thing\u{fe0a}a man\u{2120}will do")
);
#[cfg(feature = "uppercase_hex")]
assert_eq!(
r#"The\u0103first\u1203thing\uFE0Aa man\u2120will do"#,
encoder.encode("The\u{0103}first\u{1203}thing\u{fe0a}a man\u{2120}will do")
);
encoder.set(StringStandard::TOML);
#[cfg(not(feature = "uppercase_hex"))]
assert_eq!(
"The\\nfirst\\bthing\\U00010037",
encoder.encode("The\nfirst\u{08}thing\u{10037}")
);
#[cfg(feature = "uppercase_hex")]
assert_eq!(
"The\\nfirst\\bthing\\U00010037",
encoder.encode("The\nfirst\u{08}thing\u{10037}")
);
encoder.encoding_standard = EncodingStandard::ASCII;
#[cfg(not(feature = "uppercase_hex"))]
assert_eq!(
r#"The\u0103first\u1203thing\ufe0aa man\u2120will do"#,
encoder.encode("The\u{0103}first\u{1203}thing\u{fe0a}a man\u{2120}will do")
);
#[cfg(feature = "uppercase_hex")]
assert_eq!(
r#"The\u0103first\u1203thing\uFE0Aa man\u2120will do"#,
encoder.encode("The\u{0103}first\u{1203}thing\u{fe0a}a man\u{2120}will do")
);
encoder.set(StringStandard::C);
assert_eq!(
"The\\nfirst\\bthing\\U00010037",
encoder.encode("The\nfirst\u{08}thing\u{10037}")
);
assert_eq!(
"The\\nfirst\\bthing\\x00\\x01",
encoder.encode("The\nfirst\u{08}thing\u{0}\u{1}")
);
encoder.set(StringStandard::Rust);
assert_eq!(
"The\\nfirst\\x08thing\u{10037}",
encoder.encode("The\nfirst\u{08}thing\u{10037}")
);
encoder.encoding_standard = EncodingStandard::EncodeAbove(0xFFFF);
assert_eq!(
"The\\nfirst\\x08thing\\u{010037}",
encoder.encode("The\nfirst\u{08}thing\u{10037}")
);
encoder.set(StringStandard::Python);
assert_eq!(
"The\\nfirst\\bthing\\U00010037",
encoder.encode("The\nfirst\u{08}thing\u{10037}")
);
encoder.encoding_standard = EncodingStandard::EncodeAbove(0x7f);
#[cfg(not(feature = "uppercase_hex"))]
assert_eq!(
"The\\nfirst\\bthing\\u25c6",
encoder.encode("The\nfirst\u{08}thing\u{25c6}")
);
#[cfg(feature = "uppercase_hex")]
assert_eq!(
"The\\nfirst\\bthing\\u25C6",
encoder.encode("The\nfirst\u{08}thing\u{25c6}")
);
#[cfg(not(feature = "no_ucd"))]
{
encoder.use_names = true;
assert_eq!(
"The\\nfirst\\bthing\\N{LINEAR B SYLLABLE B054 WA}",
encoder.encode("The\nfirst\u{08}thing\u{10037}")
);
}
encoder.set(StringStandard::Trivet);
assert_eq!(
"The\\nfirst\\bthing\u{10037}",
encoder.encode("The\nfirst\u{08}thing\u{10037}")
);
encoder.encoding_standard = EncodingStandard::EncodeAbove(0xffff);
assert_eq!(
"The\\nfirst\\bthing\\u{010037}",
encoder.encode("The\nfirst\u{08}thing\u{10037}")
);
encoder.use_names = true;
let range = 0x1_0000..0x3_FFFF;
encoder.encoding_standard = EncodingStandard::EncodeRanges(vec![range]);
#[cfg(not(feature = "no_ucd"))]
assert_eq!(
"The\\nfirst\\bthing\\N{LINEAR B SYLLABLE B054 WA}",
encoder.encode("The\nfirst\u{08}thing\u{10037}")
);
encoder.set(StringStandard::Trivet);
assert_eq!(
"The\\nfirst\\bthing\u{10037}",
encoder.encode("The\nfirst\u{08}thing\u{10037}")
);
encoder.encoding_standard = EncodingStandard::EncodeAbove(0x7f);
#[cfg(not(feature = "uppercase_hex"))]
assert_eq!(
"The\\nfirst\\bthing\\u{0025c6}",
encoder.encode("The\nfirst\u{08}thing\u{25c6}")
);
#[cfg(feature = "uppercase_hex")]
assert_eq!(
"The\\nfirst\\bthing\\u{0025C6}",
encoder.encode("The\nfirst\u{08}thing\u{25c6}")
);
#[cfg(not(feature = "no_ucd"))]
{
encoder.use_names = true;
assert_eq!(
"The\\nfirst\\bthing\\N{BLACK DIAMOND}",
encoder.encode("The\nfirst\u{08}thing\u{25c6}")
);
}
let mut encoder = StringEncoder::default();
encoder.set(StringStandard::Trivet);
encoder.encoding_standard = EncodingStandard::ASCII;
encoder.encoding_method = EncodingMethod::Naked2;
assert_eq!(
"The\\nfirst\\bthing\\x37\\x00\\x01",
encoder.encode("The\nfirst\u{08}thing\u{10037}")
);
assert_ne!(
"The\\nfirst\\bthing\\x37\\x00\\x01",
encoder.encode("The\nfirst\u{08}thing\x37\x00\x01")
);
encoder.set(StringStandard::Rust);
encoder.use_ascii_escapes = false;
encoder.use_names = true;
encoder.encoding_standard = EncodingStandard::ASCII;
#[cfg(not(feature = "no_ucd"))]
assert_eq!(
encoder.encode("The\train inn\x08\x09Spain."),
"The\\train inn\\N{BS}\\tSpain."
);
}