1#[cfg(not(feature = "no_ucd"))]
11use super::ucd::UCD;
12use super::C_ESCAPES;
13use super::JSON_ESCAPES;
14use super::PYTHON_ESCAPES;
15use super::RUST_ESCAPES;
16use super::TOML_ESCAPES;
17use super::TRIVET_ESCAPES;
18use crate::decoder::Decode;
19use crate::strings::EscapeType;
20use crate::strings::IllegalUnicodeProtocol;
21use crate::strings::StringStandard;
22use crate::strings::UnknownEscapeProtocol;
23use crate::{
24 errors::{syntax_error, unexpected_character_error, ParseResult},
25 Loc, ParserCore,
26};
27use std::collections::BTreeMap;
28#[cfg(not(feature = "no_ucd"))]
29use std::rc::Rc;
30
31const CAPACITY: usize = 64;
33
34#[cfg(not(feature = "no_ucd"))]
43pub fn get_ucd() -> Box<Rc<BTreeMap<&'static str, char>>> {
44 let mut map = BTreeMap::new();
48 for (key, value) in UCD {
49 map.insert(*key, *value);
50 }
51 Box::new(Rc::new(map))
52}
53
54#[derive(Clone)]
128pub struct StringParser {
129 pub enable_escapes: bool,
131
132 pub escape_char: char,
134
135 pub permit_low_control_characters: bool,
139
140 pub unknown_escape_protocol: UnknownEscapeProtocol,
142
143 pub allow_surrogate_pairs: bool,
150
151 pub illegal_unicode_protocol: IllegalUnicodeProtocol,
154
155 pub allow_octal_escapes: bool,
160
161 pub octal_escapes_are_flexible: bool,
165
166 escapes: BTreeMap<char, EscapeType>,
173
174 fast_escapes: [EscapeType; 128],
176
177 #[cfg(not(feature = "no_ucd"))]
179 pub ucd: Rc<BTreeMap<&'static str, char>>,
180}
181
182impl StringParser {
183 #[cfg(not(feature = "no_ucd"))]
186 pub fn new() -> Self {
187 let mut parser = StringParser {
188 enable_escapes: true,
189 permit_low_control_characters: true,
190 escape_char: '\\',
191 allow_octal_escapes: true,
192 octal_escapes_are_flexible: true,
193 allow_surrogate_pairs: true,
194 illegal_unicode_protocol: IllegalUnicodeProtocol::ReplacementCharacter,
195 unknown_escape_protocol: UnknownEscapeProtocol::LiteralEscape,
196 escapes: BTreeMap::from(TRIVET_ESCAPES),
197 fast_escapes: [EscapeType::Undefined; 128],
198 ucd: *get_ucd(),
199 };
200 parser.fix_escapes();
201 parser
202 }
203 #[cfg(feature = "no_ucd")]
204 pub fn new() -> Self {
205 let mut parser = StringParser {
206 enable_escapes: true,
207 permit_low_control_characters: true,
208 escape_char: '\\',
209 allow_octal_escapes: true,
210 octal_escapes_are_flexible: true,
211 allow_surrogate_pairs: true,
212 illegal_unicode_protocol: IllegalUnicodeProtocol::ReplacementCharacter,
213 unknown_escape_protocol: UnknownEscapeProtocol::LiteralEscape,
214 escapes: BTreeMap::from(TRIVET_ESCAPES),
215 fast_escapes: [EscapeType::Undefined; 128],
216 };
217 parser.fix_escapes();
218 parser
219 }
220
221 #[cfg(not(feature = "no_ucd"))]
223 pub fn new_from_db(ucd: &Rc<BTreeMap<&'static str, char>>) -> Self {
224 let mut parser = StringParser {
225 enable_escapes: true,
226 permit_low_control_characters: true,
227 escape_char: '\\',
228 allow_octal_escapes: true,
229 octal_escapes_are_flexible: true,
230 allow_surrogate_pairs: true,
231 illegal_unicode_protocol: IllegalUnicodeProtocol::ReplacementCharacter,
232 unknown_escape_protocol: UnknownEscapeProtocol::LiteralEscape,
233 escapes: BTreeMap::from(TRIVET_ESCAPES),
234 fast_escapes: [EscapeType::Undefined; 128],
235 ucd: ucd.clone(),
236 };
237 parser.fix_escapes();
238 parser
239 }
240
241 pub fn set(&mut self, std: StringStandard) {
244 match std {
245 StringStandard::Trivet => {
246 self.enable_escapes = true;
247 self.permit_low_control_characters = true;
248 self.escape_char = '\\';
249 self.allow_octal_escapes = true;
250 self.octal_escapes_are_flexible = true;
251 self.allow_surrogate_pairs = true;
252 self.illegal_unicode_protocol = IllegalUnicodeProtocol::ReplacementCharacter;
253 self.unknown_escape_protocol = UnknownEscapeProtocol::LiteralEscape;
254 self.escapes = BTreeMap::from(TRIVET_ESCAPES);
255 }
256 StringStandard::C => {
257 self.enable_escapes = true;
258 self.permit_low_control_characters = true;
259 self.escape_char = '\\';
260 self.allow_octal_escapes = true;
261 self.octal_escapes_are_flexible = true;
262 self.allow_surrogate_pairs = false;
263 self.illegal_unicode_protocol = IllegalUnicodeProtocol::ReplacementCharacter;
264 self.unknown_escape_protocol = UnknownEscapeProtocol::LiteralEscape;
265 self.escapes = BTreeMap::from(C_ESCAPES);
266 }
267 StringStandard::Rust => {
268 self.enable_escapes = true;
269 self.permit_low_control_characters = true;
270 self.escape_char = '\\';
271 self.allow_octal_escapes = false;
272 self.allow_surrogate_pairs = false;
273 self.illegal_unicode_protocol = IllegalUnicodeProtocol::Error;
274 self.unknown_escape_protocol = UnknownEscapeProtocol::Error;
275 self.escapes = BTreeMap::from(RUST_ESCAPES);
276 }
277 StringStandard::JSON => {
278 self.enable_escapes = true;
279 self.permit_low_control_characters = false;
280 self.escape_char = '\\';
281 self.allow_octal_escapes = false;
282 self.allow_surrogate_pairs = true;
283 self.illegal_unicode_protocol = IllegalUnicodeProtocol::ReplacementCharacter;
284 self.unknown_escape_protocol = UnknownEscapeProtocol::Error;
285 self.escapes = BTreeMap::from(JSON_ESCAPES);
286 }
287 StringStandard::TOML => {
288 self.enable_escapes = true;
289 self.permit_low_control_characters = false;
290 self.escape_char = '\\';
291 self.allow_octal_escapes = false;
292 self.allow_surrogate_pairs = false;
293 self.illegal_unicode_protocol = IllegalUnicodeProtocol::Error;
294 self.unknown_escape_protocol = UnknownEscapeProtocol::Error;
295 self.escapes = BTreeMap::from(TOML_ESCAPES);
296 }
297 StringStandard::Python => {
298 self.enable_escapes = true;
299 self.permit_low_control_characters = true;
300 self.escape_char = '\\';
301 self.allow_octal_escapes = true;
302 self.octal_escapes_are_flexible = true;
303 self.allow_surrogate_pairs = false;
304 self.illegal_unicode_protocol = IllegalUnicodeProtocol::ReplacementCharacter;
305 self.unknown_escape_protocol = UnknownEscapeProtocol::LiteralEscape;
306 self.escapes = BTreeMap::from(PYTHON_ESCAPES);
307 }
308 }
309 self.fix_escapes();
310 }
311
312 pub fn set_escapes(&mut self, escapes: BTreeMap<char, EscapeType>) {
314 self.escapes = escapes;
315 self.fix_escapes();
316 }
317
318 fn fix_escapes(&mut self) {
320 self.fast_escapes = [EscapeType::Undefined; 128];
321 for (key, value) in self.escapes.iter() {
322 if key <= &'\u{80}' {
323 self.fast_escapes[*key as usize] = *value
324 }
325 }
326 }
327
328 fn invalid_escape(&self, ch: char, loc: Loc, string: &mut String) -> ParseResult<()> {
330 match self.unknown_escape_protocol {
331 UnknownEscapeProtocol::Discard => Ok(()),
332 UnknownEscapeProtocol::DropEscape => {
333 string.push(ch);
334 Ok(())
335 }
336 UnknownEscapeProtocol::Error => Err(syntax_error(
337 loc,
338 format!("Invalid escape '{}{}'", self.escape_char, ch).as_str(),
339 )),
340 UnknownEscapeProtocol::LiteralEscape => {
341 string.push(self.escape_char);
342 string.push(ch);
343 Ok(())
344 }
345 UnknownEscapeProtocol::Replace(ch) => {
346 string.push(ch);
347 Ok(())
348 }
349 UnknownEscapeProtocol::ReplacementCharacter => {
350 string.push(char::REPLACEMENT_CHARACTER);
351 Ok(())
352 }
353 }
354 }
355
356 fn handle_illegal_unicode(&self, value: u32, loc: Loc, string: &mut String) -> ParseResult<()> {
358 match self.illegal_unicode_protocol {
359 IllegalUnicodeProtocol::Discard => Ok(()),
360 IllegalUnicodeProtocol::Error => Err(syntax_error(
361 loc,
362 format!("Value is not a valid Unicode code point: {:04x}", value).as_str(),
363 )),
364 IllegalUnicodeProtocol::Replace(ch) => {
365 string.push(ch);
366 Ok(())
367 }
368 IllegalUnicodeProtocol::ReplacementCharacter => {
369 string.push(char::REPLACEMENT_CHARACTER);
370 Ok(())
371 }
372 }
373 }
374
375 fn parse_surrogate_pair(
380 &self,
381 parser: &mut ParserCore,
382 first: u32,
383 loc: Loc,
384 string: &mut String,
385 ) -> ParseResult<()> {
386 if !parser.peek_and_consume(self.escape_char) {
391 return self.handle_illegal_unicode(first, loc, string);
393 }
394
395 let ch = parser.peek();
398 parser.consume();
399 let second = match self.escapes.get(&ch) {
400 Some(EscapeType::BraceU18) => {
401 self.parse_braced_hex(parser, 1, 8, true)?
403 }
404 Some(EscapeType::BraceU16) => {
405 self.parse_braced_hex(parser, 1, 6, false)?
407 }
408 Some(EscapeType::NakedU4) => {
409 let digits = parser.peek_n(4);
411 parser.consume_n(4);
412 (match u16::from_str_radix(&digits, 16) {
414 Ok(value) => value,
415 Err(err) => {
416 return Err(syntax_error(
417 loc,
418 format!("Invalid hex value (ref:1) '{}': {}", digits, err).as_str(),
419 ))
420 }
421 }) as u32
422 }
423 Some(EscapeType::NakedU8) => {
424 let digits = parser.peek_n(8);
426 parser.consume_n(8);
427 match u32::from_str_radix(&digits, 16) {
429 Ok(value) => value,
430 Err(err) => {
431 return Err(syntax_error(
432 loc,
433 format!("Invalid hex value (ref:2) '{}': {}", digits, err).as_str(),
434 ))
435 }
436 }
437 }
438 _ => {
439 return Err(syntax_error(loc,
441 "Found what seems to be the first half of a surrogate pair, but no second half was found."
442 ));
443 }
444 };
445
446 if !self.allow_surrogate_pairs {
448 return Err(syntax_error(loc, "Surrogate pairs are not permitted"));
450 }
451
452 if !(0xd800..0xdc00).contains(&first) || !(0xdc00..0xe000).contains(&second) {
454 return Err(syntax_error(
456 loc,
457 format!("Invalid surrogate pair {:04x},{:04x}", first, second).as_str(),
458 ));
459 }
460
461 let value = (first - 0xD800) * 0x400 + (second - 0xDC00) + 0x10000;
464 self.u32_to_char(value, loc, string)?;
465 Ok(())
466 }
467
468 fn parse_braced_hex(
474 &self,
475 parser: &mut ParserCore,
476 low: usize,
477 high: usize,
478 underscores: bool,
479 ) -> ParseResult<u32> {
480 let loc = parser.loc();
481 if !parser.peek_and_consume('{') {
483 return Err(unexpected_character_error(loc, "{", parser.peek()));
485 }
486 let digits = if underscores {
488 parser.take_while_unless(|ch| ch.is_ascii_hexdigit(), |ch| ch == '_')
489 } else {
490 parser.take_while(|ch| ch.is_ascii_hexdigit())
491 };
492 if !parser.peek_and_consume('}') {
494 return Err(unexpected_character_error(parser.loc(), "}", parser.peek()));
496 }
497 if !(low..=high).contains(&digits.len()) {
499 if digits.len() < low {
500 return Err(syntax_error(loc, "Too few digits given in escape"));
501 }
502 return Err(syntax_error(loc, "Too many digits given in escape"));
503 }
504 Ok(u32::from_str_radix(&digits, 16).unwrap())
505 }
506
507 fn u32_to_char(&self, value: u32, loc: Loc, string: &mut String) -> ParseResult<()> {
509 match char::from_u32(value) {
510 None => {
511 self.handle_illegal_unicode(value, loc, string)
513 }
514 Some(ch) => {
515 string.push(ch);
516 Ok(())
517 }
518 }
519 }
520
521 fn parse_escape(&self, parser: &mut ParserCore, string: &mut String) -> ParseResult<()> {
526 let loc = parser.loc();
527 let mut ch = parser.peek();
528 parser.consume();
529
530 let esc_type = if ch.is_ascii() {
531 &self.fast_escapes[ch as usize]
532 } else if let Some(esc_type) = self.escapes.get(&ch) {
533 esc_type
534 } else {
535 &EscapeType::Undefined
536 };
537
538 match esc_type {
540 EscapeType::Char(rp) => {
541 string.push(*rp);
542 Ok(())
543 }
544 EscapeType::Undefined => {
545 if self.allow_octal_escapes && ('0'..='7').contains(&ch) {
547 let mut value = (ch as u32) - ('0' as u32);
549 for _ in 0..2 {
550 ch = parser.peek();
551 if ('0'..='7').contains(&ch) {
552 value *= 8;
553 value += (ch as u32) - ('0' as u32);
554 parser.consume();
555 } else {
556 if !self.octal_escapes_are_flexible {
557 return Err(syntax_error(
558 loc,
559 "Octal escape must have three digits",
560 ));
561 }
562 break;
563 }
564 }
565 self.u32_to_char(value, loc, string)?;
566 return Ok(());
567 }
568 self.invalid_escape(ch, loc, string)?;
569 Ok(())
570 }
571 EscapeType::BraceU18 => {
572 let value = self.parse_braced_hex(parser, 1, 8, true)?;
573 if (0xd800..0xe000).contains(&value) {
574 self.parse_surrogate_pair(parser, value, loc, string)?
576 } else {
577 self.u32_to_char(value, loc, string)?
578 };
579 Ok(())
580 }
581 EscapeType::BraceU16 => {
582 let value = self.parse_braced_hex(parser, 1, 6, false)?;
583 if (0xd800..0xe000).contains(&value) {
584 self.parse_surrogate_pair(parser, value, loc, string)?
586 } else {
587 self.u32_to_char(value, loc, string)?
588 };
589 Ok(())
590 }
591 EscapeType::BracketUNamed => {
592 #[cfg(not(feature = "no_ucd"))]
593 {
594 if !parser.peek_and_consume('{') {
596 return Err(unexpected_character_error(loc, "{", parser.peek()));
598 }
599 let name = parser.take_while(|ch| ch != '}');
601 if !parser.peek_and_consume('}') {
603 return Err(unexpected_character_error(loc, "}", parser.peek()));
605 }
606 let name = name.to_uppercase();
608 match self.ucd.get(name.as_str()) {
609 Some(ch) => {
610 string.push(*ch);
611 Ok(())
612 }
613 None => Err(syntax_error(
614 loc,
615 format!("Unknown Unicode character name '{}'", name).as_str(),
616 )),
617 }
618 }
619 #[cfg(feature = "no_ucd")]
620 {
621 Err(syntax_error(loc, "Unicode name lookup is not enabled."))
622 }
623 }
624 EscapeType::Discard => Ok(()),
625 EscapeType::DiscardWS => {
626 parser.consume_ws_only();
627 Ok(())
628 }
629 EscapeType::NakedASCII => {
630 let digits = parser.peek_n(2);
631 parser.consume_n(2);
632 let value = match u8::from_str_radix(&digits, 16) {
634 Ok(value) => value,
635 Err(err) => {
636 return Err(syntax_error(
637 loc,
638 format!("Invalid ASCII hex value '{}': {}", digits, err).as_str(),
639 ))
640 }
641 };
642 if value > 0x7f {
643 return Err(syntax_error(
644 loc,
645 format!("Invalid ASCII value (too high): '{}'", digits).as_str(),
646 ));
647 }
648 string.push(unsafe { char::from_u32_unchecked(value as u32) });
649 Ok(())
650 }
651 EscapeType::NakedByte => {
652 let digits = parser.peek_n(2);
653 parser.consume_n(2);
654 let value = match u8::from_str_radix(&digits, 16) {
656 Ok(value) => value,
657 Err(err) => {
658 return Err(syntax_error(
659 loc,
660 format!("Invalid hex value (ref:3) '{}': {}", digits, err).as_str(),
661 ))
662 }
663 } as u32;
664 string.push(char::from_u32(value).unwrap());
668 Ok(())
669 }
670 EscapeType::NakedU4 => {
671 let digits = parser.peek_n(4);
672 parser.consume_n(4);
673 let value = match u16::from_str_radix(&digits, 16) {
675 Ok(value) => value,
676 Err(err) => {
677 return Err(syntax_error(
678 loc,
679 format!("Invalid hex value (ref:4) '{}': {}", digits, err).as_str(),
680 ))
681 }
682 } as u32;
683 if (0xd800..0xe000).contains(&value) {
684 return self.parse_surrogate_pair(parser, value, loc, string);
686 }
687 string.push(unsafe { char::from_u32_unchecked(value) });
690 Ok(())
691 }
692 EscapeType::NakedU8 => {
693 let digits = parser.peek_n(8);
694 parser.consume_n(8);
695 let value = match u32::from_str_radix(&digits, 16) {
697 Ok(value) => value,
698 Err(err) => {
699 return Err(syntax_error(
700 loc,
701 format!("Invalid hex value (ref:5) '{}': {}", digits, err).as_str(),
702 ))
703 }
704 };
705 if (0xd800..0xe000).contains(&value) {
706 return self.parse_surrogate_pair(parser, value, loc, string);
708 }
709 match char::from_u32(value) {
710 Some(ch) => {
711 string.push(ch);
712 Ok(())
713 }
714 None => self.handle_illegal_unicode(value, loc, string),
715 }
716 }
717 }
718 }
719
720 fn parse_esc_con_ter(&self, parser: &mut ParserCore, terminal: char) -> ParseResult<String> {
724 let mut result = String::with_capacity(CAPACITY);
726 let loc = parser.loc();
727 while !parser.is_at_eof() {
728 let ch = parser.peek();
729 if ch == terminal {
730 parser.consume();
731 return Ok(result);
732 } else if ch == self.escape_char {
733 parser.consume();
735 self.parse_escape(parser, &mut result)?;
736 } else {
737 parser.consume();
738 result.push(ch)
739 }
740 }
741 Err(syntax_error(loc, "Found unterminated string."))
742 }
743
744 fn parse_esc_ter(&self, parser: &mut ParserCore, terminal: char) -> ParseResult<String> {
746 let mut result = String::with_capacity(CAPACITY);
748 let loc = parser.loc();
749 while !parser.is_at_eof() {
750 let ch = parser.peek();
751 if ch == terminal {
752 parser.consume();
753 return Ok(result);
754 } else if ch < '\x20' {
755 return Err(syntax_error(
757 parser.loc(),
758 &format!(
759 "Control characters are not permitted in strings: '{:?}'",
760 ch
761 ),
762 ));
763 } else if ch == self.escape_char {
764 parser.consume();
766 self.parse_escape(parser, &mut result)?;
767 } else {
768 parser.consume();
769 result.push(ch)
770 }
771 }
772 Err(syntax_error(loc, "Found unterminated string."))
773 }
774
775 fn parse_con_ter(&self, parser: &mut ParserCore, terminal: char) -> ParseResult<String> {
777 let mut result = String::with_capacity(CAPACITY);
779 let loc = parser.loc();
780 while !parser.is_at_eof() {
781 let ch = parser.peek();
782 if ch == terminal {
783 parser.consume();
784 return Ok(result);
785 } else {
786 parser.consume();
787 result.push(ch)
788 }
789 }
790 Err(syntax_error(loc, "Found unterminated string."))
791 }
792
793 fn parse_ter(&self, parser: &mut ParserCore, terminal: char) -> ParseResult<String> {
795 let mut result = String::with_capacity(CAPACITY);
797 let loc = parser.loc();
798 while !parser.is_at_eof() {
799 let ch = parser.peek();
800 if ch == terminal {
801 parser.consume();
802 return Ok(result);
803 } else if ch < '\x20' {
804 return Err(syntax_error(
806 parser.loc(),
807 &format!(
808 "Control characters are not permitted in strings: '{:?}'",
809 ch
810 ),
811 ));
812 } else {
813 parser.consume();
814 result.push(ch)
815 }
816 }
817 Err(syntax_error(loc, "Found unterminated string."))
818 }
819
820 fn read_c(&self, parser: &mut ParserCore) -> ParseResult<String> {
823 Ok(parser.take_while(|_| true))
824 }
825
826 fn read_ce(&self, parser: &mut ParserCore) -> ParseResult<String> {
827 let mut result = String::with_capacity(CAPACITY);
828 while !parser.is_at_eof() {
829 let ch = parser.peek();
830 parser.consume();
831 if ch == self.escape_char {
832 self.parse_escape(parser, &mut result)?
833 } else {
834 result.push(ch)
835 }
836 }
837 Ok(result)
838 }
839
840 fn read(&self, parser: &mut ParserCore) -> ParseResult<String> {
841 let result = parser.take_while(|ch| ch >= '\x20');
842 if parser.is_at_eof() {
843 Ok(result)
844 } else {
845 let ch = parser.peek();
846 Err(syntax_error(
847 parser.loc(),
848 &format!(
849 "Control characters are not permitted in strings: '{:?}'",
850 ch
851 ),
852 ))
853 }
854 }
855
856 fn read_e(&self, parser: &mut ParserCore) -> ParseResult<String> {
857 let mut result = String::with_capacity(CAPACITY);
858 while !parser.is_at_eof() {
859 let ch = parser.peek();
860 if ch == self.escape_char {
861 parser.consume();
862 self.parse_escape(parser, &mut result)?
863 } else if ch < '\x20' {
864 return Err(syntax_error(
865 parser.loc(),
866 &format!(
867 "Control characters are not permitted in strings: '{:?}'",
868 ch
869 ),
870 ));
871 } else {
872 parser.consume();
873 result.push(ch)
874 }
875 }
876 Ok(result)
877 }
878
879 pub fn process(&self, parser: &mut ParserCore, terminal: Option<char>) -> ParseResult<String> {
886 match terminal {
887 None => {
888 if self.enable_escapes {
889 if self.permit_low_control_characters {
890 self.read_ce(parser)
891 } else {
892 self.read_e(parser)
893 }
894 } else if self.permit_low_control_characters {
895 self.read_c(parser)
896 } else {
897 self.read(parser)
898 }
899 }
900 Some(terminal) => {
901 if self.enable_escapes {
902 if self.permit_low_control_characters {
903 self.parse_esc_con_ter(parser, terminal)
904 } else {
905 self.parse_esc_ter(parser, terminal)
906 }
907 } else if self.permit_low_control_characters {
908 self.parse_con_ter(parser, terminal)
909 } else {
910 self.parse_ter(parser, terminal)
911 }
912 }
913 }
914 }
915
916 pub fn parse_string(&self, value: &str) -> ParseResult<String> {
918 let decoder = Decode::new(value.bytes().collect());
919 let mut parser = ParserCore::new("<string>", decoder);
920 self.process(&mut parser, None)
921 }
922}
923
924impl Default for StringParser {
925 fn default() -> Self {
927 Self::new()
928 }
929}
930
931#[cfg(test)]
932mod test {
933 use std::collections::BTreeMap;
934
935 use super::StringParser;
936 use crate::parse_from_string;
937 use crate::strings::{EscapeType, IllegalUnicodeProtocol, UnknownEscapeProtocol};
938
939 #[test]
942 fn simple_test() {
943 let mut sp = StringParser::new();
944 sp.enable_escapes = false;
945 sp.permit_low_control_characters = false;
946 let cases = &[
947 (
948 r#"This is a simple string."#,
949 None,
950 "This is a simple string.",
951 ),
952 (r#"This is an escape\n."#, None, "This is an escape\\n."),
953 ("This is a control code\x02.", None, ""),
954 (
955 r#"This is a simple string.""#,
956 Some('"'),
957 "This is a simple string.",
958 ),
959 (r#"This is a simple string."#, Some('"'), ""),
960 (
961 r#"This is an escape\n.""#,
962 Some('"'),
963 "This is an escape\\n.",
964 ),
965 ("This is a control code\x02.\"", Some('"'), ""),
966 ];
967 for (in_str, term, out_str) in cases {
968 let mut parser = parse_from_string(in_str);
969 let result = sp.process(parser.borrow_core(), *term);
970 if out_str.is_empty() {
971 assert!(result.is_err())
972 } else {
973 assert_eq!(&result.unwrap(), out_str)
974 }
975 }
976 }
977
978 #[test]
979 fn control_test() {
980 let mut sp = StringParser::new();
981 sp.enable_escapes = false;
982 sp.permit_low_control_characters = true;
983 let cases = &[
984 (
985 r#"This is a simple string."#,
986 None,
987 "This is a simple string.",
988 ),
989 (r#"This is an escape\n."#, None, "This is an escape\\n."),
990 (
991 "This is a control code\x02.",
992 None,
993 "This is a control code\x02.",
994 ),
995 (
996 r#"This is a simple string.""#,
997 Some('"'),
998 "This is a simple string.",
999 ),
1000 (r#"This is a simple string."#, Some('"'), ""),
1001 (
1002 r#"This is an escape\n.""#,
1003 Some('"'),
1004 "This is an escape\\n.",
1005 ),
1006 (
1007 "This is a control code\x02.\"",
1008 Some('"'),
1009 "This is a control code\x02.",
1010 ),
1011 ];
1012 for (in_str, term, out_str) in cases {
1013 let mut parser = parse_from_string(in_str);
1014 let result = sp.process(parser.borrow_core(), *term);
1015 if out_str.is_empty() {
1016 assert!(result.is_err())
1017 } else {
1018 assert_eq!(&result.unwrap(), out_str)
1019 }
1020 }
1021 }
1022
1023 #[test]
1024 fn escape_test() {
1025 let mut sp = StringParser::new();
1026 sp.enable_escapes = true;
1027 sp.permit_low_control_characters = false;
1028 let cases = &[
1029 (
1030 r#"This is a simple string."#,
1031 None,
1032 "This is a simple string.",
1033 ),
1034 (r#"This is an escape\n."#, None, "This is an escape\n."),
1035 ("This is a control code\x02.", None, ""),
1036 (
1037 r#"This is a simple string.""#,
1038 Some('"'),
1039 "This is a simple string.",
1040 ),
1041 (r#"This is a simple string."#, Some('"'), ""),
1042 (
1043 r#"This is an escape\n.""#,
1044 Some('"'),
1045 "This is an escape\n.",
1046 ),
1047 ("This is a control code\x02.\"", Some('"'), ""),
1048 ];
1049 for (in_str, term, out_str) in cases {
1050 let mut parser = parse_from_string(in_str);
1051 let result = sp.process(parser.borrow_core(), *term);
1052 if out_str.is_empty() {
1053 assert!(result.is_err())
1054 } else {
1055 assert_eq!(&result.unwrap(), out_str)
1056 }
1057 }
1058 }
1059
1060 #[test]
1061 fn odd_escapes_test() {
1062 let mut sp = StringParser::new();
1063 sp.enable_escapes = true;
1064 sp.permit_low_control_characters = true;
1065 sp.allow_surrogate_pairs = true;
1066 let escapes = BTreeMap::from([
1067 ('\n', EscapeType::Discard),
1068 ('\\', EscapeType::Char('\\')),
1069 ('\'', EscapeType::Char('\'')),
1070 ('\"', EscapeType::Char('\"')),
1071 ('a', EscapeType::Char('\x07')),
1072 ('b', EscapeType::Char('\x08')),
1073 ('f', EscapeType::Char('\x0c')),
1074 ('n', EscapeType::Char('\n')),
1075 ('r', EscapeType::Char('\r')),
1076 ('t', EscapeType::Char('\t')),
1077 ('v', EscapeType::Char('\x0b')),
1078 ('x', EscapeType::NakedByte),
1079 ('N', EscapeType::BracketUNamed),
1080 ('u', EscapeType::NakedU4),
1081 ('U', EscapeType::NakedU8),
1082 ('z', EscapeType::Char('0')),
1083 ('å', EscapeType::Discard),
1084 ]);
1085 sp.unknown_escape_protocol = UnknownEscapeProtocol::Error;
1086 sp.illegal_unicode_protocol = IllegalUnicodeProtocol::Error;
1087 sp.set_escapes(escapes);
1088 let mut parser =
1089 parse_from_string(r#"A very \\escaped\\ string. \'\"\a\b\f\n\r\t\v\z\å\z"#);
1090 let result = sp.process(parser.borrow_core(), None);
1091 assert_eq!(
1092 result.unwrap(),
1093 "A very \\escaped\\ string. '\"\u{7}\u{8}\u{c}\n\r\t\u{b}00"
1094 );
1095 let mut parser = parse_from_string(r#"\ud801\udce0"#);
1096 let result = sp.process(parser.borrow_core(), None);
1097 assert_eq!(result.unwrap(), "𐓠");
1098 let mut parser = parse_from_string(r#"\ud801\u002e"#);
1099 let result = sp.process(parser.borrow_core(), None);
1100 assert!(result.is_err());
1101 let mut parser = parse_from_string(r#"\ud801*"#);
1102 let result = sp.process(parser.borrow_core(), None);
1103 println!("{:?}", result);
1104 assert!(result.is_err());
1105 let mut parser = parse_from_string(r#"\ß"#);
1106 let result = sp.process(parser.borrow_core(), None);
1107 assert!(result.is_err());
1108 }
1109
1110 #[test]
1111 fn control_escape_test() {
1112 let mut sp = StringParser::new();
1113 sp.enable_escapes = true;
1114 sp.permit_low_control_characters = true;
1115 let cases = &[
1116 (
1117 r#"This is a simple string."#,
1118 None,
1119 "This is a simple string.",
1120 ),
1121 (r#"This is an escape\n."#, None, "This is an escape\n."),
1122 (
1123 "This is a control code\x02.",
1124 None,
1125 "This is a control code\x02.",
1126 ),
1127 (
1128 r#"This is a simple string.""#,
1129 Some('"'),
1130 "This is a simple string.",
1131 ),
1132 (r#"This is a simple string."#, Some('"'), ""),
1133 (
1134 r#"This is an escape\n.""#,
1135 Some('"'),
1136 "This is an escape\n.",
1137 ),
1138 (
1139 "This is a control code\x02.\"",
1140 Some('"'),
1141 "This is a control code\x02.",
1142 ),
1143 ];
1144 for (in_str, term, out_str) in cases {
1145 let mut parser = parse_from_string(in_str);
1146 let result = sp.process(parser.borrow_core(), *term);
1147 if out_str.is_empty() {
1148 assert!(result.is_err())
1149 } else {
1150 assert_eq!(&result.unwrap(), out_str)
1151 }
1152 }
1153 }
1154}