pub struct StringParser {
pub enable_escapes: bool,
pub escape_char: char,
pub permit_low_control_characters: bool,
pub unknown_escape_protocol: UnknownEscapeProtocol,
pub allow_surrogate_pairs: bool,
pub illegal_unicode_protocol: IllegalUnicodeProtocol,
pub allow_octal_escapes: bool,
pub octal_escapes_are_flexible: bool,
pub ucd: Rc<BTreeMap<&'static str, char>>,
/* private fields */
}
Expand description
Implement parsing of strings.
This is intended to be a very flexible parsing system, and implements some common string formats. Specific features can be enable and disabled by setting the flags and providing a map for escape handling rules.
§Escape Handling
Specify escape handling rules by creating a BTreeMap
mapping characters
to escape handling rules. The character is the character following the
escape character. Escape handling rules are specified by EscapeType
.
Note: You cannot have both a \0
escape and support octal escapes, or
octal escapes with a leading zero will not work.
As an example, here are the escape handling rules for Python.
use std::collections::BTreeMap;
use trivet::strings::EscapeType;
let escapes = BTreeMap::from([
('\n', EscapeType::Discard),
('\\', EscapeType::Char('\\')),
('\'', EscapeType::Char('\'')),
('\"', EscapeType::Char('\"')),
('a', EscapeType::Char('\x07')),
('b', EscapeType::Char('\x08')),
('f', EscapeType::Char('\x0c')),
('n', EscapeType::Char('\n')),
('r', EscapeType::Char('\r')),
('t', EscapeType::Char('\t')),
('v', EscapeType::Char('\x0b')),
('x', EscapeType::NakedByte),
('N', EscapeType::BracketUNamed),
('u', EscapeType::NakedU4),
('U', EscapeType::NakedU8),
]);
§Unicode Database
Note: The feature no_ucd
will disable use of the Unicode database.
The parser is capable of looking up Unicode code points by their name or alias. This is provided by a map that encodes the entire space. This map must be provided to every new parser instance.
Creating a default instance (with Self::default
) does this for you.
If you only use this string parser instance from then on, then you do not
need to worry about this.
If you plan to create many string parser instances, then you should instead
get the UCD database yourself via get_ucd
, which returns a boxed,
reference-counted copy.
§Example
use trivet::strings::StringParser;
use trivet::parse_from_string;
use trivet::Parser;
// Make a new string parser.
let mut strpar = StringParser::new();
// Make a parser around a string.
let mut parser = parse_from_string(r#""This\nis\na\nstring.""#);
match parser.parse_string_match_delimiter() {
Ok(value) => println!("{}", value),
Err(err) => println!("ERROR: {}", err),
}
Fields§
§enable_escapes: bool
If true, parse escape sequences.
escape_char: char
Character used to introduce an escape. Usually \
.
permit_low_control_characters: bool
If true, permit “naked” control characters to be present in the stream. Otherwise
generate an error. This applies to all character values below \u0020
and to only
those characters (so delete and a few other control characters are not included).
unknown_escape_protocol: UnknownEscapeProtocol
How to handle unrecognized escape sequences.
allow_surrogate_pairs: bool
If true, and if the current result looks like a UTF-16 surrogate pair (it is in the range U+D800 up to U+DBFF) then try to find and parse a second surrogate and generate the corresponding character.
If false, treat this as an invalid escape. For instance, Rust does not permit surrogate pairs in this way.
illegal_unicode_protocol: IllegalUnicodeProtocol
How to handle invalid Unicode values that arise from parsing hexadecimal escapes. This includes surrogate pairs when those are not allowed.
allow_octal_escapes: bool
Permit octal escapes. These have the form [escape]
followed by (usually) one to
three octal digits (but see Self::octal_escapes_are_flexible
). Parsing of
octal escapes is performed before handling other escapes to permit [escape]0
to
be handled correctly, if present.
octal_escapes_are_flexible: bool
Allow flexible octal escapes. These consist of one to three octal digits. Python
uses this approach, so "\x12k"
encodes the string "\nk"
. It this is disabled,
then octal escapes must have exactly three octal digits.
ucd: Rc<BTreeMap<&'static str, char>>
The Unicode database of names and aliases to code points.
Implementations§
Source§impl StringParser
impl StringParser
Sourcepub fn new() -> Self
pub fn new() -> Self
Make and return a new string parser. The initial parsing standard is set to
StringStandard::Trivet
.
Examples found in repository?
9pub fn main() {
10 // Make a string parser.
11 let mut strpar = StringParser::new();
12 let mut encoder = StringEncoder::new();
13 println!(
14 r#"
15Enter strings to process, one per line. You can switch the string
16standard used for parsing by entering any of the following.
17
18 +python +c +json +rust +trivet
19
20Toggle using Unicode names in output with +names. Toggle encoding
21of all non-ascii with +ascii.
22 "#
23 );
24
25 // Run until we can run no more. You have to break this with
26 // ctrl+c.
27 let mut standard = StringStandard::Trivet;
28 let mut ascii = false;
29 let mut names = false;
30 loop {
31 let mut line = String::new();
32 print!(
33 "{}{}{}> ",
34 standard,
35 if ascii { " (ascii)" } else { "" },
36 if names { " (names)" } else { "" },
37 );
38 let _ = std::io::stdout().flush();
39 std::io::stdin().read_line(&mut line).unwrap();
40 line.pop();
41 match line.as_str() {
42 "+python" => {
43 standard = StringStandard::Python;
44 strpar.set(standard);
45 }
46 "+rust" => {
47 standard = StringStandard::Rust;
48 strpar.set(standard);
49 }
50 "+c" => {
51 standard = StringStandard::C;
52 strpar.set(standard);
53 }
54 "+json" => {
55 standard = StringStandard::JSON;
56 strpar.set(standard);
57 }
58 "+trivet" => {
59 standard = StringStandard::Trivet;
60 strpar.set(standard);
61 }
62 "+names" => {
63 names = !names;
64 }
65 "+ascii" => {
66 ascii = !ascii;
67 }
68 _ => match strpar.parse_string(&line) {
69 Ok(value) => {
70 println!(" Debug: {:?}", value);
71 encoder.set(StringStandard::C);
72 if ascii {
73 encoder.encoding_standard = EncodingStandard::ASCII;
74 }
75 if names {
76 encoder.use_names = true;
77 }
78 println!(" C: \"{}\"", encoder.encode(&value));
79 encoder.set(StringStandard::JSON);
80 if ascii {
81 encoder.encoding_standard = EncodingStandard::ASCII;
82 }
83 if names {
84 encoder.use_names = true;
85 }
86 println!(" JSON: \"{}\"", encoder.encode(&value));
87 encoder.set(StringStandard::Rust);
88 if ascii {
89 encoder.encoding_standard = EncodingStandard::ASCII;
90 }
91 if names {
92 encoder.use_names = true;
93 }
94 println!(" Rust: \"{}\"", encoder.encode(&value));
95 encoder.set(StringStandard::Python);
96 if ascii {
97 encoder.encoding_standard = EncodingStandard::ASCII;
98 }
99 if names {
100 encoder.use_names = true;
101 }
102 println!(" Python: \"{}\"", encoder.encode(&value));
103 encoder.set(StringStandard::Trivet);
104 if ascii {
105 encoder.encoding_standard = EncodingStandard::ASCII;
106 }
107 if names {
108 encoder.use_names = true;
109 }
110 println!(" Trivet: \"{}\"", encoder.encode(&value));
111 }
112 Err(err) => {
113 println!("ERROR: {}", err)
114 }
115 },
116 }
117 }
118}
Sourcepub fn new_from_db(ucd: &Rc<BTreeMap<&'static str, char>>) -> Self
pub fn new_from_db(ucd: &Rc<BTreeMap<&'static str, char>>) -> Self
Make and return a new string parser. The initial parsing mode is set to Trivet.
Sourcepub fn set(&mut self, std: StringStandard)
pub fn set(&mut self, std: StringStandard)
Configure all settings to conform to a given standard. See
StringStandard
for the available standards.
Examples found in repository?
122pub fn main() {
123 let mut parser = trivet::parse_from_stdin();
124 parser.parse_comments = false;
125 let numpar = parser.borrow_number_parser();
126 numpar.settings.permit_binary = false;
127 numpar.settings.permit_hexadecimal = false;
128 numpar.settings.permit_octal = false;
129 numpar.settings.permit_underscores = false;
130 numpar.settings.decimal_only_floats = true;
131 numpar.settings.permit_plus = false;
132 numpar.settings.permit_leading_zero = false;
133 numpar.settings.permit_empty_whole = false;
134 numpar.settings.permit_empty_fraction = false;
135 let strpar = parser.borrow_string_parser();
136 strpar.set(trivet::strings::StringStandard::JSON);
137 let _ = parser
138 .borrow_core()
139 .replace_whitespace_test(Box::new(|ch| [' ', '\n', '\r', '\t'].contains(&ch)));
140 parser.consume_ws();
141 if let Err(error) = parse_value_ws(&mut parser) {
142 println!("ERROR: {}", error);
143 std::process::exit(1);
144 } else {
145 // If there is any trailing stuff that is not whitespace, then this is not a valid
146 // JSON file.
147 if parser.is_at_eof() {
148 std::process::exit(0)
149 } else {
150 println!("Found unexpected trailing characters after JSON value.");
151 std::process::exit(1);
152 }
153 };
154}
More examples
9pub fn main() {
10 // Make a string parser.
11 let mut strpar = StringParser::new();
12 let mut encoder = StringEncoder::new();
13 println!(
14 r#"
15Enter strings to process, one per line. You can switch the string
16standard used for parsing by entering any of the following.
17
18 +python +c +json +rust +trivet
19
20Toggle using Unicode names in output with +names. Toggle encoding
21of all non-ascii with +ascii.
22 "#
23 );
24
25 // Run until we can run no more. You have to break this with
26 // ctrl+c.
27 let mut standard = StringStandard::Trivet;
28 let mut ascii = false;
29 let mut names = false;
30 loop {
31 let mut line = String::new();
32 print!(
33 "{}{}{}> ",
34 standard,
35 if ascii { " (ascii)" } else { "" },
36 if names { " (names)" } else { "" },
37 );
38 let _ = std::io::stdout().flush();
39 std::io::stdin().read_line(&mut line).unwrap();
40 line.pop();
41 match line.as_str() {
42 "+python" => {
43 standard = StringStandard::Python;
44 strpar.set(standard);
45 }
46 "+rust" => {
47 standard = StringStandard::Rust;
48 strpar.set(standard);
49 }
50 "+c" => {
51 standard = StringStandard::C;
52 strpar.set(standard);
53 }
54 "+json" => {
55 standard = StringStandard::JSON;
56 strpar.set(standard);
57 }
58 "+trivet" => {
59 standard = StringStandard::Trivet;
60 strpar.set(standard);
61 }
62 "+names" => {
63 names = !names;
64 }
65 "+ascii" => {
66 ascii = !ascii;
67 }
68 _ => match strpar.parse_string(&line) {
69 Ok(value) => {
70 println!(" Debug: {:?}", value);
71 encoder.set(StringStandard::C);
72 if ascii {
73 encoder.encoding_standard = EncodingStandard::ASCII;
74 }
75 if names {
76 encoder.use_names = true;
77 }
78 println!(" C: \"{}\"", encoder.encode(&value));
79 encoder.set(StringStandard::JSON);
80 if ascii {
81 encoder.encoding_standard = EncodingStandard::ASCII;
82 }
83 if names {
84 encoder.use_names = true;
85 }
86 println!(" JSON: \"{}\"", encoder.encode(&value));
87 encoder.set(StringStandard::Rust);
88 if ascii {
89 encoder.encoding_standard = EncodingStandard::ASCII;
90 }
91 if names {
92 encoder.use_names = true;
93 }
94 println!(" Rust: \"{}\"", encoder.encode(&value));
95 encoder.set(StringStandard::Python);
96 if ascii {
97 encoder.encoding_standard = EncodingStandard::ASCII;
98 }
99 if names {
100 encoder.use_names = true;
101 }
102 println!(" Python: \"{}\"", encoder.encode(&value));
103 encoder.set(StringStandard::Trivet);
104 if ascii {
105 encoder.encoding_standard = EncodingStandard::ASCII;
106 }
107 if names {
108 encoder.use_names = true;
109 }
110 println!(" Trivet: \"{}\"", encoder.encode(&value));
111 }
112 Err(err) => {
113 println!("ERROR: {}", err)
114 }
115 },
116 }
117 }
118}
Sourcepub fn set_escapes(&mut self, escapes: BTreeMap<char, EscapeType>)
pub fn set_escapes(&mut self, escapes: BTreeMap<char, EscapeType>)
Set the escapes for this parser instance.
Sourcepub fn process(
&self,
parser: &mut ParserCore,
terminal: Option<char>,
) -> ParseResult<String>
pub fn process( &self, parser: &mut ParserCore, terminal: Option<char>, ) -> ParseResult<String>
Parse a string from the given parser. The terminal
specifies a terminal character
that ends the string. If the terminal is None
, then everything is parsed as part
of the string until the end of stream is reached.
If a terminal is specified (is not None
) but is not found, an error is generated.
Sourcepub fn parse_string(&self, value: &str) -> ParseResult<String>
pub fn parse_string(&self, value: &str) -> ParseResult<String>
Parse a string from the given value. The entire string is parsed.
Examples found in repository?
9pub fn main() {
10 // Make a string parser.
11 let mut strpar = StringParser::new();
12 let mut encoder = StringEncoder::new();
13 println!(
14 r#"
15Enter strings to process, one per line. You can switch the string
16standard used for parsing by entering any of the following.
17
18 +python +c +json +rust +trivet
19
20Toggle using Unicode names in output with +names. Toggle encoding
21of all non-ascii with +ascii.
22 "#
23 );
24
25 // Run until we can run no more. You have to break this with
26 // ctrl+c.
27 let mut standard = StringStandard::Trivet;
28 let mut ascii = false;
29 let mut names = false;
30 loop {
31 let mut line = String::new();
32 print!(
33 "{}{}{}> ",
34 standard,
35 if ascii { " (ascii)" } else { "" },
36 if names { " (names)" } else { "" },
37 );
38 let _ = std::io::stdout().flush();
39 std::io::stdin().read_line(&mut line).unwrap();
40 line.pop();
41 match line.as_str() {
42 "+python" => {
43 standard = StringStandard::Python;
44 strpar.set(standard);
45 }
46 "+rust" => {
47 standard = StringStandard::Rust;
48 strpar.set(standard);
49 }
50 "+c" => {
51 standard = StringStandard::C;
52 strpar.set(standard);
53 }
54 "+json" => {
55 standard = StringStandard::JSON;
56 strpar.set(standard);
57 }
58 "+trivet" => {
59 standard = StringStandard::Trivet;
60 strpar.set(standard);
61 }
62 "+names" => {
63 names = !names;
64 }
65 "+ascii" => {
66 ascii = !ascii;
67 }
68 _ => match strpar.parse_string(&line) {
69 Ok(value) => {
70 println!(" Debug: {:?}", value);
71 encoder.set(StringStandard::C);
72 if ascii {
73 encoder.encoding_standard = EncodingStandard::ASCII;
74 }
75 if names {
76 encoder.use_names = true;
77 }
78 println!(" C: \"{}\"", encoder.encode(&value));
79 encoder.set(StringStandard::JSON);
80 if ascii {
81 encoder.encoding_standard = EncodingStandard::ASCII;
82 }
83 if names {
84 encoder.use_names = true;
85 }
86 println!(" JSON: \"{}\"", encoder.encode(&value));
87 encoder.set(StringStandard::Rust);
88 if ascii {
89 encoder.encoding_standard = EncodingStandard::ASCII;
90 }
91 if names {
92 encoder.use_names = true;
93 }
94 println!(" Rust: \"{}\"", encoder.encode(&value));
95 encoder.set(StringStandard::Python);
96 if ascii {
97 encoder.encoding_standard = EncodingStandard::ASCII;
98 }
99 if names {
100 encoder.use_names = true;
101 }
102 println!(" Python: \"{}\"", encoder.encode(&value));
103 encoder.set(StringStandard::Trivet);
104 if ascii {
105 encoder.encoding_standard = EncodingStandard::ASCII;
106 }
107 if names {
108 encoder.use_names = true;
109 }
110 println!(" Trivet: \"{}\"", encoder.encode(&value));
111 }
112 Err(err) => {
113 println!("ERROR: {}", err)
114 }
115 },
116 }
117 }
118}
Trait Implementations§
Source§impl Clone for StringParser
impl Clone for StringParser
Source§fn clone(&self) -> StringParser
fn clone(&self) -> StringParser
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source
. Read more