1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
use regex::{Regex, Captures};
use std::char;
type Result<T> = ::std::result::Result<T, &'static str>;
macro_rules! unicode_char_pattern {
() => { r#"\\u\{([[:xdigit:]]{1,6})\}"# };
}
lazy_static! {
pub static ref UNICODE_REGEX: Regex = { Regex::new(unicode_char_pattern!()).unwrap() };
}
macro_rules! escape_pattern {
() => { concat!(
r#"(?xs)
\\ # opening backslash
[nrt\\0'"] # single-character escapes
| # or
(?:\\x[[:xdigit:]]{2}) # one-byte escapes
| # or
(?:"#, unicode_char_pattern!(), r")"
)};
}
lazy_static! {
pub static ref ESCAPE_REGEX: Regex = { Regex::new(escape_pattern!()).unwrap() };
}
pub const PTN_STRING: &str = concat!(
r#"(?xs)
" # opening quote
(?P<s> # main string capture group start
(?: # repeatable character (or escape sequence) group
(?:"#,
escape_pattern!(),
r#") # escape sequence non-capturing group end
| # or
[^"\\] # anything but a backslash or double quote
)* # any number of characters / escape sequences
) # main string capture group end
" # closing quote
"#
);
lazy_static! {
pub static ref STRING_REGEX: Regex = { Regex::new(PTN_STRING).unwrap() };
}
lazy_static! {
pub static ref STRING_REGEX_ANC: Regex = {
let anchored_str = "^(?:".to_string() + PTN_STRING + ")";
Regex::new(&anchored_str).unwrap()
};
}
pub fn match_str_ext(input: &str, pos: usize) -> Option<(usize, Captures)> {
match STRING_REGEX_ANC.captures(&input[pos..]) {
Some(caps) => Some((pos + caps.get(0).unwrap().as_str().len(), caps)),
None => None
}
}
pub fn convert_string(captures: Captures) -> Result<String> {
if let Some(string_match) = captures.name("s") {
let s = string_match.as_str();
let mut result = String::with_capacity(s.len());
let mut offset = 0;
for cap in ESCAPE_REGEX.captures_iter(s) {
let mat = cap.get(0).unwrap();
result.push_str(&s[offset..mat.start()]);
result.push_str(escape(mat.as_str())?.as_str());
offset = mat.end();
}
result.push_str(&s[offset..]);
Ok(result)
} else {
Err("failed to parse string")
}
}
fn escape(code: &str) -> Result<String> {
let code_len = code.len();
if code_len < 2 {
return Err("invalid empty escape");
}
match &code[..2] {
"\\n" => Ok("\n".to_string()),
"\\r" => Ok("\r".to_string()),
"\\t" => Ok("\t".to_string()),
"\\\\" => Ok("\\".to_string()),
"\\0" => Ok("\0".to_string()),
"\\'" => Ok("'".to_string()),
"\\\"" => Ok("\"".to_string()),
"\\x" => Ok(two_digit_escape(&code[2..])?),
"\\u" => {
if let Some(caps) = UNICODE_REGEX.captures(code) {
if let Some(mat) = caps.get(1) {
return Ok(unicode_escape(mat.as_str())?);
}
}
Err("improperly formatted unicode character code")
},
_ => Err("unknown escape sequence"),
}
}
fn two_digit_escape(code: &str) -> Result<String> {
if code.len() != 2 {
return Err("two-digit character code expected");
}
let byte_code = u8::from_str_radix(code, 16).map_err(|_| "invalid two-digit escape")?;
String::from_utf8(vec![byte_code]).map_err(|_| "invalid two-digit escape")
}
fn unicode_escape(code: &str) -> Result<String> {
let unicode_value = u32::from_str_radix(code, 16).map_err(|_| "invalid unicode escape")?;
let c = char::from_u32(unicode_value).ok_or("no character found for unicode value")?;
let mut s = String::new();
s.push(c);
Ok(s)
}