rusty_promql_parser/lexer/
string.rs1use nom::{
51 IResult, Parser,
52 branch::alt,
53 bytes::complete::take_while_m_n,
54 character::complete::{anychar, char, none_of},
55 combinator::{map, map_opt, value, verify},
56 multi::many0,
57 sequence::{delimited, preceded},
58};
59
60pub fn string_literal(input: &str) -> IResult<&str, String> {
64 alt((double_quoted_string, single_quoted_string, raw_string)).parse(input)
65}
66
67pub fn double_quoted_string(input: &str) -> IResult<&str, String> {
69 delimited(
70 char('"'),
71 map(many0(double_quoted_char), |chars| {
72 chars.into_iter().collect()
73 }),
74 char('"'),
75 )
76 .parse(input)
77}
78
79pub fn single_quoted_string(input: &str) -> IResult<&str, String> {
81 delimited(
82 char('\''),
83 map(many0(single_quoted_char), |chars| {
84 chars.into_iter().collect()
85 }),
86 char('\''),
87 )
88 .parse(input)
89}
90
91pub fn raw_string(input: &str) -> IResult<&str, String> {
94 delimited(
95 char('`'),
96 map(many0(none_of("`")), |chars| chars.into_iter().collect()),
97 char('`'),
98 )
99 .parse(input)
100}
101
102fn double_quoted_char(input: &str) -> IResult<&str, char> {
104 alt((
105 preceded(char('\\'), escape_char('"')),
107 verify(anychar, |&c| c != '"' && c != '\\' && c != '\n'),
109 ))
110 .parse(input)
111}
112
113fn single_quoted_char(input: &str) -> IResult<&str, char> {
115 alt((
116 preceded(char('\\'), escape_char('\'')),
118 verify(anychar, |&c| c != '\'' && c != '\\' && c != '\n'),
120 ))
121 .parse(input)
122}
123
124fn escape_char(quote_char: char) -> impl FnMut(&str) -> IResult<&str, char> {
127 move |input: &str| {
128 alt((
129 value('\x07', char('a')), value('\x08', char('b')), value('\x0c', char('f')), value('\n', char('n')), value('\r', char('r')), value('\t', char('t')), value('\x0b', char('v')), value('\\', char('\\')), value(quote_char, char(quote_char)), value('"', char('"')),
141 value('\'', char('\'')),
142 hex_escape,
144 unicode_escape_short,
146 unicode_escape_long,
147 octal_escape,
149 ))
150 .parse(input)
151 }
152}
153
154fn hex_escape(input: &str) -> IResult<&str, char> {
156 preceded(
157 char('x'),
158 map_opt(
159 take_while_m_n(2, 2, |c: char| c.is_ascii_hexdigit()),
160 |hex: &str| {
161 let val = u8::from_str_radix(hex, 16).ok()?;
162 Some(val as char)
163 },
164 ),
165 )
166 .parse(input)
167}
168
169fn unicode_escape_short(input: &str) -> IResult<&str, char> {
171 preceded(
172 char('u'),
173 map_opt(
174 take_while_m_n(4, 4, |c: char| c.is_ascii_hexdigit()),
175 |hex: &str| {
176 let val = u32::from_str_radix(hex, 16).ok()?;
177 if (0xD800..0xE000).contains(&val) {
179 return None;
180 }
181 char::from_u32(val)
182 },
183 ),
184 )
185 .parse(input)
186}
187
188fn unicode_escape_long(input: &str) -> IResult<&str, char> {
190 preceded(
191 char('U'),
192 map_opt(
193 take_while_m_n(8, 8, |c: char| c.is_ascii_hexdigit()),
194 |hex: &str| {
195 let val = u32::from_str_radix(hex, 16).ok()?;
196 if (0xD800..0xE000).contains(&val) {
198 return None;
199 }
200 char::from_u32(val)
201 },
202 ),
203 )
204 .parse(input)
205}
206
207fn octal_escape(input: &str) -> IResult<&str, char> {
210 map_opt(
211 take_while_m_n(3, 3, |c: char| c.is_ascii_digit() && c < '8'),
212 |oct: &str| {
213 let val = u8::from_str_radix(oct, 8).ok()?;
214 Some(val as char)
215 },
216 )
217 .parse(input)
218}
219
220#[cfg(test)]
221mod tests {
222 use super::*;
223
224 fn assert_string(input: &str, expected: &str) {
226 let result = string_literal(input);
227 match result {
228 Ok((remaining, value)) => {
229 assert!(
230 remaining.is_empty(),
231 "Parser did not consume entire input '{}', remaining: '{}'",
232 input,
233 remaining
234 );
235 assert_eq!(
236 value, expected,
237 "For input '{}', expected {:?}, got {:?}",
238 input, expected, value
239 );
240 }
241 Err(e) => panic!("Failed to parse '{}': {:?}", input, e),
242 }
243 }
244
245 fn assert_string_fails(input: &str) {
247 let result = string_literal(input);
248 assert!(
249 result.is_err() || !result.unwrap().0.is_empty(),
250 "Expected '{}' to fail or not fully parse",
251 input
252 );
253 }
254
255 #[test]
257 fn test_double_quoted_basic() {
258 assert_string(r#""hello""#, "hello");
259 assert_string(r#""world""#, "world");
260 assert_string(r#""test string""#, "test string");
261 assert_string(r#""""#, ""); }
263
264 #[test]
265 fn test_double_quoted_escaped_quote() {
266 assert_string(r#""say \"hello\"""#, "say \"hello\"");
267 }
268
269 #[test]
270 fn test_double_quoted_simple_escapes() {
271 assert_string(r#""\n""#, "\n");
272 assert_string(r#""\t""#, "\t");
273 assert_string(r#""\r""#, "\r");
274 assert_string(r#""\\""#, "\\");
275 assert_string(r#""\a""#, "\x07");
276 assert_string(r#""\b""#, "\x08");
277 assert_string(r#""\f""#, "\x0c");
278 assert_string(r#""\v""#, "\x0b");
279 }
280
281 #[test]
282 fn test_double_quoted_hex_escape() {
283 assert_string(r#""\xFF""#, "\u{ff}");
284 assert_string(r#""\x00""#, "\0");
285 assert_string(r#""\x41""#, "A");
286 }
287
288 #[test]
289 fn test_double_quoted_unicode_escape() {
290 assert_string(r#""\u0041""#, "A");
291 assert_string(r#""\u1234""#, "\u{1234}");
292 assert_string(r#""\U00010111""#, "\u{10111}");
293 }
294
295 #[test]
296 fn test_double_quoted_octal_escape() {
297 assert_string(r#""\377""#, "\u{ff}");
298 assert_string(r#""\000""#, "\0");
299 assert_string(r#""\101""#, "A");
300 }
301
302 #[test]
304 fn test_single_quoted_basic() {
305 assert_string("'hello'", "hello");
306 assert_string("'world'", "world");
307 assert_string("''", ""); }
309
310 #[test]
311 fn test_single_quoted_escaped_quote() {
312 assert_string(r"'say \'hello\''", "say 'hello'");
313 }
314
315 #[test]
316 fn test_single_quoted_escapes() {
317 assert_string(r"'\n'", "\n");
318 assert_string(r"'\t'", "\t");
319 assert_string(r"'\\'", "\\");
320 }
321
322 #[test]
324 fn test_raw_string_basic() {
325 assert_string("`hello`", "hello");
326 assert_string("`test string`", "test string");
327 assert_string("``", ""); }
329
330 #[test]
331 fn test_raw_string_no_escapes() {
332 assert_string(r"`\n\t\\`", r"\n\t\\");
334 assert_string(r"`test\.expression`", r"test\.expression");
335 }
336
337 #[test]
338 fn test_raw_string_can_contain_quotes() {
339 assert_string(r#"`"hello"`"#, "\"hello\"");
340 assert_string(r"`'hello'`", "'hello'");
341 }
342
343 #[test]
345 fn test_complex_escape_sequence() {
346 assert_string(
347 r#""\a\b\f\n\r\t\v\\\" - \xFF\377\u1234\U00010111""#,
348 "\x07\x08\x0c\n\r\t\x0b\\\" - \u{ff}\u{ff}\u{1234}\u{10111}",
349 );
350 }
351
352 #[test]
354 fn test_unterminated_double_quoted() {
355 assert_string_fails(r#"""#);
356 assert_string_fails(r#""hello"#);
357 }
358
359 #[test]
360 fn test_unterminated_single_quoted() {
361 assert_string_fails("'");
362 assert_string_fails("'hello");
363 }
364
365 #[test]
366 fn test_unterminated_raw_string() {
367 assert_string_fails("`");
368 assert_string_fails("`hello");
369 }
370
371 #[test]
372 fn test_newline_in_quoted_string() {
373 assert_string_fails("\"hello\nworld\"");
375 assert_string_fails("'hello\nworld'");
376 }
377
378 #[test]
379 fn test_raw_string_can_have_newlines() {
380 assert_string("`hello\nworld`", "hello\nworld");
382 }
383
384 #[test]
386 fn test_string_followed_by_other_content() {
387 let (remaining, value) = string_literal(r#""hello" world"#).unwrap();
388 assert_eq!(value, "hello");
389 assert_eq!(remaining, " world");
390 }
391}