rusty_promql_parser/lexer/
string.rs

1//! String literal parser for PromQL.
2//!
3//! PromQL supports three string literal formats:
4//!
5//! - **Double-quoted**: `"hello \"world\""`
6//! - **Single-quoted**: `'hello \'world\''`
7//! - **Raw/backtick**: `` `no escapes here` ``
8//!
9//! # Escape Sequences
10//!
11//! Double and single-quoted strings support these escape sequences:
12//!
13//! | Escape | Description        |
14//! |--------|--------------------|
15//! | `\a`   | Bell               |
16//! | `\b`   | Backspace          |
17//! | `\f`   | Form feed          |
18//! | `\n`   | Newline            |
19//! | `\r`   | Carriage return    |
20//! | `\t`   | Tab                |
21//! | `\v`   | Vertical tab       |
22//! | `\\`   | Backslash          |
23//! | `\"`   | Double quote       |
24//! | `\'`   | Single quote       |
25//! | `\xNN` | Hex (2 digits)     |
26//! | `\NNN` | Octal (3 digits)   |
27//! | `\uNNNN` | Unicode (4 hex)  |
28//! | `\UNNNNNNNN` | Unicode (8 hex) |
29//!
30//! Raw strings (backtick) have no escape processing.
31//!
32//! # Examples
33//!
34//! ```rust
35//! use rusty_promql_parser::lexer::string::string_literal;
36//!
37//! // Double-quoted
38//! let (_, s) = string_literal(r#""hello""#).unwrap();
39//! assert_eq!(s, "hello");
40//!
41//! // Single-quoted with escape
42//! let (_, s) = string_literal(r"'line\nbreak'").unwrap();
43//! assert_eq!(s, "line\nbreak");
44//!
45//! // Raw string (no escapes)
46//! let (_, s) = string_literal(r"`\n is literal`").unwrap();
47//! assert_eq!(s, r"\n is literal");
48//! ```
49
50use nom::{
51    IResult, Parser,
52    branch::alt,
53    bytes::complete::take_while_m_n,
54    character::complete::{anychar, char, none_of},
55    combinator::{map, map_opt, value, verify},
56    multi::many0,
57    sequence::{delimited, preceded},
58};
59
60/// Parse a PromQL string literal and return the unescaped string value.
61///
62/// Accepts double-quoted, single-quoted, or backtick-quoted strings.
63pub fn string_literal(input: &str) -> IResult<&str, String> {
64    alt((double_quoted_string, single_quoted_string, raw_string)).parse(input)
65}
66
67/// Parse a double-quoted string: "hello \"world\""
68pub fn double_quoted_string(input: &str) -> IResult<&str, String> {
69    delimited(
70        char('"'),
71        map(many0(double_quoted_char), |chars| {
72            chars.into_iter().collect()
73        }),
74        char('"'),
75    )
76    .parse(input)
77}
78
79/// Parse a single-quoted string: 'hello \'world\''
80pub fn single_quoted_string(input: &str) -> IResult<&str, String> {
81    delimited(
82        char('\''),
83        map(many0(single_quoted_char), |chars| {
84            chars.into_iter().collect()
85        }),
86        char('\''),
87    )
88    .parse(input)
89}
90
91/// Parse a raw/backtick string: `no escapes`
92/// In raw strings, backslashes are literal - no escape processing.
93pub fn raw_string(input: &str) -> IResult<&str, String> {
94    delimited(
95        char('`'),
96        map(many0(none_of("`")), |chars| chars.into_iter().collect()),
97        char('`'),
98    )
99    .parse(input)
100}
101
102/// Parse a character inside a double-quoted string
103fn double_quoted_char(input: &str) -> IResult<&str, char> {
104    alt((
105        // Escape sequence
106        preceded(char('\\'), escape_char('"')),
107        // Any char except quote, backslash, or newline
108        verify(anychar, |&c| c != '"' && c != '\\' && c != '\n'),
109    ))
110    .parse(input)
111}
112
113/// Parse a character inside a single-quoted string
114fn single_quoted_char(input: &str) -> IResult<&str, char> {
115    alt((
116        // Escape sequence
117        preceded(char('\\'), escape_char('\'')),
118        // Any char except quote, backslash, or newline
119        verify(anychar, |&c| c != '\'' && c != '\\' && c != '\n'),
120    ))
121    .parse(input)
122}
123
124/// Parse an escape sequence (after the backslash)
125/// The `quote_char` parameter specifies which quote character can be escaped
126fn escape_char(quote_char: char) -> impl FnMut(&str) -> IResult<&str, char> {
127    move |input: &str| {
128        alt((
129            // Simple escape sequences
130            value('\x07', char('a')),            // Bell
131            value('\x08', char('b')),            // Backspace
132            value('\x0c', char('f')),            // Form feed
133            value('\n', char('n')),              // Newline
134            value('\r', char('r')),              // Carriage return
135            value('\t', char('t')),              // Tab
136            value('\x0b', char('v')),            // Vertical tab
137            value('\\', char('\\')),             // Backslash
138            value(quote_char, char(quote_char)), // Quote character
139            // Also allow escaping the other quote (for compatibility)
140            value('"', char('"')),
141            value('\'', char('\'')),
142            // Hex escape: \xNN
143            hex_escape,
144            // Unicode escapes: \uNNNN and \UNNNNNNNN
145            unicode_escape_short,
146            unicode_escape_long,
147            // Octal escape: \NNN (3 octal digits)
148            octal_escape,
149        ))
150        .parse(input)
151    }
152}
153
154/// Parse a hex escape sequence: \xNN (2 hex digits)
155fn hex_escape(input: &str) -> IResult<&str, char> {
156    preceded(
157        char('x'),
158        map_opt(
159            take_while_m_n(2, 2, |c: char| c.is_ascii_hexdigit()),
160            |hex: &str| {
161                let val = u8::from_str_radix(hex, 16).ok()?;
162                Some(val as char)
163            },
164        ),
165    )
166    .parse(input)
167}
168
169/// Parse a short unicode escape sequence: \uNNNN (4 hex digits)
170fn unicode_escape_short(input: &str) -> IResult<&str, char> {
171    preceded(
172        char('u'),
173        map_opt(
174            take_while_m_n(4, 4, |c: char| c.is_ascii_hexdigit()),
175            |hex: &str| {
176                let val = u32::from_str_radix(hex, 16).ok()?;
177                // Check for surrogate range (invalid)
178                if (0xD800..0xE000).contains(&val) {
179                    return None;
180                }
181                char::from_u32(val)
182            },
183        ),
184    )
185    .parse(input)
186}
187
188/// Parse a long unicode escape sequence: \UNNNNNNNN (8 hex digits)
189fn unicode_escape_long(input: &str) -> IResult<&str, char> {
190    preceded(
191        char('U'),
192        map_opt(
193            take_while_m_n(8, 8, |c: char| c.is_ascii_hexdigit()),
194            |hex: &str| {
195                let val = u32::from_str_radix(hex, 16).ok()?;
196                // Check for surrogate range (invalid)
197                if (0xD800..0xE000).contains(&val) {
198                    return None;
199                }
200                char::from_u32(val)
201            },
202        ),
203    )
204    .parse(input)
205}
206
207/// Parse an octal escape sequence: \NNN (1-3 octal digits starting with 0-7)
208/// The Go implementation reads exactly 3 octal digits
209fn octal_escape(input: &str) -> IResult<&str, char> {
210    map_opt(
211        take_while_m_n(3, 3, |c: char| c.is_ascii_digit() && c < '8'),
212        |oct: &str| {
213            let val = u8::from_str_radix(oct, 8).ok()?;
214            Some(val as char)
215        },
216    )
217    .parse(input)
218}
219
220#[cfg(test)]
221mod tests {
222    use super::*;
223
224    /// Helper to test string parsing
225    fn assert_string(input: &str, expected: &str) {
226        let result = string_literal(input);
227        match result {
228            Ok((remaining, value)) => {
229                assert!(
230                    remaining.is_empty(),
231                    "Parser did not consume entire input '{}', remaining: '{}'",
232                    input,
233                    remaining
234                );
235                assert_eq!(
236                    value, expected,
237                    "For input '{}', expected {:?}, got {:?}",
238                    input, expected, value
239                );
240            }
241            Err(e) => panic!("Failed to parse '{}': {:?}", input, e),
242        }
243    }
244
245    /// Helper to test that input fails to parse
246    fn assert_string_fails(input: &str) {
247        let result = string_literal(input);
248        assert!(
249            result.is_err() || !result.unwrap().0.is_empty(),
250            "Expected '{}' to fail or not fully parse",
251            input
252        );
253    }
254
255    // Double-quoted strings
256    #[test]
257    fn test_double_quoted_basic() {
258        assert_string(r#""hello""#, "hello");
259        assert_string(r#""world""#, "world");
260        assert_string(r#""test string""#, "test string");
261        assert_string(r#""""#, ""); // Empty string
262    }
263
264    #[test]
265    fn test_double_quoted_escaped_quote() {
266        assert_string(r#""say \"hello\"""#, "say \"hello\"");
267    }
268
269    #[test]
270    fn test_double_quoted_simple_escapes() {
271        assert_string(r#""\n""#, "\n");
272        assert_string(r#""\t""#, "\t");
273        assert_string(r#""\r""#, "\r");
274        assert_string(r#""\\""#, "\\");
275        assert_string(r#""\a""#, "\x07");
276        assert_string(r#""\b""#, "\x08");
277        assert_string(r#""\f""#, "\x0c");
278        assert_string(r#""\v""#, "\x0b");
279    }
280
281    #[test]
282    fn test_double_quoted_hex_escape() {
283        assert_string(r#""\xFF""#, "\u{ff}");
284        assert_string(r#""\x00""#, "\0");
285        assert_string(r#""\x41""#, "A");
286    }
287
288    #[test]
289    fn test_double_quoted_unicode_escape() {
290        assert_string(r#""\u0041""#, "A");
291        assert_string(r#""\u1234""#, "\u{1234}");
292        assert_string(r#""\U00010111""#, "\u{10111}");
293    }
294
295    #[test]
296    fn test_double_quoted_octal_escape() {
297        assert_string(r#""\377""#, "\u{ff}");
298        assert_string(r#""\000""#, "\0");
299        assert_string(r#""\101""#, "A");
300    }
301
302    // Single-quoted strings
303    #[test]
304    fn test_single_quoted_basic() {
305        assert_string("'hello'", "hello");
306        assert_string("'world'", "world");
307        assert_string("''", ""); // Empty string
308    }
309
310    #[test]
311    fn test_single_quoted_escaped_quote() {
312        assert_string(r"'say \'hello\''", "say 'hello'");
313    }
314
315    #[test]
316    fn test_single_quoted_escapes() {
317        assert_string(r"'\n'", "\n");
318        assert_string(r"'\t'", "\t");
319        assert_string(r"'\\'", "\\");
320    }
321
322    // Raw/backtick strings
323    #[test]
324    fn test_raw_string_basic() {
325        assert_string("`hello`", "hello");
326        assert_string("`test string`", "test string");
327        assert_string("``", ""); // Empty string
328    }
329
330    #[test]
331    fn test_raw_string_no_escapes() {
332        // Backslashes are literal in raw strings
333        assert_string(r"`\n\t\\`", r"\n\t\\");
334        assert_string(r"`test\.expression`", r"test\.expression");
335    }
336
337    #[test]
338    fn test_raw_string_can_contain_quotes() {
339        assert_string(r#"`"hello"`"#, "\"hello\"");
340        assert_string(r"`'hello'`", "'hello'");
341    }
342
343    // Complex strings from test data
344    #[test]
345    fn test_complex_escape_sequence() {
346        assert_string(
347            r#""\a\b\f\n\r\t\v\\\" - \xFF\377\u1234\U00010111""#,
348            "\x07\x08\x0c\n\r\t\x0b\\\" - \u{ff}\u{ff}\u{1234}\u{10111}",
349        );
350    }
351
352    // Error cases
353    #[test]
354    fn test_unterminated_double_quoted() {
355        assert_string_fails(r#"""#);
356        assert_string_fails(r#""hello"#);
357    }
358
359    #[test]
360    fn test_unterminated_single_quoted() {
361        assert_string_fails("'");
362        assert_string_fails("'hello");
363    }
364
365    #[test]
366    fn test_unterminated_raw_string() {
367        assert_string_fails("`");
368        assert_string_fails("`hello");
369    }
370
371    #[test]
372    fn test_newline_in_quoted_string() {
373        // Newlines not allowed in double/single quoted strings
374        assert_string_fails("\"hello\nworld\"");
375        assert_string_fails("'hello\nworld'");
376    }
377
378    #[test]
379    fn test_raw_string_can_have_newlines() {
380        // But raw strings can have newlines
381        assert_string("`hello\nworld`", "hello\nworld");
382    }
383
384    // Partial parsing tests
385    #[test]
386    fn test_string_followed_by_other_content() {
387        let (remaining, value) = string_literal(r#""hello" world"#).unwrap();
388        assert_eq!(value, "hello");
389        assert_eq!(remaining, " world");
390    }
391}