prqlc_parser/lexer/
mod.rs

1//! PRQL Lexer implementation
2
3use chumsky;
4
5use chumsky::extra;
6use chumsky::prelude::*;
7use chumsky::Parser;
8
9use self::lr::{Literal, Token, TokenKind, Tokens, ValueAndUnit};
10use crate::error::{Error, ErrorSource, Reason, WithErrorInfo};
11
12pub mod lr;
13#[cfg(test)]
14mod test;
15
16type E = Error;
17type ParserInput<'a> = &'a str;
18type ParserError<'a> = extra::Err<Simple<'a, char>>;
19
20/// Convert a chumsky Simple error to our internal Error type
21fn convert_lexer_error(source: &str, error: &Simple<'_, char>, source_id: u16) -> E {
22    // Get span information from the Simple error
23    // NOTE: When parsing &str, SimpleSpan uses BYTE offsets, not character offsets!
24    // We need to convert byte offsets to character offsets for compatibility with our error reporting.
25    let byte_span = error.span();
26    let byte_start = byte_span.start();
27    let byte_end = byte_span.end();
28
29    // Convert byte offsets to character offsets
30    let char_start = source[..byte_start].chars().count();
31    let char_end = source[..byte_end].chars().count();
32
33    // Extract the "found" text using character-based slicing
34    let found: String = source
35        .chars()
36        .skip(char_start)
37        .take(char_end - char_start)
38        .collect();
39
40    // If found is empty, report as "end of input", otherwise wrap in quotes
41    let found_display = if found.is_empty() {
42        "end of input".to_string()
43    } else {
44        format!("'{}'", found)
45    };
46
47    // Create a new Error with the extracted information
48    let error_source = format!(
49        "Unexpected {} at position {}..{}",
50        found_display, char_start, char_end
51    );
52
53    Error::new(Reason::Unexpected {
54        found: found_display,
55    })
56    .with_span(Some(crate::span::Span {
57        start: char_start,
58        end: char_end,
59        source_id,
60    }))
61    .with_source(ErrorSource::Lexer(error_source))
62}
63
64/// Lex PRQL into LR, returning both the LR and any errors encountered
65pub fn lex_source_recovery(source: &str, source_id: u16) -> (Option<Vec<Token>>, Vec<E>) {
66    let result = lexer().parse(source).into_result();
67
68    match result {
69        Ok(tokens) => (Some(insert_start(tokens.to_vec())), vec![]),
70        Err(errors) => {
71            // Convert chumsky Simple errors to our Error type
72            let errors = errors
73                .into_iter()
74                .map(|error| convert_lexer_error(source, &error, source_id))
75                .collect();
76
77            (None, errors)
78        }
79    }
80}
81
82/// Lex PRQL into LR, returning either the LR or the errors encountered
83pub fn lex_source(source: &str) -> Result<Tokens, Vec<E>> {
84    let result = lexer().parse(source).into_result();
85
86    match result {
87        Ok(tokens) => Ok(Tokens(insert_start(tokens.to_vec()))),
88        Err(errors) => {
89            // Convert chumsky Simple errors to our Error type
90            let errors = errors
91                .into_iter()
92                .map(|error| convert_lexer_error(source, &error, 0))
93                .collect();
94
95            Err(errors)
96        }
97    }
98}
99
100/// Insert a start token so later stages can treat the start of a file like a newline
101fn insert_start(tokens: Vec<Token>) -> Vec<Token> {
102    std::iter::once(Token {
103        kind: TokenKind::Start,
104        span: 0..0,
105    })
106    .chain(tokens)
107    .collect()
108}
109
110/// Lex chars to tokens until the end of the input
111pub fn lexer<'a>() -> impl Parser<'a, ParserInput<'a>, Vec<Token>, ParserError<'a>> {
112    lex_token()
113        .repeated()
114        .collect()
115        .then_ignore(whitespace().or_not())
116}
117
118/// Lex chars to a single token
119fn lex_token<'a>() -> impl Parser<'a, ParserInput<'a>, Token, ParserError<'a>> {
120    // Handle range token with proper whitespace
121    // Ranges need special handling since the '..' token needs to know about whitespace
122    // for binding on left and right sides
123    let range = whitespace()
124        .or_not()
125        .then(just(".."))
126        .then(whitespace().or_not())
127        .map_with(|((left, _), right), extra| {
128            let span: chumsky::span::SimpleSpan = extra.span();
129            Token {
130                kind: TokenKind::Range {
131                    // Check if there was whitespace before/after to determine binding
132                    bind_left: left.is_none(),
133                    bind_right: right.is_none(),
134                },
135                span: span.start()..span.end(),
136            }
137        });
138
139    // Handle all other token types with proper whitespace
140    let other_tokens = whitespace()
141        .or_not()
142        .ignore_then(token().map_with(|kind, extra| {
143            let span: chumsky::span::SimpleSpan = extra.span();
144            Token {
145                kind,
146                span: span.start()..span.end(),
147            }
148        }));
149
150    // Try to match either a range or any other token
151    choice((range, other_tokens))
152}
153
154/// Parse individual token kinds
155fn token<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>> {
156    // Main token parser for all tokens
157    // Strategic .boxed() calls reduce compile times for complex parsers with minimal runtime cost
158    choice((
159        line_wrap().boxed(), // Line continuation with backslash (complex recursive)
160        newline().to(TokenKind::NewLine), // Newline characters
161        multi_char_operators(), // Multi-character operators (==, !=, etc.)
162        interpolation().boxed(), // String interpolation (complex nested parsing)
163        param(),             // Parameters ($name)
164        // Date literals must come before @ handling for annotations
165        date_token().boxed(), // Date literals (complex with multiple branches)
166        // Special handling for @ annotations - must come after date_token
167        just('@').to(TokenKind::Annotate), // @ annotation marker
168        one_of("></%=+-*[]().,:|!{}").map(TokenKind::Control), // Single-character controls
169        literal().map(TokenKind::Literal).boxed(), // Literals (complex with many branches)
170        keyword(),                         // Keywords (let, func, etc.)
171        ident_part().map(TokenKind::Ident), // Identifiers
172        comment(),                         // Comments (# and #!)
173    ))
174}
175
176fn multi_char_operators<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>> {
177    choice((
178        just("->").to(TokenKind::ArrowThin),
179        just("=>").to(TokenKind::ArrowFat),
180        just("==").to(TokenKind::Eq),
181        just("!=").to(TokenKind::Ne),
182        just(">=").to(TokenKind::Gte),
183        just("<=").to(TokenKind::Lte),
184        just("~=").to(TokenKind::RegexSearch),
185        just("&&").then_ignore(end_expr()).to(TokenKind::And),
186        just("||").then_ignore(end_expr()).to(TokenKind::Or),
187        just("??").to(TokenKind::Coalesce),
188        just("//").to(TokenKind::DivInt),
189        just("**").to(TokenKind::Pow),
190    ))
191}
192
193fn keyword<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>> {
194    choice((
195        just("let"),
196        just("into"),
197        just("case"),
198        just("prql"),
199        just("type"),
200        just("module"),
201        just("internal"),
202        just("func"),
203        just("import"),
204        just("enum"),
205    ))
206    .to_slice()
207    .then_ignore(end_expr())
208    .map(|s: &str| TokenKind::Keyword(s.to_string()))
209}
210
211fn param<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>> {
212    just('$')
213        .ignore_then(
214            any()
215                .filter(|c: &char| c.is_alphanumeric() || *c == '_' || *c == '.')
216                .repeated()
217                .to_slice()
218                .map(|s: &str| s.to_string()),
219        )
220        .map(TokenKind::Param)
221}
222
223fn interpolation<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>> {
224    // For s-strings and f-strings, use the same multi-quote string parser
225    // Enable escaping so that `\"` in the source becomes a literal `"` in the string
226    //
227    // NOTE: Known limitation in error reporting for unclosed interpolated strings:
228    // When an f-string or s-string is unclosed (e.g., `f"{}`), the error is reported at the
229    // opening quote position (e.g., position 17) rather than at the end of input where the
230    // closing quote should be (e.g., position 20). This is because the `.then()` combinator
231    // modifies error spans during error recovery, and there's no way to prevent this from
232    // custom parsers.
233    one_of("sf")
234        .then(quoted_string(true))
235        .map(|(c, s)| TokenKind::Interpolation(c, s))
236}
237
238fn whitespace<'a>() -> impl Parser<'a, ParserInput<'a>, (), ParserError<'a>> {
239    text::inline_whitespace().at_least(1)
240}
241
242// Custom newline parser for Stream<char>
243fn newline<'a>() -> impl Parser<'a, ParserInput<'a>, (), ParserError<'a>> {
244    just('\n')
245        .or(just('\r').then_ignore(just('\n').or_not()))
246        .ignored()
247}
248
249fn line_wrap<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>> {
250    newline()
251        .ignore_then(
252            whitespace()
253                .repeated()
254                .ignore_then(comment())
255                .then_ignore(newline())
256                .repeated()
257                .collect(),
258        )
259        .then_ignore(whitespace().repeated())
260        .then_ignore(just('\\'))
261        .map(TokenKind::LineWrap)
262}
263
264fn comment<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>> {
265    // Extract the common comment text parser
266    let comment_text = none_of("\n\r").repeated().collect::<String>();
267
268    just('#').ignore_then(
269        // One option would be to check that doc comments have new lines in the
270        // lexer (we currently do in the parser); which would give better error
271        // messages?
272        just('!')
273            .ignore_then(comment_text.map(TokenKind::DocComment))
274            .or(comment_text.map(TokenKind::Comment)),
275    )
276}
277
278pub fn ident_part<'a>() -> impl Parser<'a, ParserInput<'a>, String, ParserError<'a>> {
279    let plain = any()
280        .filter(|c: &char| c.is_alphabetic() || *c == '_')
281        .then(
282            // this could _almost_ just be, but we don't currently allow numbers
283            // (should we?)
284            //
285            // .then(text::ascii::ident())
286            any()
287                .filter(|c: &char| c.is_alphanumeric() || *c == '_')
288                .repeated(),
289        )
290        .to_slice()
291        .map(|s: &str| s.to_string());
292
293    let backtick = none_of('`')
294        .repeated()
295        .collect::<String>()
296        .delimited_by(just('`'), just('`'));
297
298    choice((plain, backtick))
299}
300
301// Date/time components
302fn digits<'a>(count: usize) -> impl Parser<'a, ParserInput<'a>, &'a str, ParserError<'a>> {
303    chumsky::text::digits(10).exactly(count).to_slice()
304}
305
306fn date_inner<'a>() -> impl Parser<'a, ParserInput<'a>, String, ParserError<'a>> {
307    // Format: YYYY-MM-DD
308    text::digits(10)
309        .exactly(4)
310        .then(just('-'))
311        .then(text::digits(10).exactly(2))
312        .then(just('-'))
313        .then(text::digits(10).exactly(2))
314        .to_slice()
315        // TODO: Returning &str instead of String would require changing Literal::Date
316        // to use Cow<'a, str> or a similar approach, which is a larger refactoring
317        .map(|s: &str| s.to_owned())
318}
319
320fn time_inner<'a>() -> impl Parser<'a, ParserInput<'a>, String, ParserError<'a>> {
321    // Helper function for parsing time components with separators
322    fn time_component<'p>(
323        separator: char,
324        component_parser: impl Parser<'p, ParserInput<'p>, &'p str, ParserError<'p>>,
325    ) -> impl Parser<'p, ParserInput<'p>, String, ParserError<'p>> {
326        just(separator)
327            .then(component_parser)
328            .map(move |(sep, comp): (char, &str)| format!("{}{}", sep, comp))
329            .or_not()
330            .map(|opt| opt.unwrap_or_default())
331    }
332
333    // Hours (required)
334    let hours = digits(2).map(|s: &str| s.to_string());
335
336    // Minutes and seconds (optional) - with colon separator
337    let minutes = time_component(':', digits(2));
338    let seconds = time_component(':', digits(2));
339
340    // Milliseconds (optional) - with dot separator
341    let milliseconds = time_component(
342        '.',
343        any()
344            .filter(|c: &char| c.is_ascii_digit())
345            .repeated()
346            .at_least(1)
347            .at_most(6)
348            .to_slice(),
349    );
350
351    // Timezone (optional): either 'Z' or '+/-HH:MM'
352    let timezone = choice((
353        just('Z').map(|c| c.to_string()),
354        one_of("-+")
355            .then(digits(2).then(just(':').or_not().then(digits(2))).map(
356                |(hrs, (_opt_colon, mins)): (&str, (Option<char>, &str))| {
357                    // Always format as -0800 without colon for SQL compatibility, regardless of input format
358                    // We need to handle both -08:00 and -0800 input formats but standardize the output
359                    format!("{}{}", hrs, mins)
360                },
361            ))
362            .map(|(sign, offset)| format!("{}{}", sign, offset)),
363    ))
364    .or_not()
365    .map(|opt| opt.unwrap_or_default());
366
367    // Combine all parts
368    hours
369        .then(minutes)
370        .then(seconds)
371        .then(milliseconds)
372        .then(timezone)
373        .map(|((((hours, mins), secs), ms), tz)| format!("{}{}{}{}{}", hours, mins, secs, ms, tz))
374}
375
376fn date_token<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>> {
377    // Match digit after @ for date/time literals
378    just('@')
379        // The next character should be a digit
380        .then(any().filter(|c: &char| c.is_ascii_digit()).rewind())
381        .ignore_then(
382            // Once we know it's a date/time literal (@ followed by a digit),
383            // parse the three possible formats
384            choice((
385                // Datetime: @2022-01-01T12:00
386                date_inner()
387                    .then(just('T'))
388                    .then(time_inner())
389                    .then_ignore(end_expr())
390                    .map(|((date, t), time)| Literal::Timestamp(format!("{}{}{}", date, t, time))),
391                // Date: @2022-01-01
392                date_inner().then_ignore(end_expr()).map(Literal::Date),
393                // Time: @12:00
394                time_inner().then_ignore(end_expr()).map(Literal::Time),
395            )),
396        )
397        .map(TokenKind::Literal)
398}
399
400pub fn literal<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
401    choice((
402        binary_number(),
403        hexadecimal_number(),
404        octal_number(),
405        string(),
406        raw_string(),
407        value_and_unit(),
408        number(),
409        boolean(),
410        null(),
411    ))
412}
413
414// Helper to create number parsers with different bases
415fn parse_number_with_base<'a>(
416    prefix: &'static str,
417    base: u32,
418    max_digits: usize,
419    valid_digit: impl Fn(&char) -> bool + 'a,
420) -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
421    just(prefix)
422        .then_ignore(just("_").or_not()) // Optional underscore after prefix
423        .ignore_then(
424            any()
425                .filter(valid_digit)
426                .repeated()
427                .at_least(1)
428                .at_most(max_digits)
429                .to_slice()
430                .map(move |digits: &str| {
431                    i64::from_str_radix(digits, base)
432                        .map(Literal::Integer)
433                        .unwrap_or(Literal::Integer(0))
434                }),
435        )
436}
437
438fn binary_number<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
439    parse_number_with_base("0b", 2, 32, |c| *c == '0' || *c == '1')
440}
441
442fn hexadecimal_number<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
443    parse_number_with_base("0x", 16, 12, |c| c.is_ascii_hexdigit())
444}
445
446fn octal_number<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
447    parse_number_with_base("0o", 8, 12, |c| ('0'..='7').contains(c))
448}
449
450fn number<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
451    // Helper function to build a string from optional number components
452    fn optional_component<'p, T>(
453        parser: impl Parser<'p, ParserInput<'p>, T, ParserError<'p>>,
454        to_string: impl Fn(T) -> String + 'p,
455    ) -> impl Parser<'p, ParserInput<'p>, String, ParserError<'p>> {
456        parser
457            .map(to_string)
458            .or_not()
459            .map(|opt| opt.unwrap_or_default())
460    }
461
462    // Parse integer part
463    let integer = parse_integer();
464
465    // Parse fractional part
466    let fraction_digits = any()
467        .filter(|c: &char| c.is_ascii_digit())
468        .then(
469            any()
470                .filter(|c: &char| c.is_ascii_digit() || *c == '_')
471                .repeated(),
472        )
473        .to_slice();
474
475    let frac = just('.')
476        .then(fraction_digits)
477        .map(|(dot, digits): (char, &str)| format!("{}{}", dot, digits));
478
479    // Parse exponent
480    let exp_digits = one_of("+-")
481        .or_not()
482        .then(
483            any()
484                .filter(|c: &char| c.is_ascii_digit())
485                .repeated()
486                .at_least(1),
487        )
488        .to_slice();
489
490    let exp = one_of("eE")
491        .then(exp_digits)
492        .map(|(e, digits): (char, &str)| format!("{}{}", e, digits));
493
494    // Combine all parts into a number using the helper function
495    integer
496        .then(optional_component(frac, |f| f))
497        .then(optional_component(exp, |e| e))
498        .map(|((int_part, frac_part), exp_part)| {
499            // Construct the number string and remove underscores
500            let num_str = format!("{}{}{}", int_part, frac_part, exp_part)
501                .chars()
502                .filter(|&c| c != '_')
503                .collect::<String>();
504
505            // Try to parse as integer first, then as float
506            if let Ok(i) = num_str.parse::<i64>() {
507                Literal::Integer(i)
508            } else if let Ok(f) = num_str.parse::<f64>() {
509                Literal::Float(f)
510            } else {
511                Literal::Integer(0) // Fallback
512            }
513        })
514}
515
516fn parse_integer<'a>() -> impl Parser<'a, ParserInput<'a>, &'a str, ParserError<'a>> {
517    // Handle both multi-digit numbers (can't start with 0) and single digit 0
518    choice((
519        any()
520            .filter(|c: &char| c.is_ascii_digit() && *c != '0')
521            .then(
522                any()
523                    .filter(|c: &char| c.is_ascii_digit() || *c == '_')
524                    .repeated(),
525            )
526            .to_slice(),
527        just('0').to_slice(),
528    ))
529}
530
531fn string<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
532    quoted_string(true).map(Literal::String)
533}
534
535fn raw_string<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
536    just("r")
537        .then(choice((just('\''), just('"'))))
538        .then(
539            any()
540                .filter(move |c: &char| *c != '\'' && *c != '"' && *c != '\n' && *c != '\r')
541                .repeated()
542                .to_slice(),
543        )
544        .then(choice((just('\''), just('"'))))
545        .map(
546            |(((_, _open_quote), s), _close_quote): (((&str, char), &str), char)| {
547                Literal::RawString(s.to_string())
548            },
549        )
550}
551
552fn boolean<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
553    choice((just("true").to(true), just("false").to(false)))
554        .then_ignore(end_expr())
555        .map(Literal::Boolean)
556}
557
558fn null<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
559    just("null").to(Literal::Null).then_ignore(end_expr())
560}
561
562fn value_and_unit<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
563    // Supported time units
564    let unit = choice((
565        just("microseconds"),
566        just("milliseconds"),
567        just("seconds"),
568        just("minutes"),
569        just("hours"),
570        just("days"),
571        just("weeks"),
572        just("months"),
573        just("years"),
574    ));
575
576    // Parse the integer value followed by a unit
577    parse_integer().then(unit).then_ignore(end_expr()).map(
578        |(number_str, unit_str): (&str, &str)| {
579            // Parse the number (removing underscores), defaulting to 1 if parsing fails
580            let n = number_str.replace('_', "").parse::<i64>().unwrap_or(1);
581            Literal::ValueAndUnit(ValueAndUnit {
582                n,
583                unit: unit_str.to_string(),
584            })
585        },
586    )
587}
588
589pub fn quoted_string<'a>(
590    escaped: bool,
591) -> impl Parser<'a, ParserInput<'a>, String, ParserError<'a>> {
592    choice((
593        multi_quoted_string(&'"', escaped),
594        multi_quoted_string(&'\'', escaped),
595    ))
596    .map(|chars| chars.into_iter().collect())
597}
598
599// Helper function to parse escape sequences
600// Takes the input and the quote character, returns the escaped character
601fn parse_escape_sequence<'a>(
602    input: &mut chumsky::input::InputRef<'a, '_, ParserInput<'a>, ParserError<'a>>,
603    quote_char: char,
604) -> char {
605    match input.peek() {
606        Some(next_ch) => {
607            input.next();
608            match next_ch {
609                '\\' => '\\',
610                '/' => '/',
611                'b' => '\x08',
612                'f' => '\x0C',
613                'n' => '\n',
614                'r' => '\r',
615                't' => '\t',
616                'u' if input.peek() == Some('{') => {
617                    input.next(); // consume '{'
618                    let mut hex = String::new();
619                    while let Some(ch) = input.peek() {
620                        if ch == '}' {
621                            input.next();
622                            break;
623                        }
624                        if ch.is_ascii_hexdigit() && hex.len() < 6 {
625                            hex.push(ch);
626                            input.next();
627                        } else {
628                            break;
629                        }
630                    }
631                    char::from_u32(u32::from_str_radix(&hex, 16).unwrap_or(0)).unwrap_or('\u{FFFD}')
632                }
633                'x' => {
634                    let mut hex = String::new();
635                    for _ in 0..2 {
636                        if let Some(ch) = input.peek() {
637                            if ch.is_ascii_hexdigit() {
638                                hex.push(ch);
639                                input.next();
640                            }
641                        }
642                    }
643                    if hex.len() == 2 {
644                        char::from_u32(u32::from_str_radix(&hex, 16).unwrap_or(0))
645                            .unwrap_or('\u{FFFD}')
646                    } else {
647                        next_ch // Just use the character after backslash
648                    }
649                }
650                c if c == quote_char => quote_char, // Escaped quote
651                other => other,                     // Unknown escape, keep the character
652            }
653        }
654        None => {
655            // Backslash at end of input
656            '\\'
657        }
658    }
659}
660
661// Implementation of multi-level quoted strings using custom parser
662// Handles odd number of quotes (1, 3, 5, etc.) for strings with content
663// and even number of quotes (2, 4, 6, etc.) for empty strings
664//
665// This uses a single custom parser that dynamically handles arbitrary quote counts
666// All quoted strings allow newlines
667fn multi_quoted_string<'a>(
668    quote: &char,
669    escaping: bool,
670) -> impl Parser<'a, ParserInput<'a>, Vec<char>, ParserError<'a>> {
671    let quote_char = *quote;
672
673    custom(move |input| {
674        let start_cursor = input.save();
675
676        // Count opening quotes
677        let mut open_count = 0;
678        while let Some(ch) = input.peek() {
679            if ch == quote_char {
680                input.next();
681                open_count += 1;
682            } else {
683                break;
684            }
685        }
686
687        if open_count == 0 {
688            let span = input.span_since(start_cursor.cursor());
689            return Err(Simple::new(input.peek_maybe(), span));
690        }
691
692        // Even number of quotes -> empty string
693        if open_count % 2 == 0 {
694            return Ok(vec![]);
695        }
696
697        // Odd number of quotes -> parse content until we find the closing delimiter
698        let mut result = Vec::new();
699
700        loop {
701            // Save position to potentially rewind
702            let checkpoint = input.save();
703
704            // Try to match the closing delimiter (open_count quotes)
705            let mut close_count = 0;
706            while close_count < open_count {
707                match input.peek() {
708                    Some(ch) if ch == quote_char => {
709                        input.next();
710                        close_count += 1;
711                    }
712                    _ => break,
713                }
714            }
715
716            // If we matched the full delimiter, we're done
717            if close_count == open_count {
718                return Ok(result);
719            }
720
721            // Not the delimiter - rewind and consume one content character
722            input.rewind(checkpoint);
723
724            match input.next() {
725                Some(ch) => {
726                    // Handle escape sequences if escaping is enabled
727                    if escaping && ch == '\\' {
728                        let escaped = parse_escape_sequence(input, quote_char);
729                        result.push(escaped);
730                    } else {
731                        result.push(ch);
732                    }
733                }
734                None => {
735                    // Can't find closing delimiter - return error about unclosed string
736                    // Create a zero-width span at the current position (end of input)
737                    let current_cursor = input.save();
738                    let span = input.span_since(current_cursor.cursor());
739                    return Err(Simple::new(None, span));
740                }
741            }
742        }
743    })
744}
745
746fn end_expr<'a>() -> impl Parser<'a, ParserInput<'a>, (), ParserError<'a>> {
747    choice((
748        end(),
749        one_of(",)]}\t >").to(()),
750        newline(),
751        just("..").to(()),
752    ))
753    .rewind()
754}
prqlc_parser/lexer/mod.rs

prqlc_parser/lexer/
mod.rs