prqlc_parser/lexer/
mod.rs

1//! PRQL Lexer implementation
2
3use chumsky;
4
5use chumsky::extra;
6use chumsky::prelude::*;
7use chumsky::Parser;
8
9use self::lr::{Literal, Token, TokenKind, Tokens, ValueAndUnit};
10use crate::error::{Error, ErrorSource, Reason, WithErrorInfo};
11
12pub mod lr;
13#[cfg(test)]
14mod test;
15
16type E = Error;
17type ParserInput<'a> = &'a str;
18type ParserError<'a> = extra::Err<Simple<'a, char>>;
19
20/// Convert a chumsky Simple error to our internal Error type
21fn convert_lexer_error(source: &str, error: &Simple<'_, char>, source_id: u16) -> E {
22    // Get span information from the Simple error
23    // NOTE: When parsing &str, SimpleSpan uses BYTE offsets, not character offsets!
24    // We need to convert byte offsets to character offsets for compatibility with our error reporting.
25    let byte_span = error.span();
26    let byte_start = byte_span.start();
27    let byte_end = byte_span.end();
28
29    // Convert byte offsets to character offsets
30    let char_start = source[..byte_start].chars().count();
31    let char_end = source[..byte_end].chars().count();
32
33    // Extract the "found" text using character-based slicing
34    let found: String = source
35        .chars()
36        .skip(char_start)
37        .take(char_end - char_start)
38        .collect();
39
40    // If found is empty, report as "end of input", otherwise wrap in quotes
41    let found_display = if found.is_empty() {
42        "end of input".to_string()
43    } else {
44        format!("'{}'", found)
45    };
46
47    // Create a new Error with the extracted information
48    let error_source = format!(
49        "Unexpected {} at position {}..{}",
50        found_display, char_start, char_end
51    );
52
53    WithErrorInfo::with_span(
54        Error::new(Reason::Unexpected {
55            found: found_display,
56        }),
57        Some(crate::span::Span {
58            start: char_start,
59            end: char_end,
60            source_id,
61        }),
62    )
63    .with_source(ErrorSource::Lexer(error_source))
64}
65
66/// Lex PRQL into LR, returning both the LR and any errors encountered
67pub fn lex_source_recovery(source: &str, source_id: u16) -> (Option<Vec<Token>>, Vec<E>) {
68    let result = lexer().parse(source).into_result();
69
70    match result {
71        Ok(tokens) => (Some(insert_start(tokens.to_vec())), vec![]),
72        Err(errors) => {
73            // Convert chumsky Simple errors to our Error type
74            let errors = errors
75                .into_iter()
76                .map(|error| convert_lexer_error(source, &error, source_id))
77                .collect();
78
79            (None, errors)
80        }
81    }
82}
83
84/// Lex PRQL into LR, returning either the LR or the errors encountered
85pub fn lex_source(source: &str) -> Result<Tokens, Vec<E>> {
86    let result = lexer().parse(source).into_result();
87
88    match result {
89        Ok(tokens) => Ok(Tokens(insert_start(tokens.to_vec()))),
90        Err(errors) => {
91            // Convert chumsky Simple errors to our Error type
92            let errors = errors
93                .into_iter()
94                .map(|error| convert_lexer_error(source, &error, 0))
95                .collect();
96
97            Err(errors)
98        }
99    }
100}
101
102/// Insert a start token so later stages can treat the start of a file like a newline
103fn insert_start(tokens: Vec<Token>) -> Vec<Token> {
104    std::iter::once(Token {
105        kind: TokenKind::Start,
106        span: 0..0,
107    })
108    .chain(tokens)
109    .collect()
110}
111
112/// Lex chars to tokens until the end of the input
113pub fn lexer<'a>() -> impl Parser<'a, ParserInput<'a>, Vec<Token>, ParserError<'a>> {
114    lex_token()
115        .repeated()
116        .collect()
117        .then_ignore(whitespace().or_not())
118}
119
120/// Lex chars to a single token
121fn lex_token<'a>() -> impl Parser<'a, ParserInput<'a>, Token, ParserError<'a>> {
122    // Handle range token with proper whitespace
123    // Ranges need special handling since the '..' token needs to know about whitespace
124    // for binding on left and right sides
125    let range = whitespace()
126        .or_not()
127        .then(just(".."))
128        .then(whitespace().or_not())
129        .map_with(|((left, _), right), extra| {
130            let span: chumsky::span::SimpleSpan = extra.span();
131            Token {
132                kind: TokenKind::Range {
133                    // Check if there was whitespace before/after to determine binding
134                    bind_left: left.is_none(),
135                    bind_right: right.is_none(),
136                },
137                span: span.start()..span.end(),
138            }
139        });
140
141    // Handle all other token types with proper whitespace
142    let other_tokens = whitespace()
143        .or_not()
144        .ignore_then(token().map_with(|kind, extra| {
145            let span: chumsky::span::SimpleSpan = extra.span();
146            Token {
147                kind,
148                span: span.start()..span.end(),
149            }
150        }));
151
152    // Try to match either a range or any other token
153    choice((range, other_tokens))
154}
155
156/// Parse individual token kinds
157fn token<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>> {
158    // Main token parser for all tokens
159    // Strategic .boxed() calls reduce compile times for complex parsers with minimal runtime cost
160    choice((
161        line_wrap().boxed(), // Line continuation with backslash (complex recursive)
162        newline().to(TokenKind::NewLine), // Newline characters
163        multi_char_operators(), // Multi-character operators (==, !=, etc.)
164        interpolation().boxed(), // String interpolation (complex nested parsing)
165        param(),             // Parameters ($name)
166        // Date literals must come before @ handling for annotations
167        date_token().boxed(), // Date literals (complex with multiple branches)
168        // Special handling for @ annotations - must come after date_token
169        just('@').to(TokenKind::Annotate), // @ annotation marker
170        one_of("></%=+-*[]().,:|!{}").map(TokenKind::Control), // Single-character controls
171        literal().map(TokenKind::Literal).boxed(), // Literals (complex with many branches)
172        keyword(),                         // Keywords (let, func, etc.)
173        ident_part().map(TokenKind::Ident), // Identifiers
174        comment(),                         // Comments (# and #!)
175    ))
176}
177
178fn multi_char_operators<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>> {
179    choice((
180        just("->").to(TokenKind::ArrowThin),
181        just("=>").to(TokenKind::ArrowFat),
182        just("==").to(TokenKind::Eq),
183        just("!=").to(TokenKind::Ne),
184        just(">=").to(TokenKind::Gte),
185        just("<=").to(TokenKind::Lte),
186        just("~=").to(TokenKind::RegexSearch),
187        just("&&").then_ignore(end_expr()).to(TokenKind::And),
188        just("||").then_ignore(end_expr()).to(TokenKind::Or),
189        just("??").to(TokenKind::Coalesce),
190        just("//").to(TokenKind::DivInt),
191        just("**").to(TokenKind::Pow),
192    ))
193}
194
195fn keyword<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>> {
196    choice((
197        just("let"),
198        just("into"),
199        just("case"),
200        just("prql"),
201        just("type"),
202        just("module"),
203        just("internal"),
204        just("func"),
205        just("import"),
206        just("enum"),
207    ))
208    .to_slice()
209    .then_ignore(end_expr())
210    .map(|s: &str| TokenKind::Keyword(s.to_string()))
211}
212
213fn param<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>> {
214    just('$')
215        .ignore_then(
216            any()
217                .filter(|c: &char| c.is_alphanumeric() || *c == '_' || *c == '.')
218                .repeated()
219                .to_slice()
220                .map(|s: &str| s.to_string()),
221        )
222        .map(TokenKind::Param)
223}
224
225fn interpolation<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>> {
226    // For s-strings and f-strings, use the same multi-quote string parser
227    // Enable escaping so that `\"` in the source becomes a literal `"` in the string
228    //
229    // NOTE: Known limitation in error reporting for unclosed interpolated strings:
230    // When an f-string or s-string is unclosed (e.g., `f"{}`), the error is reported at the
231    // opening quote position (e.g., position 17) rather than at the end of input where the
232    // closing quote should be (e.g., position 20). This is because the `.then()` combinator
233    // modifies error spans during error recovery, and there's no way to prevent this from
234    // custom parsers.
235    one_of("sf")
236        .then(quoted_string(true))
237        .map(|(c, s)| TokenKind::Interpolation(c, s))
238}
239
240fn whitespace<'a>() -> impl Parser<'a, ParserInput<'a>, (), ParserError<'a>> {
241    text::inline_whitespace().at_least(1)
242}
243
244// Custom newline parser for Stream<char>
245fn newline<'a>() -> impl Parser<'a, ParserInput<'a>, (), ParserError<'a>> {
246    just('\n')
247        .or(just('\r').then_ignore(just('\n').or_not()))
248        .ignored()
249}
250
251fn line_wrap<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>> {
252    newline()
253        .ignore_then(
254            whitespace()
255                .repeated()
256                .ignore_then(comment())
257                .then_ignore(newline())
258                .repeated()
259                .collect(),
260        )
261        .then_ignore(whitespace().repeated())
262        .then_ignore(just('\\'))
263        .map(TokenKind::LineWrap)
264}
265
266fn comment<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>> {
267    // Extract the common comment text parser
268    let comment_text = none_of("\n\r").repeated().collect::<String>();
269
270    just('#').ignore_then(
271        // One option would be to check that doc comments have new lines in the
272        // lexer (we currently do in the parser); which would give better error
273        // messages?
274        just('!')
275            .ignore_then(comment_text.map(TokenKind::DocComment))
276            .or(comment_text.map(TokenKind::Comment)),
277    )
278}
279
280pub fn ident_part<'a>() -> impl Parser<'a, ParserInput<'a>, String, ParserError<'a>> {
281    let plain = any()
282        .filter(|c: &char| c.is_alphabetic() || *c == '_')
283        .then(
284            // this could _almost_ just be, but we don't currently allow numbers
285            // (should we?)
286            //
287            // .then(text::ascii::ident())
288            any()
289                .filter(|c: &char| c.is_alphanumeric() || *c == '_')
290                .repeated(),
291        )
292        .to_slice()
293        .map(|s: &str| s.to_string());
294
295    let backtick = none_of('`')
296        .repeated()
297        .collect::<String>()
298        .delimited_by(just('`'), just('`'));
299
300    choice((plain, backtick))
301}
302
303// Date/time components
304fn digits<'a>(count: usize) -> impl Parser<'a, ParserInput<'a>, &'a str, ParserError<'a>> {
305    chumsky::text::digits(10).exactly(count).to_slice()
306}
307
308fn date_inner<'a>() -> impl Parser<'a, ParserInput<'a>, String, ParserError<'a>> {
309    // Format: YYYY-MM-DD
310    text::digits(10)
311        .exactly(4)
312        .then(just('-'))
313        .then(text::digits(10).exactly(2))
314        .then(just('-'))
315        .then(text::digits(10).exactly(2))
316        .to_slice()
317        // TODO: Returning &str instead of String would require changing Literal::Date
318        // to use Cow<'a, str> or a similar approach, which is a larger refactoring
319        .map(|s: &str| s.to_owned())
320}
321
322fn time_inner<'a>() -> impl Parser<'a, ParserInput<'a>, String, ParserError<'a>> {
323    // Helper function for parsing time components with separators
324    fn time_component<'p>(
325        separator: char,
326        component_parser: impl Parser<'p, ParserInput<'p>, &'p str, ParserError<'p>>,
327    ) -> impl Parser<'p, ParserInput<'p>, String, ParserError<'p>> {
328        just(separator)
329            .then(component_parser)
330            .map(move |(sep, comp): (char, &str)| format!("{}{}", sep, comp))
331            .or_not()
332            .map(|opt| opt.unwrap_or_default())
333    }
334
335    // Hours (required)
336    let hours = digits(2).map(|s: &str| s.to_string());
337
338    // Minutes and seconds (optional) - with colon separator
339    let minutes = time_component(':', digits(2));
340    let seconds = time_component(':', digits(2));
341
342    // Milliseconds (optional) - with dot separator
343    let milliseconds = time_component(
344        '.',
345        any()
346            .filter(|c: &char| c.is_ascii_digit())
347            .repeated()
348            .at_least(1)
349            .at_most(6)
350            .to_slice(),
351    );
352
353    // Timezone (optional): either 'Z' or '+/-HH:MM'
354    let timezone = choice((
355        just('Z').map(|c| c.to_string()),
356        one_of("-+")
357            .then(digits(2).then(just(':').or_not().then(digits(2))).map(
358                |(hrs, (_opt_colon, mins)): (&str, (Option<char>, &str))| {
359                    // Always format as -0800 without colon for SQL compatibility, regardless of input format
360                    // We need to handle both -08:00 and -0800 input formats but standardize the output
361                    format!("{}{}", hrs, mins)
362                },
363            ))
364            .map(|(sign, offset)| format!("{}{}", sign, offset)),
365    ))
366    .or_not()
367    .map(|opt| opt.unwrap_or_default());
368
369    // Combine all parts
370    hours
371        .then(minutes)
372        .then(seconds)
373        .then(milliseconds)
374        .then(timezone)
375        .map(|((((hours, mins), secs), ms), tz)| format!("{}{}{}{}{}", hours, mins, secs, ms, tz))
376}
377
378fn date_token<'a>() -> impl Parser<'a, ParserInput<'a>, TokenKind, ParserError<'a>> {
379    // Match digit after @ for date/time literals
380    just('@')
381        // The next character should be a digit
382        .then(any().filter(|c: &char| c.is_ascii_digit()).rewind())
383        .ignore_then(
384            // Once we know it's a date/time literal (@ followed by a digit),
385            // parse the three possible formats
386            choice((
387                // Datetime: @2022-01-01T12:00
388                date_inner()
389                    .then(just('T'))
390                    .then(time_inner())
391                    .then_ignore(end_expr())
392                    .map(|((date, t), time)| Literal::Timestamp(format!("{}{}{}", date, t, time))),
393                // Date: @2022-01-01
394                date_inner().then_ignore(end_expr()).map(Literal::Date),
395                // Time: @12:00
396                time_inner().then_ignore(end_expr()).map(Literal::Time),
397            )),
398        )
399        .map(TokenKind::Literal)
400}
401
402pub fn literal<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
403    choice((
404        binary_number(),
405        hexadecimal_number(),
406        octal_number(),
407        string(),
408        raw_string(),
409        value_and_unit(),
410        number(),
411        boolean(),
412        null(),
413    ))
414}
415
416// Helper to create number parsers with different bases
417fn parse_number_with_base<'a>(
418    prefix: &'static str,
419    base: u32,
420    max_digits: usize,
421    valid_digit: impl Fn(&char) -> bool + 'a,
422) -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
423    just(prefix)
424        .then_ignore(just("_").or_not()) // Optional underscore after prefix
425        .ignore_then(
426            any()
427                .filter(valid_digit)
428                .repeated()
429                .at_least(1)
430                .at_most(max_digits)
431                .to_slice()
432                .map(move |digits: &str| {
433                    i64::from_str_radix(digits, base)
434                        .map(Literal::Integer)
435                        .unwrap_or(Literal::Integer(0))
436                }),
437        )
438}
439
440fn binary_number<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
441    parse_number_with_base("0b", 2, 32, |c| *c == '0' || *c == '1')
442}
443
444fn hexadecimal_number<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
445    parse_number_with_base("0x", 16, 12, |c| c.is_ascii_hexdigit())
446}
447
448fn octal_number<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
449    parse_number_with_base("0o", 8, 12, |c| ('0'..='7').contains(c))
450}
451
452fn number<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
453    // Helper function to build a string from optional number components
454    fn optional_component<'p, T>(
455        parser: impl Parser<'p, ParserInput<'p>, T, ParserError<'p>>,
456        to_string: impl Fn(T) -> String + 'p,
457    ) -> impl Parser<'p, ParserInput<'p>, String, ParserError<'p>> {
458        parser
459            .map(to_string)
460            .or_not()
461            .map(|opt| opt.unwrap_or_default())
462    }
463
464    // Parse integer part
465    let integer = parse_integer();
466
467    // Parse fractional part
468    let fraction_digits = any()
469        .filter(|c: &char| c.is_ascii_digit())
470        .then(
471            any()
472                .filter(|c: &char| c.is_ascii_digit() || *c == '_')
473                .repeated(),
474        )
475        .to_slice();
476
477    let frac = just('.')
478        .then(fraction_digits)
479        .map(|(dot, digits): (char, &str)| format!("{}{}", dot, digits));
480
481    // Parse exponent
482    let exp_digits = one_of("+-")
483        .or_not()
484        .then(
485            any()
486                .filter(|c: &char| c.is_ascii_digit())
487                .repeated()
488                .at_least(1),
489        )
490        .to_slice();
491
492    let exp = one_of("eE")
493        .then(exp_digits)
494        .map(|(e, digits): (char, &str)| format!("{}{}", e, digits));
495
496    // Combine all parts into a number using the helper function
497    integer
498        .then(optional_component(frac, |f| f))
499        .then(optional_component(exp, |e| e))
500        .map(|((int_part, frac_part), exp_part)| {
501            // Construct the number string and remove underscores
502            let num_str = format!("{}{}{}", int_part, frac_part, exp_part)
503                .chars()
504                .filter(|&c| c != '_')
505                .collect::<String>();
506
507            // Try to parse as integer first, then as float
508            if let Ok(i) = num_str.parse::<i64>() {
509                Literal::Integer(i)
510            } else if let Ok(f) = num_str.parse::<f64>() {
511                Literal::Float(f)
512            } else {
513                Literal::Integer(0) // Fallback
514            }
515        })
516}
517
518fn parse_integer<'a>() -> impl Parser<'a, ParserInput<'a>, &'a str, ParserError<'a>> {
519    // Handle both multi-digit numbers (can't start with 0) and single digit 0
520    choice((
521        any()
522            .filter(|c: &char| c.is_ascii_digit() && *c != '0')
523            .then(
524                any()
525                    .filter(|c: &char| c.is_ascii_digit() || *c == '_')
526                    .repeated(),
527            )
528            .to_slice(),
529        just('0').to_slice(),
530    ))
531}
532
533fn string<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
534    quoted_string(true).map(Literal::String)
535}
536
537fn raw_string<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
538    just("r")
539        .then(choice((just('\''), just('"'))))
540        .then(
541            any()
542                .filter(move |c: &char| *c != '\'' && *c != '"' && *c != '\n' && *c != '\r')
543                .repeated()
544                .to_slice(),
545        )
546        .then(choice((just('\''), just('"'))))
547        .map(
548            |(((_, _open_quote), s), _close_quote): (((&str, char), &str), char)| {
549                Literal::RawString(s.to_string())
550            },
551        )
552}
553
554fn boolean<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
555    choice((just("true").to(true), just("false").to(false)))
556        .then_ignore(end_expr())
557        .map(Literal::Boolean)
558}
559
560fn null<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
561    just("null").to(Literal::Null).then_ignore(end_expr())
562}
563
564fn value_and_unit<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserError<'a>> {
565    // Supported time units
566    let unit = choice((
567        just("microseconds"),
568        just("milliseconds"),
569        just("seconds"),
570        just("minutes"),
571        just("hours"),
572        just("days"),
573        just("weeks"),
574        just("months"),
575        just("years"),
576    ));
577
578    // Parse the integer value followed by a unit
579    parse_integer().then(unit).then_ignore(end_expr()).map(
580        |(number_str, unit_str): (&str, &str)| {
581            // Parse the number (removing underscores), defaulting to 1 if parsing fails
582            let n = number_str.replace('_', "").parse::<i64>().unwrap_or(1);
583            Literal::ValueAndUnit(ValueAndUnit {
584                n,
585                unit: unit_str.to_string(),
586            })
587        },
588    )
589}
590
591pub fn quoted_string<'a>(
592    escaped: bool,
593) -> impl Parser<'a, ParserInput<'a>, String, ParserError<'a>> {
594    choice((
595        multi_quoted_string(&'"', escaped),
596        multi_quoted_string(&'\'', escaped),
597    ))
598    .map(|chars| chars.into_iter().collect())
599}
600
601// Helper function to parse escape sequences
602// Takes the input and the quote character, returns the escaped character
603fn parse_escape_sequence<'a>(
604    input: &mut chumsky::input::InputRef<'a, '_, ParserInput<'a>, ParserError<'a>>,
605    quote_char: char,
606) -> char {
607    match input.peek() {
608        Some(next_ch) => {
609            input.next();
610            match next_ch {
611                '\\' => '\\',
612                '/' => '/',
613                'b' => '\x08',
614                'f' => '\x0C',
615                'n' => '\n',
616                'r' => '\r',
617                't' => '\t',
618                'u' if input.peek() == Some('{') => {
619                    input.next(); // consume '{'
620                    let mut hex = String::new();
621                    while let Some(ch) = input.peek() {
622                        if ch == '}' {
623                            input.next();
624                            break;
625                        }
626                        if ch.is_ascii_hexdigit() && hex.len() < 6 {
627                            hex.push(ch);
628                            input.next();
629                        } else {
630                            break;
631                        }
632                    }
633                    char::from_u32(u32::from_str_radix(&hex, 16).unwrap_or(0)).unwrap_or('\u{FFFD}')
634                }
635                'x' => {
636                    let mut hex = String::new();
637                    for _ in 0..2 {
638                        if let Some(ch) = input.peek() {
639                            if ch.is_ascii_hexdigit() {
640                                hex.push(ch);
641                                input.next();
642                            }
643                        }
644                    }
645                    if hex.len() == 2 {
646                        char::from_u32(u32::from_str_radix(&hex, 16).unwrap_or(0))
647                            .unwrap_or('\u{FFFD}')
648                    } else {
649                        next_ch // Just use the character after backslash
650                    }
651                }
652                c if c == quote_char => quote_char, // Escaped quote
653                other => other,                     // Unknown escape, keep the character
654            }
655        }
656        None => {
657            // Backslash at end of input
658            '\\'
659        }
660    }
661}
662
663// Implementation of multi-level quoted strings using custom parser
664// Handles odd number of quotes (1, 3, 5, etc.) for strings with content
665// and even number of quotes (2, 4, 6, etc.) for empty strings
666//
667// This uses a single custom parser that dynamically handles arbitrary quote counts
668// All quoted strings allow newlines
669fn multi_quoted_string<'a>(
670    quote: &char,
671    escaping: bool,
672) -> impl Parser<'a, ParserInput<'a>, Vec<char>, ParserError<'a>> {
673    let quote_char = *quote;
674
675    custom(move |input| {
676        let start_cursor = input.save();
677
678        // Count opening quotes
679        let mut open_count = 0;
680        while let Some(ch) = input.peek() {
681            if ch == quote_char {
682                input.next();
683                open_count += 1;
684            } else {
685                break;
686            }
687        }
688
689        if open_count == 0 {
690            let span = input.span_since(start_cursor.cursor());
691            return Err(Simple::new(input.peek_maybe(), span));
692        }
693
694        // Even number of quotes -> empty string
695        if open_count % 2 == 0 {
696            return Ok(vec![]);
697        }
698
699        // Odd number of quotes -> parse content until we find the closing delimiter
700        let mut result = Vec::new();
701
702        loop {
703            // Save position to potentially rewind
704            let checkpoint = input.save();
705
706            // Try to match the closing delimiter (open_count quotes)
707            let mut close_count = 0;
708            while close_count < open_count {
709                match input.peek() {
710                    Some(ch) if ch == quote_char => {
711                        input.next();
712                        close_count += 1;
713                    }
714                    _ => break,
715                }
716            }
717
718            // If we matched the full delimiter, we're done
719            if close_count == open_count {
720                return Ok(result);
721            }
722
723            // Not the delimiter - rewind and consume one content character
724            input.rewind(checkpoint);
725
726            match input.next() {
727                Some(ch) => {
728                    // Handle escape sequences if escaping is enabled
729                    if escaping && ch == '\\' {
730                        let escaped = parse_escape_sequence(input, quote_char);
731                        result.push(escaped);
732                    } else {
733                        result.push(ch);
734                    }
735                }
736                None => {
737                    // Can't find closing delimiter - return error about unclosed string
738                    // Create a zero-width span at the current position (end of input)
739                    let current_cursor = input.save();
740                    let span = input.span_since(current_cursor.cursor());
741                    return Err(Simple::new(None, span));
742                }
743            }
744        }
745    })
746}
747
748fn end_expr<'a>() -> impl Parser<'a, ParserInput<'a>, (), ParserError<'a>> {
749    choice((
750        end(),
751        one_of(",)]}\t >").to(()),
752        newline(),
753        just("..").to(()),
754    ))
755    .rewind()
756}
prqlc_parser/lexer/mod.rs

prqlc_parser/lexer/
mod.rs