Skip to main content

askama_parser/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![deny(elided_lifetimes_in_paths)]
3#![deny(unreachable_pub)]
4#![allow(clippy::vec_box)] // intentional, less copying
5#![doc = include_str!("../README.md")]
6
7mod ascii_str;
8pub mod expr;
9pub mod node;
10mod target;
11#[cfg(test)]
12mod tests;
13
14use std::borrow::{Borrow, BorrowMut, Cow};
15use std::cell::Cell;
16use std::env::current_dir;
17use std::ops::{Deref, DerefMut, Range};
18use std::path::Path;
19use std::sync::Arc;
20use std::{fmt, str};
21
22use rustc_hash::FxBuildHasher;
23use winnow::ascii::take_escaped;
24use winnow::combinator::{
25    alt, cond, cut_err, delimited, empty, fail, not, opt, peek, preceded, repeat, terminated,
26};
27use winnow::error::ErrMode;
28use winnow::stream::{AsChar, Location, Stream};
29use winnow::token::{any, none_of, one_of, take_while};
30use winnow::{LocatingSlice, ModalParser, ModalResult, Parser, Stateful};
31
32use crate::ascii_str::{AsciiChar, AsciiStr};
33pub use crate::expr::{AssociatedItem, Expr, Filter, PathComponent, TyGenerics};
34pub use crate::node::Node;
35pub use crate::target::{NamedTarget, Target};
36
37mod _parsed {
38    use std::path::Path;
39    use std::sync::Arc;
40    use std::{fmt, mem};
41
42    use super::node::Node;
43    use super::{Ast, ParseError, Syntax};
44
45    pub struct Parsed {
46        // `source` must outlive `ast`, so `ast` must be declared before `source`
47        ast: Ast<'static>,
48        #[allow(dead_code)]
49        source: Arc<str>,
50    }
51
52    impl Parsed {
53        /// If `file_path` is `None`, it means the `source` is an inline template. Therefore, if
54        /// a parsing error occurs, we won't display the path as it wouldn't be useful.
55        pub fn new(
56            source: Arc<str>,
57            file_path: Option<Arc<Path>>,
58            syntax: &Syntax<'_>,
59        ) -> Result<Self, ParseError> {
60            // Self-referential borrowing: `self` will keep the source alive as `String`,
61            // internally we will transmute it to `&'static str` to satisfy the compiler.
62            // However, we only expose the nodes with a lifetime limited to `self`.
63            let src = unsafe { mem::transmute::<&str, &'static str>(source.as_ref()) };
64            let ast = Ast::from_str(src, file_path, syntax)?;
65            Ok(Self { ast, source })
66        }
67
68        // The return value's lifetime must be limited to `self` to uphold the unsafe invariant.
69        #[must_use]
70        pub fn nodes(&self) -> &[Box<Node<'_>>] {
71            &self.ast.nodes
72        }
73
74        #[must_use]
75        pub fn source(&self) -> &str {
76            &self.source
77        }
78    }
79
80    impl fmt::Debug for Parsed {
81        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
82            f.debug_struct("Parsed")
83                .field("nodes", &self.ast.nodes)
84                .finish_non_exhaustive()
85        }
86    }
87
88    impl PartialEq for Parsed {
89        fn eq(&self, other: &Self) -> bool {
90            self.ast.nodes == other.ast.nodes
91        }
92    }
93
94    impl Default for Parsed {
95        fn default() -> Self {
96            Self {
97                ast: Ast::default(),
98                source: "".into(),
99            }
100        }
101    }
102}
103
104pub use _parsed::Parsed;
105
106type InputStream<'a, 'l> = Stateful<LocatingSlice<&'a str>, &'l State<'l>>;
107
108#[derive(Debug, Default)]
109pub struct Ast<'a> {
110    nodes: Vec<Box<Node<'a>>>,
111}
112
113impl<'a> Ast<'a> {
114    /// If `file_path` is `None`, it means the `source` is an inline template. Therefore, if
115    /// a parsing error occurs, we won't display the path as it wouldn't be useful.
116    pub fn from_str(
117        src: &'a str,
118        file_path: Option<Arc<Path>>,
119        syntax: &Syntax<'_>,
120    ) -> Result<Ast<'a>, ParseError> {
121        let state = State {
122            syntax: *syntax,
123            ..State::default()
124        };
125        let mut src = InputStream {
126            input: LocatingSlice::new(src),
127            state: &state,
128        };
129        match Node::parse_template(&mut src) {
130            Ok(nodes) if src.is_empty() => Ok(Self { nodes }),
131            Ok(_) | Err(ErrMode::Incomplete(_)) => unreachable!(),
132            Err(
133                ErrMode::Backtrack(ErrorContext { span, message, .. })
134                | ErrMode::Cut(ErrorContext { span, message, .. }),
135            ) => Err(ParseError {
136                message,
137                offset: span.start,
138                file_path,
139            }),
140        }
141    }
142
143    #[must_use]
144    pub fn nodes(&self) -> &[Box<Node<'a>>] {
145        &self.nodes
146    }
147}
148
149#[derive(Clone, Copy)]
150/// Struct used to wrap types with their associated "span" which is used when generating errors
151/// in the code generation.
152#[repr(C)] // rationale: `WithSpan<Box<T>` needs to have the same layout as `WithSpan<&T>`.
153pub struct WithSpan<T> {
154    inner: T,
155    span: Span,
156}
157
158/// A location in `&'a str`
159#[derive(Debug, Clone, Copy)]
160pub struct Span {
161    start: usize,
162    end: usize,
163}
164
165impl Default for Span {
166    #[inline]
167    fn default() -> Self {
168        Self::no_span()
169    }
170}
171
172impl From<&InputStream<'_, '_>> for Span {
173    #[inline]
174    fn from(i: &InputStream<'_, '_>) -> Self {
175        (*i).into()
176    }
177}
178
179impl From<InputStream<'_, '_>> for Span {
180    #[inline]
181    fn from(mut i: InputStream<'_, '_>) -> Self {
182        let start = i.current_token_start();
183        i.finish();
184        Self {
185            start,
186            end: i.current_token_start(),
187        }
188    }
189}
190
191impl From<Range<usize>> for Span {
192    #[inline]
193    #[track_caller]
194    fn from(range: Range<usize>) -> Self {
195        Span::new(range)
196    }
197}
198
199impl Span {
200    #[inline]
201    pub const fn no_span() -> Span {
202        Self {
203            start: usize::MAX,
204            end: usize::MAX,
205        }
206    }
207
208    #[inline]
209    #[track_caller]
210    pub fn new(range: Range<usize>) -> Self {
211        let Range { start, end } = range;
212        debug_assert!(start <= end);
213        Span { start, end }
214    }
215
216    #[inline]
217    pub fn byte_range(self) -> Option<Range<usize>> {
218        (self.start != usize::MAX).then_some(self.start..self.end)
219    }
220
221    /// Returns an empty [`Span`] that points to the start of `self`.
222    #[inline]
223    pub fn start(self) -> Self {
224        Self {
225            start: self.start,
226            end: self.start,
227        }
228    }
229
230    /// Returns an empty [`Span`] that points to the end of `self`.
231    #[inline]
232    pub fn end(self) -> Self {
233        Self {
234            start: self.end,
235            end: self.end,
236        }
237    }
238
239    /// Splits `self` at `mid` into two spanned strings.
240    #[track_caller]
241    pub fn split_at(self, mid: usize) -> (Self, Self) {
242        let Some(Range { start, end }) = self.byte_range() else {
243            return (self, self);
244        };
245
246        let mid = start.checked_add(mid).unwrap();
247        assert!(mid <= end);
248
249        let start = Self { start, end: mid };
250        let end = Self { start: mid, end };
251        (start, end)
252    }
253
254    /// The substring in `source` contained in [`self.byte_range()`][Self::byte_range].
255    #[inline]
256    pub fn as_infix_of<'a>(&self, source: &'a str) -> Option<&'a str> {
257        self.byte_range().and_then(|range| source.get(range))
258    }
259
260    /// The substring in `source` starting from `self.start`.
261    #[inline]
262    pub fn as_suffix_of<'a>(&self, source: &'a str) -> Option<&'a str> {
263        // No need to check if `self.start != usize::MAX`:
264        // `source` cannot be longer than `isize::MAX`, cf. [`std::alloc`].
265        source.get(self.start..)
266    }
267
268    pub fn is_overlapping(&self, other: Span) -> bool {
269        (self.start < other.end) & (other.start < self.end)
270    }
271}
272
273impl<T> WithSpan<T> {
274    #[inline]
275    #[track_caller]
276    pub fn new(inner: T, span: impl Into<Span>) -> Self {
277        Self {
278            inner,
279            span: span.into(),
280        }
281    }
282
283    #[inline]
284    pub const fn no_span(inner: T) -> Self {
285        Self {
286            inner,
287            span: Span::no_span(),
288        }
289    }
290
291    #[inline]
292    pub fn span(&self) -> Span {
293        self.span
294    }
295
296    #[inline]
297    pub fn deconstruct(self) -> (T, Span) {
298        let Self { inner, span } = self;
299        (inner, span)
300    }
301}
302
303impl WithSpan<&str> {
304    /// Returns an empty [`Span`] that points to the start of the contained string.
305    #[inline]
306    pub fn start(self) -> Self {
307        let (inner, span) = self.deconstruct();
308        Self::new(&inner[..0], span.start())
309    }
310
311    /// Returns an empty [`Span`] that points to the end of the contained string.
312    #[inline]
313    pub fn end(self) -> Self {
314        let (inner, span) = self.deconstruct();
315        Self::new(&inner[inner.len()..], span.end())
316    }
317
318    /// Splits `self` at `mid` into two spanned strings.
319    #[track_caller]
320    pub fn split_at(self, mid: usize) -> (Self, Self) {
321        let (inner, span) = self.deconstruct();
322        let (front, back) = inner.split_at(mid);
323        let (front_span, back_span) = span.split_at(mid);
324        (Self::new(front, front_span), Self::new(back, back_span))
325    }
326}
327
328impl<T> Deref for WithSpan<T> {
329    type Target = T;
330
331    #[inline]
332    fn deref(&self) -> &Self::Target {
333        &self.inner
334    }
335}
336
337impl<T> DerefMut for WithSpan<T> {
338    #[inline]
339    fn deref_mut(&mut self) -> &mut Self::Target {
340        &mut self.inner
341    }
342}
343
344impl<T: fmt::Debug> fmt::Debug for WithSpan<T> {
345    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
346        self.inner.fmt(f)
347    }
348}
349
350impl<T: PartialEq, R: AsRef<T>> PartialEq<R> for WithSpan<T> {
351    #[inline]
352    fn eq(&self, other: &R) -> bool {
353        // We never want to compare the span information.
354        self.inner == *other.as_ref()
355    }
356}
357
358impl<T: PartialOrd, R: AsRef<T>> PartialOrd<R> for WithSpan<T> {
359    #[inline]
360    fn partial_cmp(&self, other: &R) -> Option<std::cmp::Ordering> {
361        self.inner.partial_cmp(other.as_ref())
362    }
363}
364
365impl<T: Eq> Eq for WithSpan<T> {}
366
367impl<T: Ord> Ord for WithSpan<T> {
368    #[inline]
369    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
370        self.inner.cmp(&other.inner)
371    }
372}
373
374impl<T: std::hash::Hash> std::hash::Hash for WithSpan<T> {
375    #[inline]
376    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
377        self.inner.hash(state);
378    }
379}
380
381impl<T> AsRef<T> for WithSpan<T> {
382    #[inline]
383    fn as_ref(&self) -> &T {
384        &self.inner
385    }
386}
387
388impl<T> Borrow<T> for WithSpan<T> {
389    #[inline]
390    fn borrow(&self) -> &T {
391        &self.inner
392    }
393}
394
395impl<T> BorrowMut<T> for WithSpan<T> {
396    #[inline]
397    fn borrow_mut(&mut self) -> &mut T {
398        &mut self.inner
399    }
400}
401
402#[derive(Debug, Clone, PartialEq, Eq)]
403pub struct ParseError {
404    pub message: Option<Cow<'static, str>>,
405    pub offset: usize,
406    pub file_path: Option<Arc<Path>>,
407}
408
409impl std::error::Error for ParseError {}
410
411impl fmt::Display for ParseError {
412    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
413        let ParseError {
414            message,
415            file_path,
416            offset,
417        } = self;
418
419        if let Some(message) = message {
420            writeln!(f, "{message}")?;
421        }
422
423        let path = file_path
424            .as_ref()
425            .and_then(|path| Some(strip_common(&current_dir().ok()?, path)));
426        match path {
427            Some(path) => write!(f, "failed to parse template source\n  --> {path}@{offset}"),
428            None => write!(f, "failed to parse template source near offset {offset}"),
429        }
430    }
431}
432
433pub(crate) type ParseErr<'a> = ErrMode<ErrorContext>;
434pub(crate) type ParseResult<'a, T = &'a str> = Result<T, ParseErr<'a>>;
435
436/// This type is used to handle `nom` errors and in particular to add custom error messages.
437/// It used to generate `ParserError`.
438///
439/// It cannot be used to replace `ParseError` because it expects a generic, which would make
440/// `askama`'s users experience less good (since this generic is only needed for `nom`).
441#[derive(Debug)]
442pub(crate) struct ErrorContext {
443    pub(crate) span: Span,
444    pub(crate) message: Option<Cow<'static, str>>,
445}
446
447impl ErrorContext {
448    #[cold]
449    fn unclosed(kind: &str, tag: &str, span: impl Into<Span>) -> Self {
450        Self {
451            span: span.into(),
452            message: Some(format!("unclosed {kind}, missing {tag:?}").into()),
453        }
454    }
455
456    #[cold]
457    #[inline]
458    fn new(message: impl Into<Cow<'static, str>>, span: impl Into<Span>) -> Self {
459        Self {
460            span: span.into(),
461            message: Some(message.into()),
462        }
463    }
464
465    #[inline]
466    fn backtrack(self) -> ErrMode<Self> {
467        ErrMode::Backtrack(self)
468    }
469
470    #[inline]
471    fn cut(self) -> ErrMode<Self> {
472        ErrMode::Cut(self)
473    }
474}
475
476impl<'a: 'l, 'l> winnow::error::ParserError<InputStream<'a, 'l>> for ErrorContext {
477    type Inner = Self;
478
479    #[inline]
480    fn from_input(input: &InputStream<'a, 'l>) -> Self {
481        Self {
482            span: input.into(),
483            message: None,
484        }
485    }
486
487    #[inline(always)]
488    fn into_inner(self) -> Result<Self::Inner, Self> {
489        Ok(self)
490    }
491}
492
493fn skip_ws0<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
494    take_while(0.., |c: char| c.is_ascii_whitespace())
495        .void()
496        .parse_next(i)
497}
498
499fn skip_ws1<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
500    take_while(1.., |c: char| c.is_ascii_whitespace())
501        .void()
502        .parse_next(i)
503}
504
505fn ws<'a: 'l, 'l, O>(
506    inner: impl ModalParser<InputStream<'a, 'l>, O, ErrorContext>,
507) -> impl ModalParser<InputStream<'a, 'l>, O, ErrorContext> {
508    delimited(skip_ws0, inner, skip_ws0)
509}
510
511fn keyword<'a: 'l, 'l>(k: &str) -> impl ModalParser<InputStream<'a, 'l>, &'a str, ErrorContext> {
512    identifier.verify(move |v: &str| v == k)
513}
514
515fn identifier<'a: 'l, 'l>(input: &mut InputStream<'a, 'l>) -> ParseResult<'a> {
516    let head = any.verify(|&c| c == '_' || unicode_ident::is_xid_start(c));
517    let tail = take_while(.., unicode_ident::is_xid_continue);
518    (head, tail).take().parse_next(input)
519}
520
521fn bool_lit<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a> {
522    alt((keyword("false"), keyword("true"))).parse_next(i)
523}
524
525#[derive(Debug, Clone, Copy, PartialEq)]
526pub enum Num<'a> {
527    Int(&'a str, Option<IntKind>),
528    Float(&'a str, Option<FloatKind>),
529}
530
531fn check_base_digits<'a>(digits: &'a str, base: u32, span: Range<usize>) -> ParseResult<'a, ()> {
532    let allowed_digits: &[char] = match base {
533        2 => &['0', '1'],
534        8 => &['0', '1', '2', '3', '4', '5', '6', '7'],
535        16 => &[
536            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
537        ],
538        _ => panic!("unsupported base `{base}`"),
539    };
540
541    for digit in digits.chars() {
542        let lower = digit.to_ascii_lowercase();
543        if lower != '_' && !allowed_digits.iter().any(|c| *c == digit || *c == lower) {
544            let allowed = allowed_digits.iter().collect::<String>();
545            let base = match base {
546                2 => 'b',
547                8 => 'o',
548                16 => 'x',
549                _ => unreachable!(),
550            };
551            return cut_error!(
552                format!("only expected `{allowed}` digits for `0{base}` integers, found `{digit}`"),
553                span,
554            );
555        }
556    }
557    Ok(())
558}
559
560fn num_lit<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, Num<'a>> {
561    fn num_lit_suffix<'a: 'l, 'l, T: Copy>(
562        kind: &'a str,
563        list: &[(&str, T)],
564        i: &mut InputStream<'a, 'l>,
565    ) -> ParseResult<'a, T> {
566        let (suffix, span) = identifier.with_span().parse_next(i)?;
567        if let Some(value) = list
568            .iter()
569            .copied()
570            .find_map(|(name, value)| (name == suffix).then_some(value))
571        {
572            Ok(value)
573        } else {
574            cut_error!(format!("unknown {kind} suffix `{suffix}`"), span)
575        }
576    }
577
578    // Equivalent to <https://github.com/rust-lang/rust/blob/e3f909b2bbd0b10db6f164d466db237c582d3045/compiler/rustc_lexer/src/lib.rs#L587-L620>.
579    let int_with_base = (opt('-'), |i: &mut _| {
580        let ((base, kind), span) = preceded('0', alt(('b'.value(2), 'o'.value(8), 'x'.value(16))))
581            .with_taken()
582            .with_span()
583            .parse_next(i)?;
584        match opt(separated_digits(if base == 16 { base } else { 10 }, false)).parse_next(i)? {
585            Some(digits) => check_base_digits(digits, base, span),
586            None => cut_error!(format!("expected digits after `{kind}`"), span),
587        }
588    });
589
590    // Equivalent to <https://github.com/rust-lang/rust/blob/e3f909b2bbd0b10db6f164d466db237c582d3045/compiler/rustc_lexer/src/lib.rs#L626-L653>:
591    // no `_` directly after the decimal point `.`, or between `e` and `+/-`.
592    let float = |i: &mut InputStream<'a, 'l>| -> ParseResult<'a, ()> {
593        let has_dot = opt(('.', separated_digits(10, true))).parse_next(i)?;
594        let has_exp = opt(|i: &mut _| {
595            let ((kind, op), span) = (one_of(['e', 'E']), opt(one_of(['+', '-'])))
596                .with_span()
597                .parse_next(i)?;
598            match opt(separated_digits(10, op.is_none())).parse_next(i)? {
599                Some(_) => Ok(()),
600                None => {
601                    cut_error!(
602                        format!("expected decimal digits, `+` or `-` after exponent `{kind}`"),
603                        span,
604                    )
605                }
606            }
607        })
608        .parse_next(i)?;
609        match (has_dot, has_exp) {
610            (Some(_), _) | (_, Some(())) => Ok(()),
611            _ => fail(i),
612        }
613    };
614
615    let num = if let Some(num) = opt(int_with_base.take()).parse_next(i)? {
616        let suffix = opt(|i: &mut _| num_lit_suffix("integer", INTEGER_TYPES, i)).parse_next(i)?;
617        Num::Int(num, suffix)
618    } else {
619        let (float, num) = preceded((opt('-'), separated_digits(10, true)), opt(float))
620            .with_taken()
621            .parse_next(i)?;
622        if float.is_some() {
623            let suffix = opt(|i: &mut _| num_lit_suffix("float", FLOAT_TYPES, i)).parse_next(i)?;
624            Num::Float(num, suffix)
625        } else {
626            let suffix = opt(|i: &mut _| num_lit_suffix("number", NUM_TYPES, i)).parse_next(i)?;
627            match suffix {
628                Some(NumKind::Int(kind)) => Num::Int(num, Some(kind)),
629                Some(NumKind::Float(kind)) => Num::Float(num, Some(kind)),
630                None => Num::Int(num, None),
631            }
632        }
633    };
634    Ok(num)
635}
636
637/// Underscore separated digits of the given base, unless `start` is true this may start
638/// with an underscore.
639fn separated_digits<'a: 'l, 'l>(
640    radix: u32,
641    start: bool,
642) -> impl ModalParser<InputStream<'a, 'l>, &'a str, ErrorContext> {
643    (
644        cond(!start, repeat(0.., '_').map(|()| ())),
645        one_of(move |ch: char| ch.is_digit(radix)),
646        repeat(0.., one_of(move |ch: char| ch == '_' || ch.is_digit(radix))).map(|()| ()),
647    )
648        .take()
649}
650
651#[derive(Clone, Copy, Debug, PartialEq)]
652pub enum StrPrefix {
653    Binary,
654    CLike,
655}
656
657impl StrPrefix {
658    #[must_use]
659    pub fn to_char(self) -> char {
660        match self {
661            Self::Binary => 'b',
662            Self::CLike => 'c',
663        }
664    }
665}
666
667impl fmt::Display for StrPrefix {
668    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
669        use std::fmt::Write;
670
671        f.write_char(self.to_char())
672    }
673}
674
675#[derive(Clone, Debug, PartialEq)]
676pub struct StrLit<'a> {
677    /// the unparsed (but validated) content
678    pub content: &'a str,
679    /// whether the string literal is unprefixed, a cstring or binary slice
680    pub prefix: Option<StrPrefix>,
681    /// contains a NUL character, either escaped `'\0'` or the very characters;
682    /// not allowed in cstring literals
683    pub contains_null: bool,
684    /// contains a non-ASCII character, either as `\u{123456}` or as an unescaped character;
685    /// not allowed in binary slices
686    pub contains_unicode_character: bool,
687    /// contains unicode escape sequences like `\u{12}` (regardless of its range);
688    /// not allowed in binary slices
689    pub contains_unicode_escape: bool,
690    /// contains a non-ASCII range escape sequence like `\x80`;
691    /// not allowed in unprefix strings
692    pub contains_high_ascii: bool,
693}
694
695fn str_lit<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, StrLit<'a>> {
696    // <https://doc.rust-lang.org/reference/tokens.html#r-lex.token.literal.str.syntax>
697
698    fn inner<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, StrLit<'a>> {
699        #[derive(Debug, Clone, PartialEq, Eq)]
700        enum Sequence<'a> {
701            Text(&'a str),
702            Close,
703            Escape,
704            CrLf,
705            Cr(Range<usize>),
706        }
707
708        let mut contains_null = false;
709        let mut contains_unicode_character = false;
710        let mut contains_unicode_escape = false;
711        let mut contains_high_ascii = false;
712
713        while !i.is_empty() {
714            let seq = alt((
715                repeat::<_, _, (), _, _>(1.., none_of(['\r', '\\', '"']))
716                    .take()
717                    .map(Sequence::Text),
718                ('\r'.span(), opt('\n')).map(|(span, has_lf)| match has_lf {
719                    Some(_) => Sequence::CrLf,
720                    None => Sequence::Cr(span),
721                }),
722                '\\'.value(Sequence::Escape),
723                peek('"').value(Sequence::Close),
724            ))
725            .parse_next(i)?;
726
727            match seq {
728                Sequence::Text(s) => {
729                    contains_unicode_character =
730                        contains_unicode_character || s.bytes().any(|c: u8| !c.is_ascii());
731                    contains_null = contains_null || s.bytes().any(|c: u8| c == 0);
732                    continue;
733                }
734                Sequence::CrLf => continue,
735                Sequence::Cr(span) => {
736                    return cut_error!(
737                        "a bare CR (Mac linebreak) is not allowed in string literals, \
738                        use NL (Unix linebreak) or CRNL (Windows linebreak) instead, \
739                        or type `\\r` explicitly",
740                        span,
741                    );
742                }
743                Sequence::Close => break,
744                Sequence::Escape => {}
745            }
746
747            match any.parse_next(i)? {
748                '\'' | '"' | 'n' | 'r' | 't' | '\\' => continue,
749                '0' => {
750                    contains_null = true;
751                    continue;
752                }
753                'x' => {
754                    let code = take_while(2, AsChar::is_hex_digit).parse_next(i)?;
755                    match u8::from_str_radix(code, 16).unwrap() {
756                        0 => contains_null = true,
757                        128.. => contains_high_ascii = true,
758                        _ => {}
759                    }
760                }
761                'u' => {
762                    contains_unicode_escape = true;
763                    let (code, span) = delimited('{', take_while(1..=6, AsChar::is_hex_digit), '}')
764                        .with_span()
765                        .parse_next(i)?;
766                    match u32::from_str_radix(code, 16).unwrap() {
767                        0 => contains_null = true,
768                        0xd800..0xe000 => {
769                            return cut_error!("unicode escape must not be a surrogate", span);
770                        }
771                        0x110000.. => {
772                            return cut_error!("unicode escape must be at most 10FFFF", span);
773                        }
774                        128.. => contains_unicode_character = true,
775                        _ => {}
776                    }
777                }
778                _ => return fail(i),
779            }
780        }
781
782        Ok(StrLit {
783            content: "",
784            prefix: None,
785            contains_null,
786            contains_unicode_character,
787            contains_unicode_escape,
788            contains_high_ascii,
789        })
790    }
791
792    let ((prefix, lit), span) = (
793        terminated(
794            opt(alt((
795                'b'.value(StrPrefix::Binary),
796                'c'.value(StrPrefix::CLike),
797            ))),
798            '"',
799        ),
800        opt(terminated(inner.with_taken(), '"')),
801    )
802        .with_span()
803        .parse_next(i)?;
804
805    let Some((mut lit, content)) = lit else {
806        return cut_error!("unclosed or broken string", span);
807    };
808    lit.content = content;
809    lit.prefix = prefix;
810
811    let msg = match prefix {
812        Some(StrPrefix::Binary) => {
813            if lit.contains_unicode_character {
814                Some("non-ASCII character in byte string literal")
815            } else if lit.contains_unicode_escape {
816                Some("unicode escape in byte string")
817            } else {
818                None
819            }
820        }
821        Some(StrPrefix::CLike) => lit
822            .contains_null
823            .then_some("null characters in C string literals are not supported"),
824        None => lit.contains_high_ascii.then_some("out of range hex escape"),
825    };
826    if let Some(msg) = msg {
827        return cut_error!(msg, span);
828    }
829
830    not_suffix_with_hash(i)?;
831    Ok(lit)
832}
833
834fn not_suffix_with_hash<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
835    if let Some(span) = opt(identifier.span()).parse_next(i)? {
836        return cut_error!(
837            "you are missing a space to separate two string literals",
838            span,
839        );
840    }
841    Ok(())
842}
843
844fn str_lit_without_prefix<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a> {
845    let (lit, span) = str_lit.with_span().parse_next(i)?;
846
847    let kind = match lit.prefix {
848        Some(StrPrefix::Binary) => Some("binary slice"),
849        Some(StrPrefix::CLike) => Some("cstring"),
850        None => None,
851    };
852    if let Some(kind) = kind {
853        return cut_error!(
854            format!("expected an unprefixed normal string, not a {kind}"),
855            span,
856        );
857    }
858
859    Ok(lit.content)
860}
861
862#[derive(Clone, Copy, Debug, PartialEq)]
863pub enum CharPrefix {
864    Binary,
865}
866
867#[derive(Clone, Debug, PartialEq)]
868pub struct CharLit<'a> {
869    pub prefix: Option<CharPrefix>,
870    pub content: &'a str,
871}
872
873// Information about allowed character escapes is available at:
874// <https://doc.rust-lang.org/reference/tokens.html#character-literals>.
875fn char_lit<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, CharLit<'a>> {
876    let ((prefix, _, content, is_closed), span) = (
877        alt(('b'.value(Some(CharPrefix::Binary)), empty.value(None))),
878        '\'',
879        opt(take_escaped(none_of(['\\', '\'']), '\\', any)),
880        opt('\''),
881    )
882        .with_span()
883        .parse_next(i)?;
884
885    if is_closed.is_none() {
886        if let Some(prefix) = prefix {
887            return cut_error!(
888                match prefix {
889                    CharPrefix::Binary => "unterminated byte literal",
890                },
891                span,
892            );
893        } else {
894            return fail(i);
895        }
896    }
897
898    let content = match content.unwrap_or_default() {
899        "" => {
900            return cut_error!(
901                match prefix {
902                    Some(CharPrefix::Binary) => "empty byte literal",
903                    None => "empty character literal",
904                },
905                span,
906            );
907        }
908        content => content,
909    };
910
911    let mut content_i = content;
912    let Ok(c) = Char::parse(&mut content_i) else {
913        return cut_error!("invalid character", span);
914    };
915    if !content_i.is_empty() {
916        let (c, s) = match prefix {
917            Some(CharPrefix::Binary) => ("byte", "binary string"),
918            None => ("character", "string"),
919        };
920        return cut_error!(
921            format!(
922                "cannot have multiple characters in a {c} literal, use `{}\"...\"` to write a {s}",
923                match prefix {
924                    Some(CharPrefix::Binary) => "b",
925                    None => "",
926                }
927            ),
928            span,
929        );
930    }
931
932    let (nb, max_value, err1, err2) = match c {
933        Char::Literal | Char::Escaped => {
934            return Ok(CharLit { prefix, content });
935        }
936        Char::AsciiEscape(nb) => (
937            nb,
938            // `0x7F` is the maximum value for a `\x` escaped character.
939            0x7F,
940            "invalid character in ascii escape",
941            "must be a character in the range [\\x00-\\x7f]",
942        ),
943        Char::UnicodeEscape(nb) => {
944            match prefix {
945                Some(CharPrefix::Binary) => {
946                    return cut_error!(
947                        "cannot use unicode escape in byte string in byte literal",
948                        span,
949                    );
950                }
951                None => (
952                    nb,
953                    // `0x10FFFF` is the maximum value for a `\u` escaped character.
954                    0x0010_FFFF,
955                    "invalid character in unicode escape",
956                    "unicode escape must be at most 10FFFF",
957                ),
958            }
959        }
960    };
961
962    let Ok(nb) = u32::from_str_radix(nb, 16) else {
963        return cut_error!(err1, span);
964    };
965    if nb > max_value {
966        return cut_error!(err2, span);
967    }
968
969    Ok(CharLit { prefix, content })
970}
971
972/// Represents the different kinds of char declarations:
973#[derive(Copy, Clone)]
974enum Char<'a> {
975    /// Any character that is not escaped.
976    Literal,
977    /// An escaped character (like `\n`) which doesn't require any extra check.
978    Escaped,
979    /// Ascii escape (like `\x12`).
980    AsciiEscape(&'a str),
981    /// Unicode escape (like `\u{12}`).
982    UnicodeEscape(&'a str),
983}
984
985impl<'a> Char<'a> {
986    fn parse(i: &mut &'a str) -> ModalResult<Self, ()> {
987        let unescaped = none_of(('\\', '\'')).value(Self::Literal);
988        let escaped = preceded(
989            '\\',
990            alt((
991                'n'.value(Self::Escaped),
992                'r'.value(Self::Escaped),
993                't'.value(Self::Escaped),
994                '\\'.value(Self::Escaped),
995                '0'.value(Self::Escaped),
996                '\''.value(Self::Escaped),
997                // Not useful but supported by rust.
998                '"'.value(Self::Escaped),
999                ('x', take_while(2, |c: char| c.is_ascii_hexdigit()))
1000                    .map(|(_, s)| Self::AsciiEscape(s)),
1001                (
1002                    "u{",
1003                    take_while(1..=6, |c: char| c.is_ascii_hexdigit()),
1004                    '}',
1005                )
1006                    .map(|(_, s, _)| Self::UnicodeEscape(s)),
1007            )),
1008        );
1009        alt((unescaped, escaped)).parse_next(i)
1010    }
1011}
1012
1013#[derive(Clone, Debug, PartialEq)]
1014pub enum PathOrIdentifier<'a> {
1015    Path(Vec<PathComponent<'a>>),
1016    Identifier(WithSpan<&'a str>),
1017}
1018
1019fn path_or_identifier<'a: 'l, 'l>(
1020    i: &mut InputStream<'a, 'l>,
1021) -> ParseResult<'a, PathOrIdentifier<'a>> {
1022    let mut p = |i: &mut _| {
1023        let root = ws(opt(terminated(empty.span(), "::")));
1024        let start = PathComponent::parse;
1025        let tail = opt(repeat(1.., preceded(ws("::"), PathComponent::parse)).map(|v: Vec<_>| v));
1026
1027        let (root, start, rest) = (root, start, tail).parse_next(i)?;
1028        Ok((root, start, rest.unwrap_or_default()))
1029    };
1030    let (root, start, rest) = p.parse_next(i)?;
1031
1032    // The returned identifier can be assumed to be path if:
1033    // - it is an absolute path (starts with `::`), or
1034    // - it has multiple components (at least one `::`), or
1035    // - the first letter is uppercase
1036    match (root, start, rest) {
1037        (None, arg, tail)
1038            if tail.is_empty()
1039                && arg.generics.is_none()
1040                && arg
1041                    .name
1042                    .chars()
1043                    .next()
1044                    .is_none_or(|c| c == '_' || c.is_lowercase()) =>
1045        {
1046            Ok(PathOrIdentifier::Identifier(arg.name))
1047        }
1048        (root, start, tail) => {
1049            let mut path = if let Some(root) = root {
1050                let mut path = Vec::with_capacity(2 + tail.len());
1051                path.push(PathComponent {
1052                    name: WithSpan::new("", root),
1053                    generics: None,
1054                });
1055                path
1056            } else {
1057                Vec::with_capacity(1 + tail.len())
1058            };
1059            path.push(start);
1060            path.extend(tail);
1061            Ok(PathOrIdentifier::Path(path))
1062        }
1063    }
1064}
1065
1066#[derive(Debug, Clone, Default)]
1067struct State<'a> {
1068    syntax: Syntax<'a>,
1069    loop_depth: Cell<usize>,
1070    level: Level,
1071}
1072
1073fn block_start<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
1074    i.state.syntax.block_start.void().parse_next(i)
1075}
1076
1077fn block_end<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
1078    let (control, span) = alt((
1079        i.state.syntax.block_end.value(None),
1080        peek(delimited('%', alt(('-', '~', '+')).map(Some), '}')),
1081        fail, // rollback on partial matches in the previous line
1082    ))
1083    .with_span()
1084    .parse_next(i)?;
1085
1086    let Some(control) = control else {
1087        return Ok(());
1088    };
1089
1090    let err = ErrorContext::new(
1091        format!(
1092            "unclosed block, you likely meant to apply whitespace control: \"{}{}\"",
1093            control.escape_default(),
1094            i.state.syntax.block_end.escape_default(),
1095        ),
1096        span,
1097    );
1098    Err(err.backtrack())
1099}
1100
1101fn expr_start<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
1102    i.state.syntax.expr_start.void().parse_next(i)
1103}
1104
1105fn expr_end<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
1106    i.state.syntax.expr_end.void().parse_next(i)
1107}
1108
1109impl State<'_> {
1110    fn enter_loop(&self) {
1111        self.loop_depth.set(self.loop_depth.get() + 1);
1112    }
1113
1114    fn leave_loop(&self) {
1115        self.loop_depth.set(self.loop_depth.get() - 1);
1116    }
1117
1118    fn is_in_loop(&self) -> bool {
1119        self.loop_depth.get() > 0
1120    }
1121}
1122
1123#[derive(Default, Hash, PartialEq, Clone, Copy)]
1124pub struct Syntax<'a>(InnerSyntax<'a>);
1125
1126// This abstraction ensures that the fields are readable, but not writable.
1127#[derive(Hash, PartialEq, Clone, Copy)]
1128pub struct InnerSyntax<'a> {
1129    pub block_start: &'a str,
1130    pub block_end: &'a str,
1131    pub expr_start: &'a str,
1132    pub expr_end: &'a str,
1133    pub comment_start: &'a str,
1134    pub comment_end: &'a str,
1135}
1136
1137impl<'a> Deref for Syntax<'a> {
1138    type Target = InnerSyntax<'a>;
1139
1140    #[inline]
1141    fn deref(&self) -> &Self::Target {
1142        &self.0
1143    }
1144}
1145
1146impl Default for InnerSyntax<'static> {
1147    fn default() -> Self {
1148        Self {
1149            block_start: "{%",
1150            block_end: "%}",
1151            expr_start: "{{",
1152            expr_end: "}}",
1153            comment_start: "{#",
1154            comment_end: "#}",
1155        }
1156    }
1157}
1158
1159impl fmt::Debug for Syntax<'_> {
1160    #[inline]
1161    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1162        fmt_syntax("Syntax", self, f)
1163    }
1164}
1165
1166impl fmt::Debug for InnerSyntax<'_> {
1167    #[inline]
1168    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1169        fmt_syntax("InnerSyntax", self, f)
1170    }
1171}
1172
1173fn fmt_syntax(name: &str, inner: &InnerSyntax<'_>, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1174    f.debug_struct(name)
1175        .field("block_start", &inner.block_start)
1176        .field("block_end", &inner.block_end)
1177        .field("expr_start", &inner.expr_start)
1178        .field("expr_end", &inner.expr_end)
1179        .field("comment_start", &inner.comment_start)
1180        .field("comment_end", &inner.comment_end)
1181        .finish()
1182}
1183
1184#[derive(Debug, Default, Clone, Copy, Hash, PartialEq)]
1185#[cfg_attr(feature = "config", derive(serde_derive::Deserialize))]
1186pub struct SyntaxBuilder<'a> {
1187    pub name: &'a str,
1188    pub block_start: Option<&'a str>,
1189    pub block_end: Option<&'a str>,
1190    pub expr_start: Option<&'a str>,
1191    pub expr_end: Option<&'a str>,
1192    pub comment_start: Option<&'a str>,
1193    pub comment_end: Option<&'a str>,
1194}
1195
1196impl<'a> SyntaxBuilder<'a> {
1197    pub fn to_syntax(&self) -> Result<Syntax<'a>, String> {
1198        let default = InnerSyntax::default();
1199        let syntax = Syntax(InnerSyntax {
1200            block_start: self.block_start.unwrap_or(default.block_start),
1201            block_end: self.block_end.unwrap_or(default.block_end),
1202            expr_start: self.expr_start.unwrap_or(default.expr_start),
1203            expr_end: self.expr_end.unwrap_or(default.expr_end),
1204            comment_start: self.comment_start.unwrap_or(default.comment_start),
1205            comment_end: self.comment_end.unwrap_or(default.comment_end),
1206        });
1207
1208        for (s, k, is_closing) in [
1209            (syntax.block_start, "opening block", false),
1210            (syntax.block_end, "closing block", true),
1211            (syntax.expr_start, "opening expression", false),
1212            (syntax.expr_end, "closing expression", true),
1213            (syntax.comment_start, "opening comment", false),
1214            (syntax.comment_end, "closing comment", true),
1215        ] {
1216            if s.len() < 2 {
1217                return Err(format!(
1218                    "delimiters must be at least two characters long. \
1219                        The {k} delimiter ({s:?}) is too short",
1220                ));
1221            } else if s.len() > 32 {
1222                return Err(format!(
1223                    "delimiters must be at most 32 characters long. \
1224                        The {k} delimiter ({:?}...) is too long",
1225                    &s[..(16..=s.len())
1226                        .find(|&i| s.is_char_boundary(i))
1227                        .unwrap_or(s.len())],
1228                ));
1229            } else if s.chars().any(char::is_whitespace) {
1230                return Err(format!(
1231                    "delimiters may not contain white spaces. \
1232                        The {k} delimiter ({s:?}) contains white spaces",
1233                ));
1234            } else if is_closing
1235                && ['(', '-', '+', '~', '.', '>', '<', '&', '|', '!']
1236                    .contains(&s.chars().next().unwrap())
1237            {
1238                return Err(format!(
1239                    "closing delimiters may not start with operators. \
1240                        The {k} delimiter ({s:?}) starts with operator `{}`",
1241                    s.chars().next().unwrap(),
1242                ));
1243            }
1244        }
1245
1246        for ((s1, k1), (s2, k2)) in [
1247            (
1248                (syntax.block_start, "block"),
1249                (syntax.expr_start, "expression"),
1250            ),
1251            (
1252                (syntax.block_start, "block"),
1253                (syntax.comment_start, "comment"),
1254            ),
1255            (
1256                (syntax.expr_start, "expression"),
1257                (syntax.comment_start, "comment"),
1258            ),
1259        ] {
1260            if s1.starts_with(s2) || s2.starts_with(s1) {
1261                let (s1, k1, s2, k2) = match s1.len() < s2.len() {
1262                    true => (s1, k1, s2, k2),
1263                    false => (s2, k2, s1, k1),
1264                };
1265                return Err(format!(
1266                    "an opening delimiter may not be the prefix of another delimiter. \
1267                        The {k1} delimiter ({s1:?}) clashes with the {k2} delimiter ({s2:?})",
1268                ));
1269            }
1270        }
1271
1272        Ok(syntax)
1273    }
1274}
1275
1276/// The nesting level of nodes and expressions.
1277///
1278/// The level counts down from [`Level::MAX_DEPTH`] to 0. Once the value would reach below 0,
1279/// [`Level::nest()`] / [`LevelGuard::nest()`] will return an error. The same [`Level`] instance is
1280/// shared across all usages in a [`Parsed::new()`] / [`Ast::from_str()`] call, using a reference
1281/// to an interior mutable counter.
1282#[derive(Debug, Clone)]
1283struct Level(Cell<usize>);
1284
1285impl Default for Level {
1286    #[inline]
1287    fn default() -> Self {
1288        Self(Cell::new(Level::MAX_DEPTH))
1289    }
1290}
1291
1292impl Level {
1293    const MAX_DEPTH: usize = 128;
1294
1295    /// Acquire a [`LevelGuard`] without decrementing the counter, to be used with loops.
1296    fn guard(&self) -> LevelGuard<'_> {
1297        LevelGuard {
1298            level: self,
1299            count: 0,
1300        }
1301    }
1302
1303    /// Decrement the remaining level counter, and return a [`LevelGuard`] that increments it again
1304    /// when it's dropped.
1305    fn nest<'a: 'l, 'l>(&self, i: &InputStream<'a, 'l>) -> ParseResult<'a, LevelGuard<'_>> {
1306        self.nest_multiple(i, 1)
1307    }
1308
1309    /// Decrement the remaining level counter by `count`, and return a [`LevelGuard`] that
1310    /// increments it again when it's dropped.
1311    fn nest_multiple<'a: 'l, 'l>(
1312        &self,
1313        i: &InputStream<'a, 'l>,
1314        count: usize,
1315    ) -> ParseResult<'a, LevelGuard<'_>> {
1316        if let Some(new_level) = self.0.get().checked_sub(count) {
1317            self.0.set(new_level);
1318            Ok(LevelGuard { level: self, count })
1319        } else {
1320            Self::_fail(i)
1321        }
1322    }
1323
1324    #[cold]
1325    #[inline(never)]
1326    fn _fail<'a: 'l, 'l, T>(i: &InputStream<'a, 'l>) -> ParseResult<'a, T> {
1327        let msg = "your template code is too deeply nested, or the last expression is too complex";
1328        Err(ErrorContext::new(msg, i).cut())
1329    }
1330}
1331
1332/// Used to keep track how often [`LevelGuard::nest()`] was called and to re-increment the
1333/// remaining level counter when it is dropped / falls out of scope.
1334#[must_use]
1335#[derive(Debug)]
1336struct LevelGuard<'l> {
1337    level: &'l Level,
1338    count: usize,
1339}
1340
1341impl Drop for LevelGuard<'_> {
1342    fn drop(&mut self) {
1343        self.level.0.set(self.level.0.get() + self.count);
1344    }
1345}
1346
1347impl LevelGuard<'_> {
1348    /// Used to decrement the level multiple times, e.g. for every iteration of a loop.
1349    fn nest<'a: 'l, 'l>(&mut self, i: &InputStream<'a, 'l>) -> ParseResult<'a, ()> {
1350        if let Some(new_level) = self.level.0.get().checked_sub(1) {
1351            self.level.0.set(new_level);
1352            self.count += 1;
1353            Ok(())
1354        } else {
1355            Level::_fail(i)
1356        }
1357    }
1358}
1359
1360fn filter<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, Filter<'a>> {
1361    preceded(('|', not('|')), cut_err(Filter::parse)).parse_next(i)
1362}
1363
1364/// Returns the common parts of two paths.
1365///
1366/// The goal of this function is to reduce the path length based on the `base` argument
1367/// (generally the path where the program is running into). For example:
1368///
1369/// ```text
1370/// current dir: /a/b/c
1371/// path:        /a/b/c/d/e.txt
1372/// ```
1373///
1374/// `strip_common` will return `d/e.txt`.
1375#[must_use]
1376pub fn strip_common(base: &Path, path: &Path) -> String {
1377    let path = match path.canonicalize() {
1378        Ok(path) => path,
1379        Err(_) => return path.display().to_string(),
1380    };
1381    let mut components_iter = path.components().peekable();
1382
1383    for current_path_component in base.components() {
1384        let Some(path_component) = components_iter.peek() else {
1385            return path.display().to_string();
1386        };
1387        if current_path_component != *path_component {
1388            break;
1389        }
1390        components_iter.next();
1391    }
1392    let path_parts = components_iter
1393        .map(|c| c.as_os_str().to_string_lossy())
1394        .collect::<Vec<_>>();
1395    if path_parts.is_empty() {
1396        path.display().to_string()
1397    } else {
1398        path_parts.join(std::path::MAIN_SEPARATOR_STR)
1399    }
1400}
1401
1402#[inline]
1403pub(crate) fn can_be_variable_name(name: &str) -> bool {
1404    !matches!(name, "self" | "Self" | "super" | "crate")
1405}
1406
1407#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1408pub enum IntKind {
1409    I8,
1410    I16,
1411    I32,
1412    I64,
1413    I128,
1414    Isize,
1415    U8,
1416    U16,
1417    U32,
1418    U64,
1419    U128,
1420    Usize,
1421}
1422
1423#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1424pub enum FloatKind {
1425    F16,
1426    F32,
1427    F64,
1428    F128,
1429}
1430
1431#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1432enum NumKind {
1433    Int(IntKind),
1434    Float(FloatKind),
1435}
1436
1437/// Primitive integer types. Also used as number suffixes.
1438const INTEGER_TYPES: &[(&str, IntKind)] = &[
1439    ("i8", IntKind::I8),
1440    ("i16", IntKind::I16),
1441    ("i32", IntKind::I32),
1442    ("i64", IntKind::I64),
1443    ("i128", IntKind::I128),
1444    ("isize", IntKind::Isize),
1445    ("u8", IntKind::U8),
1446    ("u16", IntKind::U16),
1447    ("u32", IntKind::U32),
1448    ("u64", IntKind::U64),
1449    ("u128", IntKind::U128),
1450    ("usize", IntKind::Usize),
1451];
1452
1453/// Primitive floating point types. Also used as number suffixes.
1454const FLOAT_TYPES: &[(&str, FloatKind)] = &[
1455    ("f16", FloatKind::F16),
1456    ("f32", FloatKind::F32),
1457    ("f64", FloatKind::F64),
1458    ("f128", FloatKind::F128),
1459];
1460
1461/// Primitive numeric types. Also used as number suffixes.
1462const NUM_TYPES: &[(&str, NumKind)] = &{
1463    let mut list = [("", NumKind::Int(IntKind::I8)); INTEGER_TYPES.len() + FLOAT_TYPES.len()];
1464    let mut i = 0;
1465    let mut o = 0;
1466    while i < INTEGER_TYPES.len() {
1467        let (name, value) = INTEGER_TYPES[i];
1468        list[o] = (name, NumKind::Int(value));
1469        i += 1;
1470        o += 1;
1471    }
1472    let mut i = 0;
1473    while i < FLOAT_TYPES.len() {
1474        let (name, value) = FLOAT_TYPES[i];
1475        list[o] = (name, NumKind::Float(value));
1476        i += 1;
1477        o += 1;
1478    }
1479    list
1480};
1481
1482/// Complete list of named primitive types.
1483const PRIMITIVE_TYPES: &[&str] = &{
1484    let mut list = [""; NUM_TYPES.len() + 1];
1485    let mut i = 0;
1486    let mut o = 0;
1487    while i < NUM_TYPES.len() {
1488        list[o] = NUM_TYPES[i].0;
1489        i += 1;
1490        o += 1;
1491    }
1492    list[o] = "bool";
1493    list
1494};
1495
1496const MAX_RUST_KEYWORD_LEN: usize = 8;
1497
1498const RUST_KEYWORDS: &[&[[AsciiChar; MAX_RUST_KEYWORD_LEN]]; MAX_RUST_KEYWORD_LEN + 1] = &{
1499    const NO_KWS: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[];
1500    const KW2: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1501        AsciiStr::new_sized("as"),
1502        AsciiStr::new_sized("do"),
1503        AsciiStr::new_sized("fn"),
1504        AsciiStr::new_sized("if"),
1505        AsciiStr::new_sized("in"),
1506    ];
1507    const KW3: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1508        AsciiStr::new_sized("box"),
1509        AsciiStr::new_sized("dyn"),
1510        AsciiStr::new_sized("for"),
1511        AsciiStr::new_sized("gen"),
1512        AsciiStr::new_sized("let"),
1513        AsciiStr::new_sized("mod"),
1514        AsciiStr::new_sized("mut"),
1515        AsciiStr::new_sized("pub"),
1516        AsciiStr::new_sized("ref"),
1517        AsciiStr::new_sized("try"),
1518        AsciiStr::new_sized("use"),
1519    ];
1520    const KW4: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1521        AsciiStr::new_sized("else"),
1522        AsciiStr::new_sized("enum"),
1523        AsciiStr::new_sized("impl"),
1524        AsciiStr::new_sized("loop"),
1525        AsciiStr::new_sized("move"),
1526        AsciiStr::new_sized("priv"),
1527        AsciiStr::new_sized("self"),
1528        AsciiStr::new_sized("Self"),
1529        AsciiStr::new_sized("true"),
1530        AsciiStr::new_sized("type"),
1531    ];
1532    const KW5: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1533        AsciiStr::new_sized("async"),
1534        AsciiStr::new_sized("await"),
1535        AsciiStr::new_sized("break"),
1536        AsciiStr::new_sized("const"),
1537        AsciiStr::new_sized("crate"),
1538        AsciiStr::new_sized("false"),
1539        AsciiStr::new_sized("final"),
1540        AsciiStr::new_sized("macro"),
1541        AsciiStr::new_sized("match"),
1542        AsciiStr::new_sized("super"),
1543        AsciiStr::new_sized("trait"),
1544        AsciiStr::new_sized("union"),
1545        AsciiStr::new_sized("where"),
1546        AsciiStr::new_sized("while"),
1547        AsciiStr::new_sized("yield"),
1548    ];
1549    const KW6: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1550        AsciiStr::new_sized("become"),
1551        AsciiStr::new_sized("extern"),
1552        AsciiStr::new_sized("return"),
1553        AsciiStr::new_sized("static"),
1554        AsciiStr::new_sized("struct"),
1555        AsciiStr::new_sized("typeof"),
1556        AsciiStr::new_sized("unsafe"),
1557        AsciiStr::new_sized("caller"),
1558    ];
1559    const KW7: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1560        AsciiStr::new_sized("unsized"),
1561        AsciiStr::new_sized("virtual"),
1562    ];
1563    const KW8: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1564        AsciiStr::new_sized("abstract"),
1565        AsciiStr::new_sized("continue"),
1566        AsciiStr::new_sized("override"),
1567    ];
1568
1569    [NO_KWS, NO_KWS, KW2, KW3, KW4, KW5, KW6, KW7, KW8]
1570};
1571
1572pub fn is_rust_keyword(ident: &str) -> bool {
1573    let ident_len = ident.len();
1574    if ident_len > MAX_RUST_KEYWORD_LEN {
1575        return false;
1576    }
1577    let kws = RUST_KEYWORDS[ident.len()];
1578
1579    let mut padded_ident = [0; MAX_RUST_KEYWORD_LEN];
1580    padded_ident[..ident_len].copy_from_slice(ident.as_bytes());
1581
1582    // Since the individual buckets are quite short, a linear search is faster than a binary search.
1583    for probe in kws {
1584        if padded_ident == *AsciiChar::slice_as_bytes(probe) {
1585            return true;
1586        }
1587    }
1588    false
1589}
1590
1591macro_rules! cut_error {
1592    ($message:expr, $span:expr $(,)?) => {{
1593        use ::std::convert::Into;
1594        use ::std::option::Option::Some;
1595        use $crate::ErrorContext;
1596
1597        $crate::cut_context_err(
1598            #[cold]
1599            #[inline(always)]
1600            move || ErrorContext {
1601                span: Into::into($span),
1602                message: Some(Into::into($message)),
1603            },
1604        )
1605    }};
1606}
1607
1608pub(crate) use cut_error;
1609
1610#[cold]
1611#[inline(never)]
1612fn cut_context_err<'a, T>(gen_err: impl FnOnce() -> ErrorContext) -> ParseResult<'a, T> {
1613    Err(ErrMode::Cut(gen_err()))
1614}
1615
1616type HashSet<T> = std::collections::hash_set::HashSet<T, FxBuildHasher>;
1617
1618#[cfg(test)]
1619mod test {
1620    use std::path::Path;
1621
1622    use super::*;
1623
1624    #[cfg(not(windows))]
1625    #[test]
1626    fn test_strip_common() {
1627        // Full path is returned instead of empty when the entire path is in common.
1628        assert_eq!(strip_common(Path::new("home"), Path::new("home")), "home");
1629
1630        let cwd = std::env::current_dir().expect("current_dir failed");
1631
1632        // We need actual existing paths for `canonicalize` to work, so let's do that.
1633        let entry = cwd
1634            .read_dir()
1635            .expect("read_dir failed")
1636            .filter_map(std::result::Result::ok)
1637            .find(|f| f.path().is_file())
1638            .expect("no entry");
1639
1640        // Since they have the complete path in common except for the folder entry name, it should
1641        // return only the folder entry name.
1642        assert_eq!(
1643            strip_common(&cwd, &entry.path()),
1644            entry.file_name().to_string_lossy()
1645        );
1646
1647        // In this case it cannot canonicalize `/a/b/c` so it returns the path as is.
1648        assert_eq!(strip_common(&cwd, Path::new("/a/b/c")), "/a/b/c");
1649    }
1650
1651    #[track_caller]
1652    fn parse_peek<'a: 'l, 'l, T>(
1653        state: &'l State<'l>,
1654        parser: impl FnOnce(&mut InputStream<'a, 'l>) -> ParseResult<'a, T>,
1655        input: &'a str,
1656    ) -> ParseResult<'a, (&'a str, T)> {
1657        let mut i = InputStream {
1658            input: LocatingSlice::new(input),
1659            state,
1660        };
1661        let value = parser(&mut i)?;
1662        Ok((**i, value))
1663    }
1664
1665    #[test]
1666    fn test_num_lit() {
1667        let s = State::default();
1668
1669        // Should fail.
1670        assert!(parse_peek(&s, num_lit, ".").is_err());
1671        // Should succeed.
1672        assert_eq!(
1673            parse_peek(&s, num_lit, "1.2E-02").unwrap(),
1674            ("", Num::Float("1.2E-02", None))
1675        );
1676        assert_eq!(
1677            parse_peek(&s, num_lit, "4e3").unwrap(),
1678            ("", Num::Float("4e3", None)),
1679        );
1680        assert_eq!(
1681            parse_peek(&s, num_lit, "4e+_3").unwrap(),
1682            ("", Num::Float("4e+_3", None)),
1683        );
1684        // Not supported because Rust wants a number before the `.`.
1685        assert!(parse_peek(&s, num_lit, ".1").is_err());
1686        assert!(parse_peek(&s, num_lit, ".1E-02").is_err());
1687        // A `_` directly after the `.` denotes a field.
1688        assert_eq!(
1689            parse_peek(&s, num_lit, "1._0").unwrap(),
1690            ("._0", Num::Int("1", None))
1691        );
1692        assert_eq!(
1693            parse_peek(&s, num_lit, "1_.0").unwrap(),
1694            ("", Num::Float("1_.0", None))
1695        );
1696        // Not supported (voluntarily because of `1..` syntax).
1697        assert_eq!(
1698            parse_peek(&s, num_lit, "1.").unwrap(),
1699            (".", Num::Int("1", None))
1700        );
1701        assert_eq!(
1702            parse_peek(&s, num_lit, "1_.").unwrap(),
1703            (".", Num::Int("1_", None))
1704        );
1705        assert_eq!(
1706            parse_peek(&s, num_lit, "1_2.").unwrap(),
1707            (".", Num::Int("1_2", None))
1708        );
1709        // Numbers with suffixes
1710        assert_eq!(
1711            parse_peek(&s, num_lit, "-1usize").unwrap(),
1712            ("", Num::Int("-1", Some(IntKind::Usize)))
1713        );
1714        assert_eq!(
1715            parse_peek(&s, num_lit, "123_f32").unwrap(),
1716            ("", Num::Float("123_", Some(FloatKind::F32)))
1717        );
1718        assert_eq!(
1719            parse_peek(&s, num_lit, "1_.2_e+_3_f64|into_isize").unwrap(),
1720            (
1721                "|into_isize",
1722                Num::Float("1_.2_e+_3_", Some(FloatKind::F64))
1723            )
1724        );
1725        assert_eq!(
1726            parse_peek(&s, num_lit, "4e3f128").unwrap(),
1727            ("", Num::Float("4e3", Some(FloatKind::F128))),
1728        );
1729    }
1730
1731    #[test]
1732    fn test_char_lit() {
1733        let lit = |s: &'static str| crate::CharLit {
1734            prefix: None,
1735            content: s,
1736        };
1737        let s = State::default();
1738
1739        assert_eq!(parse_peek(&s, char_lit, "'a'").unwrap(), ("", lit("a")));
1740        assert_eq!(parse_peek(&s, char_lit, "'字'").unwrap(), ("", lit("字")));
1741
1742        // Escaped single characters.
1743        assert_eq!(
1744            parse_peek(&s, char_lit, "'\\\"'").unwrap(),
1745            ("", lit("\\\""))
1746        );
1747        assert_eq!(parse_peek(&s, char_lit, "'\\''").unwrap(), ("", lit("\\'")));
1748        assert_eq!(parse_peek(&s, char_lit, "'\\t'").unwrap(), ("", lit("\\t")));
1749        assert_eq!(parse_peek(&s, char_lit, "'\\n'").unwrap(), ("", lit("\\n")));
1750        assert_eq!(parse_peek(&s, char_lit, "'\\r'").unwrap(), ("", lit("\\r")));
1751        assert_eq!(parse_peek(&s, char_lit, "'\\0'").unwrap(), ("", lit("\\0")));
1752        // Escaped ascii characters (up to `0x7F`).
1753        assert_eq!(
1754            parse_peek(&s, char_lit, "'\\x12'").unwrap(),
1755            ("", lit("\\x12"))
1756        );
1757        assert_eq!(
1758            parse_peek(&s, char_lit, "'\\x02'").unwrap(),
1759            ("", lit("\\x02"))
1760        );
1761        assert_eq!(
1762            parse_peek(&s, char_lit, "'\\x6a'").unwrap(),
1763            ("", lit("\\x6a"))
1764        );
1765        assert_eq!(
1766            parse_peek(&s, char_lit, "'\\x7F'").unwrap(),
1767            ("", lit("\\x7F"))
1768        );
1769        // Escaped unicode characters (up to `0x10FFFF`).
1770        assert_eq!(
1771            parse_peek(&s, char_lit, "'\\u{A}'").unwrap(),
1772            ("", lit("\\u{A}"))
1773        );
1774        assert_eq!(
1775            parse_peek(&s, char_lit, "'\\u{10}'").unwrap(),
1776            ("", lit("\\u{10}"))
1777        );
1778        assert_eq!(
1779            parse_peek(&s, char_lit, "'\\u{aa}'").unwrap(),
1780            ("", lit("\\u{aa}"))
1781        );
1782        assert_eq!(
1783            parse_peek(&s, char_lit, "'\\u{10FFFF}'").unwrap(),
1784            ("", lit("\\u{10FFFF}"))
1785        );
1786
1787        // Check with `b` prefix.
1788        assert_eq!(
1789            parse_peek(&s, char_lit, "b'a'").unwrap(),
1790            (
1791                "",
1792                crate::CharLit {
1793                    prefix: Some(crate::CharPrefix::Binary),
1794                    content: "a"
1795                }
1796            )
1797        );
1798
1799        // Should fail.
1800        assert!(parse_peek(&s, char_lit, "''").is_err());
1801        assert!(parse_peek(&s, char_lit, "'\\o'").is_err());
1802        assert!(parse_peek(&s, char_lit, "'\\x'").is_err());
1803        assert!(parse_peek(&s, char_lit, "'\\x1'").is_err());
1804        assert!(parse_peek(&s, char_lit, "'\\x80'").is_err());
1805        assert!(parse_peek(&s, char_lit, "'\\u'").is_err());
1806        assert!(parse_peek(&s, char_lit, "'\\u{}'").is_err());
1807        assert!(parse_peek(&s, char_lit, "'\\u{110000}'").is_err());
1808    }
1809
1810    #[test]
1811    fn test_str_lit() {
1812        let s = State::default();
1813        assert_eq!(
1814            parse_peek(&s, str_lit, r#"b"hello""#).unwrap(),
1815            (
1816                "",
1817                StrLit {
1818                    prefix: Some(StrPrefix::Binary),
1819                    content: "hello",
1820                    contains_null: false,
1821                    contains_unicode_character: false,
1822                    contains_unicode_escape: false,
1823                    contains_high_ascii: false,
1824                }
1825            )
1826        );
1827        assert_eq!(
1828            parse_peek(&s, str_lit, r#"c"hello""#).unwrap(),
1829            (
1830                "",
1831                StrLit {
1832                    prefix: Some(StrPrefix::CLike),
1833                    content: "hello",
1834                    contains_null: false,
1835                    contains_unicode_character: false,
1836                    contains_unicode_escape: false,
1837                    contains_high_ascii: false,
1838                }
1839            )
1840        );
1841        assert!(parse_peek(&s, str_lit, r#"d"hello""#).is_err());
1842    }
1843
1844    #[test]
1845    fn test_is_rust_keyword() {
1846        assert!(is_rust_keyword("caller"));
1847        assert!(is_rust_keyword("super"));
1848        assert!(is_rust_keyword("become"));
1849        assert!(!is_rust_keyword("supeeeer"));
1850        assert!(!is_rust_keyword("sur"));
1851    }
1852
1853    #[test]
1854    fn test_check_base_digits() {
1855        assert!(check_base_digits("10", 2, 0..1).is_ok());
1856        assert!(check_base_digits("13", 2, 0..1).is_err());
1857        assert!(check_base_digits("13", 8, 0..1).is_ok());
1858        assert!(check_base_digits("79", 8, 0..1).is_err());
1859        // Checking that it's case insensitive.
1860        assert!(check_base_digits("13F", 16, 0..1).is_ok());
1861        assert!(check_base_digits("13f", 16, 0..1).is_ok());
1862        // Checking that `_` is allowed.
1863        assert!(check_base_digits("13_f", 16, 0..1).is_ok());
1864    }
1865}