Skip to main content

askama_parser/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![deny(elided_lifetimes_in_paths)]
3#![deny(unreachable_pub)]
4#![allow(clippy::vec_box)] // intentional, less copying
5#![doc = include_str!("../README.md")]
6
7mod ascii_str;
8pub mod expr;
9pub mod node;
10mod target;
11#[cfg(test)]
12mod tests;
13
14use std::borrow::{Borrow, BorrowMut, Cow};
15use std::cell::Cell;
16use std::env::current_dir;
17use std::ops::{Deref, DerefMut, Range};
18use std::path::Path;
19use std::sync::Arc;
20use std::{fmt, str};
21
22use rustc_hash::FxBuildHasher;
23use winnow::ascii::take_escaped;
24use winnow::combinator::{
25    alt, cond, cut_err, delimited, empty, fail, not, opt, peek, preceded, repeat, terminated,
26};
27use winnow::error::ErrMode;
28use winnow::stream::{AsChar, Location, Stream};
29use winnow::token::{any, none_of, one_of, take, take_while};
30use winnow::{LocatingSlice, ModalParser, ModalResult, Parser, Stateful};
31
32use crate::ascii_str::{AsciiChar, AsciiStr};
33pub use crate::expr::{AssociatedItem, Expr, Filter, PathComponent, TyGenerics};
34pub use crate::node::Node;
35pub use crate::target::{NamedTarget, Target};
36
37mod _parsed {
38    use std::path::Path;
39    use std::sync::Arc;
40    use std::{fmt, mem};
41
42    use super::node::Node;
43    use super::{Ast, ParseError, Syntax};
44
45    pub struct Parsed {
46        // `source` must outlive `ast`, so `ast` must be declared before `source`
47        ast: Ast<'static>,
48        #[allow(dead_code)]
49        source: Arc<str>,
50    }
51
52    impl Parsed {
53        /// If `file_path` is `None`, it means the `source` is an inline template. Therefore, if
54        /// a parsing error occurs, we won't display the path as it wouldn't be useful.
55        pub fn new(
56            source: Arc<str>,
57            file_path: Option<Arc<Path>>,
58            syntax: &Syntax<'_>,
59        ) -> Result<Self, ParseError> {
60            // Self-referential borrowing: `self` will keep the source alive as `String`,
61            // internally we will transmute it to `&'static str` to satisfy the compiler.
62            // However, we only expose the nodes with a lifetime limited to `self`.
63            let src = unsafe { mem::transmute::<&str, &'static str>(source.as_ref()) };
64            let ast = Ast::from_str(src, file_path, syntax)?;
65            Ok(Self { ast, source })
66        }
67
68        // The return value's lifetime must be limited to `self` to uphold the unsafe invariant.
69        #[must_use]
70        pub fn nodes(&self) -> &[Box<Node<'_>>] {
71            &self.ast.nodes
72        }
73
74        #[must_use]
75        pub fn source(&self) -> &str {
76            &self.source
77        }
78    }
79
80    impl fmt::Debug for Parsed {
81        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
82            f.debug_struct("Parsed")
83                .field("nodes", &self.ast.nodes)
84                .finish_non_exhaustive()
85        }
86    }
87
88    impl PartialEq for Parsed {
89        fn eq(&self, other: &Self) -> bool {
90            self.ast.nodes == other.ast.nodes
91        }
92    }
93
94    impl Default for Parsed {
95        fn default() -> Self {
96            Self {
97                ast: Ast::default(),
98                source: "".into(),
99            }
100        }
101    }
102}
103
104pub use _parsed::Parsed;
105
106type InputStream<'a, 'l> = Stateful<LocatingSlice<&'a str>, &'l State<'l>>;
107
108#[derive(Debug, Default)]
109pub struct Ast<'a> {
110    nodes: Vec<Box<Node<'a>>>,
111}
112
113impl<'a> Ast<'a> {
114    /// If `file_path` is `None`, it means the `source` is an inline template. Therefore, if
115    /// a parsing error occurs, we won't display the path as it wouldn't be useful.
116    pub fn from_str(
117        src: &'a str,
118        file_path: Option<Arc<Path>>,
119        syntax: &Syntax<'_>,
120    ) -> Result<Ast<'a>, ParseError> {
121        let state = State {
122            syntax: *syntax,
123            ..State::default()
124        };
125        let mut src = InputStream {
126            input: LocatingSlice::new(src),
127            state: &state,
128        };
129        match Node::parse_template(&mut src) {
130            Ok(nodes) if src.is_empty() => Ok(Self { nodes }),
131            Ok(_) | Err(ErrMode::Incomplete(_)) => unreachable!(),
132            Err(
133                ErrMode::Backtrack(ErrorContext { span, message, .. })
134                | ErrMode::Cut(ErrorContext { span, message, .. }),
135            ) => Err(ParseError {
136                message,
137                offset: span.start,
138                file_path,
139            }),
140        }
141    }
142
143    #[must_use]
144    pub fn nodes(&self) -> &[Box<Node<'a>>] {
145        &self.nodes
146    }
147}
148
149#[derive(Clone, Copy)]
150/// Struct used to wrap types with their associated "span" which is used when generating errors
151/// in the code generation.
152#[repr(C)] // rationale: `WithSpan<Box<T>` needs to have the same layout as `WithSpan<&T>`.
153pub struct WithSpan<T> {
154    inner: T,
155    span: Span,
156}
157
158/// A location in `&'a str`
159#[derive(Debug, Clone, Copy)]
160pub struct Span {
161    start: usize,
162    end: usize,
163}
164
165impl Default for Span {
166    #[inline]
167    fn default() -> Self {
168        Self::no_span()
169    }
170}
171
172impl From<&InputStream<'_, '_>> for Span {
173    #[inline]
174    fn from(i: &InputStream<'_, '_>) -> Self {
175        (*i).into()
176    }
177}
178
179impl From<InputStream<'_, '_>> for Span {
180    #[inline]
181    fn from(mut i: InputStream<'_, '_>) -> Self {
182        let start = i.current_token_start();
183        i.finish();
184        Self {
185            start,
186            end: i.current_token_start(),
187        }
188    }
189}
190
191impl From<Range<usize>> for Span {
192    #[inline]
193    #[track_caller]
194    fn from(range: Range<usize>) -> Self {
195        Span::new(range)
196    }
197}
198
199impl Span {
200    #[inline]
201    pub const fn no_span() -> Span {
202        Self {
203            start: usize::MAX,
204            end: usize::MAX,
205        }
206    }
207
208    #[inline]
209    #[track_caller]
210    pub fn new(range: Range<usize>) -> Self {
211        let Range { start, end } = range;
212        debug_assert!(start <= end);
213        Span { start, end }
214    }
215
216    #[inline]
217    pub fn byte_range(self) -> Option<Range<usize>> {
218        (self.start != usize::MAX).then_some(self.start..self.end)
219    }
220
221    /// Returns an empty [`Span`] that points to the start of `self`.
222    #[inline]
223    pub fn start(self) -> Self {
224        Self {
225            start: self.start,
226            end: self.start,
227        }
228    }
229
230    /// Returns an empty [`Span`] that points to the end of `self`.
231    #[inline]
232    pub fn end(self) -> Self {
233        Self {
234            start: self.end,
235            end: self.end,
236        }
237    }
238
239    /// Splits `self` at `mid` into two spanned strings.
240    #[track_caller]
241    pub fn split_at(self, mid: usize) -> (Self, Self) {
242        let Some(Range { start, end }) = self.byte_range() else {
243            return (self, self);
244        };
245
246        let mid = start.checked_add(mid).unwrap();
247        assert!(mid <= end);
248
249        let start = Self { start, end: mid };
250        let end = Self { start: mid, end };
251        (start, end)
252    }
253
254    /// The substring in `source` contained in [`self.byte_range()`][Self::byte_range].
255    #[inline]
256    pub fn as_infix_of<'a>(&self, source: &'a str) -> Option<&'a str> {
257        self.byte_range().and_then(|range| source.get(range))
258    }
259
260    /// The substring in `source` starting from `self.start`.
261    #[inline]
262    pub fn as_suffix_of<'a>(&self, source: &'a str) -> Option<&'a str> {
263        // No need to check if `self.start != usize::MAX`:
264        // `source` cannot be longer than `isize::MAX`, cf. [`std::alloc`].
265        source.get(self.start..)
266    }
267
268    pub fn is_overlapping(&self, other: Span) -> bool {
269        (self.start < other.end) & (other.start < self.end)
270    }
271}
272
273impl<T> WithSpan<T> {
274    #[inline]
275    #[track_caller]
276    pub fn new(inner: T, span: impl Into<Span>) -> Self {
277        Self {
278            inner,
279            span: span.into(),
280        }
281    }
282
283    #[inline]
284    pub const fn no_span(inner: T) -> Self {
285        Self {
286            inner,
287            span: Span::no_span(),
288        }
289    }
290
291    #[inline]
292    pub fn span(&self) -> Span {
293        self.span
294    }
295
296    #[inline]
297    pub fn deconstruct(self) -> (T, Span) {
298        let Self { inner, span } = self;
299        (inner, span)
300    }
301}
302
303impl WithSpan<&str> {
304    /// Returns an empty [`Span`] that points to the start of the contained string.
305    #[inline]
306    pub fn start(self) -> Self {
307        let (inner, span) = self.deconstruct();
308        Self::new(&inner[..0], span.start())
309    }
310
311    /// Returns an empty [`Span`] that points to the end of the contained string.
312    #[inline]
313    pub fn end(self) -> Self {
314        let (inner, span) = self.deconstruct();
315        Self::new(&inner[inner.len()..], span.end())
316    }
317
318    /// Splits `self` at `mid` into two spanned strings.
319    #[track_caller]
320    pub fn split_at(self, mid: usize) -> (Self, Self) {
321        let (inner, span) = self.deconstruct();
322        let (front, back) = inner.split_at(mid);
323        let (front_span, back_span) = span.split_at(mid);
324        (Self::new(front, front_span), Self::new(back, back_span))
325    }
326}
327
328impl<T> Deref for WithSpan<T> {
329    type Target = T;
330
331    #[inline]
332    fn deref(&self) -> &Self::Target {
333        &self.inner
334    }
335}
336
337impl<T> DerefMut for WithSpan<T> {
338    #[inline]
339    fn deref_mut(&mut self) -> &mut Self::Target {
340        &mut self.inner
341    }
342}
343
344impl<T: fmt::Debug> fmt::Debug for WithSpan<T> {
345    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
346        self.inner.fmt(f)
347    }
348}
349
350impl<T: PartialEq, R: AsRef<T>> PartialEq<R> for WithSpan<T> {
351    #[inline]
352    fn eq(&self, other: &R) -> bool {
353        // We never want to compare the span information.
354        self.inner == *other.as_ref()
355    }
356}
357
358impl<T: PartialOrd, R: AsRef<T>> PartialOrd<R> for WithSpan<T> {
359    #[inline]
360    fn partial_cmp(&self, other: &R) -> Option<std::cmp::Ordering> {
361        self.inner.partial_cmp(other.as_ref())
362    }
363}
364
365impl<T: Eq> Eq for WithSpan<T> {}
366
367impl<T: Ord> Ord for WithSpan<T> {
368    #[inline]
369    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
370        self.inner.cmp(&other.inner)
371    }
372}
373
374impl<T: std::hash::Hash> std::hash::Hash for WithSpan<T> {
375    #[inline]
376    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
377        self.inner.hash(state);
378    }
379}
380
381impl<T> AsRef<T> for WithSpan<T> {
382    #[inline]
383    fn as_ref(&self) -> &T {
384        &self.inner
385    }
386}
387
388impl<T> Borrow<T> for WithSpan<T> {
389    #[inline]
390    fn borrow(&self) -> &T {
391        &self.inner
392    }
393}
394
395impl<T> BorrowMut<T> for WithSpan<T> {
396    #[inline]
397    fn borrow_mut(&mut self) -> &mut T {
398        &mut self.inner
399    }
400}
401
402#[derive(Debug, Clone, PartialEq, Eq)]
403pub struct ParseError {
404    pub message: Option<Cow<'static, str>>,
405    pub offset: usize,
406    pub file_path: Option<Arc<Path>>,
407}
408
409impl std::error::Error for ParseError {}
410
411impl fmt::Display for ParseError {
412    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
413        let ParseError {
414            message,
415            file_path,
416            offset,
417        } = self;
418
419        if let Some(message) = message {
420            writeln!(f, "{message}")?;
421        }
422
423        let path = file_path
424            .as_ref()
425            .and_then(|path| Some(strip_common(&current_dir().ok()?, path)));
426        match path {
427            Some(path) => write!(f, "failed to parse template source\n  --> {path}@{offset}"),
428            None => write!(f, "failed to parse template source near offset {offset}"),
429        }
430    }
431}
432
433pub(crate) type ParseErr<'a> = ErrMode<ErrorContext>;
434pub(crate) type ParseResult<'a, T = &'a str> = Result<T, ParseErr<'a>>;
435
436/// This type is used to handle `nom` errors and in particular to add custom error messages.
437/// It used to generate `ParserError`.
438///
439/// It cannot be used to replace `ParseError` because it expects a generic, which would make
440/// `askama`'s users experience less good (since this generic is only needed for `nom`).
441#[derive(Debug)]
442pub(crate) struct ErrorContext {
443    pub(crate) span: Span,
444    pub(crate) message: Option<Cow<'static, str>>,
445}
446
447impl ErrorContext {
448    #[cold]
449    fn unclosed(kind: &str, tag: &str, span: impl Into<Span>) -> Self {
450        Self {
451            span: span.into(),
452            message: Some(format!("unclosed {kind}, missing {tag:?}").into()),
453        }
454    }
455
456    #[cold]
457    #[inline]
458    fn new(message: impl Into<Cow<'static, str>>, span: impl Into<Span>) -> Self {
459        Self {
460            span: span.into(),
461            message: Some(message.into()),
462        }
463    }
464
465    #[inline]
466    fn backtrack(self) -> ErrMode<Self> {
467        ErrMode::Backtrack(self)
468    }
469
470    #[inline]
471    fn cut(self) -> ErrMode<Self> {
472        ErrMode::Cut(self)
473    }
474}
475
476impl<'a: 'l, 'l> winnow::error::ParserError<InputStream<'a, 'l>> for ErrorContext {
477    type Inner = Self;
478
479    #[inline]
480    fn from_input(input: &InputStream<'a, 'l>) -> Self {
481        Self {
482            span: input.into(),
483            message: None,
484        }
485    }
486
487    #[inline(always)]
488    fn into_inner(self) -> Result<Self::Inner, Self> {
489        Ok(self)
490    }
491}
492
493fn skip_ws0<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
494    take_while(0.., |c: char| c.is_ascii_whitespace())
495        .void()
496        .parse_next(i)
497}
498
499fn skip_ws1<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
500    take_while(1.., |c: char| c.is_ascii_whitespace())
501        .void()
502        .parse_next(i)
503}
504
505fn ws<'a: 'l, 'l, O>(
506    inner: impl ModalParser<InputStream<'a, 'l>, O, ErrorContext>,
507) -> impl ModalParser<InputStream<'a, 'l>, O, ErrorContext> {
508    delimited(skip_ws0, inner, skip_ws0)
509}
510
511fn keyword<'a: 'l, 'l>(k: &str) -> impl ModalParser<InputStream<'a, 'l>, &'a str, ErrorContext> {
512    identifier.verify(move |v: &str| v == k)
513}
514
515fn identifier<'a: 'l, 'l>(input: &mut InputStream<'a, 'l>) -> ParseResult<'a> {
516    let head = any.verify(|&c| c == '_' || unicode_ident::is_xid_start(c));
517    let tail = take_while(.., unicode_ident::is_xid_continue);
518    (head, tail).take().parse_next(input)
519}
520
521fn bool_lit<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a> {
522    alt((keyword("false"), keyword("true"))).parse_next(i)
523}
524
525#[derive(Debug, Clone, Copy, PartialEq)]
526pub enum Num<'a> {
527    Int(&'a str, Option<IntKind>),
528    Float(&'a str, Option<FloatKind>),
529}
530
531fn check_base_digits<'a>(digits: &'a str, base: u32, span: Range<usize>) -> ParseResult<'a, ()> {
532    let allowed_digits: &[char] = match base {
533        2 => &['0', '1'],
534        8 => &['0', '1', '2', '3', '4', '5', '6', '7'],
535        16 => &[
536            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
537        ],
538        _ => panic!("unsupported base `{base}`"),
539    };
540
541    for digit in digits.chars() {
542        let lower = digit.to_ascii_lowercase();
543        if lower != '_' && !allowed_digits.iter().any(|c| *c == digit || *c == lower) {
544            let allowed = allowed_digits.iter().collect::<String>();
545            let base = match base {
546                2 => 'b',
547                8 => 'o',
548                16 => 'x',
549                _ => unreachable!(),
550            };
551            return cut_error!(
552                format!("only expected `{allowed}` digits for `0{base}` integers, found `{digit}`"),
553                span,
554            );
555        }
556    }
557    Ok(())
558}
559
560fn num_lit<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, Num<'a>> {
561    fn num_lit_suffix<'a: 'l, 'l, T: Copy>(
562        kind: &'a str,
563        list: &[(&str, T)],
564        i: &mut InputStream<'a, 'l>,
565    ) -> ParseResult<'a, T> {
566        let (suffix, span) = identifier.with_span().parse_next(i)?;
567        if let Some(value) = list
568            .iter()
569            .copied()
570            .find_map(|(name, value)| (name == suffix).then_some(value))
571        {
572            Ok(value)
573        } else {
574            cut_error!(format!("unknown {kind} suffix `{suffix}`"), span)
575        }
576    }
577
578    // Equivalent to <https://github.com/rust-lang/rust/blob/e3f909b2bbd0b10db6f164d466db237c582d3045/compiler/rustc_lexer/src/lib.rs#L587-L620>.
579    let int_with_base = (opt('-'), |i: &mut _| {
580        let ((base, kind), span) = preceded('0', alt(('b'.value(2), 'o'.value(8), 'x'.value(16))))
581            .with_taken()
582            .with_span()
583            .parse_next(i)?;
584        match opt(separated_digits(if base == 16 { base } else { 10 }, false)).parse_next(i)? {
585            Some(digits) => check_base_digits(digits, base, span),
586            None => cut_error!(format!("expected digits after `{kind}`"), span),
587        }
588    });
589
590    // Equivalent to <https://github.com/rust-lang/rust/blob/e3f909b2bbd0b10db6f164d466db237c582d3045/compiler/rustc_lexer/src/lib.rs#L626-L653>:
591    // no `_` directly after the decimal point `.`, or between `e` and `+/-`.
592    let float = |i: &mut InputStream<'a, 'l>| -> ParseResult<'a, ()> {
593        let has_dot = opt(('.', separated_digits(10, true))).parse_next(i)?;
594        let has_exp = opt(|i: &mut _| {
595            let ((kind, op), span) = (one_of(['e', 'E']), opt(one_of(['+', '-'])))
596                .with_span()
597                .parse_next(i)?;
598            match opt(separated_digits(10, op.is_none())).parse_next(i)? {
599                Some(_) => Ok(()),
600                None => {
601                    cut_error!(
602                        format!("expected decimal digits, `+` or `-` after exponent `{kind}`"),
603                        span,
604                    )
605                }
606            }
607        })
608        .parse_next(i)?;
609        match (has_dot, has_exp) {
610            (Some(_), _) | (_, Some(())) => Ok(()),
611            _ => fail(i),
612        }
613    };
614
615    let num = if let Some(num) = opt(int_with_base.take()).parse_next(i)? {
616        let suffix = opt(|i: &mut _| num_lit_suffix("integer", INTEGER_TYPES, i)).parse_next(i)?;
617        Num::Int(num, suffix)
618    } else {
619        let (float, num) = preceded((opt('-'), separated_digits(10, true)), opt(float))
620            .with_taken()
621            .parse_next(i)?;
622        if float.is_some() {
623            let suffix = opt(|i: &mut _| num_lit_suffix("float", FLOAT_TYPES, i)).parse_next(i)?;
624            Num::Float(num, suffix)
625        } else {
626            let suffix = opt(|i: &mut _| num_lit_suffix("number", NUM_TYPES, i)).parse_next(i)?;
627            match suffix {
628                Some(NumKind::Int(kind)) => Num::Int(num, Some(kind)),
629                Some(NumKind::Float(kind)) => Num::Float(num, Some(kind)),
630                None => Num::Int(num, None),
631            }
632        }
633    };
634    Ok(num)
635}
636
637/// Underscore separated digits of the given base, unless `start` is true this may start
638/// with an underscore.
639fn separated_digits<'a: 'l, 'l>(
640    radix: u32,
641    start: bool,
642) -> impl ModalParser<InputStream<'a, 'l>, &'a str, ErrorContext> {
643    (
644        cond(!start, repeat(0.., '_').map(|()| ())),
645        one_of(move |ch: char| ch.is_digit(radix)),
646        repeat(0.., one_of(move |ch: char| ch == '_' || ch.is_digit(radix))).map(|()| ()),
647    )
648        .take()
649}
650
651#[derive(Clone, Copy, Debug, PartialEq)]
652pub enum StrPrefix {
653    Binary,
654    CLike,
655}
656
657impl StrPrefix {
658    #[must_use]
659    pub fn to_char(self) -> char {
660        match self {
661            Self::Binary => 'b',
662            Self::CLike => 'c',
663        }
664    }
665}
666
667impl fmt::Display for StrPrefix {
668    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
669        use std::fmt::Write;
670
671        f.write_char(self.to_char())
672    }
673}
674
675#[derive(Clone, Debug, PartialEq)]
676pub struct StrLit<'a> {
677    /// the unparsed (but validated) content
678    pub content: &'a str,
679    /// whether the string literal is unprefixed, a cstring or binary slice
680    pub prefix: Option<StrPrefix>,
681    /// contains a NUL character, either escaped `'\0'` or the very characters;
682    /// not allowed in cstring literals
683    pub contains_null: bool,
684    /// contains a non-ASCII character, either as `\u{123456}` or as an unescaped character;
685    /// not allowed in binary slices
686    pub contains_unicode_character: bool,
687    /// contains unicode escape sequences like `\u{12}` (regardless of its range);
688    /// not allowed in binary slices
689    pub contains_unicode_escape: bool,
690    /// contains a non-ASCII range escape sequence like `\x80`;
691    /// not allowed in unprefix strings
692    pub contains_high_ascii: bool,
693}
694
695fn str_lit<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, StrLit<'a>> {
696    // <https://doc.rust-lang.org/reference/tokens.html#r-lex.token.literal.str.syntax>
697
698    fn inner<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, StrLit<'a>> {
699        #[derive(Debug, Clone, PartialEq, Eq)]
700        enum Sequence<'a> {
701            Text(&'a str),
702            Close,
703            Escape,
704            CrLf,
705            Cr(Range<usize>),
706        }
707
708        let mut contains_null = false;
709        let mut contains_unicode_character = false;
710        let mut contains_unicode_escape = false;
711        let mut contains_high_ascii = false;
712
713        while !i.is_empty() {
714            let seq = alt((
715                repeat::<_, _, (), _, _>(1.., none_of(['\r', '\\', '"']))
716                    .take()
717                    .map(Sequence::Text),
718                ('\r'.span(), opt('\n')).map(|(span, has_lf)| match has_lf {
719                    Some(_) => Sequence::CrLf,
720                    None => Sequence::Cr(span),
721                }),
722                '\\'.value(Sequence::Escape),
723                peek('"').value(Sequence::Close),
724            ))
725            .parse_next(i)?;
726
727            match seq {
728                Sequence::Text(s) => {
729                    contains_unicode_character =
730                        contains_unicode_character || s.bytes().any(|c: u8| !c.is_ascii());
731                    contains_null = contains_null || s.bytes().any(|c: u8| c == 0);
732                    continue;
733                }
734                Sequence::CrLf => continue,
735                Sequence::Cr(span) => {
736                    return cut_error!(
737                        "a bare CR (Mac linebreak) is not allowed in string literals, \
738                        use NL (Unix linebreak) or CRNL (Windows linebreak) instead, \
739                        or type `\\r` explicitly",
740                        span,
741                    );
742                }
743                Sequence::Close => break,
744                Sequence::Escape => {}
745            }
746
747            match any.parse_next(i)? {
748                '\'' | '"' | 'n' | 'r' | 't' | '\\' => continue,
749                '0' => {
750                    contains_null = true;
751                    continue;
752                }
753                'x' => {
754                    let code = take_while(2, AsChar::is_hex_digit).parse_next(i)?;
755                    match u8::from_str_radix(code, 16).unwrap() {
756                        0 => contains_null = true,
757                        128.. => contains_high_ascii = true,
758                        _ => {}
759                    }
760                }
761                'u' => {
762                    contains_unicode_escape = true;
763                    let (code, span) = delimited('{', take_while(1..=6, AsChar::is_hex_digit), '}')
764                        .with_span()
765                        .parse_next(i)?;
766                    match u32::from_str_radix(code, 16).unwrap() {
767                        0 => contains_null = true,
768                        0xd800..0xe000 => {
769                            return cut_error!("unicode escape must not be a surrogate", span);
770                        }
771                        0x110000.. => {
772                            return cut_error!("unicode escape must be at most 10FFFF", span);
773                        }
774                        128.. => contains_unicode_character = true,
775                        _ => {}
776                    }
777                }
778                _ => return fail(i),
779            }
780        }
781
782        Ok(StrLit {
783            content: "",
784            prefix: None,
785            contains_null,
786            contains_unicode_character,
787            contains_unicode_escape,
788            contains_high_ascii,
789        })
790    }
791
792    let ((prefix, lit), span) = (
793        terminated(
794            opt(alt((
795                'b'.value(StrPrefix::Binary),
796                'c'.value(StrPrefix::CLike),
797            ))),
798            '"',
799        ),
800        opt(terminated(inner.with_taken(), '"')),
801    )
802        .with_span()
803        .parse_next(i)?;
804
805    let Some((mut lit, content)) = lit else {
806        return cut_error!("unclosed or broken string", span);
807    };
808    lit.content = content;
809    lit.prefix = prefix;
810
811    let msg = match prefix {
812        Some(StrPrefix::Binary) => {
813            if lit.contains_unicode_character {
814                Some("non-ASCII character in byte string literal")
815            } else if lit.contains_unicode_escape {
816                Some("unicode escape in byte string")
817            } else {
818                None
819            }
820        }
821        Some(StrPrefix::CLike) => lit
822            .contains_null
823            .then_some("null characters in C string literals are not supported"),
824        None => lit.contains_high_ascii.then_some("out of range hex escape"),
825    };
826    if let Some(msg) = msg {
827        return cut_error!(msg, span);
828    }
829
830    not_suffix_with_hash(i)?;
831    Ok(lit)
832}
833
834fn not_suffix_with_hash<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
835    if let Some(span) = opt(identifier.span()).parse_next(i)? {
836        return cut_error!(
837            "you are missing a space to separate two string literals",
838            span,
839        );
840    }
841    Ok(())
842}
843
844fn str_lit_without_prefix<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a> {
845    let (lit, span) = str_lit.with_span().parse_next(i)?;
846
847    let kind = match lit.prefix {
848        Some(StrPrefix::Binary) => Some("binary slice"),
849        Some(StrPrefix::CLike) => Some("cstring"),
850        None => None,
851    };
852    if let Some(kind) = kind {
853        return cut_error!(
854            format!("expected an unprefixed normal string, not a {kind}"),
855            span,
856        );
857    }
858
859    Ok(lit.content)
860}
861
862#[derive(Clone, Copy, Debug, PartialEq)]
863pub enum CharPrefix {
864    Binary,
865}
866
867#[derive(Clone, Debug, PartialEq)]
868pub struct CharLit<'a> {
869    pub prefix: Option<CharPrefix>,
870    pub content: &'a str,
871}
872
873// Information about allowed character escapes is available at:
874// <https://doc.rust-lang.org/reference/tokens.html#character-literals>.
875fn char_lit<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, CharLit<'a>> {
876    let ((prefix, _, content, is_closed), span) = (
877        alt(('b'.value(Some(CharPrefix::Binary)), empty.value(None))),
878        '\'',
879        opt(take_escaped(none_of(['\\', '\'']), '\\', any)),
880        opt('\''),
881    )
882        .with_span()
883        .parse_next(i)?;
884
885    if is_closed.is_none() {
886        if let Some(prefix) = prefix {
887            return cut_error!(
888                match prefix {
889                    CharPrefix::Binary => "unterminated byte literal",
890                },
891                span,
892            );
893        } else {
894            return fail(i);
895        }
896    }
897
898    let content = match content.unwrap_or_default() {
899        "" => {
900            return cut_error!(
901                match prefix {
902                    Some(CharPrefix::Binary) => "empty byte literal",
903                    None => "empty character literal",
904                },
905                span,
906            );
907        }
908        content => content,
909    };
910
911    let mut content_i = content;
912    let Ok(c) = Char::parse(&mut content_i) else {
913        return cut_error!("invalid character", span);
914    };
915    if !content_i.is_empty() {
916        let (c, s) = match prefix {
917            Some(CharPrefix::Binary) => ("byte", "binary string"),
918            None => ("character", "string"),
919        };
920        return cut_error!(
921            format!(
922                "cannot have multiple characters in a {c} literal, use `{}\"...\"` to write a {s}",
923                match prefix {
924                    Some(CharPrefix::Binary) => "b",
925                    None => "",
926                }
927            ),
928            span,
929        );
930    }
931
932    let (nb, max_value, err1, err2) = match c {
933        Char::Literal(c) | Char::Escaped(c) => match prefix {
934            Some(CharPrefix::Binary) if !c.is_ascii() => {
935                return cut_error!("non-ASCII character in byte literal", span);
936            }
937            _ => return Ok(CharLit { prefix, content }),
938        },
939        Char::AsciiEscape(nb) => (
940            nb,
941            // `0x7F` is the maximum value for a `\x` escaped character.
942            0x7F,
943            "invalid character in ascii escape",
944            "must be a character in the range [\\x00-\\x7f]",
945        ),
946        Char::UnicodeEscape(nb) => {
947            match prefix {
948                Some(CharPrefix::Binary) => {
949                    return cut_error!(
950                        "cannot use unicode escape in byte string in byte literal",
951                        span,
952                    );
953                }
954                None => (
955                    nb,
956                    // `0x10FFFF` is the maximum value for a `\u` escaped character.
957                    0x0010_FFFF,
958                    "invalid character in unicode escape",
959                    "unicode escape must be at most 10FFFF",
960                ),
961            }
962        }
963    };
964
965    let Ok(nb) = u32::from_str_radix(nb, 16) else {
966        return cut_error!(err1, span);
967    };
968    if nb > max_value {
969        return cut_error!(err2, span);
970    }
971
972    Ok(CharLit { prefix, content })
973}
974
975/// Represents the different kinds of char declarations:
976#[derive(Copy, Clone)]
977enum Char<'a> {
978    /// Any character that is not escaped.
979    Literal(char),
980    /// An escaped character (like `\n`) which doesn't require any extra check.
981    Escaped(char),
982    /// Ascii escape (like `\x12`).
983    AsciiEscape(&'a str),
984    /// Unicode escape (like `\u{12}`).
985    UnicodeEscape(&'a str),
986}
987
988impl<'a> Char<'a> {
989    fn parse(i: &mut &'a str) -> ModalResult<Self, ()> {
990        let unescaped = none_of(('\\', '\'')).map(Self::Literal);
991        let escaped = preceded(
992            '\\',
993            alt((
994                'n'.value(Self::Escaped('\n')),
995                'r'.value(Self::Escaped('\r')),
996                't'.value(Self::Escaped('\t')),
997                '\\'.value(Self::Escaped('\\')),
998                '0'.value(Self::Escaped('\0')),
999                '\''.value(Self::Escaped('\'')),
1000                // Not useful but supported by rust.
1001                '"'.value(Self::Escaped('"')),
1002                ('x', take_while(2, |c: char| c.is_ascii_hexdigit()))
1003                    .map(|(_, s)| Self::AsciiEscape(s)),
1004                (
1005                    "u{",
1006                    take_while(1..=6, |c: char| c.is_ascii_hexdigit()),
1007                    '}',
1008                )
1009                    .map(|(_, s, _)| Self::UnicodeEscape(s)),
1010            )),
1011        );
1012        alt((unescaped, escaped)).parse_next(i)
1013    }
1014}
1015
1016#[derive(Clone, Debug, PartialEq)]
1017pub enum PathOrIdentifier<'a> {
1018    Path(Vec<PathComponent<'a>>),
1019    Identifier(WithSpan<&'a str>),
1020}
1021
1022fn path_or_identifier<'a: 'l, 'l>(
1023    i: &mut InputStream<'a, 'l>,
1024) -> ParseResult<'a, PathOrIdentifier<'a>> {
1025    let mut p = |i: &mut _| {
1026        let root = ws(opt(terminated(empty.span(), "::")));
1027        let start = PathComponent::parse;
1028        let tail = opt(repeat(1.., preceded(ws("::"), PathComponent::parse)).map(|v: Vec<_>| v));
1029
1030        let (root, start, rest) = (root, start, tail).parse_next(i)?;
1031        Ok((root, start, rest.unwrap_or_default()))
1032    };
1033    let (root, start, rest) = p.parse_next(i)?;
1034
1035    // The returned identifier can be assumed to be path if:
1036    // - it is an absolute path (starts with `::`), or
1037    // - it has multiple components (at least one `::`), or
1038    // - the first letter is uppercase
1039    match (root, start, rest) {
1040        (None, arg, tail)
1041            if tail.is_empty()
1042                && arg.generics.is_none()
1043                && arg
1044                    .name
1045                    .chars()
1046                    .next()
1047                    .is_none_or(|c| c == '_' || c.is_lowercase()) =>
1048        {
1049            Ok(PathOrIdentifier::Identifier(arg.name))
1050        }
1051        (root, start, tail) => {
1052            let mut path = if let Some(root) = root {
1053                let mut path = Vec::with_capacity(2 + tail.len());
1054                path.push(PathComponent {
1055                    name: WithSpan::new("", root),
1056                    generics: None,
1057                });
1058                path
1059            } else {
1060                Vec::with_capacity(1 + tail.len())
1061            };
1062            path.push(start);
1063            path.extend(tail);
1064            Ok(PathOrIdentifier::Path(path))
1065        }
1066    }
1067}
1068
1069#[derive(Debug, Clone, Default)]
1070struct State<'a> {
1071    syntax: Syntax<'a>,
1072    loop_depth: Cell<usize>,
1073    level: Level,
1074}
1075
1076fn block_start<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
1077    i.state.syntax.block_start.void().parse_next(i)
1078}
1079
1080fn block_end<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
1081    let (control, span) = alt((
1082        i.state.syntax.block_end.value(None),
1083        peek(delimited('%', alt(('-', '~', '+')).map(Some), '}')),
1084        fail, // rollback on partial matches in the previous line
1085    ))
1086    .with_span()
1087    .parse_next(i)?;
1088
1089    let Some(control) = control else {
1090        return Ok(());
1091    };
1092
1093    let err = ErrorContext::new(
1094        format!(
1095            "unclosed block, you likely meant to apply whitespace control: \"{}{}\"",
1096            control.escape_default(),
1097            i.state.syntax.block_end.escape_default(),
1098        ),
1099        span,
1100    );
1101    Err(err.backtrack())
1102}
1103
1104fn expr_start<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
1105    i.state.syntax.expr_start.void().parse_next(i)
1106}
1107
1108fn expr_end<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
1109    i.state.syntax.expr_end.void().parse_next(i)
1110}
1111
1112impl State<'_> {
1113    fn enter_loop(&self) {
1114        self.loop_depth.set(self.loop_depth.get() + 1);
1115    }
1116
1117    fn leave_loop(&self) {
1118        self.loop_depth.set(self.loop_depth.get() - 1);
1119    }
1120
1121    fn is_in_loop(&self) -> bool {
1122        self.loop_depth.get() > 0
1123    }
1124}
1125
1126#[derive(Default, Hash, PartialEq, Clone, Copy)]
1127pub struct Syntax<'a>(InnerSyntax<'a>);
1128
1129// This abstraction ensures that the fields are readable, but not writable.
1130#[derive(Hash, PartialEq, Clone, Copy)]
1131pub struct InnerSyntax<'a> {
1132    pub block_start: &'a str,
1133    pub block_end: &'a str,
1134    pub expr_start: &'a str,
1135    pub expr_end: &'a str,
1136    pub comment_start: &'a str,
1137    pub comment_end: &'a str,
1138}
1139
1140impl<'a> Deref for Syntax<'a> {
1141    type Target = InnerSyntax<'a>;
1142
1143    #[inline]
1144    fn deref(&self) -> &Self::Target {
1145        &self.0
1146    }
1147}
1148
1149impl Default for InnerSyntax<'static> {
1150    fn default() -> Self {
1151        Self {
1152            block_start: "{%",
1153            block_end: "%}",
1154            expr_start: "{{",
1155            expr_end: "}}",
1156            comment_start: "{#",
1157            comment_end: "#}",
1158        }
1159    }
1160}
1161
1162impl fmt::Debug for Syntax<'_> {
1163    #[inline]
1164    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1165        fmt_syntax("Syntax", self, f)
1166    }
1167}
1168
1169impl fmt::Debug for InnerSyntax<'_> {
1170    #[inline]
1171    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1172        fmt_syntax("InnerSyntax", self, f)
1173    }
1174}
1175
1176fn fmt_syntax(name: &str, inner: &InnerSyntax<'_>, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1177    f.debug_struct(name)
1178        .field("block_start", &inner.block_start)
1179        .field("block_end", &inner.block_end)
1180        .field("expr_start", &inner.expr_start)
1181        .field("expr_end", &inner.expr_end)
1182        .field("comment_start", &inner.comment_start)
1183        .field("comment_end", &inner.comment_end)
1184        .finish()
1185}
1186
1187#[derive(Debug, Default, Clone, Copy, Hash, PartialEq)]
1188#[cfg_attr(feature = "config", derive(serde_derive::Deserialize))]
1189pub struct SyntaxBuilder<'a> {
1190    pub name: &'a str,
1191    pub block_start: Option<&'a str>,
1192    pub block_end: Option<&'a str>,
1193    pub expr_start: Option<&'a str>,
1194    pub expr_end: Option<&'a str>,
1195    pub comment_start: Option<&'a str>,
1196    pub comment_end: Option<&'a str>,
1197}
1198
1199impl<'a> SyntaxBuilder<'a> {
1200    pub fn to_syntax(&self) -> Result<Syntax<'a>, String> {
1201        let default = InnerSyntax::default();
1202        let syntax = Syntax(InnerSyntax {
1203            block_start: self.block_start.unwrap_or(default.block_start),
1204            block_end: self.block_end.unwrap_or(default.block_end),
1205            expr_start: self.expr_start.unwrap_or(default.expr_start),
1206            expr_end: self.expr_end.unwrap_or(default.expr_end),
1207            comment_start: self.comment_start.unwrap_or(default.comment_start),
1208            comment_end: self.comment_end.unwrap_or(default.comment_end),
1209        });
1210
1211        for (s, k, is_closing) in [
1212            (syntax.block_start, "opening block", false),
1213            (syntax.block_end, "closing block", true),
1214            (syntax.expr_start, "opening expression", false),
1215            (syntax.expr_end, "closing expression", true),
1216            (syntax.comment_start, "opening comment", false),
1217            (syntax.comment_end, "closing comment", true),
1218        ] {
1219            if s.len() < 2 {
1220                return Err(format!(
1221                    "delimiters must be at least two characters long. \
1222                    The {k} delimiter ({s:?}) is too short",
1223                ));
1224            } else if s.len() > 32 {
1225                return Err(format!(
1226                    "delimiters must be at most 32 characters long. \
1227                    The {k} delimiter ({:?}...) is too long",
1228                    &s[..(16..=s.len())
1229                        .find(|&i| s.is_char_boundary(i))
1230                        .unwrap_or(s.len())],
1231                ));
1232            } else if s.chars().any(char::is_whitespace) {
1233                return Err(format!(
1234                    "delimiters may not contain white spaces. \
1235                    The {k} delimiter ({s:?}) contains white spaces",
1236                ));
1237            } else if is_closing
1238                && ['(', '-', '+', '~', '.', '>', '<', '&', '|', '!']
1239                    .contains(&s.chars().next().unwrap())
1240            {
1241                return Err(format!(
1242                    "closing delimiters may not start with operators. \
1243                    The {k} delimiter ({s:?}) starts with operator `{}`",
1244                    s.chars().next().unwrap(),
1245                ));
1246            }
1247        }
1248
1249        // likely to cause catastrophic backtracking in the parser
1250        for infix in [
1251            "&", "&&", "&=", "^", "^=", ",", ".", "..", "...", "..=", "=", "==", ">=", ">", "<=",
1252            "<", "-", "-=", "!=", "!", "|", "|=", "||", "%", "%=", "+", "+=", "<<", "<<=", ">>",
1253            ">>=", "/", "/=", "*", "*=",
1254        ] {
1255            match syntax.expr_end.strip_prefix(infix) {
1256                Some("") => {
1257                    return Err(format!(
1258                        "the closing expression delimiter `{}` must not be a string that could be \
1259                        mistaken for a binary operator",
1260                        syntax.expr_end.escape_debug(),
1261                    ));
1262                }
1263                Some(tail) if tail.as_bytes().iter().all(|c| b"&-!*".contains(c)) => {
1264                    return Err(format!(
1265                        "the closing expression delimiter `{}` must not be a string that could be \
1266                        mistaken for a binary operator `{infix}` followed by a (sequence of) \
1267                        prefix operator(s)",
1268                        syntax.expr_end.escape_debug(),
1269                    ));
1270                }
1271                _ => continue,
1272            }
1273        }
1274
1275        for ((s1, k1), (s2, k2)) in [
1276            (
1277                (syntax.block_start, "block"),
1278                (syntax.expr_start, "expression"),
1279            ),
1280            (
1281                (syntax.block_start, "block"),
1282                (syntax.comment_start, "comment"),
1283            ),
1284            (
1285                (syntax.expr_start, "expression"),
1286                (syntax.comment_start, "comment"),
1287            ),
1288        ] {
1289            if s1.starts_with(s2) || s2.starts_with(s1) {
1290                let (s1, k1, s2, k2) = match s1.len() < s2.len() {
1291                    true => (s1, k1, s2, k2),
1292                    false => (s2, k2, s1, k1),
1293                };
1294                return Err(format!(
1295                    "an opening delimiter may not be the prefix of another delimiter. \
1296                    The {k1} delimiter ({s1:?}) clashes with the {k2} delimiter ({s2:?})",
1297                ));
1298            }
1299        }
1300
1301        Ok(syntax)
1302    }
1303}
1304
1305/// The nesting level of nodes and expressions.
1306///
1307/// The level counts down from [`Level::MAX_DEPTH`] to 0. Once the value would reach below 0,
1308/// [`Level::nest()`] / [`LevelGuard::nest()`] will return an error. The same [`Level`] instance is
1309/// shared across all usages in a [`Parsed::new()`] / [`Ast::from_str()`] call, using a reference
1310/// to an interior mutable counter.
1311#[derive(Debug, Clone)]
1312struct Level(Cell<usize>);
1313
1314impl Default for Level {
1315    #[inline]
1316    fn default() -> Self {
1317        Self(Cell::new(Level::MAX_DEPTH))
1318    }
1319}
1320
1321impl Level {
1322    const MAX_DEPTH: usize = 128;
1323
1324    /// Acquire a [`LevelGuard`] without decrementing the counter, to be used with loops.
1325    fn guard(&self) -> LevelGuard<'_> {
1326        LevelGuard {
1327            level: self,
1328            count: 0,
1329        }
1330    }
1331
1332    /// Decrement the remaining level counter, and return a [`LevelGuard`] that increments it again
1333    /// when it's dropped.
1334    fn nest<'a: 'l, 'l>(&self, i: &InputStream<'a, 'l>) -> ParseResult<'a, LevelGuard<'_>> {
1335        self.nest_multiple(i, 1)
1336    }
1337
1338    /// Decrement the remaining level counter by `count`, and return a [`LevelGuard`] that
1339    /// increments it again when it's dropped.
1340    fn nest_multiple<'a: 'l, 'l>(
1341        &self,
1342        i: &InputStream<'a, 'l>,
1343        count: usize,
1344    ) -> ParseResult<'a, LevelGuard<'_>> {
1345        if let Some(new_level) = self.0.get().checked_sub(count) {
1346            self.0.set(new_level);
1347            Ok(LevelGuard { level: self, count })
1348        } else {
1349            Self::_fail(i)
1350        }
1351    }
1352
1353    #[cold]
1354    #[inline(never)]
1355    fn _fail<'a: 'l, 'l, T>(i: &InputStream<'a, 'l>) -> ParseResult<'a, T> {
1356        let msg = "your template code is too deeply nested, or the last expression is too complex";
1357        Err(ErrorContext::new(msg, i).cut())
1358    }
1359}
1360
1361/// Used to keep track how often [`LevelGuard::nest()`] was called and to re-increment the
1362/// remaining level counter when it is dropped / falls out of scope.
1363#[must_use]
1364#[derive(Debug)]
1365struct LevelGuard<'l> {
1366    level: &'l Level,
1367    count: usize,
1368}
1369
1370impl Drop for LevelGuard<'_> {
1371    fn drop(&mut self) {
1372        self.level.0.set(self.level.0.get() + self.count);
1373    }
1374}
1375
1376impl LevelGuard<'_> {
1377    /// Used to decrement the level multiple times, e.g. for every iteration of a loop.
1378    fn nest<'a: 'l, 'l>(&mut self, i: &InputStream<'a, 'l>) -> ParseResult<'a, ()> {
1379        if let Some(new_level) = self.level.0.get().checked_sub(1) {
1380            self.level.0.set(new_level);
1381            self.count += 1;
1382            Ok(())
1383        } else {
1384            Level::_fail(i)
1385        }
1386    }
1387}
1388
1389fn filter<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, Filter<'a>> {
1390    preceded(('|', not(one_of(['|', '=']))), cut_err(Filter::parse)).parse_next(i)
1391}
1392
1393/// Returns the common parts of two paths.
1394///
1395/// The goal of this function is to reduce the path length based on the `base` argument
1396/// (generally the path where the program is running into). For example:
1397///
1398/// ```text
1399/// current dir: /a/b/c
1400/// path:        /a/b/c/d/e.txt
1401/// ```
1402///
1403/// `strip_common` will return `d/e.txt`.
1404#[must_use]
1405pub fn strip_common(base: &Path, path: &Path) -> String {
1406    let path = match path.canonicalize() {
1407        Ok(path) => path,
1408        Err(_) => return path.display().to_string(),
1409    };
1410    let mut components_iter = path.components().peekable();
1411
1412    for current_path_component in base.components() {
1413        let Some(path_component) = components_iter.peek() else {
1414            return path.display().to_string();
1415        };
1416        if current_path_component != *path_component {
1417            break;
1418        }
1419        components_iter.next();
1420    }
1421    let path_parts = components_iter
1422        .map(|c| c.as_os_str().to_string_lossy())
1423        .collect::<Vec<_>>();
1424    if path_parts.is_empty() {
1425        path.display().to_string()
1426    } else {
1427        path_parts.join(std::path::MAIN_SEPARATOR_STR)
1428    }
1429}
1430
1431#[inline]
1432pub(crate) fn can_be_variable_name(name: &str) -> bool {
1433    !matches!(name, "self" | "Self" | "super" | "crate")
1434}
1435
1436#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1437pub enum IntKind {
1438    I8,
1439    I16,
1440    I32,
1441    I64,
1442    I128,
1443    Isize,
1444    U8,
1445    U16,
1446    U32,
1447    U64,
1448    U128,
1449    Usize,
1450}
1451
1452#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1453pub enum FloatKind {
1454    F16,
1455    F32,
1456    F64,
1457    F128,
1458}
1459
1460#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1461enum NumKind {
1462    Int(IntKind),
1463    Float(FloatKind),
1464}
1465
1466/// Primitive integer types. Also used as number suffixes.
1467const INTEGER_TYPES: &[(&str, IntKind)] = &[
1468    ("i8", IntKind::I8),
1469    ("i16", IntKind::I16),
1470    ("i32", IntKind::I32),
1471    ("i64", IntKind::I64),
1472    ("i128", IntKind::I128),
1473    ("isize", IntKind::Isize),
1474    ("u8", IntKind::U8),
1475    ("u16", IntKind::U16),
1476    ("u32", IntKind::U32),
1477    ("u64", IntKind::U64),
1478    ("u128", IntKind::U128),
1479    ("usize", IntKind::Usize),
1480];
1481
1482/// Primitive floating point types. Also used as number suffixes.
1483const FLOAT_TYPES: &[(&str, FloatKind)] = &[
1484    ("f16", FloatKind::F16),
1485    ("f32", FloatKind::F32),
1486    ("f64", FloatKind::F64),
1487    ("f128", FloatKind::F128),
1488];
1489
1490/// Primitive numeric types. Also used as number suffixes.
1491const NUM_TYPES: &[(&str, NumKind)] = &{
1492    let mut list = [("", NumKind::Int(IntKind::I8)); INTEGER_TYPES.len() + FLOAT_TYPES.len()];
1493    let mut i = 0;
1494    let mut o = 0;
1495    while i < INTEGER_TYPES.len() {
1496        let (name, value) = INTEGER_TYPES[i];
1497        list[o] = (name, NumKind::Int(value));
1498        i += 1;
1499        o += 1;
1500    }
1501    let mut i = 0;
1502    while i < FLOAT_TYPES.len() {
1503        let (name, value) = FLOAT_TYPES[i];
1504        list[o] = (name, NumKind::Float(value));
1505        i += 1;
1506        o += 1;
1507    }
1508    list
1509};
1510
1511/// Complete list of named primitive types.
1512const PRIMITIVE_TYPES: &[&str] = &{
1513    let mut list = [""; NUM_TYPES.len() + 1];
1514    let mut i = 0;
1515    let mut o = 0;
1516    while i < NUM_TYPES.len() {
1517        list[o] = NUM_TYPES[i].0;
1518        i += 1;
1519        o += 1;
1520    }
1521    list[o] = "bool";
1522    list
1523};
1524
1525const MAX_RUST_KEYWORD_LEN: usize = 8;
1526
1527const RUST_KEYWORDS: &[&[[AsciiChar; MAX_RUST_KEYWORD_LEN]]; MAX_RUST_KEYWORD_LEN + 1] = &{
1528    const NO_KWS: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[];
1529    const KW2: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1530        AsciiStr::new_sized("as"),
1531        AsciiStr::new_sized("do"),
1532        AsciiStr::new_sized("fn"),
1533        AsciiStr::new_sized("if"),
1534        AsciiStr::new_sized("in"),
1535    ];
1536    const KW3: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1537        AsciiStr::new_sized("box"),
1538        AsciiStr::new_sized("dyn"),
1539        AsciiStr::new_sized("for"),
1540        AsciiStr::new_sized("gen"),
1541        AsciiStr::new_sized("let"),
1542        AsciiStr::new_sized("mod"),
1543        AsciiStr::new_sized("mut"),
1544        AsciiStr::new_sized("pub"),
1545        AsciiStr::new_sized("ref"),
1546        AsciiStr::new_sized("try"),
1547        AsciiStr::new_sized("use"),
1548    ];
1549    const KW4: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1550        AsciiStr::new_sized("else"),
1551        AsciiStr::new_sized("enum"),
1552        AsciiStr::new_sized("impl"),
1553        AsciiStr::new_sized("loop"),
1554        AsciiStr::new_sized("move"),
1555        AsciiStr::new_sized("priv"),
1556        AsciiStr::new_sized("self"),
1557        AsciiStr::new_sized("Self"),
1558        AsciiStr::new_sized("true"),
1559        AsciiStr::new_sized("type"),
1560    ];
1561    const KW5: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1562        AsciiStr::new_sized("async"),
1563        AsciiStr::new_sized("await"),
1564        AsciiStr::new_sized("break"),
1565        AsciiStr::new_sized("const"),
1566        AsciiStr::new_sized("crate"),
1567        AsciiStr::new_sized("false"),
1568        AsciiStr::new_sized("final"),
1569        AsciiStr::new_sized("macro"),
1570        AsciiStr::new_sized("match"),
1571        AsciiStr::new_sized("super"),
1572        AsciiStr::new_sized("trait"),
1573        AsciiStr::new_sized("union"),
1574        AsciiStr::new_sized("where"),
1575        AsciiStr::new_sized("while"),
1576        AsciiStr::new_sized("yield"),
1577    ];
1578    const KW6: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1579        AsciiStr::new_sized("become"),
1580        AsciiStr::new_sized("extern"),
1581        AsciiStr::new_sized("return"),
1582        AsciiStr::new_sized("static"),
1583        AsciiStr::new_sized("struct"),
1584        AsciiStr::new_sized("typeof"),
1585        AsciiStr::new_sized("unsafe"),
1586        AsciiStr::new_sized("caller"),
1587    ];
1588    const KW7: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1589        AsciiStr::new_sized("unsized"),
1590        AsciiStr::new_sized("virtual"),
1591    ];
1592    const KW8: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1593        AsciiStr::new_sized("abstract"),
1594        AsciiStr::new_sized("continue"),
1595        AsciiStr::new_sized("override"),
1596    ];
1597
1598    [NO_KWS, NO_KWS, KW2, KW3, KW4, KW5, KW6, KW7, KW8]
1599};
1600
1601pub fn is_rust_keyword(ident: &str) -> bool {
1602    let ident_len = ident.len();
1603    if ident_len > MAX_RUST_KEYWORD_LEN {
1604        return false;
1605    }
1606    let kws = RUST_KEYWORDS[ident.len()];
1607
1608    let mut padded_ident = [0; MAX_RUST_KEYWORD_LEN];
1609    padded_ident[..ident_len].copy_from_slice(ident.as_bytes());
1610
1611    // Since the individual buckets are quite short, a linear search is faster than a binary search.
1612    for probe in kws {
1613        if padded_ident == *AsciiChar::slice_as_bytes(probe) {
1614            return true;
1615        }
1616    }
1617    false
1618}
1619
1620macro_rules! cut_error {
1621    ($message:expr, $span:expr $(,)?) => {{
1622        use ::std::convert::Into;
1623        use ::std::option::Option::Some;
1624        use $crate::ErrorContext;
1625
1626        $crate::cut_context_err(
1627            #[cold]
1628            #[inline(always)]
1629            move || ErrorContext {
1630                span: Into::into($span),
1631                message: Some(Into::into($message)),
1632            },
1633        )
1634    }};
1635}
1636
1637pub(crate) use cut_error;
1638
1639#[cold]
1640#[inline(never)]
1641fn cut_context_err<'a, T>(gen_err: impl FnOnce() -> ErrorContext) -> ParseResult<'a, T> {
1642    Err(ErrMode::Cut(gen_err()))
1643}
1644
1645type HashSet<T> = std::collections::hash_set::HashSet<T, FxBuildHasher>;
1646
1647#[cold]
1648#[inline(never)]
1649fn deny_any_rust_token<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
1650    // https://docs.rs/syn/2.0.114/src/syn/token.rs.html#748-795
1651    const PUNCTUATIONS: &[&str] = &[
1652        "&", "&&", "&=", "@", "^", "^=", ":", ",", "$", ".", "..", "...", "..=", "=", "==", "=>",
1653        ">=", ">", "<-", "<=", "<", "-", "-=", "!=", "!", "|", "|=", "||", "::", "%", "%=", "+",
1654        "+=", "#", "?", "->", ";", "<<", "<<=", ">>", ">>=", "/", "/=", "*", "*=", "~",
1655        // not a punctuation per se, but a likely typo
1656        "\"", "'",
1657    ];
1658
1659    const ONE: &[u8] = &{
1660        const LEN: usize = {
1661            let mut i = 0;
1662            let mut o = 0;
1663            while i < PUNCTUATIONS.len() {
1664                if PUNCTUATIONS[i].len() == 1 {
1665                    o += 1;
1666                }
1667                i += 1;
1668            }
1669            o
1670        };
1671
1672        let mut result = [0; LEN];
1673        let mut i = 0;
1674        let mut o = 0;
1675        while i < PUNCTUATIONS.len() {
1676            if let &[c] = PUNCTUATIONS[i].as_bytes() {
1677                result[o] = c;
1678                o += 1;
1679            }
1680            i += 1;
1681        }
1682        result
1683    };
1684
1685    const TWO: &[[u8; 2]] = &{
1686        const LEN: usize = {
1687            let mut i = 0;
1688            let mut o = 0;
1689            while i < PUNCTUATIONS.len() {
1690                if PUNCTUATIONS[i].len() == 2 {
1691                    o += 1;
1692                }
1693                i += 1;
1694            }
1695            o
1696        };
1697
1698        let mut result = [*b"12"; LEN];
1699        let mut i = 0;
1700        let mut o = 0;
1701        while i < PUNCTUATIONS.len() {
1702            if let &[a, b] = PUNCTUATIONS[i].as_bytes() {
1703                result[o] = [a, b];
1704                o += 1;
1705            }
1706            i += 1;
1707        }
1708        result
1709    };
1710
1711    const THREE: &[[u8; 3]] = &{
1712        const LEN: usize = {
1713            let mut i = 0;
1714            let mut o = 0;
1715            while i < PUNCTUATIONS.len() {
1716                if PUNCTUATIONS[i].len() == 3 {
1717                    o += 1;
1718                }
1719                i += 1;
1720            }
1721            o
1722        };
1723
1724        let mut result = [*b"123"; LEN];
1725        let mut i = 0;
1726        let mut o = 0;
1727        while i < PUNCTUATIONS.len() {
1728            if let &[a, b, c] = PUNCTUATIONS[i].as_bytes() {
1729                result[o] = [a, b, c];
1730                o += 1;
1731            }
1732            i += 1;
1733        }
1734        result
1735    };
1736
1737    // https://docs.rs/syn/2.0.114/src/syn/token.rs.html#692-746
1738    const KEYWORDS: &[&str] = &[
1739        "abstract", "as", "async", "auto", "await", "become", "box", "break", "const", "continue",
1740        "crate", "default", "do", "dyn", "else", "enum", "extern", "final", "fn", "for", "if",
1741        "impl", "in", "let", "loop", "macro", "match", "mod", "move", "mut", "override", "priv",
1742        "pub", "raw", "ref", "return", "Self", "self", "static", "struct", "super", "trait", "try",
1743        "type", "typeof", "union", "unsafe", "unsized", "use", "virtual", "where", "while",
1744        "yield", // not a keyword in rust, but in askama
1745        "is",
1746    ];
1747
1748    fn any_rust_token<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a> {
1749        alt((
1750            take(3usize).verify(|s: &str| {
1751                if let Ok(s) = s.as_bytes().try_into() {
1752                    THREE.contains(&s)
1753                } else {
1754                    false
1755                }
1756            }),
1757            take(2usize).verify(|s: &str| {
1758                if let Ok(s) = s.as_bytes().try_into() {
1759                    TWO.contains(&s)
1760                } else {
1761                    false
1762                }
1763            }),
1764            take(1usize).verify(|s: &str| {
1765                if let [c] = s.as_bytes() {
1766                    ONE.contains(c)
1767                } else {
1768                    false
1769                }
1770            }),
1771            identifier.verify(|s: &str| KEYWORDS.contains(&s)),
1772        ))
1773        .parse_next(i)
1774    }
1775
1776    let (token, span) = any_rust_token.with_span().parse_next(i)?;
1777    cut_error!(
1778        format!(
1779            "the token `{}` was not expected at this point in the expression",
1780            token.escape_debug(),
1781        ),
1782        span
1783    )
1784}
1785
1786#[cfg(test)]
1787mod test {
1788    use std::path::Path;
1789
1790    use super::*;
1791
1792    #[cfg(not(windows))]
1793    #[test]
1794    fn test_strip_common() {
1795        // Full path is returned instead of empty when the entire path is in common.
1796        assert_eq!(strip_common(Path::new("home"), Path::new("home")), "home");
1797
1798        let cwd = std::env::current_dir().expect("current_dir failed");
1799
1800        // We need actual existing paths for `canonicalize` to work, so let's do that.
1801        let entry = cwd
1802            .read_dir()
1803            .expect("read_dir failed")
1804            .filter_map(std::result::Result::ok)
1805            .find(|f| f.path().is_file())
1806            .expect("no entry");
1807
1808        // Since they have the complete path in common except for the folder entry name, it should
1809        // return only the folder entry name.
1810        assert_eq!(
1811            strip_common(&cwd, &entry.path()),
1812            entry.file_name().to_string_lossy()
1813        );
1814
1815        // In this case it cannot canonicalize `/a/b/c` so it returns the path as is.
1816        assert_eq!(strip_common(&cwd, Path::new("/a/b/c")), "/a/b/c");
1817    }
1818
1819    #[track_caller]
1820    fn parse_peek<'a: 'l, 'l, T>(
1821        state: &'l State<'l>,
1822        parser: impl FnOnce(&mut InputStream<'a, 'l>) -> ParseResult<'a, T>,
1823        input: &'a str,
1824    ) -> ParseResult<'a, (&'a str, T)> {
1825        let mut i = InputStream {
1826            input: LocatingSlice::new(input),
1827            state,
1828        };
1829        let value = parser(&mut i)?;
1830        Ok((**i, value))
1831    }
1832
1833    #[test]
1834    fn test_num_lit() {
1835        let s = State::default();
1836
1837        // Should fail.
1838        assert!(parse_peek(&s, num_lit, ".").is_err());
1839        // Should succeed.
1840        assert_eq!(
1841            parse_peek(&s, num_lit, "1.2E-02").unwrap(),
1842            ("", Num::Float("1.2E-02", None))
1843        );
1844        assert_eq!(
1845            parse_peek(&s, num_lit, "4e3").unwrap(),
1846            ("", Num::Float("4e3", None)),
1847        );
1848        assert_eq!(
1849            parse_peek(&s, num_lit, "4e+_3").unwrap(),
1850            ("", Num::Float("4e+_3", None)),
1851        );
1852        // Not supported because Rust wants a number before the `.`.
1853        assert!(parse_peek(&s, num_lit, ".1").is_err());
1854        assert!(parse_peek(&s, num_lit, ".1E-02").is_err());
1855        // A `_` directly after the `.` denotes a field.
1856        assert_eq!(
1857            parse_peek(&s, num_lit, "1._0").unwrap(),
1858            ("._0", Num::Int("1", None))
1859        );
1860        assert_eq!(
1861            parse_peek(&s, num_lit, "1_.0").unwrap(),
1862            ("", Num::Float("1_.0", None))
1863        );
1864        // Not supported (voluntarily because of `1..` syntax).
1865        assert_eq!(
1866            parse_peek(&s, num_lit, "1.").unwrap(),
1867            (".", Num::Int("1", None))
1868        );
1869        assert_eq!(
1870            parse_peek(&s, num_lit, "1_.").unwrap(),
1871            (".", Num::Int("1_", None))
1872        );
1873        assert_eq!(
1874            parse_peek(&s, num_lit, "1_2.").unwrap(),
1875            (".", Num::Int("1_2", None))
1876        );
1877        // Numbers with suffixes
1878        assert_eq!(
1879            parse_peek(&s, num_lit, "-1usize").unwrap(),
1880            ("", Num::Int("-1", Some(IntKind::Usize)))
1881        );
1882        assert_eq!(
1883            parse_peek(&s, num_lit, "123_f32").unwrap(),
1884            ("", Num::Float("123_", Some(FloatKind::F32)))
1885        );
1886        assert_eq!(
1887            parse_peek(&s, num_lit, "1_.2_e+_3_f64|into_isize").unwrap(),
1888            (
1889                "|into_isize",
1890                Num::Float("1_.2_e+_3_", Some(FloatKind::F64))
1891            )
1892        );
1893        assert_eq!(
1894            parse_peek(&s, num_lit, "4e3f128").unwrap(),
1895            ("", Num::Float("4e3", Some(FloatKind::F128))),
1896        );
1897    }
1898
1899    #[test]
1900    fn test_char_lit() {
1901        let lit = |s: &'static str| crate::CharLit {
1902            prefix: None,
1903            content: s,
1904        };
1905        let s = State::default();
1906
1907        assert_eq!(parse_peek(&s, char_lit, "'a'").unwrap(), ("", lit("a")));
1908        assert_eq!(parse_peek(&s, char_lit, "'字'").unwrap(), ("", lit("字")));
1909
1910        // Escaped single characters.
1911        assert_eq!(
1912            parse_peek(&s, char_lit, "'\\\"'").unwrap(),
1913            ("", lit("\\\""))
1914        );
1915        assert_eq!(parse_peek(&s, char_lit, "'\\''").unwrap(), ("", lit("\\'")));
1916        assert_eq!(parse_peek(&s, char_lit, "'\\t'").unwrap(), ("", lit("\\t")));
1917        assert_eq!(parse_peek(&s, char_lit, "'\\n'").unwrap(), ("", lit("\\n")));
1918        assert_eq!(parse_peek(&s, char_lit, "'\\r'").unwrap(), ("", lit("\\r")));
1919        assert_eq!(parse_peek(&s, char_lit, "'\\0'").unwrap(), ("", lit("\\0")));
1920        // Escaped ascii characters (up to `0x7F`).
1921        assert_eq!(
1922            parse_peek(&s, char_lit, "'\\x12'").unwrap(),
1923            ("", lit("\\x12"))
1924        );
1925        assert_eq!(
1926            parse_peek(&s, char_lit, "'\\x02'").unwrap(),
1927            ("", lit("\\x02"))
1928        );
1929        assert_eq!(
1930            parse_peek(&s, char_lit, "'\\x6a'").unwrap(),
1931            ("", lit("\\x6a"))
1932        );
1933        assert_eq!(
1934            parse_peek(&s, char_lit, "'\\x7F'").unwrap(),
1935            ("", lit("\\x7F"))
1936        );
1937        // Escaped unicode characters (up to `0x10FFFF`).
1938        assert_eq!(
1939            parse_peek(&s, char_lit, "'\\u{A}'").unwrap(),
1940            ("", lit("\\u{A}"))
1941        );
1942        assert_eq!(
1943            parse_peek(&s, char_lit, "'\\u{10}'").unwrap(),
1944            ("", lit("\\u{10}"))
1945        );
1946        assert_eq!(
1947            parse_peek(&s, char_lit, "'\\u{aa}'").unwrap(),
1948            ("", lit("\\u{aa}"))
1949        );
1950        assert_eq!(
1951            parse_peek(&s, char_lit, "'\\u{10FFFF}'").unwrap(),
1952            ("", lit("\\u{10FFFF}"))
1953        );
1954
1955        // Check with `b` prefix.
1956        assert_eq!(
1957            parse_peek(&s, char_lit, "b'a'").unwrap(),
1958            (
1959                "",
1960                crate::CharLit {
1961                    prefix: Some(crate::CharPrefix::Binary),
1962                    content: "a"
1963                }
1964            )
1965        );
1966
1967        // Should fail.
1968        assert!(parse_peek(&s, char_lit, "''").is_err());
1969        assert!(parse_peek(&s, char_lit, "'\\o'").is_err());
1970        assert!(parse_peek(&s, char_lit, "'\\x'").is_err());
1971        assert!(parse_peek(&s, char_lit, "'\\x1'").is_err());
1972        assert!(parse_peek(&s, char_lit, "'\\x80'").is_err());
1973        assert!(parse_peek(&s, char_lit, "'\\u'").is_err());
1974        assert!(parse_peek(&s, char_lit, "'\\u{}'").is_err());
1975        assert!(parse_peek(&s, char_lit, "'\\u{110000}'").is_err());
1976    }
1977
1978    #[test]
1979    fn test_str_lit() {
1980        let s = State::default();
1981        assert_eq!(
1982            parse_peek(&s, str_lit, r#"b"hello""#).unwrap(),
1983            (
1984                "",
1985                StrLit {
1986                    prefix: Some(StrPrefix::Binary),
1987                    content: "hello",
1988                    contains_null: false,
1989                    contains_unicode_character: false,
1990                    contains_unicode_escape: false,
1991                    contains_high_ascii: false,
1992                }
1993            )
1994        );
1995        assert_eq!(
1996            parse_peek(&s, str_lit, r#"c"hello""#).unwrap(),
1997            (
1998                "",
1999                StrLit {
2000                    prefix: Some(StrPrefix::CLike),
2001                    content: "hello",
2002                    contains_null: false,
2003                    contains_unicode_character: false,
2004                    contains_unicode_escape: false,
2005                    contains_high_ascii: false,
2006                }
2007            )
2008        );
2009        assert!(parse_peek(&s, str_lit, r#"d"hello""#).is_err());
2010    }
2011
2012    #[test]
2013    fn test_is_rust_keyword() {
2014        assert!(is_rust_keyword("caller"));
2015        assert!(is_rust_keyword("super"));
2016        assert!(is_rust_keyword("become"));
2017        assert!(!is_rust_keyword("supeeeer"));
2018        assert!(!is_rust_keyword("sur"));
2019    }
2020
2021    #[test]
2022    fn test_check_base_digits() {
2023        assert!(check_base_digits("10", 2, 0..1).is_ok());
2024        assert!(check_base_digits("13", 2, 0..1).is_err());
2025        assert!(check_base_digits("13", 8, 0..1).is_ok());
2026        assert!(check_base_digits("79", 8, 0..1).is_err());
2027        // Checking that it's case insensitive.
2028        assert!(check_base_digits("13F", 16, 0..1).is_ok());
2029        assert!(check_base_digits("13f", 16, 0..1).is_ok());
2030        // Checking that `_` is allowed.
2031        assert!(check_base_digits("13_f", 16, 0..1).is_ok());
2032    }
2033}