Skip to main content

askama_parser/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![deny(elided_lifetimes_in_paths)]
3#![deny(unreachable_pub)]
4#![allow(clippy::vec_box)] // intentional, less copying
5#![doc = include_str!("../README.md")]
6
7mod ascii_str;
8pub mod expr;
9pub mod node;
10mod target;
11#[cfg(test)]
12mod tests;
13
14use std::borrow::{Borrow, BorrowMut, Cow};
15use std::cell::Cell;
16use std::env::current_dir;
17use std::ops::{Deref, DerefMut, Range};
18use std::path::Path;
19use std::sync::Arc;
20use std::{fmt, str};
21
22use rustc_hash::FxBuildHasher;
23use winnow::ascii::take_escaped;
24use winnow::combinator::{
25    alt, cond, cut_err, delimited, empty, fail, not, opt, peek, preceded, repeat, terminated,
26};
27use winnow::error::ErrMode;
28use winnow::stream::{AsChar, Location, Stream};
29use winnow::token::{any, none_of, one_of, take, take_while};
30use winnow::{LocatingSlice, ModalParser, ModalResult, Parser, Stateful};
31
32use crate::ascii_str::{AsciiChar, AsciiStr};
33pub use crate::expr::{AssociatedItem, Expr, Filter, PathComponent, TyGenerics, TyGenericsKind};
34pub use crate::node::{LetValueOrBlock, Node};
35pub use crate::target::{NamedTarget, Target};
36
37mod _parsed {
38    use std::path::Path;
39    use std::sync::Arc;
40    use std::{fmt, mem};
41
42    use super::node::Node;
43    use super::{Ast, ParseError, Syntax};
44
45    pub struct Parsed {
46        // `source` must outlive `ast`, so `ast` must be declared before `source`
47        ast: Ast<'static>,
48        #[allow(dead_code)]
49        source: Arc<str>,
50    }
51
52    impl Parsed {
53        /// If `file_path` is `None`, it means the `source` is an inline template. Therefore, if
54        /// a parsing error occurs, we won't display the path as it wouldn't be useful.
55        pub fn new(
56            source: Arc<str>,
57            file_path: Option<Arc<Path>>,
58            syntax: &Syntax<'_>,
59        ) -> Result<Self, ParseError> {
60            // Self-referential borrowing: `self` will keep the source alive as `String`,
61            // internally we will transmute it to `&'static str` to satisfy the compiler.
62            // However, we only expose the nodes with a lifetime limited to `self`.
63            let src = unsafe { mem::transmute::<&str, &'static str>(source.as_ref()) };
64            let ast = Ast::from_str(src, file_path, syntax)?;
65            Ok(Self { ast, source })
66        }
67
68        // The return value's lifetime must be limited to `self` to uphold the unsafe invariant.
69        #[must_use]
70        pub fn nodes(&self) -> &[Box<Node<'_>>] {
71            &self.ast.nodes
72        }
73
74        #[must_use]
75        pub fn source(&self) -> &str {
76            &self.source
77        }
78    }
79
80    impl fmt::Debug for Parsed {
81        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
82            f.debug_struct("Parsed")
83                .field("nodes", &self.ast.nodes)
84                .finish_non_exhaustive()
85        }
86    }
87
88    impl PartialEq for Parsed {
89        fn eq(&self, other: &Self) -> bool {
90            self.ast.nodes == other.ast.nodes
91        }
92    }
93
94    impl Default for Parsed {
95        fn default() -> Self {
96            Self {
97                ast: Ast::default(),
98                source: "".into(),
99            }
100        }
101    }
102}
103
104pub use _parsed::Parsed;
105
106type InputStream<'a, 'l> = Stateful<LocatingSlice<&'a str>, &'l State<'l>>;
107
108#[derive(Debug, Default)]
109pub struct Ast<'a> {
110    nodes: Vec<Box<Node<'a>>>,
111}
112
113impl<'a> Ast<'a> {
114    /// If `file_path` is `None`, it means the `source` is an inline template. Therefore, if
115    /// a parsing error occurs, we won't display the path as it wouldn't be useful.
116    pub fn from_str(
117        src: &'a str,
118        file_path: Option<Arc<Path>>,
119        syntax: &Syntax<'_>,
120    ) -> Result<Ast<'a>, ParseError> {
121        let state = State {
122            syntax: *syntax,
123            ..State::default()
124        };
125        let mut src = InputStream {
126            input: LocatingSlice::new(src),
127            state: &state,
128        };
129        match Node::parse_template(&mut src) {
130            Ok(nodes) if src.is_empty() => Ok(Self { nodes }),
131            Ok(_) | Err(ErrMode::Incomplete(_)) => unreachable!(),
132            Err(
133                ErrMode::Backtrack(ErrorContext { span, message, .. })
134                | ErrMode::Cut(ErrorContext { span, message, .. }),
135            ) => Err(ParseError {
136                message,
137                offset: span.start,
138                file_path,
139            }),
140        }
141    }
142
143    #[must_use]
144    pub fn nodes(&self) -> &[Box<Node<'a>>] {
145        &self.nodes
146    }
147}
148
149#[derive(Clone, Copy)]
150/// Struct used to wrap types with their associated "span" which is used when generating errors
151/// in the code generation.
152#[repr(C)] // rationale: `WithSpan<Box<T>` needs to have the same layout as `WithSpan<&T>`.
153pub struct WithSpan<T> {
154    inner: T,
155    span: Span,
156}
157
158/// A location in `&'a str`
159#[derive(Debug, Clone, Copy)]
160pub struct Span {
161    start: usize,
162    end: usize,
163}
164
165impl Default for Span {
166    #[inline]
167    fn default() -> Self {
168        Self::no_span()
169    }
170}
171
172impl From<&InputStream<'_, '_>> for Span {
173    #[inline]
174    fn from(i: &InputStream<'_, '_>) -> Self {
175        (*i).into()
176    }
177}
178
179impl From<InputStream<'_, '_>> for Span {
180    #[inline]
181    fn from(mut i: InputStream<'_, '_>) -> Self {
182        let start = i.current_token_start();
183        i.finish();
184        Self {
185            start,
186            end: i.current_token_start(),
187        }
188    }
189}
190
191impl From<Range<usize>> for Span {
192    #[inline]
193    #[track_caller]
194    fn from(range: Range<usize>) -> Self {
195        Span::new(range)
196    }
197}
198
199impl Span {
200    #[inline]
201    pub const fn no_span() -> Span {
202        Self {
203            start: usize::MAX,
204            end: usize::MAX,
205        }
206    }
207
208    #[inline]
209    #[track_caller]
210    pub fn new(range: Range<usize>) -> Self {
211        let Range { start, end } = range;
212        debug_assert!(start <= end);
213        Span { start, end }
214    }
215
216    #[inline]
217    pub fn byte_range(self) -> Option<Range<usize>> {
218        (self.start != usize::MAX).then_some(self.start..self.end)
219    }
220
221    /// Returns an empty [`Span`] that points to the start of `self`.
222    #[inline]
223    pub fn start(self) -> Self {
224        Self {
225            start: self.start,
226            end: self.start,
227        }
228    }
229
230    /// Returns an empty [`Span`] that points to the end of `self`.
231    #[inline]
232    pub fn end(self) -> Self {
233        Self {
234            start: self.end,
235            end: self.end,
236        }
237    }
238
239    /// Splits `self` at `mid` into two spanned strings.
240    #[track_caller]
241    pub fn split_at(self, mid: usize) -> (Self, Self) {
242        let Some(Range { start, end }) = self.byte_range() else {
243            return (self, self);
244        };
245
246        let mid = start.checked_add(mid).unwrap();
247        assert!(mid <= end);
248
249        let start = Self { start, end: mid };
250        let end = Self { start: mid, end };
251        (start, end)
252    }
253
254    /// The substring in `source` contained in [`self.byte_range()`][Self::byte_range].
255    #[inline]
256    pub fn as_infix_of<'a>(&self, source: &'a str) -> Option<&'a str> {
257        self.byte_range().and_then(|range| source.get(range))
258    }
259
260    /// The substring in `source` starting from `self.start`.
261    #[inline]
262    pub fn as_suffix_of<'a>(&self, source: &'a str) -> Option<&'a str> {
263        // No need to check if `self.start != usize::MAX`:
264        // `source` cannot be longer than `isize::MAX`, cf. [`std::alloc`].
265        source.get(self.start..)
266    }
267
268    pub fn is_overlapping(&self, other: Span) -> bool {
269        (self.start < other.end) & (other.start < self.end)
270    }
271}
272
273impl<T> WithSpan<T> {
274    #[inline]
275    #[track_caller]
276    pub fn new(inner: T, span: impl Into<Span>) -> Self {
277        Self {
278            inner,
279            span: span.into(),
280        }
281    }
282
283    #[inline]
284    pub const fn no_span(inner: T) -> Self {
285        Self {
286            inner,
287            span: Span::no_span(),
288        }
289    }
290
291    #[inline]
292    pub fn span(&self) -> Span {
293        self.span
294    }
295
296    #[inline]
297    pub fn deconstruct(self) -> (T, Span) {
298        let Self { inner, span } = self;
299        (inner, span)
300    }
301}
302
303impl WithSpan<&str> {
304    /// Returns an empty [`Span`] that points to the start of the contained string.
305    #[inline]
306    pub fn start(self) -> Self {
307        let (inner, span) = self.deconstruct();
308        Self::new(&inner[..0], span.start())
309    }
310
311    /// Returns an empty [`Span`] that points to the end of the contained string.
312    #[inline]
313    pub fn end(self) -> Self {
314        let (inner, span) = self.deconstruct();
315        Self::new(&inner[inner.len()..], span.end())
316    }
317
318    /// Splits `self` at `mid` into two spanned strings.
319    #[track_caller]
320    pub fn split_at(self, mid: usize) -> (Self, Self) {
321        let (inner, span) = self.deconstruct();
322        let (front, back) = inner.split_at(mid);
323        let (front_span, back_span) = span.split_at(mid);
324        (Self::new(front, front_span), Self::new(back, back_span))
325    }
326}
327
328impl<T> Deref for WithSpan<T> {
329    type Target = T;
330
331    #[inline]
332    fn deref(&self) -> &Self::Target {
333        &self.inner
334    }
335}
336
337impl<T> DerefMut for WithSpan<T> {
338    #[inline]
339    fn deref_mut(&mut self) -> &mut Self::Target {
340        &mut self.inner
341    }
342}
343
344impl<T: fmt::Debug> fmt::Debug for WithSpan<T> {
345    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
346        self.inner.fmt(f)
347    }
348}
349
350impl<T: PartialEq, R: AsRef<T>> PartialEq<R> for WithSpan<T> {
351    #[inline]
352    fn eq(&self, other: &R) -> bool {
353        // We never want to compare the span information.
354        self.inner == *other.as_ref()
355    }
356}
357
358impl<T: PartialOrd, R: AsRef<T>> PartialOrd<R> for WithSpan<T> {
359    #[inline]
360    fn partial_cmp(&self, other: &R) -> Option<std::cmp::Ordering> {
361        self.inner.partial_cmp(other.as_ref())
362    }
363}
364
365impl<T: Eq> Eq for WithSpan<T> {}
366
367impl<T: Ord> Ord for WithSpan<T> {
368    #[inline]
369    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
370        self.inner.cmp(&other.inner)
371    }
372}
373
374impl<T: std::hash::Hash> std::hash::Hash for WithSpan<T> {
375    #[inline]
376    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
377        self.inner.hash(state);
378    }
379}
380
381impl<T> AsRef<T> for WithSpan<T> {
382    #[inline]
383    fn as_ref(&self) -> &T {
384        &self.inner
385    }
386}
387
388impl<T> Borrow<T> for WithSpan<T> {
389    #[inline]
390    fn borrow(&self) -> &T {
391        &self.inner
392    }
393}
394
395impl<T> BorrowMut<T> for WithSpan<T> {
396    #[inline]
397    fn borrow_mut(&mut self) -> &mut T {
398        &mut self.inner
399    }
400}
401
402#[derive(Debug, Clone, PartialEq, Eq)]
403pub struct ParseError {
404    pub message: Option<Cow<'static, str>>,
405    pub offset: usize,
406    pub file_path: Option<Arc<Path>>,
407}
408
409impl std::error::Error for ParseError {}
410
411impl fmt::Display for ParseError {
412    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
413        let ParseError {
414            message,
415            file_path,
416            offset,
417        } = self;
418
419        if let Some(message) = message {
420            writeln!(f, "{message}")?;
421        }
422
423        let path = file_path
424            .as_ref()
425            .and_then(|path| Some(strip_common(&current_dir().ok()?, path)));
426        match path {
427            Some(path) => write!(f, "failed to parse template source\n  --> {path}@{offset}"),
428            None => write!(f, "failed to parse template source near offset {offset}"),
429        }
430    }
431}
432
433pub(crate) type ParseErr<'a> = ErrMode<ErrorContext>;
434pub(crate) type ParseResult<'a, T = &'a str> = Result<T, ParseErr<'a>>;
435
436/// This type is used to handle `nom` errors and in particular to add custom error messages.
437/// It used to generate `ParserError`.
438///
439/// It cannot be used to replace `ParseError` because it expects a generic, which would make
440/// `askama`'s users experience less good (since this generic is only needed for `nom`).
441#[derive(Debug)]
442pub(crate) struct ErrorContext {
443    pub(crate) span: Span,
444    pub(crate) message: Option<Cow<'static, str>>,
445}
446
447impl ErrorContext {
448    #[cold]
449    fn unclosed(kind: &str, tag: &str, span: impl Into<Span>) -> Self {
450        Self {
451            span: span.into(),
452            message: Some(format!("unclosed {kind}, missing {tag:?}").into()),
453        }
454    }
455
456    #[cold]
457    #[inline]
458    fn new(message: impl Into<Cow<'static, str>>, span: impl Into<Span>) -> Self {
459        Self {
460            span: span.into(),
461            message: Some(message.into()),
462        }
463    }
464
465    #[inline]
466    fn backtrack(self) -> ErrMode<Self> {
467        ErrMode::Backtrack(self)
468    }
469
470    #[inline]
471    fn cut(self) -> ErrMode<Self> {
472        ErrMode::Cut(self)
473    }
474}
475
476impl<'a: 'l, 'l> winnow::error::ParserError<InputStream<'a, 'l>> for ErrorContext {
477    type Inner = Self;
478
479    #[inline]
480    fn from_input(input: &InputStream<'a, 'l>) -> Self {
481        Self {
482            span: input.into(),
483            message: None,
484        }
485    }
486
487    #[inline(always)]
488    fn into_inner(self) -> Result<Self::Inner, Self> {
489        Ok(self)
490    }
491}
492
493fn skip_ws0<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
494    take_while(0.., |c: char| c.is_ascii_whitespace())
495        .void()
496        .parse_next(i)
497}
498
499fn skip_ws1<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
500    take_while(1.., |c: char| c.is_ascii_whitespace())
501        .void()
502        .parse_next(i)
503}
504
505fn ws<'a: 'l, 'l, O>(
506    inner: impl ModalParser<InputStream<'a, 'l>, O, ErrorContext>,
507) -> impl ModalParser<InputStream<'a, 'l>, O, ErrorContext> {
508    delimited(skip_ws0, inner, skip_ws0)
509}
510
511fn keyword<'a: 'l, 'l>(k: &str) -> impl ModalParser<InputStream<'a, 'l>, &'a str, ErrorContext> {
512    identifier.verify(move |v: &str| v == k)
513}
514
515fn identifier<'a: 'l, 'l>(input: &mut InputStream<'a, 'l>) -> ParseResult<'a> {
516    let head = any.verify(|&c| c == '_' || unicode_ident::is_xid_start(c));
517    let tail = take_while(.., unicode_ident::is_xid_continue);
518    (head, tail).take().parse_next(input)
519}
520
521fn bool_lit<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a> {
522    alt((keyword("false"), keyword("true"))).parse_next(i)
523}
524
525#[derive(Debug, Clone, Copy, PartialEq)]
526pub enum Num<'a> {
527    Int(&'a str, Option<IntKind>),
528    Float(&'a str, Option<FloatKind>),
529}
530
531fn check_base_digits<'a>(digits: &'a str, base: u32, span: Range<usize>) -> ParseResult<'a, ()> {
532    let allowed_digits: &[char] = match base {
533        2 => &['0', '1'],
534        8 => &['0', '1', '2', '3', '4', '5', '6', '7'],
535        16 => &[
536            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
537        ],
538        _ => panic!("unsupported base `{base}`"),
539    };
540
541    for digit in digits.chars() {
542        let lower = digit.to_ascii_lowercase();
543        if lower != '_' && !allowed_digits.iter().any(|c| *c == digit || *c == lower) {
544            let allowed = allowed_digits.iter().collect::<String>();
545            let base = match base {
546                2 => 'b',
547                8 => 'o',
548                16 => 'x',
549                _ => unreachable!(),
550            };
551            return cut_error!(
552                format!("only expected `{allowed}` digits for `0{base}` integers, found `{digit}`"),
553                span,
554            );
555        }
556    }
557    Ok(())
558}
559
560fn num_lit<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, Num<'a>> {
561    fn num_lit_suffix<'a: 'l, 'l, T: Copy>(
562        kind: &'a str,
563        list: &[(&str, T)],
564        i: &mut InputStream<'a, 'l>,
565    ) -> ParseResult<'a, T> {
566        let (suffix, span) = identifier.with_span().parse_next(i)?;
567        if let Some(value) = list
568            .iter()
569            .copied()
570            .find_map(|(name, value)| (name == suffix).then_some(value))
571        {
572            Ok(value)
573        } else {
574            cut_error!(format!("unknown {kind} suffix `{suffix}`"), span)
575        }
576    }
577
578    // Equivalent to <https://github.com/rust-lang/rust/blob/e3f909b2bbd0b10db6f164d466db237c582d3045/compiler/rustc_lexer/src/lib.rs#L587-L620>.
579    let int_with_base = (opt('-'), |i: &mut _| {
580        let ((base, kind), span) = preceded('0', alt(('b'.value(2), 'o'.value(8), 'x'.value(16))))
581            .with_taken()
582            .with_span()
583            .parse_next(i)?;
584        match opt(separated_digits(if base == 16 { base } else { 10 }, false)).parse_next(i)? {
585            Some(digits) => check_base_digits(digits, base, span),
586            None => cut_error!(format!("expected digits after `{kind}`"), span),
587        }
588    });
589
590    // Equivalent to <https://github.com/rust-lang/rust/blob/e3f909b2bbd0b10db6f164d466db237c582d3045/compiler/rustc_lexer/src/lib.rs#L626-L653>:
591    // no `_` directly after the decimal point `.`, or between `e` and `+/-`.
592    let float = |i: &mut InputStream<'a, 'l>| -> ParseResult<'a, ()> {
593        let has_dot = opt(('.', separated_digits(10, true))).parse_next(i)?;
594        let has_exp = opt(|i: &mut _| {
595            let ((kind, op), span) = (one_of(['e', 'E']), opt(one_of(['+', '-'])))
596                .with_span()
597                .parse_next(i)?;
598            match opt(separated_digits(10, op.is_none())).parse_next(i)? {
599                Some(_) => Ok(()),
600                None => {
601                    cut_error!(
602                        format!("expected decimal digits, `+` or `-` after exponent `{kind}`"),
603                        span,
604                    )
605                }
606            }
607        })
608        .parse_next(i)?;
609        match (has_dot, has_exp) {
610            (Some(_), _) | (_, Some(())) => Ok(()),
611            _ => fail(i),
612        }
613    };
614
615    let num = if let Some(num) = opt(int_with_base.take()).parse_next(i)? {
616        let suffix = opt(|i: &mut _| num_lit_suffix("integer", INTEGER_TYPES, i)).parse_next(i)?;
617        Num::Int(num, suffix)
618    } else {
619        let (float, num) = preceded((opt('-'), separated_digits(10, true)), opt(float))
620            .with_taken()
621            .parse_next(i)?;
622        if float.is_some() {
623            let suffix = opt(|i: &mut _| num_lit_suffix("float", FLOAT_TYPES, i)).parse_next(i)?;
624            Num::Float(num, suffix)
625        } else {
626            let suffix = opt(|i: &mut _| num_lit_suffix("number", NUM_TYPES, i)).parse_next(i)?;
627            match suffix {
628                Some(NumKind::Int(kind)) => Num::Int(num, Some(kind)),
629                Some(NumKind::Float(kind)) => Num::Float(num, Some(kind)),
630                None => Num::Int(num, None),
631            }
632        }
633    };
634    Ok(num)
635}
636
637/// Underscore separated digits of the given base, unless `start` is true this may start
638/// with an underscore.
639fn separated_digits<'a: 'l, 'l>(
640    radix: u32,
641    start: bool,
642) -> impl ModalParser<InputStream<'a, 'l>, &'a str, ErrorContext> {
643    (
644        cond(!start, repeat(0.., '_').map(|()| ())),
645        one_of(move |ch: char| ch.is_digit(radix)),
646        repeat(0.., one_of(move |ch: char| ch == '_' || ch.is_digit(radix))).map(|()| ()),
647    )
648        .take()
649}
650
651#[derive(Clone, Copy, Debug, PartialEq)]
652pub enum StrPrefix {
653    Binary,
654    CLike,
655}
656
657impl StrPrefix {
658    #[must_use]
659    pub fn to_char(self) -> char {
660        match self {
661            Self::Binary => 'b',
662            Self::CLike => 'c',
663        }
664    }
665}
666
667impl fmt::Display for StrPrefix {
668    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
669        use std::fmt::Write;
670
671        f.write_char(self.to_char())
672    }
673}
674
675#[derive(Clone, Debug, PartialEq)]
676pub struct StrLit<'a> {
677    /// the unparsed (but validated) content
678    pub content: &'a str,
679    /// whether the string literal is unprefixed, a cstring or binary slice
680    pub prefix: Option<StrPrefix>,
681    /// contains a NUL character, either escaped `'\0'` or the very characters;
682    /// not allowed in cstring literals
683    pub contains_null: bool,
684    /// contains a non-ASCII character, either as `\u{123456}` or as an unescaped character;
685    /// not allowed in binary slices
686    pub contains_unicode_character: bool,
687    /// contains unicode escape sequences like `\u{12}` (regardless of its range);
688    /// not allowed in binary slices
689    pub contains_unicode_escape: bool,
690    /// contains a non-ASCII range escape sequence like `\x80`;
691    /// not allowed in unprefix strings
692    pub contains_high_ascii: bool,
693}
694
695fn str_lit<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, StrLit<'a>> {
696    // <https://doc.rust-lang.org/reference/tokens.html#r-lex.token.literal.str.syntax>
697
698    fn inner<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, StrLit<'a>> {
699        #[derive(Debug, Clone, PartialEq, Eq)]
700        enum Sequence<'a> {
701            Text(&'a str),
702            Close,
703            Escape,
704            CrLf,
705            Cr(Range<usize>),
706        }
707
708        let mut contains_null = false;
709        let mut contains_unicode_character = false;
710        let mut contains_unicode_escape = false;
711        let mut contains_high_ascii = false;
712
713        while !i.is_empty() {
714            let seq = alt((
715                repeat::<_, _, (), _, _>(1.., none_of(['\r', '\\', '"']))
716                    .take()
717                    .map(Sequence::Text),
718                ('\r'.span(), opt('\n')).map(|(span, has_lf)| match has_lf {
719                    Some(_) => Sequence::CrLf,
720                    None => Sequence::Cr(span),
721                }),
722                '\\'.value(Sequence::Escape),
723                peek('"').value(Sequence::Close),
724            ))
725            .parse_next(i)?;
726
727            match seq {
728                Sequence::Text(s) => {
729                    contains_unicode_character =
730                        contains_unicode_character || s.bytes().any(|c: u8| !c.is_ascii());
731                    contains_null = contains_null || s.bytes().any(|c: u8| c == 0);
732                    continue;
733                }
734                Sequence::CrLf => continue,
735                Sequence::Cr(span) => {
736                    return cut_error!(
737                        "a bare CR (Mac linebreak) is not allowed in string literals, \
738                        use NL (Unix linebreak) or CRNL (Windows linebreak) instead, \
739                        or type `\\r` explicitly",
740                        span,
741                    );
742                }
743                Sequence::Close => break,
744                Sequence::Escape => {}
745            }
746
747            match any.parse_next(i)? {
748                '\'' | '"' | 'n' | 'r' | 't' | '\\' => continue,
749                '0' => {
750                    contains_null = true;
751                    continue;
752                }
753                'x' => {
754                    let code = take_while(2, AsChar::is_hex_digit).parse_next(i)?;
755                    match u8::from_str_radix(code, 16).unwrap() {
756                        0 => contains_null = true,
757                        128.. => contains_high_ascii = true,
758                        _ => {}
759                    }
760                }
761                'u' => {
762                    contains_unicode_escape = true;
763                    let (code, span) = delimited('{', take_while(1..=6, AsChar::is_hex_digit), '}')
764                        .with_span()
765                        .parse_next(i)?;
766                    match u32::from_str_radix(code, 16).unwrap() {
767                        0 => contains_null = true,
768                        0xd800..0xe000 => {
769                            return cut_error!("unicode escape must not be a surrogate", span);
770                        }
771                        0x110000.. => {
772                            return cut_error!("unicode escape must be at most 10FFFF", span);
773                        }
774                        128.. => contains_unicode_character = true,
775                        _ => {}
776                    }
777                }
778                _ => return fail(i),
779            }
780        }
781
782        Ok(StrLit {
783            content: "",
784            prefix: None,
785            contains_null,
786            contains_unicode_character,
787            contains_unicode_escape,
788            contains_high_ascii,
789        })
790    }
791
792    let ((prefix, lit), span) = (
793        terminated(
794            opt(alt((
795                'b'.value(StrPrefix::Binary),
796                'c'.value(StrPrefix::CLike),
797            ))),
798            '"',
799        ),
800        opt(terminated(inner.with_taken(), '"')),
801    )
802        .with_span()
803        .parse_next(i)?;
804
805    let Some((mut lit, content)) = lit else {
806        return cut_error!("unclosed or broken string", span);
807    };
808    lit.content = content;
809    lit.prefix = prefix;
810
811    let msg = match prefix {
812        Some(StrPrefix::Binary) => {
813            if lit.contains_unicode_character {
814                Some("non-ASCII character in byte string literal")
815            } else if lit.contains_unicode_escape {
816                Some("unicode escape in byte string")
817            } else {
818                None
819            }
820        }
821        Some(StrPrefix::CLike) => lit
822            .contains_null
823            .then_some("null characters in C string literals are not supported"),
824        None => lit.contains_high_ascii.then_some("out of range hex escape"),
825    };
826    if let Some(msg) = msg {
827        return cut_error!(msg, span);
828    }
829
830    not_suffix_with_hash(i)?;
831    Ok(lit)
832}
833
834fn not_suffix_with_hash<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
835    if let Some(span) = opt(identifier.span()).parse_next(i)? {
836        return cut_error!(
837            "you are missing a space to separate two string literals",
838            span,
839        );
840    }
841    Ok(())
842}
843
844fn str_lit_without_prefix<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a> {
845    let (lit, span) = str_lit.with_span().parse_next(i)?;
846
847    let kind = match lit.prefix {
848        Some(StrPrefix::Binary) => Some("binary slice"),
849        Some(StrPrefix::CLike) => Some("cstring"),
850        None => None,
851    };
852    if let Some(kind) = kind {
853        return cut_error!(
854            format!("expected an unprefixed normal string, not a {kind}"),
855            span,
856        );
857    }
858
859    Ok(lit.content)
860}
861
862#[derive(Clone, Copy, Debug, PartialEq)]
863pub enum CharPrefix {
864    Binary,
865}
866
867#[derive(Clone, Debug, PartialEq)]
868pub struct CharLit<'a> {
869    pub prefix: Option<CharPrefix>,
870    pub content: &'a str,
871}
872
873// Information about allowed character escapes is available at:
874// <https://doc.rust-lang.org/reference/tokens.html#character-literals>.
875fn char_lit<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, CharLit<'a>> {
876    let ((prefix, _, content, is_closed), span) = (
877        alt(('b'.value(Some(CharPrefix::Binary)), empty.value(None))),
878        '\'',
879        opt(take_escaped(none_of(['\\', '\'']), '\\', any)),
880        opt('\''),
881    )
882        .with_span()
883        .parse_next(i)?;
884
885    if is_closed.is_none() {
886        if let Some(prefix) = prefix {
887            return cut_error!(
888                match prefix {
889                    CharPrefix::Binary => "unterminated byte literal",
890                },
891                span,
892            );
893        } else {
894            return fail(i);
895        }
896    }
897
898    let content = match content.unwrap_or_default() {
899        "" => {
900            return cut_error!(
901                match prefix {
902                    Some(CharPrefix::Binary) => "empty byte literal",
903                    None => "empty character literal",
904                },
905                span,
906            );
907        }
908        content => content,
909    };
910
911    let mut content_i = content;
912    let Ok(c) = Char::parse(&mut content_i) else {
913        return cut_error!("invalid character", span);
914    };
915    if !content_i.is_empty() {
916        let (c, s) = match prefix {
917            Some(CharPrefix::Binary) => ("byte", "binary string"),
918            None => ("character", "string"),
919        };
920        return cut_error!(
921            format!(
922                "cannot have multiple characters in a {c} literal, use `{}\"...\"` to write a {s}",
923                match prefix {
924                    Some(CharPrefix::Binary) => "b",
925                    None => "",
926                }
927            ),
928            span,
929        );
930    }
931
932    let (nb, max_value, err1, err2) = match c {
933        Char::Literal(c) | Char::Escaped(c) => match prefix {
934            Some(CharPrefix::Binary) if !c.is_ascii() => {
935                return cut_error!("non-ASCII character in byte literal", span);
936            }
937            _ => return Ok(CharLit { prefix, content }),
938        },
939        Char::AsciiEscape(nb) => (
940            nb,
941            // `0x7F` is the maximum value for a `\x` escaped character.
942            0x7F,
943            "invalid character in ascii escape",
944            "must be a character in the range [\\x00-\\x7f]",
945        ),
946        Char::UnicodeEscape(nb) => {
947            match prefix {
948                Some(CharPrefix::Binary) => {
949                    return cut_error!(
950                        "cannot use unicode escape in byte string in byte literal",
951                        span,
952                    );
953                }
954                None => (
955                    nb,
956                    // `0x10FFFF` is the maximum value for a `\u` escaped character.
957                    0x0010_FFFF,
958                    "invalid character in unicode escape",
959                    "unicode escape must be at most 10FFFF",
960                ),
961            }
962        }
963    };
964
965    let Ok(nb) = u32::from_str_radix(nb, 16) else {
966        return cut_error!(err1, span);
967    };
968    if nb > max_value {
969        return cut_error!(err2, span);
970    }
971
972    Ok(CharLit { prefix, content })
973}
974
975/// Represents the different kinds of char declarations:
976#[derive(Copy, Clone)]
977enum Char<'a> {
978    /// Any character that is not escaped.
979    Literal(char),
980    /// An escaped character (like `\n`) which doesn't require any extra check.
981    Escaped(char),
982    /// Ascii escape (like `\x12`).
983    AsciiEscape(&'a str),
984    /// Unicode escape (like `\u{12}`).
985    UnicodeEscape(&'a str),
986}
987
988impl<'a> Char<'a> {
989    fn parse(i: &mut &'a str) -> ModalResult<Self, ()> {
990        let unescaped = none_of(('\\', '\'')).map(Self::Literal);
991        let escaped = preceded(
992            '\\',
993            alt((
994                'n'.value(Self::Escaped('\n')),
995                'r'.value(Self::Escaped('\r')),
996                't'.value(Self::Escaped('\t')),
997                '\\'.value(Self::Escaped('\\')),
998                '0'.value(Self::Escaped('\0')),
999                '\''.value(Self::Escaped('\'')),
1000                // Not useful but supported by rust.
1001                '"'.value(Self::Escaped('"')),
1002                ('x', take_while(2, |c: char| c.is_ascii_hexdigit()))
1003                    .map(|(_, s)| Self::AsciiEscape(s)),
1004                (
1005                    "u{",
1006                    take_while(1..=6, |c: char| c.is_ascii_hexdigit()),
1007                    '}',
1008                )
1009                    .map(|(_, s, _)| Self::UnicodeEscape(s)),
1010            )),
1011        );
1012        alt((unescaped, escaped)).parse_next(i)
1013    }
1014}
1015
1016#[derive(Clone, Debug, PartialEq)]
1017pub enum PathOrIdentifier<'a> {
1018    Path(Vec<PathComponent<'a>>),
1019    Identifier(WithSpan<&'a str>),
1020}
1021
1022fn path_or_identifier<'a: 'l, 'l>(
1023    i: &mut InputStream<'a, 'l>,
1024) -> ParseResult<'a, PathOrIdentifier<'a>> {
1025    let mut p = |i: &mut _| {
1026        let root = ws(opt(terminated(empty.span(), "::")));
1027        let start = PathComponent::parse;
1028        let tail = opt(repeat(1.., preceded(ws("::"), PathComponent::parse)).map(|v: Vec<_>| v));
1029
1030        let (root, start, rest) = (root, start, tail).parse_next(i)?;
1031        Ok((root, start, rest.unwrap_or_default()))
1032    };
1033    let (root, start, rest) = p.parse_next(i)?;
1034
1035    // The returned identifier can be assumed to be path if:
1036    // - it is an absolute path (starts with `::`), or
1037    // - it has multiple components (at least one `::`), or
1038    // - the first letter is uppercase
1039    match (root, start, rest) {
1040        (None, arg, tail)
1041            if tail.is_empty()
1042                && arg.generics.is_none()
1043                && arg
1044                    .name
1045                    .chars()
1046                    .next()
1047                    .is_none_or(|c| c == '_' || c.is_lowercase()) =>
1048        {
1049            Ok(PathOrIdentifier::Identifier(arg.name))
1050        }
1051        (root, start, tail) => {
1052            let mut path = if let Some(root) = root {
1053                let mut path = Vec::with_capacity(2 + tail.len());
1054                path.push(PathComponent {
1055                    name: WithSpan::new("", root),
1056                    generics: None,
1057                });
1058                path
1059            } else {
1060                Vec::with_capacity(1 + tail.len())
1061            };
1062            path.push(start);
1063            path.extend(tail);
1064            Ok(PathOrIdentifier::Path(path))
1065        }
1066    }
1067}
1068
1069#[derive(Debug, Clone, Default)]
1070struct State<'a> {
1071    syntax: Syntax<'a>,
1072    loop_depth: Cell<usize>,
1073    level: Level,
1074}
1075
1076fn block_start<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
1077    i.state.syntax.block_start.void().parse_next(i)
1078}
1079
1080fn block_end<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
1081    let (control, span) = alt((
1082        i.state.syntax.block_end.value(None),
1083        peek(delimited('%', alt(('-', '~', '+')).map(Some), '}')),
1084        fail, // rollback on partial matches in the previous line
1085    ))
1086    .with_span()
1087    .parse_next(i)?;
1088
1089    let Some(control) = control else {
1090        return Ok(());
1091    };
1092
1093    let err = ErrorContext::new(
1094        format!(
1095            "unclosed block, you likely meant to apply whitespace control: \"{}{}\"",
1096            control.escape_default(),
1097            i.state.syntax.block_end.escape_default(),
1098        ),
1099        span,
1100    );
1101    Err(err.backtrack())
1102}
1103
1104fn expr_start<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
1105    i.state.syntax.expr_start.void().parse_next(i)
1106}
1107
1108fn expr_end<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
1109    i.state.syntax.expr_end.void().parse_next(i)
1110}
1111
1112impl State<'_> {
1113    fn enter_loop(&self) {
1114        self.loop_depth.set(self.loop_depth.get() + 1);
1115    }
1116
1117    fn leave_loop(&self) {
1118        self.loop_depth.set(self.loop_depth.get() - 1);
1119    }
1120
1121    fn is_in_loop(&self) -> bool {
1122        self.loop_depth.get() > 0
1123    }
1124}
1125
1126#[derive(Default, Hash, PartialEq, Clone, Copy)]
1127pub struct Syntax<'a>(InnerSyntax<'a>);
1128
1129// This abstraction ensures that the fields are readable, but not writable.
1130#[derive(Hash, PartialEq, Clone, Copy)]
1131pub struct InnerSyntax<'a> {
1132    pub block_start: &'a str,
1133    pub block_end: &'a str,
1134    pub expr_start: &'a str,
1135    pub expr_end: &'a str,
1136    pub comment_start: &'a str,
1137    pub comment_end: &'a str,
1138}
1139
1140impl<'a> Deref for Syntax<'a> {
1141    type Target = InnerSyntax<'a>;
1142
1143    #[inline]
1144    fn deref(&self) -> &Self::Target {
1145        &self.0
1146    }
1147}
1148
1149impl Default for InnerSyntax<'static> {
1150    fn default() -> Self {
1151        Self {
1152            block_start: "{%",
1153            block_end: "%}",
1154            expr_start: "{{",
1155            expr_end: "}}",
1156            comment_start: "{#",
1157            comment_end: "#}",
1158        }
1159    }
1160}
1161
1162impl fmt::Debug for Syntax<'_> {
1163    #[inline]
1164    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1165        fmt_syntax("Syntax", self, f)
1166    }
1167}
1168
1169impl fmt::Debug for InnerSyntax<'_> {
1170    #[inline]
1171    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1172        fmt_syntax("InnerSyntax", self, f)
1173    }
1174}
1175
1176fn fmt_syntax(name: &str, inner: &InnerSyntax<'_>, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1177    f.debug_struct(name)
1178        .field("block_start", &inner.block_start)
1179        .field("block_end", &inner.block_end)
1180        .field("expr_start", &inner.expr_start)
1181        .field("expr_end", &inner.expr_end)
1182        .field("comment_start", &inner.comment_start)
1183        .field("comment_end", &inner.comment_end)
1184        .finish()
1185}
1186
1187#[derive(Debug, Default, Clone, Copy, Hash, PartialEq)]
1188#[cfg_attr(feature = "config", derive(serde_derive::Deserialize))]
1189pub struct SyntaxBuilder<'a> {
1190    pub name: &'a str,
1191    pub block_start: Option<&'a str>,
1192    pub block_end: Option<&'a str>,
1193    pub expr_start: Option<&'a str>,
1194    pub expr_end: Option<&'a str>,
1195    pub comment_start: Option<&'a str>,
1196    pub comment_end: Option<&'a str>,
1197}
1198
1199impl<'a> SyntaxBuilder<'a> {
1200    pub fn to_syntax(&self) -> Result<Syntax<'a>, String> {
1201        let default = InnerSyntax::default();
1202        let syntax = Syntax(InnerSyntax {
1203            block_start: self.block_start.unwrap_or(default.block_start),
1204            block_end: self.block_end.unwrap_or(default.block_end),
1205            expr_start: self.expr_start.unwrap_or(default.expr_start),
1206            expr_end: self.expr_end.unwrap_or(default.expr_end),
1207            comment_start: self.comment_start.unwrap_or(default.comment_start),
1208            comment_end: self.comment_end.unwrap_or(default.comment_end),
1209        });
1210
1211        for (s, k, is_closing) in [
1212            (syntax.block_start, "opening block", false),
1213            (syntax.block_end, "closing block", true),
1214            (syntax.expr_start, "opening expression", false),
1215            (syntax.expr_end, "closing expression", true),
1216            (syntax.comment_start, "opening comment", false),
1217            (syntax.comment_end, "closing comment", true),
1218        ] {
1219            if s.len() < 2 {
1220                return Err(format!(
1221                    "delimiters must be at least two characters long. \
1222                    The {k} delimiter ({s:?}) is too short",
1223                ));
1224            } else if s.len() > 32 {
1225                return Err(format!(
1226                    "delimiters must be at most 32 characters long. \
1227                    The {k} delimiter ({:?}...) is too long",
1228                    &s[..(16..=s.len())
1229                        .find(|&i| s.is_char_boundary(i))
1230                        .unwrap_or(s.len())],
1231                ));
1232            } else if s.chars().any(char::is_whitespace) {
1233                return Err(format!(
1234                    "delimiters may not contain white spaces. \
1235                    The {k} delimiter ({s:?}) contains white spaces",
1236                ));
1237            } else if is_closing
1238                && ['(', '-', '+', '~', '.', '>', '<', '&', '|', '!']
1239                    .contains(&s.chars().next().unwrap())
1240            {
1241                return Err(format!(
1242                    "closing delimiters may not start with operators. \
1243                    The {k} delimiter ({s:?}) starts with operator `{}`",
1244                    s.chars().next().unwrap(),
1245                ));
1246            }
1247        }
1248
1249        // likely to cause catastrophic backtracking in the parser
1250        for infix in [
1251            "&", "&&", "&=", "^", "^=", ",", ".", "..", "...", "..=", "=", "==", ">=", ">", "<=",
1252            "<", "-", "-=", "!=", "!", "|", "|=", "||", "%", "%=", "+", "+=", "<<", "<<=", ">>",
1253            ">>=", "/", "/=", "*", "*=",
1254        ] {
1255            match syntax.expr_end.strip_prefix(infix) {
1256                Some("") => {
1257                    return Err(format!(
1258                        "the closing expression delimiter `{}` must not be a string that could be \
1259                        mistaken for a binary operator",
1260                        syntax.expr_end.escape_debug(),
1261                    ));
1262                }
1263                Some(tail) if tail.as_bytes().iter().all(|c| b"&-!*".contains(c)) => {
1264                    return Err(format!(
1265                        "the closing expression delimiter `{}` must not be a string that could be \
1266                        mistaken for a binary operator `{infix}` followed by a (sequence of) \
1267                        prefix operator(s)",
1268                        syntax.expr_end.escape_debug(),
1269                    ));
1270                }
1271                _ => continue,
1272            }
1273        }
1274
1275        for ((s1, k1), (s2, k2)) in [
1276            (
1277                (syntax.block_start, "block"),
1278                (syntax.expr_start, "expression"),
1279            ),
1280            (
1281                (syntax.block_start, "block"),
1282                (syntax.comment_start, "comment"),
1283            ),
1284            (
1285                (syntax.expr_start, "expression"),
1286                (syntax.comment_start, "comment"),
1287            ),
1288        ] {
1289            if s1.starts_with(s2) || s2.starts_with(s1) {
1290                let (s1, k1, s2, k2) = match s1.len() < s2.len() {
1291                    true => (s1, k1, s2, k2),
1292                    false => (s2, k2, s1, k1),
1293                };
1294                return Err(format!(
1295                    "an opening delimiter may not be the prefix of another delimiter. \
1296                    The {k1} delimiter ({s1:?}) clashes with the {k2} delimiter ({s2:?})",
1297                ));
1298            }
1299        }
1300
1301        Ok(syntax)
1302    }
1303}
1304
1305/// The nesting level of nodes and expressions.
1306///
1307/// The level counts down from [`Level::MAX_DEPTH`] to 0. Once the value would reach below 0,
1308/// [`Level::nest()`] / [`LevelGuard::nest()`] will return an error. The same [`Level`] instance is
1309/// shared across all usages in a [`Parsed::new()`] / [`Ast::from_str()`] call, using a reference
1310/// to an interior mutable counter.
1311#[derive(Debug, Clone)]
1312struct Level(Cell<usize>);
1313
1314impl Default for Level {
1315    #[inline]
1316    fn default() -> Self {
1317        Self(Cell::new(Level::MAX_DEPTH))
1318    }
1319}
1320
1321impl Level {
1322    const MAX_DEPTH: usize = 128;
1323
1324    /// Acquire a [`LevelGuard`] without decrementing the counter, to be used with loops.
1325    fn guard(&self) -> LevelGuard<'_> {
1326        LevelGuard {
1327            level: self,
1328            count: 0,
1329        }
1330    }
1331
1332    /// Decrement the remaining level counter, and return a [`LevelGuard`] that increments it again
1333    /// when it's dropped.
1334    fn nest<'a: 'l, 'l>(&self, i: &InputStream<'a, 'l>) -> ParseResult<'a, LevelGuard<'_>> {
1335        self.nest_multiple(i, 1)
1336    }
1337
1338    /// Decrement the remaining level counter by `count`, and return a [`LevelGuard`] that
1339    /// increments it again when it's dropped.
1340    fn nest_multiple<'a: 'l, 'l>(
1341        &self,
1342        i: &InputStream<'a, 'l>,
1343        count: usize,
1344    ) -> ParseResult<'a, LevelGuard<'_>> {
1345        if let Some(new_level) = self.0.get().checked_sub(count) {
1346            self.0.set(new_level);
1347            Ok(LevelGuard { level: self, count })
1348        } else {
1349            Self::_fail(i)
1350        }
1351    }
1352
1353    #[cold]
1354    #[inline(never)]
1355    fn _fail<'a: 'l, 'l, T>(i: &InputStream<'a, 'l>) -> ParseResult<'a, T> {
1356        let msg = "your template code is too deeply nested, or the last expression is too complex";
1357        Err(ErrorContext::new(msg, i).cut())
1358    }
1359}
1360
1361/// Used to keep track how often [`LevelGuard::nest()`] was called and to re-increment the
1362/// remaining level counter when it is dropped / falls out of scope.
1363#[must_use]
1364#[derive(Debug)]
1365struct LevelGuard<'l> {
1366    level: &'l Level,
1367    count: usize,
1368}
1369
1370impl Drop for LevelGuard<'_> {
1371    fn drop(&mut self) {
1372        self.level.0.set(self.level.0.get() + self.count);
1373    }
1374}
1375
1376impl LevelGuard<'_> {
1377    /// Used to decrement the level multiple times, e.g. for every iteration of a loop.
1378    fn nest<'a: 'l, 'l>(&mut self, i: &InputStream<'a, 'l>) -> ParseResult<'a, ()> {
1379        if let Some(new_level) = self.level.0.get().checked_sub(1) {
1380            self.level.0.set(new_level);
1381            self.count += 1;
1382            Ok(())
1383        } else {
1384            Level::_fail(i)
1385        }
1386    }
1387}
1388
1389fn filter<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, Filter<'a>> {
1390    preceded(('|', not(one_of(['|', '=']))), cut_err(Filter::parse)).parse_next(i)
1391}
1392
1393/// Returns the common parts of two paths.
1394///
1395/// The goal of this function is to reduce the path length based on the `base` argument
1396/// (generally the path where the program is running into). For example:
1397///
1398/// ```text
1399/// current dir: /a/b/c
1400/// path:        /a/b/c/d/e.txt
1401/// ```
1402///
1403/// `strip_common` will return `d/e.txt`.
1404#[must_use]
1405pub fn strip_common(base: &Path, path: &Path) -> String {
1406    let path = match path.canonicalize() {
1407        Ok(path) => path,
1408        Err(_) => return path.display().to_string(),
1409    };
1410    let mut components_iter = path.components().peekable();
1411
1412    for current_path_component in base.components() {
1413        let Some(path_component) = components_iter.peek() else {
1414            return path.display().to_string();
1415        };
1416        if current_path_component != *path_component {
1417            break;
1418        }
1419        components_iter.next();
1420    }
1421    let path_parts = components_iter
1422        .map(|c| c.as_os_str().to_string_lossy())
1423        .collect::<Vec<_>>();
1424    if path_parts.is_empty() {
1425        path.display().to_string()
1426    } else {
1427        path_parts.join(std::path::MAIN_SEPARATOR_STR)
1428    }
1429}
1430
1431#[inline]
1432pub(crate) fn can_be_variable_name(name: &str) -> bool {
1433    !matches!(name, "self" | "Self" | "super" | "crate")
1434}
1435
1436#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1437pub enum IntKind {
1438    I8,
1439    I16,
1440    I32,
1441    I64,
1442    I128,
1443    Isize,
1444    U8,
1445    U16,
1446    U32,
1447    U64,
1448    U128,
1449    Usize,
1450}
1451
1452impl fmt::Display for IntKind {
1453    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1454        f.write_str(match self {
1455            Self::I8 => "i8",
1456            Self::I16 => "i16",
1457            Self::I32 => "i32",
1458            Self::I64 => "i64",
1459            Self::I128 => "i128",
1460            Self::Isize => "isize",
1461            Self::U8 => "u8",
1462            Self::U16 => "u16",
1463            Self::U32 => "u32",
1464            Self::U64 => "u64",
1465            Self::U128 => "u128",
1466            Self::Usize => "usize",
1467        })
1468    }
1469}
1470
1471#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1472pub enum FloatKind {
1473    F16,
1474    F32,
1475    F64,
1476    F128,
1477}
1478
1479#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1480enum NumKind {
1481    Int(IntKind),
1482    Float(FloatKind),
1483}
1484
1485/// Primitive integer types. Also used as number suffixes.
1486const INTEGER_TYPES: &[(&str, IntKind)] = &[
1487    ("i8", IntKind::I8),
1488    ("i16", IntKind::I16),
1489    ("i32", IntKind::I32),
1490    ("i64", IntKind::I64),
1491    ("i128", IntKind::I128),
1492    ("isize", IntKind::Isize),
1493    ("u8", IntKind::U8),
1494    ("u16", IntKind::U16),
1495    ("u32", IntKind::U32),
1496    ("u64", IntKind::U64),
1497    ("u128", IntKind::U128),
1498    ("usize", IntKind::Usize),
1499];
1500
1501/// Primitive floating point types. Also used as number suffixes.
1502const FLOAT_TYPES: &[(&str, FloatKind)] = &[
1503    ("f16", FloatKind::F16),
1504    ("f32", FloatKind::F32),
1505    ("f64", FloatKind::F64),
1506    ("f128", FloatKind::F128),
1507];
1508
1509/// Primitive numeric types. Also used as number suffixes.
1510const NUM_TYPES: &[(&str, NumKind)] = &{
1511    let mut list = [("", NumKind::Int(IntKind::I8)); INTEGER_TYPES.len() + FLOAT_TYPES.len()];
1512    let mut i = 0;
1513    let mut o = 0;
1514    while i < INTEGER_TYPES.len() {
1515        let (name, value) = INTEGER_TYPES[i];
1516        list[o] = (name, NumKind::Int(value));
1517        i += 1;
1518        o += 1;
1519    }
1520    let mut i = 0;
1521    while i < FLOAT_TYPES.len() {
1522        let (name, value) = FLOAT_TYPES[i];
1523        list[o] = (name, NumKind::Float(value));
1524        i += 1;
1525        o += 1;
1526    }
1527    list
1528};
1529
1530/// Complete list of named primitive types.
1531const PRIMITIVE_TYPES: &[&str] = &{
1532    let mut list = [""; NUM_TYPES.len() + 1];
1533    let mut i = 0;
1534    let mut o = 0;
1535    while i < NUM_TYPES.len() {
1536        list[o] = NUM_TYPES[i].0;
1537        i += 1;
1538        o += 1;
1539    }
1540    list[o] = "bool";
1541    list
1542};
1543
1544const MAX_RUST_KEYWORD_LEN: usize = 8;
1545
1546const RUST_KEYWORDS: &[&[[AsciiChar; MAX_RUST_KEYWORD_LEN]]; MAX_RUST_KEYWORD_LEN + 1] = &{
1547    const NO_KWS: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[];
1548    const KW2: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1549        AsciiStr::new_sized("as"),
1550        AsciiStr::new_sized("do"),
1551        AsciiStr::new_sized("fn"),
1552        AsciiStr::new_sized("if"),
1553        AsciiStr::new_sized("in"),
1554    ];
1555    const KW3: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1556        AsciiStr::new_sized("box"),
1557        AsciiStr::new_sized("dyn"),
1558        AsciiStr::new_sized("for"),
1559        AsciiStr::new_sized("gen"),
1560        AsciiStr::new_sized("let"),
1561        AsciiStr::new_sized("mod"),
1562        AsciiStr::new_sized("mut"),
1563        AsciiStr::new_sized("pub"),
1564        AsciiStr::new_sized("ref"),
1565        AsciiStr::new_sized("try"),
1566        AsciiStr::new_sized("use"),
1567    ];
1568    const KW4: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1569        AsciiStr::new_sized("else"),
1570        AsciiStr::new_sized("enum"),
1571        AsciiStr::new_sized("impl"),
1572        AsciiStr::new_sized("loop"),
1573        AsciiStr::new_sized("move"),
1574        AsciiStr::new_sized("priv"),
1575        AsciiStr::new_sized("self"),
1576        AsciiStr::new_sized("Self"),
1577        AsciiStr::new_sized("true"),
1578        AsciiStr::new_sized("type"),
1579    ];
1580    const KW5: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1581        AsciiStr::new_sized("async"),
1582        AsciiStr::new_sized("await"),
1583        AsciiStr::new_sized("break"),
1584        AsciiStr::new_sized("const"),
1585        AsciiStr::new_sized("crate"),
1586        AsciiStr::new_sized("false"),
1587        AsciiStr::new_sized("final"),
1588        AsciiStr::new_sized("macro"),
1589        AsciiStr::new_sized("match"),
1590        AsciiStr::new_sized("super"),
1591        AsciiStr::new_sized("trait"),
1592        AsciiStr::new_sized("union"),
1593        AsciiStr::new_sized("where"),
1594        AsciiStr::new_sized("while"),
1595        AsciiStr::new_sized("yield"),
1596    ];
1597    const KW6: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1598        AsciiStr::new_sized("become"),
1599        AsciiStr::new_sized("extern"),
1600        AsciiStr::new_sized("return"),
1601        AsciiStr::new_sized("static"),
1602        AsciiStr::new_sized("struct"),
1603        AsciiStr::new_sized("typeof"),
1604        AsciiStr::new_sized("unsafe"),
1605        AsciiStr::new_sized("caller"),
1606    ];
1607    const KW7: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1608        AsciiStr::new_sized("unsized"),
1609        AsciiStr::new_sized("virtual"),
1610    ];
1611    const KW8: &[[AsciiChar; MAX_RUST_KEYWORD_LEN]] = &[
1612        AsciiStr::new_sized("abstract"),
1613        AsciiStr::new_sized("continue"),
1614        AsciiStr::new_sized("override"),
1615    ];
1616
1617    [NO_KWS, NO_KWS, KW2, KW3, KW4, KW5, KW6, KW7, KW8]
1618};
1619
1620pub fn is_rust_keyword(ident: &str) -> bool {
1621    let ident_len = ident.len();
1622    if ident_len > MAX_RUST_KEYWORD_LEN {
1623        return false;
1624    }
1625    let kws = RUST_KEYWORDS[ident.len()];
1626
1627    let mut padded_ident = [0; MAX_RUST_KEYWORD_LEN];
1628    padded_ident[..ident_len].copy_from_slice(ident.as_bytes());
1629
1630    // Since the individual buckets are quite short, a linear search is faster than a binary search.
1631    for probe in kws {
1632        if padded_ident == *AsciiChar::slice_as_bytes(probe) {
1633            return true;
1634        }
1635    }
1636    false
1637}
1638
1639macro_rules! cut_error {
1640    ($message:expr, $span:expr $(,)?) => {{
1641        use ::std::convert::Into;
1642        use ::std::option::Option::Some;
1643        use $crate::ErrorContext;
1644
1645        $crate::cut_context_err(
1646            #[cold]
1647            #[inline(always)]
1648            move || ErrorContext {
1649                span: Into::into($span),
1650                message: Some(Into::into($message)),
1651            },
1652        )
1653    }};
1654}
1655
1656pub(crate) use cut_error;
1657
1658#[cold]
1659#[inline(never)]
1660fn cut_context_err<'a, T>(gen_err: impl FnOnce() -> ErrorContext) -> ParseResult<'a, T> {
1661    Err(ErrMode::Cut(gen_err()))
1662}
1663
1664type HashSet<T> = std::collections::hash_set::HashSet<T, FxBuildHasher>;
1665
1666fn deny_any_rust_token<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a, ()> {
1667    let (token, span) = any_rust_token.with_span().parse_next(i)?;
1668    cut_error!(
1669        format!(
1670            "the token `{}` was not expected at this point in the expression",
1671            token.escape_debug(),
1672        ),
1673        span
1674    )
1675}
1676
1677#[cold]
1678#[inline(never)]
1679fn any_rust_token<'a: 'l, 'l>(i: &mut InputStream<'a, 'l>) -> ParseResult<'a> {
1680    // https://docs.rs/syn/2.0.114/src/syn/token.rs.html#748-795
1681    const PUNCTUATIONS: &[&str] = &[
1682        "&", "&&", "&=", "@", "^", "^=", ":", ",", "$", ".", "..", "...", "..=", "=", "==", "=>",
1683        ">=", ">", "<-", "<=", "<", "-", "-=", "!=", "!", "|", "|=", "||", "::", "%", "%=", "+",
1684        "+=", "#", "?", "->", ";", "<<", "<<=", ">>", ">>=", "/", "/=", "*", "*=", "~",
1685        // not a punctuation per se, but a likely typo
1686        "\"", "'", "(", ")", "[", "]", "{", "}",
1687    ];
1688
1689    const ONE: &[u8] = &{
1690        const LEN: usize = {
1691            let mut i = 0;
1692            let mut o = 0;
1693            while i < PUNCTUATIONS.len() {
1694                if PUNCTUATIONS[i].len() == 1 {
1695                    o += 1;
1696                }
1697                i += 1;
1698            }
1699            o
1700        };
1701
1702        let mut result = [0; LEN];
1703        let mut i = 0;
1704        let mut o = 0;
1705        while i < PUNCTUATIONS.len() {
1706            if let &[c] = PUNCTUATIONS[i].as_bytes() {
1707                result[o] = c;
1708                o += 1;
1709            }
1710            i += 1;
1711        }
1712        result
1713    };
1714
1715    const TWO: &[[u8; 2]] = &{
1716        const LEN: usize = {
1717            let mut i = 0;
1718            let mut o = 0;
1719            while i < PUNCTUATIONS.len() {
1720                if PUNCTUATIONS[i].len() == 2 {
1721                    o += 1;
1722                }
1723                i += 1;
1724            }
1725            o
1726        };
1727
1728        let mut result = [*b"12"; LEN];
1729        let mut i = 0;
1730        let mut o = 0;
1731        while i < PUNCTUATIONS.len() {
1732            if let &[a, b] = PUNCTUATIONS[i].as_bytes() {
1733                result[o] = [a, b];
1734                o += 1;
1735            }
1736            i += 1;
1737        }
1738        result
1739    };
1740
1741    const THREE: &[[u8; 3]] = &{
1742        const LEN: usize = {
1743            let mut i = 0;
1744            let mut o = 0;
1745            while i < PUNCTUATIONS.len() {
1746                if PUNCTUATIONS[i].len() == 3 {
1747                    o += 1;
1748                }
1749                i += 1;
1750            }
1751            o
1752        };
1753
1754        let mut result = [*b"123"; LEN];
1755        let mut i = 0;
1756        let mut o = 0;
1757        while i < PUNCTUATIONS.len() {
1758            if let &[a, b, c] = PUNCTUATIONS[i].as_bytes() {
1759                result[o] = [a, b, c];
1760                o += 1;
1761            }
1762            i += 1;
1763        }
1764        result
1765    };
1766
1767    // https://docs.rs/syn/2.0.114/src/syn/token.rs.html#692-746
1768    const KEYWORDS: &[&str] = &[
1769        "abstract", "as", "async", "auto", "await", "become", "box", "break", "const", "continue",
1770        "crate", "default", "do", "dyn", "else", "enum", "extern", "final", "fn", "for", "if",
1771        "impl", "in", "let", "loop", "macro", "match", "mod", "move", "mut", "override", "priv",
1772        "pub", "raw", "ref", "return", "Self", "self", "static", "struct", "super", "trait", "try",
1773        "type", "typeof", "union", "unsafe", "unsized", "use", "virtual", "where", "while",
1774        "yield", // not a keyword in rust, but in askama
1775        "is",
1776    ];
1777
1778    alt((
1779        take(3usize).verify(|s: &str| {
1780            if let Ok(s) = s.as_bytes().try_into() {
1781                THREE.contains(&s)
1782            } else {
1783                false
1784            }
1785        }),
1786        take(2usize).verify(|s: &str| {
1787            if let Ok(s) = s.as_bytes().try_into() {
1788                TWO.contains(&s)
1789            } else {
1790                false
1791            }
1792        }),
1793        take(1usize).verify(|s: &str| {
1794            if let [c] = s.as_bytes() {
1795                ONE.contains(c)
1796            } else {
1797                false
1798            }
1799        }),
1800        identifier.verify(|s: &str| KEYWORDS.contains(&s)),
1801    ))
1802    .parse_next(i)
1803}
1804
1805#[cfg(test)]
1806mod test {
1807    use std::path::Path;
1808
1809    use super::*;
1810
1811    #[cfg(not(windows))]
1812    #[test]
1813    fn test_strip_common() {
1814        // Full path is returned instead of empty when the entire path is in common.
1815        assert_eq!(strip_common(Path::new("home"), Path::new("home")), "home");
1816
1817        let cwd = std::env::current_dir().expect("current_dir failed");
1818
1819        // We need actual existing paths for `canonicalize` to work, so let's do that.
1820        let entry = cwd
1821            .read_dir()
1822            .expect("read_dir failed")
1823            .filter_map(std::result::Result::ok)
1824            .find(|f| f.path().is_file())
1825            .expect("no entry");
1826
1827        // Since they have the complete path in common except for the folder entry name, it should
1828        // return only the folder entry name.
1829        assert_eq!(
1830            strip_common(&cwd, &entry.path()),
1831            entry.file_name().to_string_lossy()
1832        );
1833
1834        // In this case it cannot canonicalize `/a/b/c` so it returns the path as is.
1835        assert_eq!(strip_common(&cwd, Path::new("/a/b/c")), "/a/b/c");
1836    }
1837
1838    #[track_caller]
1839    fn parse_peek<'a: 'l, 'l, T>(
1840        state: &'l State<'l>,
1841        parser: impl FnOnce(&mut InputStream<'a, 'l>) -> ParseResult<'a, T>,
1842        input: &'a str,
1843    ) -> ParseResult<'a, (&'a str, T)> {
1844        let mut i = InputStream {
1845            input: LocatingSlice::new(input),
1846            state,
1847        };
1848        let value = parser(&mut i)?;
1849        Ok((**i, value))
1850    }
1851
1852    #[test]
1853    fn test_num_lit() {
1854        let s = State::default();
1855
1856        // Should fail.
1857        assert!(parse_peek(&s, num_lit, ".").is_err());
1858        // Should succeed.
1859        assert_eq!(
1860            parse_peek(&s, num_lit, "1.2E-02").unwrap(),
1861            ("", Num::Float("1.2E-02", None))
1862        );
1863        assert_eq!(
1864            parse_peek(&s, num_lit, "4e3").unwrap(),
1865            ("", Num::Float("4e3", None)),
1866        );
1867        assert_eq!(
1868            parse_peek(&s, num_lit, "4e+_3").unwrap(),
1869            ("", Num::Float("4e+_3", None)),
1870        );
1871        // Not supported because Rust wants a number before the `.`.
1872        assert!(parse_peek(&s, num_lit, ".1").is_err());
1873        assert!(parse_peek(&s, num_lit, ".1E-02").is_err());
1874        // A `_` directly after the `.` denotes a field.
1875        assert_eq!(
1876            parse_peek(&s, num_lit, "1._0").unwrap(),
1877            ("._0", Num::Int("1", None))
1878        );
1879        assert_eq!(
1880            parse_peek(&s, num_lit, "1_.0").unwrap(),
1881            ("", Num::Float("1_.0", None))
1882        );
1883        // Not supported (voluntarily because of `1..` syntax).
1884        assert_eq!(
1885            parse_peek(&s, num_lit, "1.").unwrap(),
1886            (".", Num::Int("1", None))
1887        );
1888        assert_eq!(
1889            parse_peek(&s, num_lit, "1_.").unwrap(),
1890            (".", Num::Int("1_", None))
1891        );
1892        assert_eq!(
1893            parse_peek(&s, num_lit, "1_2.").unwrap(),
1894            (".", Num::Int("1_2", None))
1895        );
1896        // Numbers with suffixes
1897        assert_eq!(
1898            parse_peek(&s, num_lit, "-1usize").unwrap(),
1899            ("", Num::Int("-1", Some(IntKind::Usize)))
1900        );
1901        assert_eq!(
1902            parse_peek(&s, num_lit, "123_f32").unwrap(),
1903            ("", Num::Float("123_", Some(FloatKind::F32)))
1904        );
1905        assert_eq!(
1906            parse_peek(&s, num_lit, "1_.2_e+_3_f64|into_isize").unwrap(),
1907            (
1908                "|into_isize",
1909                Num::Float("1_.2_e+_3_", Some(FloatKind::F64))
1910            )
1911        );
1912        assert_eq!(
1913            parse_peek(&s, num_lit, "4e3f128").unwrap(),
1914            ("", Num::Float("4e3", Some(FloatKind::F128))),
1915        );
1916    }
1917
1918    #[test]
1919    fn test_char_lit() {
1920        let lit = |s: &'static str| crate::CharLit {
1921            prefix: None,
1922            content: s,
1923        };
1924        let s = State::default();
1925
1926        assert_eq!(parse_peek(&s, char_lit, "'a'").unwrap(), ("", lit("a")));
1927        assert_eq!(parse_peek(&s, char_lit, "'字'").unwrap(), ("", lit("字")));
1928
1929        // Escaped single characters.
1930        assert_eq!(
1931            parse_peek(&s, char_lit, "'\\\"'").unwrap(),
1932            ("", lit("\\\""))
1933        );
1934        assert_eq!(parse_peek(&s, char_lit, "'\\''").unwrap(), ("", lit("\\'")));
1935        assert_eq!(parse_peek(&s, char_lit, "'\\t'").unwrap(), ("", lit("\\t")));
1936        assert_eq!(parse_peek(&s, char_lit, "'\\n'").unwrap(), ("", lit("\\n")));
1937        assert_eq!(parse_peek(&s, char_lit, "'\\r'").unwrap(), ("", lit("\\r")));
1938        assert_eq!(parse_peek(&s, char_lit, "'\\0'").unwrap(), ("", lit("\\0")));
1939        // Escaped ascii characters (up to `0x7F`).
1940        assert_eq!(
1941            parse_peek(&s, char_lit, "'\\x12'").unwrap(),
1942            ("", lit("\\x12"))
1943        );
1944        assert_eq!(
1945            parse_peek(&s, char_lit, "'\\x02'").unwrap(),
1946            ("", lit("\\x02"))
1947        );
1948        assert_eq!(
1949            parse_peek(&s, char_lit, "'\\x6a'").unwrap(),
1950            ("", lit("\\x6a"))
1951        );
1952        assert_eq!(
1953            parse_peek(&s, char_lit, "'\\x7F'").unwrap(),
1954            ("", lit("\\x7F"))
1955        );
1956        // Escaped unicode characters (up to `0x10FFFF`).
1957        assert_eq!(
1958            parse_peek(&s, char_lit, "'\\u{A}'").unwrap(),
1959            ("", lit("\\u{A}"))
1960        );
1961        assert_eq!(
1962            parse_peek(&s, char_lit, "'\\u{10}'").unwrap(),
1963            ("", lit("\\u{10}"))
1964        );
1965        assert_eq!(
1966            parse_peek(&s, char_lit, "'\\u{aa}'").unwrap(),
1967            ("", lit("\\u{aa}"))
1968        );
1969        assert_eq!(
1970            parse_peek(&s, char_lit, "'\\u{10FFFF}'").unwrap(),
1971            ("", lit("\\u{10FFFF}"))
1972        );
1973
1974        // Check with `b` prefix.
1975        assert_eq!(
1976            parse_peek(&s, char_lit, "b'a'").unwrap(),
1977            (
1978                "",
1979                crate::CharLit {
1980                    prefix: Some(crate::CharPrefix::Binary),
1981                    content: "a"
1982                }
1983            )
1984        );
1985
1986        // Should fail.
1987        assert!(parse_peek(&s, char_lit, "''").is_err());
1988        assert!(parse_peek(&s, char_lit, "'\\o'").is_err());
1989        assert!(parse_peek(&s, char_lit, "'\\x'").is_err());
1990        assert!(parse_peek(&s, char_lit, "'\\x1'").is_err());
1991        assert!(parse_peek(&s, char_lit, "'\\x80'").is_err());
1992        assert!(parse_peek(&s, char_lit, "'\\u'").is_err());
1993        assert!(parse_peek(&s, char_lit, "'\\u{}'").is_err());
1994        assert!(parse_peek(&s, char_lit, "'\\u{110000}'").is_err());
1995    }
1996
1997    #[test]
1998    fn test_str_lit() {
1999        let s = State::default();
2000        assert_eq!(
2001            parse_peek(&s, str_lit, r#"b"hello""#).unwrap(),
2002            (
2003                "",
2004                StrLit {
2005                    prefix: Some(StrPrefix::Binary),
2006                    content: "hello",
2007                    contains_null: false,
2008                    contains_unicode_character: false,
2009                    contains_unicode_escape: false,
2010                    contains_high_ascii: false,
2011                }
2012            )
2013        );
2014        assert_eq!(
2015            parse_peek(&s, str_lit, r#"c"hello""#).unwrap(),
2016            (
2017                "",
2018                StrLit {
2019                    prefix: Some(StrPrefix::CLike),
2020                    content: "hello",
2021                    contains_null: false,
2022                    contains_unicode_character: false,
2023                    contains_unicode_escape: false,
2024                    contains_high_ascii: false,
2025                }
2026            )
2027        );
2028        assert!(parse_peek(&s, str_lit, r#"d"hello""#).is_err());
2029    }
2030
2031    #[test]
2032    fn test_is_rust_keyword() {
2033        assert!(is_rust_keyword("caller"));
2034        assert!(is_rust_keyword("super"));
2035        assert!(is_rust_keyword("become"));
2036        assert!(!is_rust_keyword("supeeeer"));
2037        assert!(!is_rust_keyword("sur"));
2038    }
2039
2040    #[test]
2041    fn test_check_base_digits() {
2042        assert!(check_base_digits("10", 2, 0..1).is_ok());
2043        assert!(check_base_digits("13", 2, 0..1).is_err());
2044        assert!(check_base_digits("13", 8, 0..1).is_ok());
2045        assert!(check_base_digits("79", 8, 0..1).is_err());
2046        // Checking that it's case insensitive.
2047        assert!(check_base_digits("13F", 16, 0..1).is_ok());
2048        assert!(check_base_digits("13f", 16, 0..1).is_ok());
2049        // Checking that `_` is allowed.
2050        assert!(check_base_digits("13_f", 16, 0..1).is_ok());
2051    }
2052}