bracket/
lexer.rs

1//! Iterator for grammar tokens.
2use logos::{Lexer as Lex, Logos, Span};
3
4/// Identity type for the lexer modes.
5#[derive(Clone, Default)]
6pub struct Extras;
7
8/// Tokens for the document and nested blocks.
9#[derive(Logos, Clone, Debug, Eq, PartialEq)]
10#[logos(extras = Extras)]
11#[logos(subpattern identifier = r#"[^\s"!#%&'()*+,./;<=>@\[/\]^`{|}~]"#)]
12pub enum Block {
13    /// Start a raw block.
14    #[regex(r"\{\{\{\{~?[\t ]*")]
15    StartRawBlock,
16
17    /// Start a raw comment.
18    #[regex(r"\{\{!--")]
19    StartRawComment,
20
21    /// Start a raw (escaped) statement.
22    #[regex(r"\\\{\{\{?")]
23    StartRawStatement,
24
25    /// Start a comment.
26    #[regex(r"\{\{!")]
27    StartComment,
28
29    /// Start a statement.
30    #[regex(r"\{\{\{?~?[\t ]*")]
31    StartStatement,
32
33    /// Start a block.
34    #[regex(r"\{\{\~?[\t ]*#[\t ]*")]
35    StartBlockScope,
36
37    /// Start a link.
38    #[regex(r"\\?\[\[")]
39    StartLink,
40
41    /// End a block.
42    #[regex(r"\{\{\~?[\t ]*/")]
43    EndBlockScope,
44
45    /// End a raw block.
46    #[regex(r"\{\{\{\{~?[\t ]*/")]
47    EndRawBlock,
48
49    /// Text token.
50    #[regex(r".")]
51    Text,
52
53    /// Newline token.
54    #[token("\n")]
55    Newline,
56
57    /// Error token.
58    #[error]
59    Error,
60}
61
62/// Tokens for raw comments.
63///
64/// Raw comments can contain statements and blocks which will
65/// not be rendered. They begin with `{{!--` and are terminated
66/// with `--}}`.
67#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Logos)]
68#[logos(extras = Extras)]
69pub enum RawComment {
70    /// Text token.
71    #[regex(r".")]
72    Text,
73
74    /// End of raw comment.
75    #[regex(r"--\}\}")]
76    End,
77
78    /// Newline token.
79    #[token("\n")]
80    Newline,
81
82    /// Error token.
83    #[error]
84    Error,
85}
86
87/// Tokens for raw statements.
88///
89/// Raw statements are single-line statements escaped with a
90/// backslash, for example: `\{{title}}`.
91#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Logos)]
92#[logos(extras = Extras)]
93pub enum RawStatement {
94    /// Text token.
95    #[regex(r".")]
96    Text,
97
98    /// End of raw statement.
99    #[regex(r"~?\}?\}\}")]
100    End,
101
102    /// Newline token.
103    #[token("\n")]
104    Newline,
105
106    /// Error token.
107    #[error]
108    Error,
109}
110
111/// Tokens for comments.
112///
113/// Comments may **not** contain statements and blocks.
114/// They begin with `{{!` and are terminated with `}}`.
115///
116#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Logos)]
117#[logos(extras = Extras)]
118pub enum Comment {
119    /// Text token.
120    #[regex(r".")]
121    Text,
122
123    /// End of comment.
124    #[regex(r"\}\}")]
125    End,
126
127    /// Newline token.
128    #[token("\n")]
129    Newline,
130
131    /// Error token.
132    #[error]
133    Error,
134}
135
136/// Tokens for parameters.
137///
138/// Parameters are converted to a call statement by the parser and must
139/// represent all the tokens in a statement (`{{...}}`) and the start
140/// of a block (`{{# block}}...{{/block}}`).
141#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Logos)]
142#[logos(extras = Extras)]
143#[logos(subpattern identifier = r#"[^\s"!#%&'()*+,./;<=>@\[/\]^`{|}~]"#)]
144pub enum Parameters {
145    /// Token for a partial instruction.
146    #[token(r">")]
147    Partial,
148
149    /// Token for the `else` conditional keyword.
150    #[token(r"else")]
151    ElseKeyword,
152
153    /// Token for the explicit `this` keyword.
154    #[token(r"this")]
155    ExplicitThisKeyword,
156
157    /// Token for explicit `this` notation using a dot and a slash.
158    #[token("./")]
159    ExplicitThisDotSlash,
160
161    /// Token for a reference to a parent scope.
162    #[token("../")]
163    ParentRef,
164
165    /// Token for a valid identifier.
166    #[regex(r"(?&identifier)+", priority = 2)]
167    Identifier,
168
169    /// Token for a local identifier (preceeded by an `@` symbol).
170    #[regex(r"@(?&identifier)+")]
171    LocalIdentifier,
172
173    /// Token for the delimiter between path components.
174    #[regex(r"[./]")]
175    PathDelimiter,
176
177    /// Token that starts a double-quoted string literal.
178    #[token("\"")]
179    DoubleQuoteString,
180
181    /// Token that starts a single-quoted string literal.
182    #[token("'")]
183    SingleQuoteString,
184
185    /// Token that starts a raw literal using square brackets.
186    #[token("[")]
187    StartArray,
188
189    /// Token that starts a sub-expression.
190    #[token("(", priority = 3)]
191    StartSubExpression,
192
193    /// Token that ends a sub-expression.
194    #[token(")")]
195    EndSubExpression,
196
197    /// Token for key/value pairs (hash parameters).
198    #[regex(r"(?&identifier)+=")]
199    HashKey,
200
201    /// Token for numeric values.
202    // NOTE: Must have higher priority than identifier
203    // NOTE: otherwise numbers become identifiers
204    #[regex(r"-?([0-9]+\.)?[0-9]+((e|E)[+-]?[0-9]+)?", priority = 3)]
205    Number,
206
207    /// Token for the `true` keyword.
208    #[token("true")]
209    True,
210
211    /// Token for the `false` keyword.
212    #[token("false")]
213    False,
214
215    /// Token for the `null` keyword.
216    #[token("null")]
217    Null,
218
219    /// Token for whitespace delimiters.
220    #[regex(r"[ \t]+")]
221    WhiteSpace,
222
223    /// Token for the end of a statement or block open tag.
224    #[regex(r"~?\}?\}?\}\}")]
225    End,
226
227    /// Newline token.
228    #[token("\n")]
229    Newline,
230
231    /// Error token.
232    #[error]
233    Error,
234}
235
236/// Tokens for double-quoted string literals.
237#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Logos)]
238#[logos(extras = Extras)]
239pub enum DoubleQuoteString {
240    /// Text token.
241    #[regex(r#"[^\\"\n]+"#)]
242    Text,
243
244    /// Escaped newline token.
245    #[token("\\n")]
246    EscapedNewline,
247
248    /// Escaped quote.
249    #[token(r#"\""#)]
250    Escaped,
251
252    /// End of the string literal.
253    #[token("\"")]
254    End,
255
256    /// Newline token.
257    #[token("\n")]
258    Newline,
259
260    /// Error token.
261    #[error]
262    Error,
263}
264
265/// Tokens for single-quoted string literals.
266#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Logos)]
267#[logos(extras = Extras)]
268pub enum SingleQuoteString {
269    /// Text token.
270    #[regex(r#"[^\\'\n]+"#)]
271    Text,
272
273    /// Escaped newline token.
274    #[token("\\n")]
275    EscapedNewline,
276
277    /// Escaped quote.
278    #[token(r#"\'"#)]
279    Escaped,
280
281    /// End of the string literal.
282    #[token("'")]
283    End,
284
285    /// Newline token.
286    #[token("\n")]
287    Newline,
288
289    /// Error token.
290    #[error]
291    Error,
292}
293
294/// Tokens for square bracket raw literals.
295#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Logos)]
296#[logos(extras = Extras)]
297pub enum Array {
298    /// Text token.
299    #[regex(r#"[^\]\n]+"#)]
300    Text,
301
302    //#[token("\\n")]
303    //EscapedNewline,
304    /// Escaped bracket.
305    #[token(r#"\]"#)]
306    Escaped,
307
308    /// End of the raw literal.
309    #[token("]")]
310    End,
311
312    /// Newline token.
313    #[token("\n")]
314    Newline,
315
316    /// Error token.
317    #[error]
318    Error,
319}
320
321/// Tokens for links.
322#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Logos)]
323#[logos(extras = Extras)]
324pub enum Link {
325    /// Text token.
326    #[regex(r#"[^\\|\]]+"#)]
327    Text,
328
329    /// Pipe delimiter token.
330    #[token("|")]
331    Pipe,
332
333    /// Escaped newline token.
334    #[token("\\n")]
335    EscapedNewline,
336
337    /// Escaped pipe token.
338    #[token(r#"\|"#)]
339    EscapedPipe,
340
341    /// Escaped bracket token.
342    #[token(r#"\]"#)]
343    Escaped,
344
345    /// End of square bracket literal.
346    #[token(r"]]")]
347    End,
348
349    /// Newline token.
350    #[token("\n")]
351    Newline,
352
353    /// Error token.
354    #[error]
355    Error,
356}
357
358/// Enumeration of the token types.
359#[derive(Clone, Debug, Eq, PartialEq)]
360pub enum Token {
361    /// Block token.
362    Block(Block, Span),
363    /// Raw comment token.
364    RawComment(RawComment, Span),
365    /// Raw statement token.
366    RawStatement(RawStatement, Span),
367    /// Comment token.
368    Comment(Comment, Span),
369    /// Token for call parameters.
370    Parameters(Parameters, Span),
371    /// Token for a double-quoted string literal.
372    DoubleQuoteString(DoubleQuoteString, Span),
373    /// Token for a single-quoted string literal.
374    SingleQuoteString(SingleQuoteString, Span),
375    /// Token for a raw square bracket literal.
376    Array(Array, Span),
377    /// Token for links.
378    Link(Link, Span),
379}
380
381impl Token {
382    /// Get the span for a token.
383    pub fn span(&self) -> &Span {
384        match self {
385            Token::Block(_, ref span) => span,
386            Token::RawComment(_, ref span) => span,
387            Token::RawStatement(_, ref span) => span,
388            Token::Comment(_, ref span) => span,
389            Token::Parameters(_, ref span) => span,
390            Token::DoubleQuoteString(_, ref span) => span,
391            Token::SingleQuoteString(_, ref span) => span,
392            Token::Array(_, ref span) => span,
393            Token::Link(_, ref span) => span,
394        }
395    }
396
397    /// Determine if a token should be treated as text.
398    pub fn is_text(&self) -> bool {
399        match self {
400            Token::Block(ref t, _) => t == &Block::Text || t == &Block::Newline,
401            Token::RawComment(ref t, _) => {
402                t == &RawComment::Text || t == &RawComment::Newline
403            }
404            Token::RawStatement(ref t, _) => {
405                t == &RawStatement::Text || t == &RawStatement::Newline
406            }
407            Token::Comment(ref t, _) => {
408                t == &Comment::Text || t == &Comment::Newline
409            }
410            Token::Parameters(_, _) => false,
411            Token::DoubleQuoteString(_, _) => false,
412            Token::SingleQuoteString(_, _) => false,
413            Token::Array(_, _) => false,
414            Token::Link(_, _) => false,
415        }
416    }
417
418    /// Determine if a token is the newline token.
419    pub fn is_newline(&self) -> bool {
420        match *self {
421            Token::RawComment(ref lex, _) => lex == &RawComment::Newline,
422            Token::RawStatement(ref lex, _) => lex == &RawStatement::Newline,
423            Token::Comment(ref lex, _) => lex == &Comment::Newline,
424            //Token::RawBlock(ref lex, _) => lex == &Block::Newline,
425            Token::Block(ref lex, _) => lex == &Block::Newline,
426            Token::Parameters(ref lex, _) => lex == &Parameters::Newline,
427            Token::DoubleQuoteString(ref lex, _) => {
428                lex == &DoubleQuoteString::Newline
429            }
430            Token::SingleQuoteString(ref lex, _) => {
431                lex == &SingleQuoteString::Newline
432            }
433            Token::Array(ref lex, _) => lex == &Array::Newline,
434            Token::Link(ref lex, _) => lex == &Link::Newline,
435        }
436    }
437}
438
439enum Modes<'source> {
440    Block(Lex<'source, Block>),
441    RawComment(Lex<'source, RawComment>),
442    RawStatement(Lex<'source, RawStatement>),
443    Comment(Lex<'source, Comment>),
444    Parameters(Lex<'source, Parameters>),
445    DoubleQuoteString(Lex<'source, DoubleQuoteString>),
446    SingleQuoteString(Lex<'source, SingleQuoteString>),
447    Array(Lex<'source, Array>),
448    Link(Lex<'source, Link>),
449}
450
451impl<'source> Modes<'source> {
452    fn new(s: &'source str) -> Self {
453        Self::Block(Block::lexer(s))
454    }
455}
456
457/// Iterator for a stream of grammar tokens.
458pub struct Lexer<'source> {
459    mode: Modes<'source>,
460}
461
462impl<'source> Lexer<'source> {
463    /// Utility for switching the lexer to parameters mode.
464    ///
465    /// Must be called immediately after creating the lexer otherwise
466    /// it is not guaranteed to change the lexer mode.
467    pub(crate) fn set_parameters_mode(&mut self) {
468        match &mut self.mode {
469            Modes::Block(lexer) => {
470                self.mode = Modes::Parameters(lexer.to_owned().morph())
471            }
472            _ => {}
473        }
474    }
475
476    /// Consume nodes until we can return to the top-level mode.
477    ///
478    /// This is used during *lint* mode to move back to the top-level
479    /// parsing mode.
480    pub(crate) fn until_mode(&mut self) -> Option<Token> {
481        while let Some(token) = self.next() {
482            match token {
483                Token::Block(_, _) => return Some(token),
484                _ => {}
485            }
486        }
487        None
488    }
489}
490
491/// Clone lexers as we switch between modes.
492impl<'source> Iterator for Lexer<'source> {
493    type Item = Token;
494    fn next(&mut self) -> Option<Self::Item> {
495        match &mut self.mode {
496            Modes::Block(lexer) => {
497                let result = lexer.next();
498                let span = lexer.span();
499
500                if let Some(token) = result {
501                    if Block::StartRawBlock == token {
502                        self.mode = Modes::Parameters(lexer.to_owned().morph());
503                    } else if Block::EndRawBlock == token {
504                        self.mode = Modes::Parameters(lexer.to_owned().morph());
505                    } else if Block::StartRawComment == token {
506                        self.mode = Modes::RawComment(lexer.to_owned().morph());
507                    } else if Block::StartRawStatement == token {
508                        self.mode =
509                            Modes::RawStatement(lexer.to_owned().morph());
510                    } else if Block::StartComment == token {
511                        self.mode = Modes::Comment(lexer.to_owned().morph());
512                    } else if Block::StartStatement == token {
513                        self.mode = Modes::Parameters(lexer.to_owned().morph());
514                    } else if Block::StartBlockScope == token {
515                        self.mode = Modes::Parameters(lexer.to_owned().morph());
516                    } else if Block::EndBlockScope == token {
517                        self.mode = Modes::Parameters(lexer.to_owned().morph());
518                    } else if Block::StartLink == token {
519                        self.mode = Modes::Link(lexer.to_owned().morph());
520                    }
521                    Some(Token::Block(token, span))
522                } else {
523                    None
524                }
525            }
526            Modes::RawComment(lexer) => {
527                let result = lexer.next();
528                let span = lexer.span();
529
530                if let Some(token) = result {
531                    if RawComment::End == token {
532                        self.mode = Modes::Block(lexer.to_owned().morph());
533                    }
534                    Some(Token::RawComment(token, span))
535                } else {
536                    None
537                }
538            }
539            Modes::RawStatement(lexer) => {
540                let result = lexer.next();
541                let span = lexer.span();
542
543                if let Some(token) = result {
544                    if RawStatement::End == token {
545                        self.mode = Modes::Block(lexer.to_owned().morph());
546                    }
547                    Some(Token::RawStatement(token, span))
548                } else {
549                    None
550                }
551            }
552            Modes::Comment(lexer) => {
553                let result = lexer.next();
554                let span = lexer.span();
555
556                if let Some(token) = result {
557                    if Comment::End == token {
558                        self.mode = Modes::Block(lexer.to_owned().morph());
559                    }
560                    Some(Token::Comment(token, span))
561                } else {
562                    None
563                }
564            }
565            Modes::Parameters(lexer) => {
566                let result = lexer.next();
567                let span = lexer.span();
568
569                if let Some(token) = result {
570                    if Parameters::DoubleQuoteString == token {
571                        self.mode =
572                            Modes::DoubleQuoteString(lexer.to_owned().morph());
573                    } else if Parameters::SingleQuoteString == token {
574                        self.mode =
575                            Modes::SingleQuoteString(lexer.to_owned().morph());
576                    } else if Parameters::StartArray == token {
577                        self.mode = Modes::Array(lexer.to_owned().morph());
578                    } else if Parameters::End == token {
579                        self.mode = Modes::Block(lexer.to_owned().morph());
580                    }
581                    Some(Token::Parameters(token, span))
582                } else {
583                    None
584                }
585            }
586            Modes::DoubleQuoteString(lexer) => {
587                let result = lexer.next();
588                let span = lexer.span();
589
590                if let Some(token) = result {
591                    if DoubleQuoteString::End == token {
592                        self.mode = Modes::Parameters(lexer.to_owned().morph());
593                    }
594                    Some(Token::DoubleQuoteString(token, span))
595                } else {
596                    None
597                }
598            }
599            Modes::SingleQuoteString(lexer) => {
600                let result = lexer.next();
601                let span = lexer.span();
602
603                if let Some(token) = result {
604                    if SingleQuoteString::End == token {
605                        self.mode = Modes::Parameters(lexer.to_owned().morph());
606                    }
607                    Some(Token::SingleQuoteString(token, span))
608                } else {
609                    None
610                }
611            }
612            Modes::Array(lexer) => {
613                let result = lexer.next();
614                let span = lexer.span();
615
616                if let Some(token) = result {
617                    if Array::End == token {
618                        self.mode = Modes::Parameters(lexer.to_owned().morph());
619                    }
620                    Some(Token::Array(token, span))
621                } else {
622                    None
623                }
624            }
625            Modes::Link(lexer) => {
626                let result = lexer.next();
627                let span = lexer.span();
628
629                if let Some(token) = result {
630                    if Link::End == token {
631                        self.mode = Modes::Block(lexer.to_owned().morph());
632                    }
633                    Some(Token::Link(token, span))
634                } else {
635                    None
636                }
637            }
638        }
639    }
640}
641
642fn normalize(tokens: Vec<Token>) -> Vec<Token> {
643    let mut normalized: Vec<Token> = Vec::new();
644    let mut span: Option<Span> = None;
645
646    for t in tokens.into_iter() {
647        if t.is_text() {
648            if let Some(ref mut span) = span {
649                span.end = t.span().end;
650            } else {
651                span = Some(t.span().clone());
652            }
653        } else {
654            if let Some(span) = span.take() {
655                normalized.push(Token::Block(Block::Text, span));
656                normalized.push(t);
657            } else {
658                normalized.push(t);
659            }
660        }
661    }
662
663    if let Some(span) = span.take() {
664        normalized.push(Token::Block(Block::Text, span));
665    }
666
667    normalized
668}
669
670/// Get a token iterator for the given source template.
671///
672/// The returned iterator will emit tokens of type `Token`.
673pub fn lex(s: &str) -> Lexer {
674    Lexer {
675        mode: Modes::new(s),
676    }
677}
678
679/// Collect the input source into a vector of tokens.
680///
681/// If the normalized flag is given consecutive text tokens
682/// are coalesced into a single token.
683///
684/// The normalized flag is useful for test cases; the parser
685/// will perform it's own normalization to reduce the number of
686/// passes on the token stream.
687pub fn collect(s: &str, normalized: bool) -> Vec<Token> {
688    let tokens = lex(s).collect();
689    if normalized {
690        normalize(tokens)
691    } else {
692        tokens
693    }
694}