Skip to main content

tokel_engine/
syntax.rs

1//! The Abstract Syntax Tree (AST) for Tokel.
2//!
3//! This module contains the data structures and `syn::parse::Parse` implementations
4//! that represent the Tokel grammar. It is responsible for taking a raw
5//! `proc_macro2::TokenStream` and structuring it into a recursive AST that can be
6//! evaluated bottom-up by the engine.
7//!
8//! # Grammar Mapping
9//!
10//! The types in this module directly correspond to the formal EBNF grammar:
11//!
12//! * **`TokelStream`**: The root sequence of elements. Represents `TokelStream ::= Element*`.
13//! * **`Element`**: An individual unit in the stream. It is either a standard Rust token tree
14//!   (`TokelTree`) or an expansion block (`[ Block ] Pipeline`).
15//! * **`Block`**: The inner contents of an expansion block (the `< TokelStream >` part).
16//! * **`Pipeline`**: A sequence of one or more transformers attached to the end of an
17//!   expansion block (e.g., `:case[[pascal]]:prefix[[Get]]`).
18//! * **`Transformer`**: A single transformation operation (like `case`) and its optional
19//!   arguments parsed from double brackets (`[[ ... ]]`).
20//!
21//! # Parsing Strategy
22//!
23//! The parser performs a deep traversal of the incoming token stream. When it encounters
24//! standard Rust delimiters (`()`, `{}`, or `[]` that do not start with `<`), it recursively
25//! parses their inner contents as a new `TokelStream`.
26//!
27//! This recursive parsing ensures that expansion blocks deeply nested inside standard Rust
28//! code, `None`-delimited macro capture groups, or even inside transformer arguments, are
29//! accurately located and represented in the final AST.
30
31use std::{iter, ops::Not};
32
33use syn::{
34    Token,
35    parse::{Parse, ParseStream, discouraged::Speculative},
36    token::Bracket,
37};
38
39use proc_macro2::{Delimiter, Ident, Literal, Punct, Span, TokenStream, TokenTree};
40
41/// The `:`-punctuated sequence of [`Pipe`], to form a complete pipeline for an expansion block.
42#[derive(Debug, Clone)]
43pub struct Pipeline(pub Vec<Pipe>);
44
45impl Parse for Pipeline {
46    fn parse(input: ParseStream) -> syn::Result<Self> {
47        let mut target_list = Vec::new();
48
49        while input.peek(Token![:]) {
50            target_list.push(input.parse::<Pipe>()?);
51        }
52
53        Ok(Self(target_list))
54    }
55}
56
57/// A pass of a single transformer with the provided argument.
58#[derive(Debug, Clone)]
59pub struct Pipe {
60    /// The preceding `:` token.
61    pub colon_token: Token![:],
62
63    /// The name of the transformer that is to be used within the active context.
64    pub name: Ident,
65
66    /// The argument that has been passed to the respective transformer.
67    pub argument: Option<((Bracket, Bracket), TokelStream)>,
68}
69
70impl Parse for Pipe {
71    fn parse(input: ParseStream) -> syn::Result<Self> {
72        let colon_token: Token![:] = input.parse()?;
73
74        let name: Ident = input.parse()?;
75
76        let argument = None;
77
78        // NOTE: Determine whether we are looking at a `[[...]]` textually.
79        if input.peek(Bracket) {
80            let fork = input.fork();
81
82            let content_left;
83
84            let bracket_left = syn::bracketed!(content_left in fork);
85
86            if content_left.peek(Bracket) {
87                // NOTE: Advance to the fork, as it was indeed an argument.
88                input.advance_to(&fork);
89
90                let content_right;
91
92                let bracket_right = syn::bracketed!(content_right in content_left);
93
94                let argument = (
95                    (bracket_left, bracket_right),
96                    content_right.parse::<TokelStream>()?,
97                );
98
99                let argument = Some(argument);
100
101                Ok(Self {
102                    colon_token,
103                    name,
104                    argument,
105                })
106            } else
107            /* NOTE: Inner token-stream does not contain another bracket, ignore the top-level bracketed group. */
108            {
109                Ok(Self {
110                    colon_token,
111                    name,
112                    argument,
113                })
114            }
115        } else {
116            Ok(Self {
117                colon_token,
118                name,
119                argument,
120            })
121        }
122    }
123}
124
125/// An expansion block.
126#[derive(Debug, Clone)]
127pub struct Block {
128    /// The less-than token part of this expansion block.
129    pub lt_token: Token![<],
130
131    /// The tokel-specific token-stream contained within this expansion block.
132    pub stream: TokelStream,
133
134    /// The greater-than token part of this expansion block.
135    pub gt_token: Token![>],
136}
137
138impl Parse for Block {
139    fn parse(input: ParseStream) -> syn::Result<Self> {
140        let lt_token = input.parse()?;
141
142        let mut tree_list = Vec::new();
143
144        while input.is_empty().not() {
145            tree_list.push(input.parse::<TokenTree>()?);
146        }
147
148        let Some(last_tree) = tree_list.pop() else {
149            return Err(syn::Error::new_spanned(
150                lt_token,
151                "missmatched chevron token",
152            ));
153        };
154
155        let gt_token = {
156            let mut token_stream = TokenStream::new();
157
158            token_stream.extend(iter::once(last_tree));
159
160            syn::parse2(token_stream)?
161        };
162
163        let stream = {
164            let mut token_stream = TokenStream::new();
165
166            token_stream.extend(tree_list);
167
168            syn::parse2(token_stream)?
169        };
170
171        Ok(Self {
172            lt_token,
173            stream,
174            gt_token,
175        })
176    }
177}
178
179/// An element inside a tokel-processed token-stream.
180///
181/// An element can be either an expansion block or a token tree.
182#[derive(Debug, Clone)]
183pub enum Element {
184    /// An expansion block in a tokel-stream.
185    Block {
186        /// The expansion block content.
187        block: Block,
188
189        /// The pipeline to be used for the token-stream.
190        pipeline: Option<Pipeline>,
191    },
192
193    /// An unmodified tokel-tree.
194    Tree(TokelTree),
195}
196
197impl Parse for Element {
198    fn parse(input: ParseStream) -> syn::Result<Self> {
199        match input.parse::<TokenTree>()? {
200            TokenTree::Group(group) => {
201                let mut iter = group.stream().into_iter();
202
203                iter.next()
204                    .and_then(|first| iter.last().map(|last| (first, last)))
205                    .and_then(|(first, last)| match (first, last) {
206                        (TokenTree::Punct(left), TokenTree::Punct(right))
207                            if left.as_char() == '<' && right.as_char() == '>' =>
208                        {
209                            Some(syn::parse2::<Block>(group.stream()).map(|block| {
210                                (input.peek(Token![:]) && input.peek2(syn::Ident))
211                                    .then(|| input.parse::<Pipeline>())
212                                    .transpose()
213                                    .map(|pipeline| Self::Block { block, pipeline })
214                            }))
215                        }
216                        _ => None,
217                    })
218                    .filter(|_| group.delimiter() == Delimiter::Bracket)
219                    .transpose()?
220                    .unwrap_or_else(|| {
221                        let (delimiter, span, tokens) =
222                            (group.delimiter(), group.span(), group.stream());
223
224                        let stream = syn::parse2(tokens)?;
225
226                        Ok(Self::Tree(TokelTree::Group(TokelGroup {
227                            delimiter,
228                            span,
229                            stream,
230                        })))
231                    })
232            }
233            target_tree => {
234                let target_variant = match target_tree {
235                    TokenTree::Ident(ident) => TokelTree::Ident(ident),
236                    TokenTree::Punct(punct) => TokelTree::Punct(punct),
237                    TokenTree::Literal(literal) => TokelTree::Literal(literal),
238                    TokenTree::Group(..) => unreachable!(),
239                };
240
241                Ok(Self::Tree(target_variant))
242            }
243        }
244    }
245}
246
247/// A tokel-specific `TokenTree`.
248///
249/// This is used to perform recursive-descent expansion of all tokens.
250#[derive(Debug, Clone)]
251pub enum TokelTree {
252    /// A delimited sequence of tokens.
253    Group(TokelGroup),
254
255    /// An identifier.
256    Ident(Ident),
257
258    /// A literal.
259    Literal(Literal),
260
261    /// A punctuation fragment.
262    Punct(Punct),
263}
264
265/// A tokel-specific `TokenGroup`.
266#[derive(Debug, Clone)]
267pub struct TokelGroup {
268    /// The delimiter of this tokel-group.
269    ///
270    /// Note that any `None`-delimited [`TokenTree`] is acknowledged and will be processed verbatim.
271    pub delimiter: Delimiter,
272
273    /// A delimiter span.
274    pub span: Span,
275
276    /// The tokel-specific token-stream that this tokel-group embeds.
277    pub stream: TokelStream,
278}
279
280/// A tokel-specific stream of source-level elements.
281///
282/// See the [`Element`] item for further information.
283#[derive(Debug, Clone)]
284#[repr(transparent)]
285pub struct TokelStream(pub Vec<Element>);
286
287impl Parse for TokelStream {
288    fn parse(input: ParseStream) -> syn::Result<Self> {
289        let mut target_list = Vec::new();
290
291        loop {
292            if input.is_empty() {
293                break Ok(Self(target_list));
294            }
295
296            target_list.push(input.parse()?);
297        }
298    }
299}
300
301#[cfg(test)]
302mod tests {
303    use super::*;
304    use quote::quote;
305
306    #[test]
307    fn parse_empty_stream() {
308        let input = quote! {};
309        let ast: TokelStream = syn::parse2(input).unwrap();
310        assert!(ast.0.is_empty(), "Stream should have exactly 0 elements");
311    }
312
313    #[test]
314    fn parse_standard_rust_tokens() {
315        let input = quote! { pub fn hello() {} };
316        let ast: TokelStream = syn::parse2(input).unwrap();
317
318        // `pub`, `fn`, `hello`, `()`, `{}`
319        assert_eq!(ast.0.len(), 5);
320
321        for element in ast.0 {
322            match element {
323                Element::Tree(_) => {}
324                _ => panic!("Expected only standard trees, found an expansion block!"),
325            }
326        }
327    }
328
329    #[test]
330    fn parse_simple_chevron_block() {
331        let input = quote! { [< a b c >] };
332        let ast: TokelStream = syn::parse2(input).unwrap();
333
334        assert_eq!(ast.0.len(), 1, "Should parse as exactly one element");
335
336        match &ast.0[0] {
337            Element::Block { block, pipeline } => {
338                // Assert the inner block parsed 3 elements (a, b, c)
339                assert_eq!(block.stream.0.len(), 3);
340                // Assert there is no pipeline
341                assert!(pipeline.is_none());
342            }
343            _ => panic!("Expected an Element::Block"),
344        }
345    }
346
347    #[test]
348    fn parse_block_with_pipeline() {
349        let input = quote! { [< x >]:case:append[[_suffix]] };
350        let ast: TokelStream = syn::parse2(input).unwrap();
351
352        match &ast.0[0] {
353            Element::Block {
354                block: _,
355                pipeline: Some(pipeline),
356            } => {
357                assert_eq!(pipeline.0.len(), 2);
358
359                // First transformer: `case`
360                assert_eq!(pipeline.0[0].name.to_string(), "case");
361                assert!(pipeline.0[0].argument.is_none());
362
363                // Second transformer: `append`
364                assert_eq!(pipeline.0[1].name.to_string(), "append");
365
366                // Ensure arguments parsed correctly
367                let args = pipeline.0[1]
368                    .argument
369                    .as_ref()
370                    .expect("Expected args for append");
371                assert_eq!(args.1.0.len(), 1); // `_suffix`
372            }
373            _ => panic!("Expected an Element::Block with a Pipeline"),
374        }
375    }
376
377    #[test]
378    fn parse_nested_blocks() {
379        let input = quote! { [< [< inner >]:first >]:second };
380        let ast: TokelStream = syn::parse2(input).unwrap();
381
382        // Outer Block
383        match &ast.0[0] {
384            Element::Block {
385                block: outer_block,
386                pipeline: Some(outer_pipeline),
387            } => {
388                assert_eq!(outer_pipeline.0[0].name.to_string(), "second");
389
390                // Inner Block
391                assert_eq!(outer_block.stream.0.len(), 1);
392                match &outer_block.stream.0[0] {
393                    Element::Block {
394                        block: inner_block,
395                        pipeline: Some(inner_pipeline),
396                    } => {
397                        assert_eq!(inner_block.stream.0.len(), 1); // `inner`
398                        assert_eq!(inner_pipeline.0[0].name.to_string(), "first");
399                    }
400                    _ => panic!("Expected a nested Element::Block"),
401                }
402            }
403            _ => panic!("Expected an outer Element::Block"),
404        }
405    }
406
407    #[test]
408    fn parse_recursive_group_traversal() {
409        // We place a chevron block inside standard rust parenthesis `()`
410        let input = quote! { ( [< target >]:transform ) };
411        let ast: TokelStream = syn::parse2(input).unwrap();
412
413        assert_eq!(ast.0.len(), 1);
414
415        match &ast.0[0] {
416            Element::Tree(TokelTree::Group(group)) => {
417                assert_eq!(group.delimiter, Delimiter::Parenthesis);
418                assert_eq!(group.stream.0.len(), 1);
419                assert!(matches!(group.stream.0[0], Element::Block { .. }));
420            }
421            _ => panic!("Expected an Element::Tree(TokelTree::Group)"),
422        }
423    }
424
425    #[test]
426    fn parse_block_with_mixed_contents() {
427        // Tests that a block can hold standard rust tokens and inner blocks side-by-side
428        let input = quote! { [< let x = [< inner >]; >] };
429        let ast: TokelStream = syn::parse2(input).unwrap();
430
431        match &ast.0[0] {
432            Element::Block { block, .. } => {
433                let inner_elements = &block.stream.0;
434                assert_eq!(inner_elements.len(), 5);
435
436                assert!(matches!(
437                    inner_elements[0],
438                    Element::Tree(TokelTree::Ident(_))
439                )); // `let`
440                assert!(matches!(
441                    inner_elements[1],
442                    Element::Tree(TokelTree::Ident(_))
443                )); // `x`
444                assert!(matches!(
445                    inner_elements[2],
446                    Element::Tree(TokelTree::Punct(_))
447                )); // `=`
448                assert!(matches!(inner_elements[3], Element::Block { .. })); // `[< inner >]`
449                assert!(matches!(
450                    inner_elements[4],
451                    Element::Tree(TokelTree::Punct(_))
452                )); // `;`
453            }
454            _ => panic!("Expected an Element::Block"),
455        }
456    }
457
458    #[test]
459    fn parse_double_bracket_lookalike_ignore() {
460        // Tests that standard standard multidimensional arrays don't get mistaken for pipelines
461        let input = quote! { [[1, 2], [3, 4]] };
462        let ast: TokelStream = syn::parse2(input).unwrap();
463
464        // It should parse as a standard TokelTree::Group (outer bracket)
465        // containing two TokelTree::Groups (inner brackets) separated by a comma.
466        assert_eq!(ast.0.len(), 1);
467        match &ast.0[0] {
468            Element::Tree(TokelTree::Group(group)) => {
469                assert_eq!(group.delimiter, Delimiter::Bracket);
470                assert_eq!(group.stream.0.len(), 3); // `[1, 2]`, `,`, `[3, 4]`
471            }
472            _ => panic!("Expected a standard array grouping, not an expansion block"),
473        }
474    }
475
476    #[test]
477    fn parse_looks_like_block_but_missing_gt() {
478        // Here, we have `[<` but the bracketed group NEVER closes with `>`.
479        // For example, this could be valid Rust code in an array: `[<MyType as Trait>::Assoc]`
480        let input = quote! { [<MyType as Trait>::Assoc] };
481        let ast: TokelStream = syn::parse2(input).unwrap();
482
483        assert_eq!(ast.0.len(), 1);
484
485        // Because the inner token stream of the bracket DOES NOT end in `>`,
486        // it must fall back to a standard `TokelTree::Group`.
487        match &ast.0[0] {
488            Element::Tree(TokelTree::Group(group)) => {
489                assert_eq!(group.delimiter, Delimiter::Bracket);
490                // Inside the standard group, we should have `<`, `MyType`, `as`, etc.
491                assert_eq!(group.stream.0.len(), 8);
492                assert!(
493                    matches!(group.stream.0[0], Element::Tree(TokelTree::Punct(ref p)) if p.as_char() == '<')
494                );
495            }
496            _ => panic!("Parser incorrectly interpreted a path-in-array as an expansion block!"),
497        }
498    }
499
500    #[test]
501    fn parse_looks_like_block_but_missing_lt() {
502        // Starts with a standard identifier, ends with `>`.
503        // Example: `[ T::Assoc > ]` (Not valid Rust usually, but valid TokenStream).
504        let input = quote! { [ T::Assoc > ] };
505        let ast: TokelStream = syn::parse2(input).unwrap();
506
507        assert_eq!(ast.0.len(), 1);
508
509        match &ast.0[0] {
510            Element::Tree(TokelTree::Group(group)) => {
511                assert_eq!(group.delimiter, Delimiter::Bracket);
512                // We ensure it didn't accidentally consume the `>` into the void.
513                assert!(matches!(
514                    group.stream.0.last().unwrap(),
515                    Element::Tree(TokelTree::Punct(p)) if p.as_char() == '>'
516                ));
517            }
518            _ => panic!(
519                "Parser interpreted a sequence ending in `>` as an expansion block without a starting `<`"
520            ),
521        }
522    }
523
524    #[test]
525    fn parse_pipeline_lookalike_on_standard_group() {
526        // We have a standard bracketed group, immediately followed by `:ident`.
527        // Since the group does NOT start with `<` and end with `>`, it should NOT parse as a block.
528        // The `:ident` should just be parsed as standard tokens.
529        let input = quote! { [ standard_rust_array ]:transform };
530        let ast: TokelStream = syn::parse2(input).unwrap();
531
532        // Should parse as THREE separate elements: the Group `[]`, the Punct `:`, the Ident `transform`
533        assert_eq!(ast.0.len(), 3);
534
535        assert!(matches!(ast.0[0], Element::Tree(TokelTree::Group(_))));
536        assert!(matches!(ast.0[1], Element::Tree(TokelTree::Punct(ref p)) if p.as_char() == ':'));
537        assert!(matches!(ast.0[2], Element::Tree(TokelTree::Ident(_))));
538    }
539
540    #[test]
541    fn parse_valid_block_with_complex_inner_rust_code() {
542        // An expansion block that contains valid Rust code that uses `<` and `>` internally.
543        // The parser must NOT stop at the inner `>`, it must find the LAST `>`.
544        let input = quote! { [< HashMap::<String, Vec<u8>>::new() >] };
545        let ast: TokelStream = syn::parse2(input).unwrap();
546
547        match &ast.0[0] {
548            Element::Block { block, pipeline } => {
549                assert!(pipeline.is_none());
550
551                // The inner stream should be `HashMap`, `::`, `<`, `String`, `,`, `Vec`, `<`, `u8`, `>`, `>`, `::`, `new`, `()`.
552                // If the parser stopped at the first `>`, this assertion will fail.
553                let inner = &block.stream.0;
554
555                // Verify the last tokens before the closing `>` of the block are `::`, `new`, `()`
556                assert!(
557                    matches!(inner[inner.len() - 1], Element::Tree(TokelTree::Group(ref g)) if g.delimiter == Delimiter::Parenthesis)
558                );
559                assert!(
560                    matches!(inner[inner.len() - 2], Element::Tree(TokelTree::Ident(ref i)) if i == "new")
561                );
562            }
563            _ => panic!("Failed to parse a block containing inner angle brackets"),
564        }
565    }
566
567    #[test]
568    fn parse_nested_pipeline_arguments() {
569        // The argument `[[ ... ]]` to a transformer is actually another pipeline!
570        let input = quote! { [< main >]:append[[ [< suffix >]:case ]] };
571        let ast: TokelStream = syn::parse2(input).unwrap();
572
573        match &ast.0[0] {
574            Element::Block {
575                pipeline: Some(pipeline),
576                ..
577            } => {
578                let pipe = &pipeline.0[0];
579                let args_stream = &pipe.argument.as_ref().unwrap().1;
580
581                // The argument stream should contain exactly one element: The inner `Element::Block`
582                assert_eq!(args_stream.0.len(), 1);
583
584                match &args_stream.0[0] {
585                    Element::Block {
586                        block: inner_block,
587                        pipeline: Some(inner_pipeline),
588                    } => {
589                        assert_eq!(inner_pipeline.0[0].name.to_string(), "case");
590
591                        // And the inner block contains `suffix`
592                        match &inner_block.stream.0[0] {
593                            Element::Tree(TokelTree::Ident(i)) => assert_eq!(i, "suffix"),
594                            _ => panic!("Expected identifier 'suffix'"),
595                        }
596                    }
597                    _ => panic!("Expected the argument to parse as an inner expansion block"),
598                }
599            }
600            _ => panic!("Expected outer block"),
601        }
602    }
603}