lady_deirdre_derive/lib.rs
1////////////////////////////////////////////////////////////////////////////////
2// This file is part of "Lady Deirdre", a compiler front-end foundation //
3// technology. //
4// //
5// This work is proprietary software with source-available code. //
6// //
7// To copy, use, distribute, or contribute to this work, you must agree to //
8// the terms of the General License Agreement: //
9// //
10// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md //
11// //
12// The agreement grants a Basic Commercial License, allowing you to use //
13// this work in non-commercial and limited commercial products with a total //
14// gross revenue cap. To remove this commercial limit for one of your //
15// products, you must acquire a Full Commercial License. //
16// //
17// If you contribute to the source code, documentation, or related materials, //
18// you must grant me an exclusive license to these contributions. //
19// Contributions are governed by the "Contributions" section of the General //
20// License Agreement. //
21// //
22// Copying the work in parts is strictly forbidden, except as permitted //
23// under the General License Agreement. //
24// //
25// If you do not or cannot agree to the terms of this Agreement, //
26// do not use this work. //
27// //
28// This work is provided "as is", without any warranties, express or implied, //
29// except where such disclaimers are legally invalid. //
30// //
31// Copyright (c) 2024 Ilya Lakhin (Илья Александрович Лахин). //
32// All rights reserved. //
33////////////////////////////////////////////////////////////////////////////////
34
35//todo consider replacing HashMap with AHashMap
36
37//TODO check warnings regularly
38#![allow(warnings)]
39
40//! # Lady Deirdre Macros Crate
41//!
42//! This is a helper crate for the [main crate](https://docs.rs/lady-deirdre/latest/lady_deirdre/)
43//! of Lady Deirdre, compiler front-end foundation technology.
44//!
45//! The derive macros in this crate offer default implementations for
46//! the [Token] (lexical scanner), [Node] (syntax parser), and [Feature] (semantic object)
47//! traits used by the main crate.
48//!
49//! ## Links
50//!
51//! - [Source Code](https://github.com/Eliah-Lakhin/lady-deirdre)
52//! - [Main Crate](https://crates.io/crates/lady-deirdre)
53//! - [API Documentation](https://docs.rs/lady-deirdre)
54//! - [User Guide](https://lady-deirdre.lakhin.com/)
55//! - [Examples](https://github.com/Eliah-Lakhin/lady-deirdre/tree/master/work/crates/examples)
56//! - [License Agreement](https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md)
57//!
58//! ## Copyright
59//!
60//! This work is proprietary software with source-available code.
61//!
62//! To copy, use, distribute, or contribute to this work, you must agree to the
63//! terms and conditions of the [General License Agreement](https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md).
64//!
65//! For an explanation of the licensing terms, see the
66//! [F.A.Q.](https://github.com/Eliah-Lakhin/lady-deirdre/tree/master/FAQ.md)
67//!
68//! Copyright (c) 2024 Ilya Lakhin (Илья Александрович Лахин). All rights reserved.
69
70extern crate core;
71extern crate proc_macro;
72#[macro_use]
73extern crate quote;
74#[macro_use]
75extern crate syn;
76
77use std::str::FromStr;
78
79use proc_macro2::TokenStream;
80use quote::ToTokens;
81
82use crate::{feature::FeatureInput, node::NodeInput, token::TokenInput, utils::system_panic};
83
84mod feature;
85mod node;
86mod token;
87mod utils;
88
89/// A canonical implementation of Lady Deirdre's lexical scanner.
90///
91/// This macro implements a Token trait on the enum type,
92/// where the enum variants denote individual variants of the token with
93/// the regular expressions defining their parsing rules.
94///
95/// The generated scanner is a minimal finite state machine that unions the
96/// specified regular expression rules.
97///
98/// ## Macro Application Outline
99///
100/// ```ignore
101/// // Copy and Eq implementations are required by the Token trait.
102/// #[derive(Token, Clone, Copy, PartialEq, Eq)]
103///
104/// // U8 representation is required by the Token macro.
105/// #[repr(u8)]
106///
107/// // An optional instruction that alternates the macro output.
108/// //
109/// // Possible <mode> values are:
110/// //
111/// // - The `output` mode or nothing.
112/// // Prints the full macro output to the terminal using panic.
113/// //
114/// // - The `meta` mode.
115/// // Prints the generator's metadata such as the time the generator spent
116/// // to optimize the scanner's state machine.
117/// //
118/// // - The `dry` mode.
119/// // Checks correctness of the macro application, but does not produce any
120/// // output.
121/// //
122/// // - The `decl` mode.
123/// // Produces the normal output of the macro with all Rust spans erased.
124/// // This is useful when the macro is being applied inside the declarative
125/// // macro.
126/// #[dump(<mode>)]
127///
128/// // An optional instruction that sets the state machine optimization strategy.
129/// //
130/// // Possible <strategy> values are:
131/// //
132/// // - The `flat` strategy. Uses a heuristic approach to optimize
133/// // the state machine. The result is almost always the same as with
134/// // the "deep" strategy, but there is no strong guarantee that
135/// // the finite state machine will be optimized to the minimal form.
136/// //
137/// // - The `deep` strategy. Guarantees to optimizes the finite state-machine
138/// // to the canonical form.
139/// //
140/// // The default value is `flat` for the debug target (`debug_assertions`
141/// // feature is enabled); otherwise, the strategy is `deep`.
142/// #[opt(<strategy>)]
143///
144/// // An optional lookback attribute that sets the Token::LOOKBACK value.
145/// //
146/// // This value denotes the number of Unicode characters the scanner needs
147/// // to step back to rescan a fragment of the source code text.
148/// //
149/// // When omitted, the value set to 1 by default.
150/// #[lookback(1)]
151///
152/// // Optional inline expressions that you can use inside other expressions
153/// // by name (specified before the "=" sign): `Foo | 'x' & Bar`.
154/// //
155/// // You can refer to the inline expression only after the definition
156/// // (recursive application is not possible).
157/// //
158/// // The names must be unique in the namespace of other inline expressions
159/// // and the enum variants.
160/// #[define(Foo = <reg expr>)]
161/// #[define(Bar = <reg expr>)]
162/// enum MyToken {
163/// // A variant with discriminant 0 is required.
164/// // This variant denotes an end-of-input token (`Token::eoi()` value).
165/// EOI = 0,
166///
167/// // A variant with discriminant 1 is required.
168/// // This variant denotes the source code text fragments that cannot
169/// // be recognized by the scanner (`Token::mismatch()` value).
170/// Unknown = 1,
171///
172/// // Required for the parsable variants.
173/// // Specifies the token scanning expression.
174/// #[rule(<reg expr>)]
175///
176/// // Optional.
177/// //
178/// // Calls the `<rust expr>` expression when the `<reg expr>` matches
179/// // the scanned fragment.
180/// //
181/// // The `<rust expr>` must return a valid token variant that corresponds
182/// // to this fragment. The `fragment` variable of type `&str` can be used
183/// // inside the constructor expression.
184/// #[constructor(<rust expr>)]
185///
186/// // Optional.
187/// //
188/// // Specifies the value of the `Token::describe` function that returns
189/// // an end-user display description of the token variant.
190/// //
191/// // When two strings provided, the second string corresponds to
192/// // the verbose version of the description.
193/// //
194/// // When only one string provided, the string specifies the short and
195/// // verbose descriptions both.
196/// //
197/// // If the macro attribute omitted, the `Token::describe` would
198/// // return None for this token variant.
199/// #[describe("short", "verbose")]
200///
201/// // Optional.
202/// //
203/// // Specifies the priority of the rule over other rules.
204/// // This helps the scanner to resolves ambiguities between the scanning
205/// // rules when several rules could match the same string fragments.
206/// //
207/// // For example, the general identifier scanning rule could conflict
208/// // with the keyword rule. In this case, the keyword rule should have
209/// // a higher priority.
210/// //
211/// // Rules with higher priority values supersede the rules with lower
212/// // priority values.
213/// //
214/// // The default priority is zero.
215/// #[priority(<signed integer number>)]
216///
217/// // The `= <num>` discriminant is optional but if specified,
218/// // it will match the `Token::rule()` value.
219/// ParsableVariant,
220///
221/// // Variants without the `#[rule(...)]` macro attribute are allowed.
222/// //
223/// // They will not be scanned by the generated scanner, but you can
224/// // return them from the constructor expressions of the parsable variant.
225/// //
226/// // The `#[describe(...)]` macro attribute is also allowed for
227/// // the variants without rules.
228/// UnparseableVariant,
229/// }
230/// ```
231///
232/// ## Regular Expressions
233///
234/// ### Example
235///
236/// ```ignore
237/// | "word"
238/// | 'w' & 'o' & 'r' & 'd'
239/// | 'x' & ('y' | 'z')
240/// | ['1'..'9', 'X', 'a'..'c']
241/// | "optional"?`
242/// | "zero or more repetition"*
243/// | "one or more repetition"+
244/// ```
245///
246/// ### Precedence
247///
248/// The `&` concatenation operator has a higher priority over
249/// the `|` alternation operator.
250///
251/// The unary operators (`+`, `*`, `?`) have the highest priority.
252///
253/// The binary operators (`|` and `&`) are left associative.
254///
255/// Parentheses `(<reg expr>)` group the inner expressions.
256///
257/// The `&` operator can be omitted: `"foo" & "bar"` means the same as `"foo" "bar"`.
258///
259/// The alternate expressions (denoted by the `|` operator) can start with
260/// the pipe character like in the example above.
261///
262/// ### Requirements
263///
264/// The expression specified in the `#[rule(<reg expr>)]` macro attribute
265/// must match at least one character.
266///
267/// The inline expression `#[define(<name> = <reg expr>)]` could match
268/// empty strings.
269///
270/// The parsable variants with the same priority must match distinct string
271/// fragments. The priority could be overridden using the `#[priority(<num>)]`
272/// macro attribute.
273///
274/// ### Debugging
275///
276/// The "dump" operator (e.g., `"foo" | dump("b" & "a" & "r")`) enforces the
277/// macro program to print the surrounding regular expression state machine
278/// transitions to the terminal using panic.
279///
280/// ### Operators
281///
282/// - String fragment: `"foo"`. Matches a sequence of the Unicode characters
283/// denoted by the string literal.
284///
285/// - Single character: `'Y'`. Matches a single Unicode character.
286///
287/// - Any Unicode character: `.`. Matches any single Unicode character.
288///
289/// - Any character in the set: `['a', 'c', '1'..'8']`.
290/// Matches any Unicode character within the specified set. The character
291/// ranges (`'1'..'8'`) denote the Unicode characters in the range starting
292/// from the lower bound to the upper bound **inclusive**. The lower bound
293/// must be less than or equal to the upper bound.
294///
295/// - Any character outside of the set: `^['a', 'c', '1'..'8']`.
296/// The inverted version of the previous operator that matches any character
297/// outside of the specified set.
298///
299/// - Case-insensitive matching: `i("foo")`.
300/// Matches "foo", "FOO", "Foo", and other case-insensitive variants.
301///
302/// - Any Unicode uppercase character: `$upper`.
303///
304/// - Any Unicode lowercase character: `$lower`.
305///
306/// - Any Unicode numeric character: `$num`.
307///
308/// - Any Unicode whitespace character: `$space`.
309///
310/// - Any Unicode alphabetic character: `$alpha`.
311///
312/// - Any Unicode identifier's start character: `$xid_start`.
313///
314/// - Any Unicode identifier's continuation character: `$xid_continue`.
315///
316/// - A class of the character property combinations: `${alpha | num | space}`.
317/// The property names can be any combination of the names listed above.
318///
319/// - A concatenation of the rules: `<expr1> & <expr2>` or just `<expr1> <expr2>`.
320/// Matches `<expr1>`, then matches `<expr2>`. The concatenation expression
321/// matches the string fragment if and only if both operands match
322/// the substrings of the fragment string.
323///
324/// - A union of the rules: `<expr1> | <expr2>` or `| <expr1> | <expr2>`.
325/// Matches either `<expr1>` or `<expr2>`. The union expression matches
326/// the string fragment if and only if at least one operand matches this
327/// fragment.
328///
329/// - Non-empty repetition: `<expr>+`. Applies the `<expr>` rule one or more
330/// times. The repetition expression matches the string fragment if and only
331/// if at least one application of the `<expr>` rule is satisfied.
332///
333/// - Possibly empty repetition: `<expr>*`. Applies the `<expr>` rule zero or
334/// more times. If the `<expr>` cannot be applied one or more times,
335/// the operator matches an empty string.
336///
337/// - Optional application: `<expr>?`. Attempts to apply the `<expr>` rule.
338/// If the `<expr>` cannot be applied, the operator matches an empty string.
339///
340/// - Inline expression: `FOO`. Inlines the expression defined previously using
341/// the `#[define(FOO = <expr>)]` macro attribute.
342///
343/// - Debug dump: `dump(<expr>)`. Enforces the macro program to print the state
344/// machine transitions of the `<expr>` rule to the terminal.
345#[proc_macro_derive(
346 Token,
347 attributes(define, lookback, rule, priority, constructor, describe, opt, dump)
348)]
349pub fn token(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
350 let input = parse_macro_input!(input as TokenInput);
351
352 let declarative = input.dump.is_declarative();
353
354 output_stream(declarative, input.into_token_stream())
355}
356
357/// A canonical implementation of Lady Deirdre's syntax parser.
358///
359/// This macro implements the syntax component (Node and AbstractNode traits)
360/// and the semantic entry points (Grammar and AbstractFeature traits)
361/// of the programming language grammar on enum types.
362///
363/// The enum variants denote individual syntax tree node variants, and their
364/// parsing rules. The enum variant fields represent the state of the node.
365/// In particular, through the variant fields, the syntax tree establishes
366/// parent-child relations between the tree nodes.
367///
368/// The parsing rules are described in terms of the LL(1) grammar expressions,
369/// but the macro enables the possibility to implement the individual node
370/// parsing procedure using the user-defined function where you can implement
371/// custom recursive-descend parsing logic with potentially unlimited lookahead
372/// and the left recursion.
373///
374/// The generated parser is capable of automatically recovering from
375/// the syntax errors.
376///
377/// ## Macro Application Outline
378///
379/// ```ignore
380/// #[derive(Node)]
381///
382/// // Required.
383/// //
384/// // Denotes the lexical component of the grammar (`Node::Token` type).
385/// #[token(MyToken)]
386///
387/// // Optional.
388/// //
389/// // Sets the syntax tree classifier type (`Grammar::Classifier` type).
390/// //
391/// // When omitted, the classifier is set to the VoidClassifier.
392/// #[classifier(<classifier type>)]
393///
394/// // Optional.
395/// //
396/// // Specifies the semantic entry-point for the common semantics shared across
397/// // all documents in the Analyzer.
398/// //
399/// // The type of this field must implement the `Feature` trait.
400/// //
401/// // If omitted, the default common semantics will be `VoidFeature<MyNode>`
402/// #[semantics(<common semantics type>)]
403///
404/// // Optional.
405/// //
406/// // Defines the expression that will be automatically parsed zero or more
407/// // times between every consumed token in the node's parse rules.
408/// //
409/// // The trivia expression usually enumerates whitespace tokens, comment
410/// // rules and similar syntactically useless things
411/// // (e.g., `#[trivia($Whitespace | InlineComment)`).
412/// //
413/// // The <parse expr> expression is allowed to parse an empty sequence of
414/// // tokens.
415/// //
416/// // When omitted, the default trivia expression is an empty expression.
417/// //
418/// // You can manually override trivia expression of each parsable rule.
419/// #[trivia(<parse expr>)]
420///
421/// // Optional.
422/// //
423/// // Defines panic error recovery configuration of the parsable rules.
424/// //
425/// // By default, the panic recovery is unlimited (`Recovery::unlimited()`).
426/// //
427/// // Using this macro attribute you can specify the recovery halting
428/// // tokens (the `Recovery::unexpected` tokens), and the token groups
429/// // (the `Recovery::group` pairs).
430/// //
431/// // The <config> is a sequence of elements delimited by `,` comma, where each
432/// // element is either a halting `$Token`, or a group pair `[$Start, $End]`.
433/// //
434/// // Example: `#[recovery($Semicolon, [$OpenBrace, $CloseBrace])]`.
435/// //
436/// // You can manually override the recovery configuration of each parsable rule.
437/// #[recovery(<config>)]
438///
439/// // An optional instruction that alternates the macro output.
440/// //
441/// // Possible <mode> values are:
442/// //
443/// // - The `output` mode or nothing.
444/// // Prints the full macro output to the terminal using panic.
445/// //
446/// // - The `trivia` mode.
447/// // Prints the parser's common trivia parsing function (the function
448/// // generated from the `#[trivia(...)]` expression of the enum type).
449/// //
450/// // - The `meta` mode.
451/// // Prints the generator's metadata such as the time the generator spent
452/// // to statically optimize the syntax parser.
453/// //
454/// // - The `dry` mode.
455/// // Checks correctness of the macro application, but does not produce any
456/// // output.
457/// //
458/// // - The `decl` mode.
459/// // Produces the normal output of the macro with all Rust spans erased.
460/// // This is useful when the macro is being applied inside the declarative
461/// // macro.
462/// #[dump(<mode>)]
463///
464/// // Optional inline expressions that you can use inside other expressions
465/// // by name (specified before the "=" sign): `Foo | $Token & Bar`.
466/// //
467/// // You can refer to the inline expression only after the definition
468/// // (recursive application is not possible).
469/// //
470/// // The names must be unique in the namespace of other inline expressions
471/// // and the enum variants.
472/// #[define(Foo = <parse expr>)]
473/// #[define(Bar = <parse expr>)]
474///
475/// enum MyNode {
476/// // Must be applied to exactly one parseable variant that represents
477/// // the root node of the syntax tree.
478/// #[root]
479///
480/// // Optional if the variant has a #[denote(...)] attribute.
481/// //
482/// // Specifies the parsing rule of the variant.
483/// //
484/// // The macro uses this expression to generate the parser, and
485/// // to reveal the leftmost set of tokens of the parsing rule.
486/// #[rule(<parse expr>)]
487///
488/// // Optional.
489/// //
490/// // Overrides the parser generated by the macro with the user-defined
491/// // parser. Only applicable when the variant has a #[rule(...)] attribute
492/// // that would define the leftmost token set of the parser.
493/// //
494/// // The <rust expr> must return an instance of the Node. Inside the
495/// // <rust expr> you can use the "session" variable which is a mutable
496/// // reference to the SyntaxSession of the current parsing state from
497/// // which you should parse the node.
498/// //
499/// // Typically, inside the <rust expr> you would call a user-defined
500/// // parser function with the "session" argument.
501/// #[parser(<rust expr>)]
502///
503/// // Optional if the variant has a #[rule(...)] attribute.
504/// //
505/// // Specifies the NodeRule number of this variant.
506/// //
507/// // Possible syntax is `<const_name>`, `<const_name> = <int_value>`,
508/// // or `<int_value>`.
509/// //
510/// // When the <const name> specified, the macro will generate type's
511/// // constant with the value: `MyNode::Foo == 10`.
512/// //
513/// // When the <int_value> specified, the number must be unique across
514/// // all denoted variants with the #[denote(...)] attribute.
515/// //
516/// // If the <int_value> omitted, the macro will assign the unique value
517/// // automatically.
518/// #[denote(FOO = 10)]
519///
520/// // Optional if the variant has a #[rule(...)] attribute.
521/// //
522/// // Specifies the value of the `AbstractNode::describe` function that
523/// // returns an end-user display description of the node variant.
524/// //
525/// // When two strings provided, the second string corresponds to
526/// // the verbose version of the description.
527/// //
528/// // When only one string provided, the string specifies the short and
529/// // verbose descriptions both.
530/// //
531/// // If the macro attribute omitted, the `AbstractNode::describe` would
532/// // return None for this node variant.
533/// #[describe("short", "verbose")]
534///
535/// // Optional. Only applicable when the variant has a #[rule(..)]
536/// // attribute, and does not have overridden parser.
537/// //
538/// // Overrides default variant's constructor. The <rust expr> must return
539/// // an instance of the Node. Inside the <rust expr> you can use variables
540/// // of the rule expression capture keys and the "session" variable of
541/// // the SyntaxSession type from which you can read the current parsing
542/// // state.
543/// #[constructor(some_constructor(session, foo, bar, baz))]
544///
545/// // Optional. Only applicable when the variant has a #[rule(..)]
546/// // attribute, and does not have overridden parser.
547/// //
548/// // Overrides trivia parsing expression of this node. See #[trivia(...)]
549/// // type attribute description above for details.
550/// #[trivia(<parse expr>)]
551///
552/// // Optional. Only applicable when the variant has a #[rule(..)]
553/// // attribute, and does not have overridden parser.
554/// //
555/// // Overrides recovery configuration of the generated parser of this
556/// // node. See #[recovery(...)] type attribute description above
557/// // for details.
558/// #[recovery(<config>)]
559///
560/// // Optional.
561/// //
562/// // Instructs the macro that the generated parsers should bypass
563/// // caching of this node when descending into the parser of this rule.
564/// //
565/// // When omitted, the macro will consider that the variant's parser is
566/// // "primary", and it will enforce the parsing environment to cache the
567/// // node whenever possible.
568/// #[secondary]
569///
570/// // Optional.
571/// //
572/// // Tells the macro that this Node variant is the root of the semantics
573/// // scope branch (the `Grammar::is_scope()` function would return true
574/// // for such variants).
575/// #[scope]
576///
577///
578/// // An optional instruction that enforce the macro to print debug
579/// // metadata for this node.
580/// //
581/// // Possible <mode> values are:
582/// //
583/// // - The `output` mode or nothing.
584/// // Prints the generated parsing function to the terminal using panic.
585/// //
586/// // - The `trivia` mode.
587/// // Prints the parser's overridden trivia parsing function
588/// // (the function generated from the `#[trivia(...)]` expression of
589/// // the variant).
590/// #[dump(<mode>)]
591///
592/// Variant {
593/// // Optional. Sets the reference of this node in the syntax tree.
594/// #[node]
595/// node: NodeRef,
596///
597/// // Optional. Sets the reference to the parent node of this node
598/// // in the syntax tree
599/// #[parent]
600/// parent: NodeRef,
601///
602/// // Optional.
603/// //
604/// // A child of this node.
605/// //
606/// // The field name must match one of the capturing operator's keys
607/// // from the rule's expression (e.g., "foo: FooNode") if the variant
608/// // has a #[rule(...)] attribute which was not overridden by
609/// // the #[parser(...)] attribute.
610/// //
611/// // The type of the field is NodeRef, TokenRef, Vec<NodeRef>,
612/// // or Vec<TokenRef> depending on the capture type and repetition.
613/// //
614/// // Note that all captures specified in the #[rule(...)] attribute
615/// // must be covered by the variant fields unless the variant has
616/// // an overridden constructor (via the #[constructor(...)]
617/// // attribute). The overridden constructor allows to change
618/// // the logic of the variant fields initialization.
619/// #[child]
620/// foo: NodeRef,
621///
622/// // Optional unless any other denoted variant already has
623/// // a #[semantics] field.
624/// //
625/// // Specifies the semantic entry-point of this node variant.
626/// //
627/// // The type of the field must be Semantics type parametrized with
628/// // the Feature implementation describing the semantics of the node.
629/// //
630/// // If the variant does not have semantics you can use
631/// // `Semantics<VoidFeature<MyNode>>` as the field type.
632/// #[semantics]
633/// semantics: Semantic<VariantSemantics>,
634///
635/// // Required for arbitrary fields of the node variants.
636/// //
637/// // The <rust expr> specifies a constructor of the field value,
638/// // that the generated parser will use to initialize the field.
639/// //
640/// // The <rust expr> could be omitted (`#[default]`). In this case
641/// // the parser will use the Default implementation of the field's type.
642/// //
643/// // This macro attribute is only applicable when the variant
644/// // has a #[rule(...)] attribute, and it does not have overridden
645/// // #[constructor(...)] or overridden #[parser(..)] attributes
646/// // because the overridden the Constructor or Parser are specifying
647/// // the initialization logic explicitly.
648/// #[default(<rust expr>)]
649/// custom_field: CustomType,
650/// },
651/// }
652/// ```
653///
654/// ### Variants Denotation
655///
656/// Denoted enum variants are variants that either have
657/// a `#[rule(...)]` macro attribute or at least a `#[denote(...)]` attribute.
658///
659/// Denoted variants are eligible variants of the nodes of the syntax tree.
660///
661/// The macro allows you to describe their field structure (i.e., to annotate
662/// the fields with the `#[parent]`, `#[child]`, and other macro attributes),
663/// and it will generate the traits functions in accordance with these
664/// descriptions.
665///
666/// The denotation of the variant is important because in this case, the variant
667/// at least receives a NodeRule number through which it could be referred to.
668///
669/// Non-denoted variants are just normal enum variants that don't relate
670/// to the syntax. The macro ignores them.
671///
672/// ### Parsable variants
673///
674/// The macro variants considered to be "parsable" if they have a
675/// `#[rule(...)]` attribute. These variants become implicitly "denoted".
676///
677/// Parseability is only makes sense for the Node macro logic because
678/// by specifying the Rule attribute, you are exposing the leftmost set of
679/// the variant's parser. The macro needs to know this set to properly implement
680/// descend transitions between the parsing procedures when you refer this
681/// variant in another parsing expression.
682///
683/// However, if you do not intend to refer to the variant in the parsing
684/// expression, and you want to parse the variant manually in one of the custom
685/// parsers, you only need to denote the variant (explicitly) using
686/// the `#[denote(...)]` macro attribute.
687///
688/// ```ignore
689/// #[derive(Node)]
690/// enum MyNode {
691/// // You can refer to this variant in another rule.
692/// #[rule(...)]
693/// DenotedVariant1 { ... },
694///
695/// // You can refer to this variant in another rule,
696/// // and in the custom parsers using the `MyNode::VARIANT_2` rule number.
697/// #[rule(...)]
698/// #[denote(VARIANT_2)]
699/// #[describe(...)]
700/// DenotedVariant2 { ... },
701///
702/// // You cannot refer to this variant in another rule,
703/// // but you can refer to it in the custom parsers using
704/// // the `MyNode::VARIANT_3` rule number.
705/// #[denote(VARIANT_3)]
706/// #[describe(...)]
707/// UnparsableDenotedVariant { ... },
708/// }
709/// ```
710///
711/// ### Custom Parsers
712///
713/// By default, the macro generates a parsing function for each parsable
714/// variant (a variant with the `#[rule(...)]` macro attribute) based on the
715/// parsing expression specified in the rule.
716///
717/// You can manually override the function with your own custom parsing
718/// function using the `#[parser(...)]` macro attribute.
719///
720/// ```ignore
721/// #[derive(Node)]
722/// enum MyNode {
723/// #[rule(...)]
724/// #[parser(custom_parser(session))]
725/// ParsableVariant { ... },
726/// }
727///
728/// fn custom_parser<'a>(session: &mut impl SyntaxSession<'a, MyNode>) -> MyNode {
729/// // custom parser implementation of the MyNode::ParsableVariant variant
730/// }
731/// ```
732///
733/// Inside the parser's expression, you can use the "session" variable, which is
734/// a reference to the current state of the SyntaxSession from which the node
735/// should be parsed. The expression must return an instance of the parsable
736/// node variant. Usually, this expression is a custom parsing function call.
737///
738/// Note that even though the parser is customized, the `#[rule(...)]` expression
739/// is still required. You don't need to implement the full parser in the rule
740/// expression, but you should enumerate the leftmost tokens so that the macro
741/// will be aware of how to descend into the node's parser. Usually, you can just
742/// enumerate the tokens with the union operator: `#[rule($TokenA | $TokenB | $TokenC)]`.
743///
744/// ### Ascending Relations
745///
746/// It is recommended that each denoted variant would have `#[node]` and
747/// `#[parent]` variant fields.
748///
749/// The `#[node]` field is the NodeRef that will point to this node instance.
750///
751/// The `#[parent]` field is the NodeRef that will point to the parent node
752/// instance of this node.
753///
754/// The macro-generated parser of the variant will automatically set these
755/// values. Inside the custom parser, you should set them manually (you can use
756/// the `SyntaxSession::node()` and `SyntaxSession::parent` functions for this
757/// purpose).
758///
759/// These field values could then be fetched using
760/// the `AbstractNode::node_ref()` and `AbstractNode::parent_ref()` functions
761/// accordingly. The parsing environment would use
762/// the `AbstractNode::set_parent_ref()` function to update the parent reference
763/// during incremental reparsing when the reparser attempts to "transplant"
764/// the branch of the syntax tree.
765///
766/// The macro requires that either all denoted variants have `#[node]` and
767/// `#[parent]` fields, or none of them.
768///
769/// ```ignore
770/// #[derive(Node)]
771/// enum MyNode {
772/// #[rule(...)]
773/// Variant1 {
774/// #[node]
775/// node: NodeRef,
776/// #[parent]
777/// parent: NodeRef,
778/// },
779///
780/// #[rule(...)]
781/// Variant2 {
782/// #[node]
783/// node: NodeRef,
784/// #[parent]
785/// parent: NodeRef,
786/// },
787/// }
788/// ```
789///
790/// ### Descending Relations
791///
792/// The parent-child relations are established through the system of captures.
793///
794/// When the parsing expression reads a token or descends into another rule,
795/// you can capture the TokenRef / NodeRef reference of the result and
796/// put it into the variant fields annotated with the `#[child]` macro
797/// attribute.
798///
799/// ```ignore
800/// #[derive(Node)]
801/// enum MyNode {
802/// #[rule(foo: $FooToken & bar: BarNode? & baz: BazNode*)]
803/// Variant {
804/// #[child]
805/// foo: TokenRef,
806/// #[child]
807/// bar: NodeRef,
808/// #[child]
809/// baz: Vec<NodeRef>,
810/// },
811/// }
812/// ```
813///
814/// When the expression captures a token, it should be put into the TokenRef
815/// field. When it captures a node, it should be put into the NodeRef field.
816///
817/// If the expression could be captured more than once (like a `baz: BazNode*`
818/// in the example above), it should be put into Vec.
819///
820/// If the expression capturing is optional (`bar: BarNode?` captures no more
821/// than once), the field type should still be the TokenRef / NodeRef. When
822/// the parser didn't capture the value, it will set the field to
823/// the `TokenRef::nil()` / `NodeRef::nil()` accordingly.
824///
825/// The custom user-defined parser should follow this convention too.
826///
827/// The `#[child]` macro attribute informs the macro that the field is subject
828/// to capturing. In particular, the implementation of `AbstractNode::capture`
829/// and related functions uses these fields to represent the AbstractNode’s
830/// captures.
831///
832/// Properly denoting children is particularly important for a number of
833/// built-in features, including syntax tree traversal
834/// (SyntaxTree::traverse_tree()), which rely on the children metadata.
835///
836/// ### Semantics
837///
838/// The macro automatically implements the Grammar and AbstractFeature traits
839/// for the derived enum type, making the type eligible for the Analyzer.
840///
841/// To bind the semantic entry points, you should specify the `#[semantics]`
842/// variant fields in all denoted variants.
843///
844/// Even if the variant does not have semantic features, you have to implement
845/// the field for semantic consistency. In this case, you can use
846/// the VoidFeature helper type.
847///
848/// ```ignore
849/// #[derive(Node)]
850/// enum MyNode {
851/// #[rule(...)]
852/// Variant1 {
853/// #[semantics]
854/// semantics: Semantics<Variant1Semantics>,
855/// },
856///
857/// #[rule(...)]
858/// Variant2 {
859/// #[semantics]
860/// semantics: Semantics<VoidFeature<MyNode>>,
861/// }
862/// }
863/// ```
864///
865/// The macro requires that either all denoted variants have semantics, or none
866/// of them.
867///
868/// You may have at most one variant field annotated with the `#[semantics]`
869/// macro attribute.
870///
871/// ### Error Recovery
872///
873/// The parsers generated by the macro are subject for error recovery, which
874/// is a heuristic process based on the static analysis of the specified
875/// syntax rules.
876///
877/// The exact procedure is not specified and could change over time in
878/// the minor versions of the crate to improve the recovery logic.
879///
880/// The recovery mechanism uses at least the "panic" recovery approach when
881/// appropriate. As the author of the syntax grammar, you can specify
882/// the panic recovery configurations for the entire grammar or
883/// per individual variants using the `#[recovery(<config>)]` attribute.
884///
885/// ## Parsing Expressions
886///
887/// Parsing expressions are regex-like expressions that describe the parsing
888/// rules in terms of LL(1) grammars.
889///
890/// ### Example
891///
892/// ```ignore
893/// | $TokenA & ($TokenB & VariantX) & VariantY
894/// | $OptionalToken?
895/// | ZeroOrMoreRepetition*
896/// | ZeroOrMoreRepetition*{$WithDelimiter}
897/// | OneOrMoreRepetition+
898/// | OneOrMoreRepetition+{$WithDelimiter}
899/// | token_capture: $SomeToken
900/// | many_nodes_capture: SomeVariant*
901/// ```
902///
903/// ### Precedence
904///
905/// The `&` concatenation operator has a higher priority over
906/// the `|` alternation operator.
907///
908/// The unary operators (`+`, `*`, `?`) have higher priority than
909/// the `&` concatenation operator.
910///
911/// The capturing operator (`a: Foo`) has the highest priority.
912///
913/// The binary operators (`|` and `&`) are left associative.
914///
915/// Parentheses `(<parse expr>)` group the inner expressions.
916///
917/// The `&` operator can be omitted: `Foo & Bar` means the same as `Foo Bar`.
918///
919/// The alternate expressions (denoted by the `|` operator) can start with
920/// the pipe character like in the example above.
921///
922/// ### Requirements
923///
924/// The expression specified in the `#[rule(<parse expr>)]` macro attribute
925/// must match at least one token (possibly implicitly via descending into
926/// other parse rules).
927///
928/// The inline expression `#[define(<name> = <parse expr>)]` could match
929/// empty token sequences.
930///
931/// Left recursion is forbidden: the `#[rule(...)]` expression cannot
932/// descend into itself as the first step of the parsing, neither directly nor
933/// indirectly through other rules.
934///
935/// Each parsing expression has an associated _leftmost set_, a set of tokens
936/// through which the parser starts to parse the expression (possibly indirectly
937/// by descending into another parse rule).
938///
939/// The generated parser makes a decision to descend into the subrules based
940/// on the leftmost set of the subrules. Therefore, rules that descend must be
941/// unambiguous in the parsing step.
942///
943/// For example, the expression `$Foo | Bar` would be ambiguous if the Bar's
944/// variant has a `$Foo` token in its leftmost set.
945///
946/// ### Debugging
947///
948/// The "dump" operator (e.g., `$Foo | dump($A & x: B & Y: $C)`) enforces the
949/// macro program to print the surrounding expression's inner state machine
950/// transitions, leftmost set, and the captures to the terminal using panic.
951///
952/// ### Operators
953///
954/// - Single token match: `$SomeToken`.
955///
956/// - Any token match: `.`. Matches any single token from the alphabet of
957/// available tokens.
958///
959/// - Any token except the tokens in the set: `^[$TokenA | $TokenB | $TokenC]`.
960/// Matches any single token from the alphabet of available tokens, except
961/// the enumerated tokens.
962///
963/// - Descending or inline: `Foo`. If "Foo" is a parsable variant, descends
964/// into this variant's parsing rule. If "Foo" is an inline expression
965/// defined with the `#[define(Foo = <expr>)]` macro attribute, copies
966/// this `<expr>` expression in place as it is.
967///
968/// - A concatenation of the rules: `<expr1> & <expr2>` or just `<expr1> <expr2>`.
969/// Matches `<expr1>`, then matches `<expr2>`. The concatenation expression
970/// matches the token sequence if and only if both operands match
971/// the subsequences of the sequence.
972///
973/// - A union of the rules: `<expr1> | <expr2>` or `| <expr1> | <expr2>`.
974/// Matches either `<expr1>` or `<expr2>`. The union expression matches
975/// the token sequence if and only if at least one operand matches this
976/// sequence.
977///
978/// - Non-empty repetition: `<expr>+`. Applies the `<expr>` rule one or more
979/// times. The repetition expression matches the token sequence if and only
980/// if at least one application of the `<expr>` rule is satisfied.
981///
982/// - Non-empty repetition with delimiter: `<expr>+{<del_expr>}`. Same as
983/// the normal non-empty repetition, but requires the `<del_expr>`
984/// expression to be present between each match of the `<expr>` expression.
985///
986/// - Possibly empty repetition: `<expr>*`. Applies the `<expr>` rule zero or
987/// more times. If the `<expr>` cannot be applied one or more times,
988/// the operator matches an empty token sequence.
989///
990/// - Possibly empty repetition with delimiter: `<expr>*{<del_expr>}`. Same as
991/// the normal possibly empty repetition, but requires the `<del_expr>`
992/// expression to be present between each match of the `<expr>` expression.
993///
994/// - Optional application: `<expr>?`. Attempts to apply the `<expr>` rule.
995/// If the `<expr>` cannot be applied, the operator matches an token sequence.
996///
997/// - Result capture: `<key>: <expr>`. If `<expr>` is a token match, matches
998/// and captures the TokenRef of the token. If `<expr>` is a subrule
999/// descending, descends into the rule, and captures the NodeRef result of
1000/// this rule. Otherwise, if `<expr>` is a complex expression, spreads
1001/// the capturing operator to all inner token matches and rule descendings.
1002/// The `<key>` is an identifier of the capture (basically, variant field's
1003/// name) to which the captured value should be assigned or pushed.
1004///
1005/// - Debug dump: `dump(<expr>)`. Enforces the macro program to print
1006/// the inner state machine transitions, leftmost set, and the captures
1007/// of the surrounding `<expr>` expression to the terminal.
1008#[proc_macro_derive(
1009 Node,
1010 attributes(
1011 token,
1012 classifier,
1013 define,
1014 trivia,
1015 recovery,
1016 rule,
1017 root,
1018 denote,
1019 constructor,
1020 secondary,
1021 parser,
1022 default,
1023 node,
1024 parent,
1025 child,
1026 semantics,
1027 describe,
1028 scope,
1029 dump,
1030 )
1031)]
1032pub fn node(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
1033 let input = parse_macro_input!(input as NodeInput);
1034
1035 let declarative = input.dump.is_declarative();
1036
1037 output_stream(declarative, input.into_token_stream())
1038}
1039
1040/// A canonical implementation of Lady Deirdre's syntax tree's node semantic
1041/// object.
1042///
1043/// This macro implements the Feature and AbstractFeature traits on the struct
1044/// types, making these types eligible for use as the generic parameter of
1045/// the Semantics object and as field types of other Feature objects.
1046///
1047/// ## Macro Application Outline
1048///
1049/// ```ignore
1050/// #[derive(Feature)]
1051///
1052/// // Specifies the grammar to which this Feature belongs (`Feature::Node` type).
1053/// #[node(MyNode)]
1054///
1055/// // An optional instruction that alternates the macro output.
1056/// //
1057/// // Possible <mode> values are:
1058/// //
1059/// // - The `output` mode or nothing.
1060/// // Prints the full macro output to the terminal using panic.
1061/// //
1062/// // - The `dry` mode.
1063/// // Checks correctness of the macro application, but does not produce any
1064/// // output.
1065/// //
1066/// // - The `decl` mode.
1067/// // Produces the normal output of the macro with all Rust spans erased.
1068/// // This is useful when the macro is being applied inside the declarative
1069/// // macro.
1070/// #[dump(<mode>)]
1071///
1072/// // The macro exposes the inner fields of the struct through
1073/// // the `AbstractFeature::feature()` and `AbstractFeature::feature_keys()`
1074/// // function that have the same visibility as the type's visibility.
1075/// pub(super) struct SomeFeature {
1076/// // Will be exposed.
1077/// pub(super) foo: Attr<FooFn>,
1078///
1079/// // Will not be exposed, visibility is different from the type's visibility.
1080/// bar: Attr<BarFn>,
1081///
1082/// // BazFeature must also implement the Feature and AbstractFeature traits too.
1083/// pub(super) baz: BazFeature,
1084///
1085/// // This macro attribute is optional, and denotes that the semantics
1086/// // Attribute or a Feature is subject to invalidation when
1087/// // the `Feature::invalidate()` function is called for the "SomeFeature"
1088/// // type.
1089/// //
1090/// // This attribute is only makes sense to use if the "SomeFeature"
1091/// // will be used as a part of the semantics of the syntax tree node
1092/// // denoted as a scope; otherwise, the #[scoped] marker will be ignored.
1093/// //
1094/// // In practice, you should only annotate the fields of the struct with
1095/// // the #[scoped] macro attribute that supposed to be entry points of
1096/// // the semantic model.
1097/// #[scoped]
1098/// pub(super) scoped_attr: Attr<InputFn>,
1099/// }
1100/// ```
1101///
1102/// Structs with anonymous fields are also derivable:
1103///
1104/// ```ignore
1105/// #[derive(Feature)]
1106/// #[node(MyNode)]
1107/// pub(super) struct SomeFeature(
1108/// pub(super) Attr<FooFn>,
1109/// Attr<BarFn>,
1110/// pub(super) BazFeature,
1111/// #[scoped] pub(super) Attr<InputFn>,
1112/// );
1113/// ```
1114#[proc_macro_derive(Feature, attributes(node, scoped, dump))]
1115pub fn feature(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
1116 let input = parse_macro_input!(input as FeatureInput);
1117
1118 let declarative = input.dump.is_declarative();
1119
1120 output_stream(declarative, input.into_token_stream())
1121}
1122
1123fn output_stream(declarative: bool, stream: TokenStream) -> proc_macro::TokenStream {
1124 match declarative {
1125 true => match TokenStream::from_str(&stream.to_string()) {
1126 Ok(stream) => stream.into(),
1127 Err(error) => system_panic!("Spans erasure failure. {error}",),
1128 },
1129 false => stream.into(),
1130 }
1131}