lexit/
macros.rs

1#[macro_export]
2/// Internal macro used by other token definition macros to create a `TokenDefinition`.
3///
4/// This macro provides a consistent way to construct `TokenDefinition` instances
5/// by delegating to `TokenDefinition::new`. It's not intended for direct use.
6///
7/// # Arguments
8/// * `$name`: The name of the token as a string literal.
9/// * `$regex_str`: The regular expression pattern for the token as a string literal.
10/// * `$priority`: An integer representing the token's priority.
11/// * `$to_store`: A boolean indicating whether to store the matched text.
12/// * `$behavior`: The `TokenBehavior` for the token.
13macro_rules! __lexor_create_token_definition {
14    ($name:expr, $regex_str:expr, $priority:expr, $to_store:expr, $behavior:expr) => {
15        $crate::language::TokenDefinition::new(
16            $name.to_string(),
17            $regex_str,
18            $behavior,
19            $priority,
20            $to_store,
21        )
22    };
23}
24
25#[macro_export]
26/// Creates a `TokenDefinition` for a **keyword**.
27///
28/// Keywords are typically reserved words in a language. This macro sets
29/// `to_store_match` to `false` and `TokenBehavior` to `None` by default.
30///
31/// # Arguments
32/// * `$name`: The name of the keyword token (e.g., "IF_KEYWORD").
33/// * `$regex_str`: The regex pattern for the keyword (e.g., `r"if\b"` for word boundary).
34/// * `$priority`: The priority of the keyword.
35///
36/// # Returns
37/// A `Result<TokenDefinition, String>` which can be unwrapped to get the definition
38/// if the regex is valid.
39///
40/// # Examples
41/// ```
42/// use lexit::keyword;
43/// let if_token = keyword!("IF_KEYWORD", r"if\b", 100);
44/// assert!(if_token.is_ok());
45/// ```
46macro_rules! keyword {
47    ($name:expr, $regex_str:expr, $priority:expr) => {
48        $crate::__lexor_create_token_definition!(
49            $name,
50            $regex_str,
51            $priority,
52            false,
53            $crate::language::TokenBehavior::None
54        )
55    };
56}
57
58#[macro_export]
59/// Creates a generic `TokenDefinition`.
60///
61/// This macro allows full control over whether the matched text should be stored.
62/// The `TokenBehavior` is set to `None` by default.
63///
64/// # Arguments
65/// * `$name`: The name of the token (e.g., "IDENTIFIER").
66/// * `$regex_str`: The regex pattern for the token.
67/// * `$priority`: The priority of the token.
68/// * `$to_store`: A boolean; `true` to store the matched text, `false` otherwise.
69///
70/// # Returns
71/// A `Result<TokenDefinition, String>`.
72///
73/// # Examples
74/// ```
75/// use lexit::token;
76/// let identifier = token!("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*", 50, true);
77/// assert!(identifier.is_ok());
78/// ```
79macro_rules! token {
80    ($name:expr, $regex_str:expr, $priority:expr, $to_store:expr) => {
81        $crate::__lexor_create_token_definition!(
82            $name,
83            $regex_str,
84            $priority,
85            $to_store,
86            $crate::language::TokenBehavior::None
87        )
88    };
89}
90
91#[macro_export]
92/// Creates a `TokenDefinition` for a token that should be **ignored** by the lexer.
93///
94/// Ignored tokens (like whitespace) are recognized but
95/// not included in the final token stream. `to_store_match` is always `false`.
96///
97/// # Arguments
98/// * `$name`: The name of the ignored token (e.g., "WHITESPACE").
99/// * `$regex_str`: The regex pattern for the ignored token.
100/// * `$priority`: The priority of the ignored token.
101///
102/// # Returns
103/// A `Result<TokenDefinition, String>`.
104///
105/// # Examples
106/// ```
107/// use lexit::ignore_token;
108/// let whitespace = ignore_token!("WHITESPACE", r"\s+", 10);
109/// assert!(whitespace.is_ok());
110/// ```
111macro_rules! ignore_token {
112    ($name:expr, $regex_str:expr, $priority:expr) => {
113        $crate::__lexor_create_token_definition!(
114            $name,
115            $regex_str,
116            $priority,
117            false,
118            $crate::language::TokenBehavior::Ignore
119        )
120    };
121}
122
123#[macro_export]
124/// Creates a `TokenDefinition` for an **opening paired token** (e.g., `(` or `{`).
125///
126/// Paired tokens help the lexer track balanced delimiters. `to_store_match` is
127/// always `false`.
128///
129/// # Arguments
130/// * `$name`: The name of the opening pair token (e.g., "LEFT_PAREN").
131/// * `$regex_str`: The regex pattern for the opening pair.
132/// * `$counterpart`: The name of its corresponding closing pair token (e.g., "RIGHT_PAREN").
133/// * `$priority`: The priority of the token.
134///
135/// # Returns
136/// A `Result<TokenDefinition, String>`.
137///
138/// # Examples
139/// ```
140/// use lexit::open_pair;
141/// let left_brace = open_pair!("LEFT_BRACE", r"\{", "RIGHT_BRACE", 90);
142/// assert!(left_brace.is_ok());
143/// ```
144macro_rules! open_pair {
145    ($name:expr, $regex_str:expr, $counterpart:expr, $priority:expr) => {
146        $crate::__lexor_create_token_definition!(
147            $name,
148            $regex_str,
149            $priority,
150            false,
151            $crate::language::TokenBehavior::Pair($crate::language::PairDefinition::new(
152                $crate::language::PairDirection::Open,
153                $counterpart.to_string(),
154            ))
155        )
156    };
157}
158
159#[macro_export]
160/// Creates a `TokenDefinition` for a **closing paired token** (e.g., `)` or `}`).
161///
162/// Paired tokens help the lexer track balanced delimiters. `to_store_match` is
163/// always `false`.
164///
165/// # Arguments
166/// * `$name`: The name of the closing pair token (e.g., "RIGHT_PAREN").
167/// * `$regex_str`: The regex pattern for the closing pair.
168/// * `$counterpart`: The name of its corresponding opening pair token (e.g., "LEFT_PAREN").
169/// * `$priority`: The priority of the token.
170///
171/// # Returns
172/// A `Result<TokenDefinition, String>`.
173///
174/// # Examples
175/// ```
176/// use lexit::close_pair;
177/// let right_brace = close_pair!("RIGHT_BRACE", r"\}", "LEFT_BRACE", 90);
178/// assert!(right_brace.is_ok());
179/// ```
180macro_rules! close_pair {
181    ($name:expr, $regex_str:expr, $counterpart:expr, $priority:expr) => {
182        $crate::__lexor_create_token_definition!(
183            $name,
184            $regex_str,
185            $priority,
186            false,
187            $crate::language::TokenBehavior::Pair($crate::language::PairDefinition::new(
188                $crate::language::PairDirection::Close,
189                $counterpart.to_string(),
190            ))
191        )
192    };
193}
194
195#[macro_export]
196/// Creates a `TokenDefinition` for a token that marks the **start of an ignored block**
197/// until a specific end regex is matched.
198///
199/// This is typically used for multi-line comments. The content between the start
200/// token and the end regex is ignored. `to_store_match` is always `false`.
201///
202/// # Arguments
203/// * `$name`: The name of the ignore-until token (e.g., "MULTI_LINE_COMMENT").
204/// * `$regex_str`: The regex pattern that starts the ignored block (e.g., `r"/\*" `).
205/// * `$end_regex_str`: The regex pattern that ends the ignored block (e.g., `r"\*/" `).
206/// * `$priority`: The priority of the token.
207///
208/// # Returns
209/// A `Result<TokenDefinition, String>`.
210///
211/// # Examples
212/// ```
213/// use lexit::ignore_until;
214/// let multi_comment = ignore_until!("MULTI_LINE_COMMENT", r"/\*", r"\*/", 5);
215/// assert!(multi_comment.is_ok());
216/// ```
217macro_rules! ignore_until {
218    ($name:expr, $regex_str:expr, $end_regex_str:expr, $priority:expr) => {
219        $crate::__lexor_create_token_definition!(
220            $name,
221            $regex_str,
222            $priority,
223            false,
224            $crate::language::TokenBehavior::IgnoreUntil($end_regex_str.to_string())
225        )
226    };
227}
228
229#[macro_export]
230/// Defines a complete `Language` by providing a list of `TokenDefinition` results.
231///
232/// This macro simplifies the creation of a `Language` object by taking a
233/// comma-separated list of `Result<TokenDefinition, String>` expressions
234/// (typically generated by other token macros like `keyword!`, `token!`, etc.).
235/// It collects these into a `Vec` and uses `Language::new_from_results` for validation
236/// and final `Language` creation.
237///
238/// # Arguments
239/// * `$( $token_def:expr ),*`: A comma-separated list of expressions that evaluate to
240///   `Result<TokenDefinition, String>`.
241///
242/// # Returns
243/// A `Result<Language, String>`. If any of the provided token definitions
244/// are errors, or if the resulting `Language` definition is invalid (e.g.,
245/// duplicate token names, unmatched pairs), an `Err` will be returned.
246///
247/// # Examples
248/// ```
249/// use lexit::{define_language, keyword, token, open_pair, close_pair};
250///
251/// let my_language = define_language! {
252///     keyword!("FN", r"fn\b", 100),
253///     token!("IDENT", r"[a-z_]+", 90, true),
254///     open_pair!("L_PAREN", r"\(", "R_PAREN", 80),
255///     close_pair!("R_PAREN", r"\)", "L_PAREN", 80),
256/// };
257///
258/// assert!(my_language.is_ok());
259/// let language = my_language.unwrap();
260/// assert_eq!(language.get_token_definitions().len(), 4);
261/// ```
262macro_rules! define_language {
263    ( $( $token_def:expr ),* $(,)? ) => {
264        $crate::language::Language::new_from_results(
265            vec![ $( $token_def ),* ],
266        )
267    };
268}
269
270#[cfg(test)]
271mod tests {
272    use crate::language::{PairDefinition, PairDirection, TokenBehavior, TokenDefinition};
273
274    fn create_expected_token(
275        name: &str,
276        regex_str: &str,
277        priority: i32,
278        to_store_match: bool,
279        behavior: TokenBehavior,
280    ) -> TokenDefinition {
281        TokenDefinition::new(
282            name.to_string(),
283            regex_str,
284            behavior,
285            priority,
286            to_store_match,
287        )
288        .expect("test failed in creating token definition")
289    }
290
291    #[test]
292    fn test_keyword_macro() {
293        let result = keyword!("TEST_KEYWORD", r"test", 100);
294        let expected =
295            create_expected_token("TEST_KEYWORD", r"test", 100, false, TokenBehavior::None);
296        assert_eq!(result.unwrap(), expected);
297    }
298
299    #[test]
300    fn test_token_macro() {
301        let result = token!("TEST_IDENT", r"[a-z]+", 50, true);
302        let expected =
303            create_expected_token("TEST_IDENT", r"[a-z]+", 50, true, TokenBehavior::None);
304        assert_eq!(result.unwrap(), expected);
305    }
306
307    #[test]
308    fn test_ignore_token_macro() {
309        let result = ignore_token!("WHITESPACE", r"\s+", 10);
310        let expected =
311            create_expected_token("WHITESPACE", r"\s+", 10, false, TokenBehavior::Ignore);
312        assert_eq!(result.unwrap(), expected);
313    }
314
315    #[test]
316    fn test_open_pair_macro() {
317        let result = open_pair!("L_BRACE", r"\{", "R_BRACE", 90);
318        let expected = create_expected_token(
319            "L_BRACE",
320            r"\{",
321            90,
322            false,
323            TokenBehavior::Pair(PairDefinition::new(
324                PairDirection::Open,
325                "R_BRACE".to_string(),
326            )),
327        );
328        assert_eq!(result.unwrap(), expected);
329    }
330
331    #[test]
332    fn test_close_pair_macro() {
333        let result = close_pair!("R_BRACE", r"\}", "L_BRACE", 90);
334        let expected = create_expected_token(
335            "R_BRACE",
336            r"\}",
337            90,
338            false,
339            TokenBehavior::Pair(PairDefinition::new(
340                PairDirection::Close,
341                "L_BRACE".to_string(),
342            )),
343        );
344        assert_eq!(result.unwrap(), expected);
345    }
346
347    #[test]
348    fn test_ignore_until_macro() {
349        let result = ignore_until!("MULTI_COMMENT", r"/\*", r"\*/", 5);
350        let expected = create_expected_token(
351            "MULTI_COMMENT",
352            r"/\*",
353            5,
354            false,
355            TokenBehavior::IgnoreUntil(r"\*/".to_string()),
356        );
357        assert_eq!(result.unwrap(), expected);
358    }
359
360    #[test]
361    fn test_define_language_macro_success() {
362        let module_token = keyword!("MODULE", r"^module\b", 100);
363        let identifier_token = token!("IDENTIFIER", r"^[a-zA-Z_][a-zA-Z0-9_]*", 50, true);
364        let open_paren_token = open_pair!("LEFT_PAREN", r"^\(", "RIGHT_PAREN", 90);
365        let close_paren_token = close_pair!("RIGHT_PAREN", r"^\)", "LEFT_PAREN", 90);
366
367        let language_result = define_language! {
368            module_token,
369            identifier_token,
370            open_paren_token,
371            close_paren_token,
372        };
373
374        assert!(language_result.is_ok());
375        let language = language_result.unwrap();
376        let definitions = language.get_token_definitions();
377
378        assert_eq!(definitions.len(), 4);
379        assert!(definitions.iter().any(|t| t.get_name() == "MODULE"));
380        assert!(definitions.iter().any(|t| t.get_name() == "IDENTIFIER"));
381        assert!(definitions.iter().any(|t| t.get_name() == "LEFT_PAREN"));
382        assert!(definitions.iter().any(|t| t.get_name() == "RIGHT_PAREN"));
383    }
384}