lexit/macros.rs
1#[macro_export]
2/// Internal macro used by other token definition macros to create a `TokenDefinition`.
3///
4/// This macro provides a consistent way to construct `TokenDefinition` instances
5/// by delegating to `TokenDefinition::new`. It's not intended for direct use.
6///
7/// # Arguments
8/// * `$name`: The name of the token as a string literal.
9/// * `$regex_str`: The regular expression pattern for the token as a string literal.
10/// * `$priority`: An integer representing the token's priority.
11/// * `$to_store`: A boolean indicating whether to store the matched text.
12/// * `$behavior`: The `TokenBehavior` for the token.
13macro_rules! __lexor_create_token_definition {
14 ($name:expr, $regex_str:expr, $priority:expr, $to_store:expr, $behavior:expr) => {
15 $crate::language::TokenDefinition::new(
16 $name.to_string(),
17 $regex_str,
18 $behavior,
19 $priority,
20 $to_store,
21 )
22 };
23}
24
25#[macro_export]
26/// Creates a `TokenDefinition` for a **keyword**.
27///
28/// Keywords are typically reserved words in a language. This macro sets
29/// `to_store_match` to `false` and `TokenBehavior` to `None` by default.
30///
31/// # Arguments
32/// * `$name`: The name of the keyword token (e.g., "IF_KEYWORD").
33/// * `$regex_str`: The regex pattern for the keyword (e.g., `r"if\b"` for word boundary).
34/// * `$priority`: The priority of the keyword.
35///
36/// # Returns
37/// A `Result<TokenDefinition, String>` which can be unwrapped to get the definition
38/// if the regex is valid.
39///
40/// # Examples
41/// ```
42/// use lexit::keyword;
43/// let if_token = keyword!("IF_KEYWORD", r"if\b", 100);
44/// assert!(if_token.is_ok());
45/// ```
46macro_rules! keyword {
47 ($name:expr, $regex_str:expr, $priority:expr) => {
48 $crate::__lexor_create_token_definition!(
49 $name,
50 $regex_str,
51 $priority,
52 false,
53 $crate::language::TokenBehavior::None
54 )
55 };
56}
57
58#[macro_export]
59/// Creates a generic `TokenDefinition`.
60///
61/// This macro allows full control over whether the matched text should be stored.
62/// The `TokenBehavior` is set to `None` by default.
63///
64/// # Arguments
65/// * `$name`: The name of the token (e.g., "IDENTIFIER").
66/// * `$regex_str`: The regex pattern for the token.
67/// * `$priority`: The priority of the token.
68/// * `$to_store`: A boolean; `true` to store the matched text, `false` otherwise.
69///
70/// # Returns
71/// A `Result<TokenDefinition, String>`.
72///
73/// # Examples
74/// ```
75/// use lexit::token;
76/// let identifier = token!("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*", 50, true);
77/// assert!(identifier.is_ok());
78/// ```
79macro_rules! token {
80 ($name:expr, $regex_str:expr, $priority:expr, $to_store:expr) => {
81 $crate::__lexor_create_token_definition!(
82 $name,
83 $regex_str,
84 $priority,
85 $to_store,
86 $crate::language::TokenBehavior::None
87 )
88 };
89}
90
91#[macro_export]
92/// Creates a `TokenDefinition` for a token that should be **ignored** by the lexer.
93///
94/// Ignored tokens (like whitespace) are recognized but
95/// not included in the final token stream. `to_store_match` is always `false`.
96///
97/// # Arguments
98/// * `$name`: The name of the ignored token (e.g., "WHITESPACE").
99/// * `$regex_str`: The regex pattern for the ignored token.
100/// * `$priority`: The priority of the ignored token.
101///
102/// # Returns
103/// A `Result<TokenDefinition, String>`.
104///
105/// # Examples
106/// ```
107/// use lexit::ignore_token;
108/// let whitespace = ignore_token!("WHITESPACE", r"\s+", 10);
109/// assert!(whitespace.is_ok());
110/// ```
111macro_rules! ignore_token {
112 ($name:expr, $regex_str:expr, $priority:expr) => {
113 $crate::__lexor_create_token_definition!(
114 $name,
115 $regex_str,
116 $priority,
117 false,
118 $crate::language::TokenBehavior::Ignore
119 )
120 };
121}
122
123#[macro_export]
124/// Creates a `TokenDefinition` for an **opening paired token** (e.g., `(` or `{`).
125///
126/// Paired tokens help the lexer track balanced delimiters. `to_store_match` is
127/// always `false`.
128///
129/// # Arguments
130/// * `$name`: The name of the opening pair token (e.g., "LEFT_PAREN").
131/// * `$regex_str`: The regex pattern for the opening pair.
132/// * `$counterpart`: The name of its corresponding closing pair token (e.g., "RIGHT_PAREN").
133/// * `$priority`: The priority of the token.
134///
135/// # Returns
136/// A `Result<TokenDefinition, String>`.
137///
138/// # Examples
139/// ```
140/// use lexit::open_pair;
141/// let left_brace = open_pair!("LEFT_BRACE", r"\{", "RIGHT_BRACE", 90);
142/// assert!(left_brace.is_ok());
143/// ```
144macro_rules! open_pair {
145 ($name:expr, $regex_str:expr, $counterpart:expr, $priority:expr) => {
146 $crate::__lexor_create_token_definition!(
147 $name,
148 $regex_str,
149 $priority,
150 false,
151 $crate::language::TokenBehavior::Pair($crate::language::PairDefinition::new(
152 $crate::language::PairDirection::Open,
153 $counterpart.to_string(),
154 ))
155 )
156 };
157}
158
159#[macro_export]
160/// Creates a `TokenDefinition` for a **closing paired token** (e.g., `)` or `}`).
161///
162/// Paired tokens help the lexer track balanced delimiters. `to_store_match` is
163/// always `false`.
164///
165/// # Arguments
166/// * `$name`: The name of the closing pair token (e.g., "RIGHT_PAREN").
167/// * `$regex_str`: The regex pattern for the closing pair.
168/// * `$counterpart`: The name of its corresponding opening pair token (e.g., "LEFT_PAREN").
169/// * `$priority`: The priority of the token.
170///
171/// # Returns
172/// A `Result<TokenDefinition, String>`.
173///
174/// # Examples
175/// ```
176/// use lexit::close_pair;
177/// let right_brace = close_pair!("RIGHT_BRACE", r"\}", "LEFT_BRACE", 90);
178/// assert!(right_brace.is_ok());
179/// ```
180macro_rules! close_pair {
181 ($name:expr, $regex_str:expr, $counterpart:expr, $priority:expr) => {
182 $crate::__lexor_create_token_definition!(
183 $name,
184 $regex_str,
185 $priority,
186 false,
187 $crate::language::TokenBehavior::Pair($crate::language::PairDefinition::new(
188 $crate::language::PairDirection::Close,
189 $counterpart.to_string(),
190 ))
191 )
192 };
193}
194
195#[macro_export]
196/// Creates a `TokenDefinition` for a token that marks the **start of an ignored block**
197/// until a specific end regex is matched.
198///
199/// This is typically used for multi-line comments. The content between the start
200/// token and the end regex is ignored. `to_store_match` is always `false`.
201///
202/// # Arguments
203/// * `$name`: The name of the ignore-until token (e.g., "MULTI_LINE_COMMENT").
204/// * `$regex_str`: The regex pattern that starts the ignored block (e.g., `r"/\*" `).
205/// * `$end_regex_str`: The regex pattern that ends the ignored block (e.g., `r"\*/" `).
206/// * `$priority`: The priority of the token.
207///
208/// # Returns
209/// A `Result<TokenDefinition, String>`.
210///
211/// # Examples
212/// ```
213/// use lexit::ignore_until;
214/// let multi_comment = ignore_until!("MULTI_LINE_COMMENT", r"/\*", r"\*/", 5);
215/// assert!(multi_comment.is_ok());
216/// ```
217macro_rules! ignore_until {
218 ($name:expr, $regex_str:expr, $end_regex_str:expr, $priority:expr) => {
219 $crate::__lexor_create_token_definition!(
220 $name,
221 $regex_str,
222 $priority,
223 false,
224 $crate::language::TokenBehavior::IgnoreUntil($end_regex_str.to_string())
225 )
226 };
227}
228
229#[macro_export]
230/// Defines a complete `Language` by providing a list of `TokenDefinition` results.
231///
232/// This macro simplifies the creation of a `Language` object by taking a
233/// comma-separated list of `Result<TokenDefinition, String>` expressions
234/// (typically generated by other token macros like `keyword!`, `token!`, etc.).
235/// It collects these into a `Vec` and uses `Language::new_from_results` for validation
236/// and final `Language` creation.
237///
238/// # Arguments
239/// * `$( $token_def:expr ),*`: A comma-separated list of expressions that evaluate to
240/// `Result<TokenDefinition, String>`.
241///
242/// # Returns
243/// A `Result<Language, String>`. If any of the provided token definitions
244/// are errors, or if the resulting `Language` definition is invalid (e.g.,
245/// duplicate token names, unmatched pairs), an `Err` will be returned.
246///
247/// # Examples
248/// ```
249/// use lexit::{define_language, keyword, token, open_pair, close_pair};
250///
251/// let my_language = define_language! {
252/// keyword!("FN", r"fn\b", 100),
253/// token!("IDENT", r"[a-z_]+", 90, true),
254/// open_pair!("L_PAREN", r"\(", "R_PAREN", 80),
255/// close_pair!("R_PAREN", r"\)", "L_PAREN", 80),
256/// };
257///
258/// assert!(my_language.is_ok());
259/// let language = my_language.unwrap();
260/// assert_eq!(language.get_token_definitions().len(), 4);
261/// ```
262macro_rules! define_language {
263 ( $( $token_def:expr ),* $(,)? ) => {
264 $crate::language::Language::new_from_results(
265 vec![ $( $token_def ),* ],
266 )
267 };
268}
269
270#[cfg(test)]
271mod tests {
272 use crate::language::{PairDefinition, PairDirection, TokenBehavior, TokenDefinition};
273
274 fn create_expected_token(
275 name: &str,
276 regex_str: &str,
277 priority: i32,
278 to_store_match: bool,
279 behavior: TokenBehavior,
280 ) -> TokenDefinition {
281 TokenDefinition::new(
282 name.to_string(),
283 regex_str,
284 behavior,
285 priority,
286 to_store_match,
287 )
288 .expect("test failed in creating token definition")
289 }
290
291 #[test]
292 fn test_keyword_macro() {
293 let result = keyword!("TEST_KEYWORD", r"test", 100);
294 let expected =
295 create_expected_token("TEST_KEYWORD", r"test", 100, false, TokenBehavior::None);
296 assert_eq!(result.unwrap(), expected);
297 }
298
299 #[test]
300 fn test_token_macro() {
301 let result = token!("TEST_IDENT", r"[a-z]+", 50, true);
302 let expected =
303 create_expected_token("TEST_IDENT", r"[a-z]+", 50, true, TokenBehavior::None);
304 assert_eq!(result.unwrap(), expected);
305 }
306
307 #[test]
308 fn test_ignore_token_macro() {
309 let result = ignore_token!("WHITESPACE", r"\s+", 10);
310 let expected =
311 create_expected_token("WHITESPACE", r"\s+", 10, false, TokenBehavior::Ignore);
312 assert_eq!(result.unwrap(), expected);
313 }
314
315 #[test]
316 fn test_open_pair_macro() {
317 let result = open_pair!("L_BRACE", r"\{", "R_BRACE", 90);
318 let expected = create_expected_token(
319 "L_BRACE",
320 r"\{",
321 90,
322 false,
323 TokenBehavior::Pair(PairDefinition::new(
324 PairDirection::Open,
325 "R_BRACE".to_string(),
326 )),
327 );
328 assert_eq!(result.unwrap(), expected);
329 }
330
331 #[test]
332 fn test_close_pair_macro() {
333 let result = close_pair!("R_BRACE", r"\}", "L_BRACE", 90);
334 let expected = create_expected_token(
335 "R_BRACE",
336 r"\}",
337 90,
338 false,
339 TokenBehavior::Pair(PairDefinition::new(
340 PairDirection::Close,
341 "L_BRACE".to_string(),
342 )),
343 );
344 assert_eq!(result.unwrap(), expected);
345 }
346
347 #[test]
348 fn test_ignore_until_macro() {
349 let result = ignore_until!("MULTI_COMMENT", r"/\*", r"\*/", 5);
350 let expected = create_expected_token(
351 "MULTI_COMMENT",
352 r"/\*",
353 5,
354 false,
355 TokenBehavior::IgnoreUntil(r"\*/".to_string()),
356 );
357 assert_eq!(result.unwrap(), expected);
358 }
359
360 #[test]
361 fn test_define_language_macro_success() {
362 let module_token = keyword!("MODULE", r"^module\b", 100);
363 let identifier_token = token!("IDENTIFIER", r"^[a-zA-Z_][a-zA-Z0-9_]*", 50, true);
364 let open_paren_token = open_pair!("LEFT_PAREN", r"^\(", "RIGHT_PAREN", 90);
365 let close_paren_token = close_pair!("RIGHT_PAREN", r"^\)", "LEFT_PAREN", 90);
366
367 let language_result = define_language! {
368 module_token,
369 identifier_token,
370 open_paren_token,
371 close_paren_token,
372 };
373
374 assert!(language_result.is_ok());
375 let language = language_result.unwrap();
376 let definitions = language.get_token_definitions();
377
378 assert_eq!(definitions.len(), 4);
379 assert!(definitions.iter().any(|t| t.get_name() == "MODULE"));
380 assert!(definitions.iter().any(|t| t.get_name() == "IDENTIFIER"));
381 assert!(definitions.iter().any(|t| t.get_name() == "LEFT_PAREN"));
382 assert!(definitions.iter().any(|t| t.get_name() == "RIGHT_PAREN"));
383 }
384}