Skip to main content

gazelle_macros/
lib.rs

1//! Procedural macros for Gazelle parser generator.
2//!
3//! This crate provides the `gazelle!` macro that allows defining grammars
4//! in Rust with type-safe parsers generated at compile time.
5//!
6//! # Example
7//!
8//! ```
9//! use gazelle_macros::gazelle;
10//! use gazelle::Precedence;
11//!
12//! gazelle! {
13//!     grammar expr {
14//!         start expr;
15//!         terminals {
16//!             NUM: _,
17//!             LPAREN, RPAREN,
18//!             prec OP: _
19//!         }
20//!         expr = NUM => num
21//!              | expr OP expr => binop
22//!              | LPAREN expr RPAREN => paren;
23//!     }
24//! }
25//!
26//! struct Eval;
27//! impl gazelle::ErrorType for Eval {
28//!     type Error = core::convert::Infallible;
29//! }
30//! impl expr::Types for Eval {
31//!     type Num = f64;
32//!     type Op = char;
33//!     type Expr = f64;
34//! }
35//! impl gazelle::Action<expr::Expr<Eval>> for Eval {
36//!     fn build(&mut self, node: expr::Expr<Eval>) -> Result<f64, Self::Error> {
37//!         Ok(match node {
38//!             expr::Expr::Num(n) => n,
39//!             expr::Expr::Binop(l, op, r) => match op {
40//!                 '+' => l + r, '-' => l - r, '*' => l * r, '/' => l / r, _ => 0.0,
41//!             },
42//!             expr::Expr::Paren(e) => e,
43//!         })
44//!     }
45//! }
46//!
47//! let mut parser = expr::Parser::<Eval>::new();
48//! let mut eval = Eval;
49//! parser.push(expr::Terminal::Num(1.0), &mut eval).unwrap();
50//! parser.push(expr::Terminal::Op('+', Precedence::Left(1)), &mut eval).unwrap();
51//! parser.push(expr::Terminal::Num(2.0), &mut eval).unwrap();
52//! parser.push(expr::Terminal::Op('*', Precedence::Left(2)), &mut eval).unwrap();
53//! parser.push(expr::Terminal::Num(3.0), &mut eval).unwrap();
54//! let result = parser.finish(&mut eval).map_err(|(_, e)| e).unwrap();
55//! assert_eq!(result, 7.0);  // 1 + (2 * 3)
56//! ```
57
58use proc_macro::TokenStream;
59use proc_macro2::TokenTree;
60
61use gazelle::meta::{AstBuilder, Terminal};
62
63/// Define a grammar and generate a type-safe parser.
64///
65/// See the crate-level documentation for usage examples.
66#[proc_macro]
67pub fn gazelle(input: TokenStream) -> TokenStream {
68    let input2: proc_macro2::TokenStream = input.into();
69
70    match parse_and_generate(input2) {
71        Ok(tokens) => tokens.into(),
72        Err(msg) => {
73            let err = format!("compile_error!({:?});", msg);
74            err.parse().unwrap()
75        }
76    }
77}
78
79fn parse_and_generate(input: proc_macro2::TokenStream) -> Result<proc_macro2::TokenStream, String> {
80    let (derives, visibility, name, source) = lex_token_stream(input)?;
81
82    let apply_derives = |ctx: &mut gazelle::codegen::CodegenContext| {
83        for d in &derives {
84            ctx.derives.insert(d.clone());
85        }
86    };
87
88    let grammar_def = match source {
89        GrammarSource::Inline(tokens) => {
90            if tokens.is_empty() {
91                return Err("Empty grammar".to_string());
92            }
93            gazelle::meta::parse_tokens_typed(tokens)?
94        }
95        GrammarSource::File(path) => {
96            let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")
97                .map_err(|_| "CARGO_MANIFEST_DIR not set".to_string())?;
98            let full_path = std::path::Path::new(&manifest_dir).join(&path);
99            let content = std::fs::read_to_string(&full_path)
100                .map_err(|e| format!("Failed to read {}: {}", full_path.display(), e))?;
101            let grammar_def = gazelle::parse_grammar(&content)?;
102
103            // Emit include_bytes! so cargo tracks the file for recompilation
104            let mut ctx = gazelle::codegen::CodegenContext::from_grammar(
105                &grammar_def,
106                &name,
107                &visibility,
108                true,
109            )?;
110            apply_derives(&mut ctx);
111            let mut tokens = gazelle::codegen::generate_tokens(&ctx)?;
112            let abs = full_path
113                .canonicalize()
114                .map_err(|e| format!("Failed to canonicalize {}: {}", full_path.display(), e))?;
115            let abs_str = abs.to_str().ok_or("Non-UTF8 path")?;
116            let include: proc_macro2::TokenStream =
117                format!("const _: &[u8] = include_bytes!({:?});", abs_str)
118                    .parse()
119                    .map_err(|e| format!("Failed to generate include_bytes: {}", e))?;
120            tokens.extend(include);
121            return Ok(tokens);
122        }
123    };
124
125    let mut ctx =
126        gazelle::codegen::CodegenContext::from_grammar(&grammar_def, &name, &visibility, true)?;
127    apply_derives(&mut ctx);
128    gazelle::codegen::generate_tokens(&ctx)
129}
130
131enum GrammarSource {
132    Inline(Vec<Terminal<AstBuilder>>),
133    File(String),
134}
135
136/// Lex a proc_macro2::TokenStream into Terminals.
137/// Returns (derives, visibility_string, name, source).
138///
139/// Expected formats:
140///   `[#[derive(Debug, Clone)]] [pub] grammar Name { grammar_content... }`   — inline
141///   `[#[derive(Debug, Clone)]] [pub] grammar Name = "path/to/file.gzl"`     — file include
142fn lex_token_stream(
143    input: proc_macro2::TokenStream,
144) -> Result<(Vec<String>, String, String, GrammarSource), String> {
145    let mut iter = input.into_iter().peekable();
146
147    // Check for #[derive(...)] attribute
148    let mut derives = Vec::new();
149    if matches!(iter.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '#') {
150        iter.next(); // consume '#'
151        match iter.next() {
152            Some(TokenTree::Group(g))
153                if matches!(g.delimiter(), proc_macro2::Delimiter::Bracket) =>
154            {
155                let mut attr_iter = g.stream().into_iter().peekable();
156                match attr_iter.next() {
157                    Some(TokenTree::Ident(id)) if id == "derive" => {}
158                    other => {
159                        return Err(format!("Expected `derive` in attribute, got {:?}", other));
160                    }
161                }
162                match attr_iter.next() {
163                    Some(TokenTree::Group(g2))
164                        if matches!(g2.delimiter(), proc_macro2::Delimiter::Parenthesis) =>
165                    {
166                        for tt in g2.stream() {
167                            if let TokenTree::Ident(id) = tt {
168                                derives.push(id.to_string());
169                            }
170                            // skip commas
171                        }
172                    }
173                    other => {
174                        return Err(format!("Expected `(...)` after `derive`, got {:?}", other));
175                    }
176                }
177            }
178            other => return Err(format!("Expected `[...]` after `#`, got {:?}", other)),
179        }
180    }
181
182    // Check for visibility (pub, pub(crate), etc.)
183    let visibility = if matches!(iter.peek(), Some(TokenTree::Ident(id)) if *id == "pub") {
184        iter.next(); // consume "pub"
185
186        // Check for (crate) or (super) etc.
187        if matches!(iter.peek(), Some(TokenTree::Group(g)) if matches!(g.delimiter(), proc_macro2::Delimiter::Parenthesis))
188        {
189            let group = iter.next().unwrap();
190            format!("pub{} ", group)
191        } else {
192            "pub ".to_string()
193        }
194    } else {
195        String::new()
196    };
197
198    // Expect `grammar` keyword
199    match iter.next() {
200        Some(TokenTree::Ident(id)) if id == "grammar" => {}
201        other => return Err(format!("Expected `grammar` keyword, got {:?}", other)),
202    }
203
204    // Extract grammar name
205    let name = match iter.next() {
206        Some(TokenTree::Ident(id)) => id.to_string(),
207        other => {
208            return Err(format!(
209                "Expected grammar name after `grammar`, got {:?}",
210                other
211            ));
212        }
213    };
214
215    // File include: `grammar Name = "path.gzl"`
216    if matches!(iter.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '=') {
217        iter.next(); // consume '='
218        match iter.next() {
219            Some(TokenTree::Literal(lit)) => {
220                let s = lit.to_string();
221                // Strip surrounding quotes
222                if s.starts_with('"') && s.ends_with('"') {
223                    let path = s[1..s.len() - 1].to_string();
224                    return Ok((derives, visibility, name, GrammarSource::File(path)));
225                }
226                return Err(format!("Expected string literal after `=`, got {}", s));
227            }
228            other => return Err(format!("Expected file path after `=`, got {:?}", other)),
229        }
230    }
231
232    // Inline: `grammar Name { ... }`
233    let content = match iter.next() {
234        Some(TokenTree::Group(g)) if matches!(g.delimiter(), proc_macro2::Delimiter::Brace) => {
235            g.stream()
236        }
237        other => {
238            return Err(format!(
239                "Expected {{ or = after grammar name, got {:?}",
240                other
241            ));
242        }
243    };
244
245    let mut tokens = Vec::new();
246    let mut inner_iter = content.into_iter().peekable();
247    lex_tokens(&mut inner_iter, &mut tokens)?;
248
249    Ok((derives, visibility, name, GrammarSource::Inline(tokens)))
250}
251
252fn unescape_string(s: &str) -> Result<String, String> {
253    let mut out = String::new();
254    let mut chars = s.chars();
255    while let Some(c) = chars.next() {
256        if c == '\\' {
257            match chars.next() {
258                Some('n') => out.push('\n'),
259                Some('t') => out.push('\t'),
260                Some('r') => out.push('\r'),
261                Some('\\') => out.push('\\'),
262                Some('"') => out.push('"'),
263                Some('0') => out.push('\0'),
264                Some(other) => {
265                    // Pass through other escapes (like \+, \*, etc.) as-is for regex
266                    out.push('\\');
267                    out.push(other);
268                }
269                None => return Err("unexpected end of string after backslash".into()),
270            }
271        } else {
272            out.push(c);
273        }
274    }
275    Ok(out)
276}
277
278fn lex_tokens(
279    iter: &mut std::iter::Peekable<proc_macro2::token_stream::IntoIter>,
280    tokens: &mut Vec<Terminal<AstBuilder>>,
281) -> Result<(), String> {
282    while let Some(tt) = iter.next() {
283        match tt {
284            TokenTree::Ident(id) => {
285                let s = id.to_string();
286                match s.as_str() {
287                    "start" => tokens.push(Terminal::KwStart),
288                    "terminals" => tokens.push(Terminal::KwTerminals),
289                    "prec" | "shift" | "reduce" | "conflict" => {
290                        tokens.push(Terminal::Modifier(s));
291                    }
292                    "expect" => tokens.push(Terminal::KwExpect),
293                    "_" => tokens.push(Terminal::Underscore),
294                    _ => tokens.push(Terminal::Ident(s)),
295                }
296            }
297            TokenTree::Punct(p) => {
298                let c = p.as_char();
299                match c {
300                    '{' => tokens.push(Terminal::Lbrace),
301                    '}' => tokens.push(Terminal::Rbrace),
302                    ',' => tokens.push(Terminal::Comma),
303                    '|' => tokens.push(Terminal::Pipe),
304                    ';' => tokens.push(Terminal::Semi),
305                    '?' => tokens.push(Terminal::Question),
306                    '*' => tokens.push(Terminal::Star),
307                    '+' => tokens.push(Terminal::Plus),
308                    '%' => tokens.push(Terminal::Percent),
309                    ':' => {
310                        tokens.push(Terminal::Colon);
311                    }
312                    '=' => {
313                        // Check for => (fat arrow)
314                        if p.spacing() == proc_macro2::Spacing::Joint
315                            && let Some(TokenTree::Punct(p2)) = iter.peek()
316                            && p2.as_char() == '>'
317                        {
318                            iter.next();
319                            tokens.push(Terminal::FatArrow);
320                            continue;
321                        }
322                        tokens.push(Terminal::Eq);
323                    }
324                    _ => return Err(format!("Unexpected punctuation: {}", c)),
325                }
326            }
327            TokenTree::Group(g) => match g.delimiter() {
328                proc_macro2::Delimiter::Brace => {
329                    tokens.push(Terminal::Lbrace);
330                    let mut inner_iter = g.stream().into_iter().peekable();
331                    lex_tokens(&mut inner_iter, tokens)?;
332                    tokens.push(Terminal::Rbrace);
333                }
334                proc_macro2::Delimiter::Parenthesis => {
335                    tokens.push(Terminal::Lparen);
336                    let mut inner_iter = g.stream().into_iter().peekable();
337                    lex_tokens(&mut inner_iter, tokens)?;
338                    tokens.push(Terminal::Rparen);
339                }
340                _ => return Err(format!("Unexpected group delimiter: {:?}", g.delimiter())),
341            },
342            TokenTree::Literal(lit) => {
343                let s = lit.to_string();
344                // Check if it's a number (integer literal)
345                if s.chars().all(|c| c.is_ascii_digit()) {
346                    tokens.push(Terminal::Num(s));
347                } else if s.starts_with('"') {
348                    // Regular string literal — strip quotes and unescape
349                    let inner = &s[1..s.len() - 1];
350                    let value = unescape_string(inner)
351                        .map_err(|e| format!("Invalid string literal: {}", e))?;
352                    tokens.push(Terminal::Regex(value));
353                } else if s.starts_with("r\"") || s.starts_with("r#") {
354                    // Raw string literal — strip r, hashes, and quotes
355                    let after_r = &s[1..];
356                    let hashes = after_r.bytes().take_while(|&b| b == b'#').count();
357                    // Skip hashes + opening quote, remove closing quote + hashes
358                    let inner = &after_r[hashes + 1..after_r.len() - hashes - 1];
359                    tokens.push(Terminal::Regex(inner.to_string()));
360                } else {
361                    return Err(format!("Unexpected literal in grammar: {}", s));
362                }
363            }
364        }
365    }
366    Ok(())
367}