1use proc_macro::TokenStream;
59use proc_macro2::TokenTree;
60
61use gazelle::meta::{AstBuilder, Terminal};
62
63#[proc_macro]
67pub fn gazelle(input: TokenStream) -> TokenStream {
68 let input2: proc_macro2::TokenStream = input.into();
69
70 match parse_and_generate(input2) {
71 Ok(tokens) => tokens.into(),
72 Err(msg) => {
73 let err = format!("compile_error!({:?});", msg);
74 err.parse().unwrap()
75 }
76 }
77}
78
79fn parse_and_generate(input: proc_macro2::TokenStream) -> Result<proc_macro2::TokenStream, String> {
80 let (derives, visibility, name, source) = lex_token_stream(input)?;
81
82 let apply_derives = |ctx: &mut gazelle::codegen::CodegenContext| {
83 for d in &derives {
84 ctx.derives.insert(d.clone());
85 }
86 };
87
88 let grammar_def = match source {
89 GrammarSource::Inline(tokens) => {
90 if tokens.is_empty() {
91 return Err("Empty grammar".to_string());
92 }
93 gazelle::meta::parse_tokens_typed(tokens)?
94 }
95 GrammarSource::File(path) => {
96 let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")
97 .map_err(|_| "CARGO_MANIFEST_DIR not set".to_string())?;
98 let full_path = std::path::Path::new(&manifest_dir).join(&path);
99 let content = std::fs::read_to_string(&full_path)
100 .map_err(|e| format!("Failed to read {}: {}", full_path.display(), e))?;
101 let grammar_def = gazelle::parse_grammar(&content)?;
102
103 let mut ctx = gazelle::codegen::CodegenContext::from_grammar(
105 &grammar_def,
106 &name,
107 &visibility,
108 true,
109 )?;
110 apply_derives(&mut ctx);
111 let mut tokens = gazelle::codegen::generate_tokens(&ctx)?;
112 let abs = full_path
113 .canonicalize()
114 .map_err(|e| format!("Failed to canonicalize {}: {}", full_path.display(), e))?;
115 let abs_str = abs.to_str().ok_or("Non-UTF8 path")?;
116 let include: proc_macro2::TokenStream =
117 format!("const _: &[u8] = include_bytes!({:?});", abs_str)
118 .parse()
119 .map_err(|e| format!("Failed to generate include_bytes: {}", e))?;
120 tokens.extend(include);
121 return Ok(tokens);
122 }
123 };
124
125 let mut ctx =
126 gazelle::codegen::CodegenContext::from_grammar(&grammar_def, &name, &visibility, true)?;
127 apply_derives(&mut ctx);
128 gazelle::codegen::generate_tokens(&ctx)
129}
130
131enum GrammarSource {
132 Inline(Vec<Terminal<AstBuilder>>),
133 File(String),
134}
135
136fn lex_token_stream(
143 input: proc_macro2::TokenStream,
144) -> Result<(Vec<String>, String, String, GrammarSource), String> {
145 let mut iter = input.into_iter().peekable();
146
147 let mut derives = Vec::new();
149 if matches!(iter.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '#') {
150 iter.next(); match iter.next() {
152 Some(TokenTree::Group(g))
153 if matches!(g.delimiter(), proc_macro2::Delimiter::Bracket) =>
154 {
155 let mut attr_iter = g.stream().into_iter().peekable();
156 match attr_iter.next() {
157 Some(TokenTree::Ident(id)) if id == "derive" => {}
158 other => {
159 return Err(format!("Expected `derive` in attribute, got {:?}", other));
160 }
161 }
162 match attr_iter.next() {
163 Some(TokenTree::Group(g2))
164 if matches!(g2.delimiter(), proc_macro2::Delimiter::Parenthesis) =>
165 {
166 for tt in g2.stream() {
167 if let TokenTree::Ident(id) = tt {
168 derives.push(id.to_string());
169 }
170 }
172 }
173 other => {
174 return Err(format!("Expected `(...)` after `derive`, got {:?}", other));
175 }
176 }
177 }
178 other => return Err(format!("Expected `[...]` after `#`, got {:?}", other)),
179 }
180 }
181
182 let visibility = if matches!(iter.peek(), Some(TokenTree::Ident(id)) if *id == "pub") {
184 iter.next(); if matches!(iter.peek(), Some(TokenTree::Group(g)) if matches!(g.delimiter(), proc_macro2::Delimiter::Parenthesis))
188 {
189 let group = iter.next().unwrap();
190 format!("pub{} ", group)
191 } else {
192 "pub ".to_string()
193 }
194 } else {
195 String::new()
196 };
197
198 match iter.next() {
200 Some(TokenTree::Ident(id)) if id == "grammar" => {}
201 other => return Err(format!("Expected `grammar` keyword, got {:?}", other)),
202 }
203
204 let name = match iter.next() {
206 Some(TokenTree::Ident(id)) => id.to_string(),
207 other => {
208 return Err(format!(
209 "Expected grammar name after `grammar`, got {:?}",
210 other
211 ));
212 }
213 };
214
215 if matches!(iter.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '=') {
217 iter.next(); match iter.next() {
219 Some(TokenTree::Literal(lit)) => {
220 let s = lit.to_string();
221 if s.starts_with('"') && s.ends_with('"') {
223 let path = s[1..s.len() - 1].to_string();
224 return Ok((derives, visibility, name, GrammarSource::File(path)));
225 }
226 return Err(format!("Expected string literal after `=`, got {}", s));
227 }
228 other => return Err(format!("Expected file path after `=`, got {:?}", other)),
229 }
230 }
231
232 let content = match iter.next() {
234 Some(TokenTree::Group(g)) if matches!(g.delimiter(), proc_macro2::Delimiter::Brace) => {
235 g.stream()
236 }
237 other => {
238 return Err(format!(
239 "Expected {{ or = after grammar name, got {:?}",
240 other
241 ));
242 }
243 };
244
245 let mut tokens = Vec::new();
246 let mut inner_iter = content.into_iter().peekable();
247 lex_tokens(&mut inner_iter, &mut tokens)?;
248
249 Ok((derives, visibility, name, GrammarSource::Inline(tokens)))
250}
251
252fn unescape_string(s: &str) -> Result<String, String> {
253 let mut out = String::new();
254 let mut chars = s.chars();
255 while let Some(c) = chars.next() {
256 if c == '\\' {
257 match chars.next() {
258 Some('n') => out.push('\n'),
259 Some('t') => out.push('\t'),
260 Some('r') => out.push('\r'),
261 Some('\\') => out.push('\\'),
262 Some('"') => out.push('"'),
263 Some('0') => out.push('\0'),
264 Some(other) => {
265 out.push('\\');
267 out.push(other);
268 }
269 None => return Err("unexpected end of string after backslash".into()),
270 }
271 } else {
272 out.push(c);
273 }
274 }
275 Ok(out)
276}
277
278fn lex_tokens(
279 iter: &mut std::iter::Peekable<proc_macro2::token_stream::IntoIter>,
280 tokens: &mut Vec<Terminal<AstBuilder>>,
281) -> Result<(), String> {
282 while let Some(tt) = iter.next() {
283 match tt {
284 TokenTree::Ident(id) => {
285 let s = id.to_string();
286 match s.as_str() {
287 "start" => tokens.push(Terminal::KwStart),
288 "terminals" => tokens.push(Terminal::KwTerminals),
289 "prec" | "shift" | "reduce" | "conflict" => {
290 tokens.push(Terminal::Modifier(s));
291 }
292 "expect" => tokens.push(Terminal::KwExpect),
293 "_" => tokens.push(Terminal::Underscore),
294 _ => tokens.push(Terminal::Ident(s)),
295 }
296 }
297 TokenTree::Punct(p) => {
298 let c = p.as_char();
299 match c {
300 '{' => tokens.push(Terminal::Lbrace),
301 '}' => tokens.push(Terminal::Rbrace),
302 ',' => tokens.push(Terminal::Comma),
303 '|' => tokens.push(Terminal::Pipe),
304 ';' => tokens.push(Terminal::Semi),
305 '?' => tokens.push(Terminal::Question),
306 '*' => tokens.push(Terminal::Star),
307 '+' => tokens.push(Terminal::Plus),
308 '%' => tokens.push(Terminal::Percent),
309 ':' => {
310 tokens.push(Terminal::Colon);
311 }
312 '=' => {
313 if p.spacing() == proc_macro2::Spacing::Joint
315 && let Some(TokenTree::Punct(p2)) = iter.peek()
316 && p2.as_char() == '>'
317 {
318 iter.next();
319 tokens.push(Terminal::FatArrow);
320 continue;
321 }
322 tokens.push(Terminal::Eq);
323 }
324 _ => return Err(format!("Unexpected punctuation: {}", c)),
325 }
326 }
327 TokenTree::Group(g) => match g.delimiter() {
328 proc_macro2::Delimiter::Brace => {
329 tokens.push(Terminal::Lbrace);
330 let mut inner_iter = g.stream().into_iter().peekable();
331 lex_tokens(&mut inner_iter, tokens)?;
332 tokens.push(Terminal::Rbrace);
333 }
334 proc_macro2::Delimiter::Parenthesis => {
335 tokens.push(Terminal::Lparen);
336 let mut inner_iter = g.stream().into_iter().peekable();
337 lex_tokens(&mut inner_iter, tokens)?;
338 tokens.push(Terminal::Rparen);
339 }
340 _ => return Err(format!("Unexpected group delimiter: {:?}", g.delimiter())),
341 },
342 TokenTree::Literal(lit) => {
343 let s = lit.to_string();
344 if s.chars().all(|c| c.is_ascii_digit()) {
346 tokens.push(Terminal::Num(s));
347 } else if s.starts_with('"') {
348 let inner = &s[1..s.len() - 1];
350 let value = unescape_string(inner)
351 .map_err(|e| format!("Invalid string literal: {}", e))?;
352 tokens.push(Terminal::Regex(value));
353 } else if s.starts_with("r\"") || s.starts_with("r#") {
354 let after_r = &s[1..];
356 let hashes = after_r.bytes().take_while(|&b| b == b'#').count();
357 let inner = &after_r[hashes + 1..after_r.len() - hashes - 1];
359 tokens.push(Terminal::Regex(inner.to_string()));
360 } else {
361 return Err(format!("Unexpected literal in grammar: {}", s));
362 }
363 }
364 }
365 }
366 Ok(())
367}