autozig_engine/
scanner.rs

1//! Source code scanner to extract Zig code from autozig! macros using syn AST
2//! parsing
3
4use std::{
5    fs,
6    path::Path,
7};
8
9use anyhow::{
10    Context,
11    Result,
12};
13use syn::{
14    visit::Visit,
15    Macro,
16};
17use walkdir::WalkDir;
18
19/// Scanner for extracting Zig code from Rust source files
20pub struct ZigCodeScanner {
21    src_dir: std::path::PathBuf,
22    manifest_dir: std::path::PathBuf,
23}
24
25impl ZigCodeScanner {
26    pub fn new(src_dir: impl AsRef<Path>) -> Self {
27        // Get manifest dir from environment or use src_dir parent
28        let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")
29            .ok()
30            .and_then(|d| std::path::PathBuf::from(d).canonicalize().ok())
31            .unwrap_or_else(|| {
32                src_dir
33                    .as_ref()
34                    .parent()
35                    .unwrap_or(src_dir.as_ref())
36                    .to_path_buf()
37            });
38
39        Self {
40            src_dir: src_dir.as_ref().to_path_buf(),
41            manifest_dir,
42        }
43    }
44
45    /// Scan all .rs files and extract Zig code using AST parsing
46    pub fn scan(&self) -> Result<String> {
47        let mut consolidated_zig = String::new();
48        let mut has_std_import = false;
49
50        for entry in WalkDir::new(&self.src_dir)
51            .into_iter()
52            .filter_map(|e| e.ok())
53        {
54            let path = entry.path();
55            if path.extension().map_or(false, |ext| ext == "rs") {
56                let content = fs::read_to_string(path)
57                    .with_context(|| format!("Failed to read {}", path.display()))?;
58
59                // Parse the Rust file into an AST
60                match syn::parse_file(&content) {
61                    Ok(file) => {
62                        let mut visitor = AutozigVisitor::default();
63                        visitor.visit_file(&file);
64
65                        // Process embedded Zig code
66                        for zig_code in visitor.zig_code {
67                            consolidated_zig.push_str(&zig_code);
68                            consolidated_zig.push('\n');
69                        }
70
71                        // Process external Zig files
72                        for external_file in visitor.external_files {
73                            let external_path = self.manifest_dir.join(&external_file);
74                            match fs::read_to_string(&external_path) {
75                                Ok(external_content) => {
76                                    consolidated_zig.push_str(&format!(
77                                        "\n// From external file: {}\n",
78                                        external_file
79                                    ));
80
81                                    // Remove duplicate std import and other common imports
82                                    let cleaned_content = remove_duplicate_imports(
83                                        &external_content,
84                                        &mut has_std_import,
85                                    );
86                                    consolidated_zig.push_str(&cleaned_content);
87                                    consolidated_zig.push('\n');
88                                },
89                                Err(e) => {
90                                    eprintln!(
91                                        "Warning: Failed to read external Zig file {}: {}",
92                                        external_path.display(),
93                                        e
94                                    );
95                                },
96                            }
97                        }
98                    },
99                    Err(e) => {
100                        eprintln!("Warning: Failed to parse {}: {}", path.display(), e);
101                        // Continue scanning other files
102                    },
103                }
104            }
105        }
106
107        Ok(consolidated_zig)
108    }
109}
110
111/// AST visitor to extract autozig! and include_zig! macro contents
112#[derive(Default)]
113struct AutozigVisitor {
114    zig_code: Vec<String>,
115    external_files: Vec<String>,
116}
117
118impl<'ast> Visit<'ast> for AutozigVisitor {
119    fn visit_macro(&mut self, node: &'ast Macro) {
120        // Check if this is an autozig! macro
121        if node.path.is_ident("autozig") {
122            // Extract the token stream and convert to string
123            let tokens = node.tokens.to_string();
124
125            // The tokens will be in the format: { ... }
126            // We need to extract the content and split by ---
127            if let Some(zig_code) = extract_zig_from_tokens(&tokens) {
128                self.zig_code.push(zig_code);
129            }
130        }
131        // Check if this is an include_zig! macro
132        else if node.path.is_ident("include_zig") {
133            // Extract file path from tokens
134            // Format: include_zig!("path/to/file.zig", { ... })
135            let tokens = node.tokens.to_string();
136            if let Some(file_path) = extract_file_path_from_tokens(&tokens) {
137                self.external_files.push(file_path);
138            }
139        }
140
141        // Continue visiting nested items
142        syn::visit::visit_macro(self, node);
143    }
144
145    fn visit_item_macro(&mut self, node: &'ast syn::ItemMacro) {
146        // Visit the macro itself
147        self.visit_macro(&node.mac);
148    }
149}
150
151/// Extract file path from include_zig! macro tokens
152/// Expected format: ("path/to/file.zig", { ... }) or just ("path/to/file.zig")
153fn extract_file_path_from_tokens(tokens: &str) -> Option<String> {
154    let content = tokens.trim();
155
156    // Remove outer parentheses if present
157    let content = if content.starts_with('(') && content.ends_with(')') {
158        &content[1..content.len() - 1]
159    } else {
160        content
161    };
162
163    // Find the first string literal (file path)
164    // Look for quoted strings
165    if let Some(start) = content.find('"') {
166        if let Some(end) = content[start + 1..].find('"') {
167            let file_path = &content[start + 1..start + 1 + end];
168            return Some(file_path.to_string());
169        }
170    }
171
172    None
173}
174
175/// Extract Zig code from macro tokens
176/// This preserves the original formatting to avoid breaking Zig syntax like
177/// @import
178fn extract_zig_from_tokens(tokens: &str) -> Option<String> {
179    // Remove outer braces if present, but preserve internal spacing
180    let content = tokens.trim();
181    let content = if content.starts_with('{') && content.ends_with('}') {
182        &content[1..content.len() - 1]
183    } else {
184        content
185    };
186
187    // Split by --- separator (Zig code comes before ---)
188    // Only take the first part (before ---)
189    let zig_section = if let Some(separator_pos) = content.find("---") {
190        content[..separator_pos].trim()
191    } else {
192        // No separator, take all content
193        content.trim()
194    };
195
196    if zig_section.is_empty() {
197        None
198    } else {
199        // Fix TokenStream formatting issues: remove spaces after @ symbol
200        // We need a comprehensive fix for all @ builtins
201        let fixed = zig_section.to_string();
202
203        // Use regex-like pattern matching to fix all "@ word" to "@word"
204        // This handles @import, @floatFromInt, @sqrt, etc.
205        let mut result = String::with_capacity(fixed.len());
206        let mut chars = fixed.chars().peekable();
207
208        while let Some(ch) = chars.next() {
209            if ch == '@' {
210                result.push(ch);
211                // Skip any whitespace after @
212                while let Some(&next_ch) = chars.peek() {
213                    if next_ch.is_whitespace() {
214                        chars.next();
215                    } else {
216                        break;
217                    }
218                }
219            } else {
220                result.push(ch);
221            }
222        }
223
224        let fixed = result
225            // Fix array syntax spacing
226            .replace("[ * ]", "[*]")
227            .replace("[ ]", "[]")
228            // Fix range syntax
229            .replace("[ 0 .. len ]", "[0..len]")
230            .replace(".. ", "..");
231
232        Some(fixed)
233    }
234}
235
236/// Remove duplicate imports from external Zig files
237/// This prevents "duplicate struct member name" errors when merging multiple
238/// files
239fn remove_duplicate_imports(content: &str, has_std_import: &mut bool) -> String {
240    let mut result = String::new();
241
242    for line in content.lines() {
243        let trimmed = line.trim();
244
245        // Check if this is a std import line
246        if trimmed.starts_with("const std") && trimmed.contains("@import(\"std\")") {
247            // Only include the first std import
248            if !*has_std_import {
249                result.push_str(line);
250                result.push('\n');
251                *has_std_import = true;
252            }
253            // Skip subsequent std imports
254            continue;
255        }
256
257        // Keep all other lines
258        result.push_str(line);
259        result.push('\n');
260    }
261
262    result
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268
269    #[test]
270    fn test_extract_zig_from_tokens() {
271        let tokens = r#"{
272            const std = @import("std");
273            export fn add(a: i32, b: i32) i32 {
274                return a + b;
275            }
276            ---
277            fn add(a: i32, b: i32) -> i32;
278        }"#;
279
280        let result = extract_zig_from_tokens(tokens).unwrap();
281        // Should contain Zig code
282        assert!(result.contains("export fn add"));
283        assert!(result.contains("const std"));
284        // Should NOT contain separator or Rust signatures (look for -> which is
285        // Rust-specific)
286        assert!(!result.contains("---"));
287        assert!(!result.contains("-> i32;")); // Rust return type syntax
288    }
289
290    #[test]
291    fn test_extract_without_separator() {
292        let tokens = r#"{
293            const std = @import("std");
294            export fn multiply(a: i32, b: i32) i32 {
295                return a * b;
296            }
297        }"#;
298
299        let result = extract_zig_from_tokens(tokens).unwrap();
300        assert!(result.contains("export fn multiply"));
301    }
302
303    #[test]
304    fn test_remove_duplicate_imports() {
305        let content = r#"const std = @import("std");
306
307export fn test() void {}
308"#;
309        let mut has_std = false;
310        let result1 = remove_duplicate_imports(content, &mut has_std);
311        assert!(result1.contains("const std"));
312        assert!(has_std);
313
314        // Second call should remove the import
315        let result2 = remove_duplicate_imports(content, &mut has_std);
316        assert!(!result2.contains("const std"));
317        assert!(result2.contains("export fn test"));
318    }
319}