Skip to main content

aster/map/
analyzer.rs

1//! 代码分析器
2//!
3//! 负责分析代码文件,提取符号和结构信息
4
5use std::collections::{HashMap, HashSet};
6use std::path::{Path, PathBuf};
7
8use crate::map::types::*;
9
10/// 语言扩展名映射
11fn detect_language(file_path: &Path) -> &'static str {
12    match file_path.extension().and_then(|e| e.to_str()) {
13        Some("ts") | Some("tsx") => "typescript",
14        Some("js") | Some("jsx") | Some("mjs") | Some("cjs") => "javascript",
15        Some("py") => "python",
16        Some("go") => "go",
17        Some("rs") => "rust",
18        Some("java") => "java",
19        Some("c") | Some("h") => "c",
20        Some("cpp") | Some("hpp") | Some("cc") => "cpp",
21        Some("rb") => "ruby",
22        Some("php") => "php",
23        Some("swift") => "swift",
24        Some("kt") => "kotlin",
25        Some("scala") => "scala",
26        Some("cs") => "csharp",
27        Some("sh") | Some("bash") => "bash",
28        _ => "unknown",
29    }
30}
31
32/// 默认包含模式
33const DEFAULT_INCLUDE: &[&str] = &[
34    "**/*.ts",
35    "**/*.tsx",
36    "**/*.js",
37    "**/*.jsx",
38    "**/*.py",
39    "**/*.go",
40    "**/*.rs",
41    "**/*.java",
42];
43
44/// 默认排除模式
45const DEFAULT_EXCLUDE: &[&str] = &[
46    "**/node_modules/**",
47    "**/dist/**",
48    "**/build/**",
49    "**/.git/**",
50    "**/coverage/**",
51    "**/__pycache__/**",
52    "**/vendor/**",
53    "**/target/**",
54    "**/*.min.js",
55    "**/*.bundle.js",
56];
57
58/// 代码分析器
59pub struct CodeMapAnalyzer {
60    root_path: PathBuf,
61    include: Vec<String>,
62    exclude: Vec<String>,
63    concurrency: usize,
64}
65
66impl CodeMapAnalyzer {
67    /// 创建新的分析器
68    pub fn new(root_path: impl AsRef<Path>) -> Self {
69        Self {
70            root_path: root_path.as_ref().to_path_buf(),
71            include: DEFAULT_INCLUDE.iter().map(|s| s.to_string()).collect(),
72            exclude: DEFAULT_EXCLUDE.iter().map(|s| s.to_string()).collect(),
73            concurrency: 10,
74        }
75    }
76
77    /// 设置包含模式
78    pub fn with_include(mut self, patterns: Vec<String>) -> Self {
79        self.include = patterns;
80        self
81    }
82
83    /// 设置排除模式
84    pub fn with_exclude(mut self, patterns: Vec<String>) -> Self {
85        self.exclude = patterns;
86        self
87    }
88
89    /// 设置并发数
90    pub fn with_concurrency(mut self, concurrency: usize) -> Self {
91        self.concurrency = concurrency;
92        self
93    }
94
95    /// 从选项创建
96    pub fn from_options(root_path: impl AsRef<Path>, options: &GenerateOptions) -> Self {
97        let mut analyzer = Self::new(root_path);
98        if let Some(ref include) = options.include {
99            analyzer.include = include.clone();
100        }
101        if let Some(ref exclude) = options.exclude {
102            analyzer.exclude = exclude.clone();
103        }
104        if let Some(concurrency) = options.concurrency {
105            analyzer.concurrency = concurrency;
106        }
107        analyzer
108    }
109
110    /// 发现所有待分析的文件
111    pub fn discover_files(&self) -> Vec<PathBuf> {
112        let mut all_files = HashSet::new();
113
114        for pattern in &self.include {
115            let full_pattern = self.root_path.join(pattern);
116            if let Ok(entries) = glob::glob(full_pattern.to_str().unwrap_or("")) {
117                for entry in entries.flatten() {
118                    if entry.is_file() && !self.is_excluded(&entry) {
119                        all_files.insert(entry);
120                    }
121                }
122            }
123        }
124
125        let mut files: Vec<_> = all_files.into_iter().collect();
126        files.sort();
127        files
128    }
129
130    /// 检查文件是否被排除
131    fn is_excluded(&self, path: &Path) -> bool {
132        let path_str = path.to_string_lossy();
133        for pattern in &self.exclude {
134            if let Ok(glob_pattern) = glob::Pattern::new(pattern) {
135                if glob_pattern.matches(&path_str) {
136                    return true;
137                }
138            }
139            // 简单的包含检查
140            if path_str.contains(pattern.trim_matches('*')) {
141                return true;
142            }
143        }
144        false
145    }
146
147    /// 分析单个文件
148    pub fn analyze_file(&self, file_path: &Path) -> Option<ModuleNode> {
149        let content = std::fs::read_to_string(file_path).ok()?;
150        let metadata = std::fs::metadata(file_path).ok()?;
151        let language = detect_language(file_path);
152        let relative_path = file_path
153            .strip_prefix(&self.root_path)
154            .unwrap_or(file_path)
155            .to_string_lossy()
156            .replace('\\', "/");
157        let lines = content.lines().count();
158
159        Some(ModuleNode {
160            id: relative_path.clone(),
161            name: file_path.file_name()?.to_string_lossy().to_string(),
162            path: file_path.to_string_lossy().to_string(),
163            language: language.to_string(),
164            lines,
165            size: metadata.len() as usize,
166            imports: self.extract_imports(&content, &relative_path, language),
167            exports: Vec::new(),
168            classes: Vec::new(),
169            interfaces: Vec::new(),
170            types: Vec::new(),
171            enums: Vec::new(),
172            functions: self.extract_functions(&content, &relative_path, language),
173            variables: Vec::new(),
174        })
175    }
176
177    /// 批量分析文件
178    pub fn analyze_files(&self, files: Option<Vec<PathBuf>>) -> Vec<ModuleNode> {
179        let files_to_analyze = files.unwrap_or_else(|| self.discover_files());
180        files_to_analyze
181            .iter()
182            .filter_map(|f| self.analyze_file(f))
183            .collect()
184    }
185
186    /// 提取导入信息
187    fn extract_imports(&self, content: &str, module_id: &str, lang: &str) -> Vec<ImportInfo> {
188        let mut imports = Vec::new();
189
190        match lang {
191            "typescript" | "javascript" => {
192                self.extract_js_imports(content, module_id, &mut imports);
193            }
194            "python" => {
195                self.extract_python_imports(content, module_id, &mut imports);
196            }
197            "rust" => {
198                self.extract_rust_imports(content, module_id, &mut imports);
199            }
200            _ => {}
201        }
202
203        imports
204    }
205
206    /// 提取 JS/TS 导入
207    fn extract_js_imports(&self, content: &str, module_id: &str, imports: &mut Vec<ImportInfo>) {
208        let import_re = regex::Regex::new(
209            r#"import\s+(?:(?:\{([^}]*)\}|(\*\s+as\s+\w+)|(\w+))\s+from\s+)?['"]([^'"]+)['"]"#,
210        )
211        .unwrap();
212
213        for (line_num, line) in content.lines().enumerate() {
214            if let Some(caps) = import_re.captures(line) {
215                let source = caps
216                    .get(4)
217                    .map(|m| m.as_str().to_string())
218                    .unwrap_or_default();
219                let mut symbols = Vec::new();
220                let mut is_default = false;
221                let mut is_namespace = false;
222
223                if let Some(named) = caps.get(1) {
224                    symbols.extend(
225                        named
226                            .as_str()
227                            .split(',')
228                            .map(|s| s.trim().to_string())
229                            .filter(|s| !s.is_empty()),
230                    );
231                }
232                if caps.get(2).is_some() {
233                    is_namespace = true;
234                }
235                if let Some(default) = caps.get(3) {
236                    is_default = true;
237                    symbols.push(default.as_str().to_string());
238                }
239
240                imports.push(ImportInfo {
241                    source,
242                    symbols,
243                    is_default,
244                    is_namespace,
245                    is_dynamic: false,
246                    location: LocationInfo {
247                        file: module_id.to_string(),
248                        start_line: (line_num + 1) as u32,
249                        start_column: 0,
250                        end_line: (line_num + 1) as u32,
251                        end_column: line.len() as u32,
252                    },
253                });
254            }
255        }
256    }
257
258    /// 提取 Python 导入
259    fn extract_python_imports(
260        &self,
261        content: &str,
262        module_id: &str,
263        imports: &mut Vec<ImportInfo>,
264    ) {
265        let from_import_re = regex::Regex::new(r"^from\s+(\S+)\s+import\s+(.+)$").unwrap();
266        let import_re = regex::Regex::new(r"^import\s+(.+)$").unwrap();
267
268        for (line_num, line) in content.lines().enumerate() {
269            let trimmed = line.trim();
270
271            if let Some(caps) = from_import_re.captures(trimmed) {
272                let source = caps
273                    .get(1)
274                    .map(|m| m.as_str().to_string())
275                    .unwrap_or_default();
276                let import_part = caps.get(2).map(|m| m.as_str()).unwrap_or("");
277                let symbols: Vec<String> = import_part
278                    .split(',')
279                    .map(|s| {
280                        s.trim()
281                            .split(" as ")
282                            .next()
283                            .unwrap_or("")
284                            .trim()
285                            .to_string()
286                    })
287                    .filter(|s| !s.is_empty() && s != "*")
288                    .collect();
289
290                imports.push(ImportInfo {
291                    source,
292                    symbols,
293                    is_default: false,
294                    is_namespace: import_part.trim() == "*",
295                    is_dynamic: false,
296                    location: LocationInfo {
297                        file: module_id.to_string(),
298                        start_line: (line_num + 1) as u32,
299                        start_column: 0,
300                        end_line: (line_num + 1) as u32,
301                        end_column: line.len() as u32,
302                    },
303                });
304            } else if let Some(caps) = import_re.captures(trimmed) {
305                let import_part = caps.get(1).map(|m| m.as_str()).unwrap_or("");
306                let source = import_part
307                    .split(',')
308                    .next()
309                    .unwrap_or("")
310                    .trim()
311                    .to_string();
312
313                imports.push(ImportInfo {
314                    source,
315                    symbols: Vec::new(),
316                    is_default: false,
317                    is_namespace: false,
318                    is_dynamic: false,
319                    location: LocationInfo {
320                        file: module_id.to_string(),
321                        start_line: (line_num + 1) as u32,
322                        start_column: 0,
323                        end_line: (line_num + 1) as u32,
324                        end_column: line.len() as u32,
325                    },
326                });
327            }
328        }
329    }
330
331    /// 提取 Rust 导入
332    fn extract_rust_imports(&self, content: &str, module_id: &str, imports: &mut Vec<ImportInfo>) {
333        let use_re = regex::Regex::new(r"^use\s+([^;]+);").unwrap();
334
335        for (line_num, line) in content.lines().enumerate() {
336            let trimmed = line.trim();
337            if let Some(caps) = use_re.captures(trimmed) {
338                let use_path = caps.get(1).map(|m| m.as_str()).unwrap_or("");
339                let source = use_path.split("::").next().unwrap_or("").to_string();
340
341                imports.push(ImportInfo {
342                    source,
343                    symbols: vec![use_path.to_string()],
344                    is_default: false,
345                    is_namespace: use_path.contains('*'),
346                    is_dynamic: false,
347                    location: LocationInfo {
348                        file: module_id.to_string(),
349                        start_line: (line_num + 1) as u32,
350                        start_column: 0,
351                        end_line: (line_num + 1) as u32,
352                        end_column: line.len() as u32,
353                    },
354                });
355            }
356        }
357    }
358
359    /// 提取函数
360    fn extract_functions(&self, content: &str, module_id: &str, lang: &str) -> Vec<FunctionNode> {
361        let mut functions = Vec::new();
362
363        let fn_re = match lang {
364            "rust" => regex::Regex::new(r"(?m)^(?:pub\s+)?(?:async\s+)?fn\s+(\w+)").ok(),
365            "typescript" | "javascript" => {
366                regex::Regex::new(r"(?m)(?:export\s+)?(?:async\s+)?function\s+(\w+)").ok()
367            }
368            "python" => regex::Regex::new(r"(?m)^(?:async\s+)?def\s+(\w+)").ok(),
369            _ => None,
370        };
371
372        if let Some(re) = fn_re {
373            for (line_num, line) in content.lines().enumerate() {
374                if let Some(caps) = re.captures(line) {
375                    let name = caps
376                        .get(1)
377                        .map(|m| m.as_str().to_string())
378                        .unwrap_or_default();
379                    functions.push(FunctionNode {
380                        id: format!("{}::{}", module_id, name),
381                        name: name.clone(),
382                        signature: line.trim().to_string(),
383                        parameters: Vec::new(),
384                        return_type: None,
385                        is_async: line.contains("async"),
386                        is_generator: false,
387                        is_exported: line.contains("pub") || line.contains("export"),
388                        location: LocationInfo {
389                            file: module_id.to_string(),
390                            start_line: (line_num + 1) as u32,
391                            start_column: 0,
392                            end_line: (line_num + 1) as u32,
393                            end_column: line.len() as u32,
394                        },
395                        documentation: None,
396                        calls: Vec::new(),
397                        called_by: Vec::new(),
398                    });
399                }
400            }
401        }
402
403        functions
404    }
405}
406
407/// 创建分析器的便捷函数
408pub fn create_analyzer(root_path: impl AsRef<Path>) -> CodeMapAnalyzer {
409    CodeMapAnalyzer::new(root_path)
410}
411
412/// 生成代码本体图谱
413pub fn generate_ontology(
414    root_path: impl AsRef<Path>,
415    options: Option<GenerateOptions>,
416) -> CodeOntology {
417    let opts = options.unwrap_or_default();
418    let analyzer = CodeMapAnalyzer::from_options(&root_path, &opts);
419    let modules = analyzer.analyze_files(None);
420
421    let mut statistics = OntologyStatistics::default();
422    let mut language_breakdown: HashMap<String, usize> = HashMap::new();
423
424    for module in &modules {
425        statistics.total_modules += 1;
426        statistics.total_functions += module.functions.len();
427        statistics.total_classes += module.classes.len();
428        statistics.total_interfaces += module.interfaces.len();
429        statistics.total_variables += module.variables.len();
430        statistics.total_lines += module.lines;
431
432        *language_breakdown
433            .entry(module.language.clone())
434            .or_insert(0) += 1;
435
436        for class in &module.classes {
437            statistics.total_methods += class.methods.len();
438        }
439    }
440
441    statistics.language_breakdown = language_breakdown;
442
443    CodeOntology {
444        version: "1.0.0".to_string(),
445        generated_at: chrono::Utc::now().to_rfc3339(),
446        project: ProjectInfo {
447            name: root_path
448                .as_ref()
449                .file_name()
450                .map(|n| n.to_string_lossy().to_string())
451                .unwrap_or_default(),
452            root_path: root_path.as_ref().to_string_lossy().to_string(),
453            languages: statistics.language_breakdown.keys().cloned().collect(),
454            file_count: statistics.total_modules,
455            total_lines: statistics.total_lines,
456        },
457        modules,
458        call_graph: CallGraph::default(),
459        dependency_graph: DependencyGraph::default(),
460        statistics,
461    }
462}