1use std::collections::{HashMap, HashSet};
6use std::path::{Path, PathBuf};
7
8use crate::map::types::*;
9
10fn detect_language(file_path: &Path) -> &'static str {
12 match file_path.extension().and_then(|e| e.to_str()) {
13 Some("ts") | Some("tsx") => "typescript",
14 Some("js") | Some("jsx") | Some("mjs") | Some("cjs") => "javascript",
15 Some("py") => "python",
16 Some("go") => "go",
17 Some("rs") => "rust",
18 Some("java") => "java",
19 Some("c") | Some("h") => "c",
20 Some("cpp") | Some("hpp") | Some("cc") => "cpp",
21 Some("rb") => "ruby",
22 Some("php") => "php",
23 Some("swift") => "swift",
24 Some("kt") => "kotlin",
25 Some("scala") => "scala",
26 Some("cs") => "csharp",
27 Some("sh") | Some("bash") => "bash",
28 _ => "unknown",
29 }
30}
31
32const DEFAULT_INCLUDE: &[&str] = &[
34 "**/*.ts",
35 "**/*.tsx",
36 "**/*.js",
37 "**/*.jsx",
38 "**/*.py",
39 "**/*.go",
40 "**/*.rs",
41 "**/*.java",
42];
43
44const DEFAULT_EXCLUDE: &[&str] = &[
46 "**/node_modules/**",
47 "**/dist/**",
48 "**/build/**",
49 "**/.git/**",
50 "**/coverage/**",
51 "**/__pycache__/**",
52 "**/vendor/**",
53 "**/target/**",
54 "**/*.min.js",
55 "**/*.bundle.js",
56];
57
58pub struct CodeMapAnalyzer {
60 root_path: PathBuf,
61 include: Vec<String>,
62 exclude: Vec<String>,
63 concurrency: usize,
64}
65
66impl CodeMapAnalyzer {
67 pub fn new(root_path: impl AsRef<Path>) -> Self {
69 Self {
70 root_path: root_path.as_ref().to_path_buf(),
71 include: DEFAULT_INCLUDE.iter().map(|s| s.to_string()).collect(),
72 exclude: DEFAULT_EXCLUDE.iter().map(|s| s.to_string()).collect(),
73 concurrency: 10,
74 }
75 }
76
77 pub fn with_include(mut self, patterns: Vec<String>) -> Self {
79 self.include = patterns;
80 self
81 }
82
83 pub fn with_exclude(mut self, patterns: Vec<String>) -> Self {
85 self.exclude = patterns;
86 self
87 }
88
89 pub fn with_concurrency(mut self, concurrency: usize) -> Self {
91 self.concurrency = concurrency;
92 self
93 }
94
95 pub fn from_options(root_path: impl AsRef<Path>, options: &GenerateOptions) -> Self {
97 let mut analyzer = Self::new(root_path);
98 if let Some(ref include) = options.include {
99 analyzer.include = include.clone();
100 }
101 if let Some(ref exclude) = options.exclude {
102 analyzer.exclude = exclude.clone();
103 }
104 if let Some(concurrency) = options.concurrency {
105 analyzer.concurrency = concurrency;
106 }
107 analyzer
108 }
109
110 pub fn discover_files(&self) -> Vec<PathBuf> {
112 let mut all_files = HashSet::new();
113
114 for pattern in &self.include {
115 let full_pattern = self.root_path.join(pattern);
116 if let Ok(entries) = glob::glob(full_pattern.to_str().unwrap_or("")) {
117 for entry in entries.flatten() {
118 if entry.is_file() && !self.is_excluded(&entry) {
119 all_files.insert(entry);
120 }
121 }
122 }
123 }
124
125 let mut files: Vec<_> = all_files.into_iter().collect();
126 files.sort();
127 files
128 }
129
130 fn is_excluded(&self, path: &Path) -> bool {
132 let path_str = path.to_string_lossy();
133 for pattern in &self.exclude {
134 if let Ok(glob_pattern) = glob::Pattern::new(pattern) {
135 if glob_pattern.matches(&path_str) {
136 return true;
137 }
138 }
139 if path_str.contains(pattern.trim_matches('*')) {
141 return true;
142 }
143 }
144 false
145 }
146
147 pub fn analyze_file(&self, file_path: &Path) -> Option<ModuleNode> {
149 let content = std::fs::read_to_string(file_path).ok()?;
150 let metadata = std::fs::metadata(file_path).ok()?;
151 let language = detect_language(file_path);
152 let relative_path = file_path
153 .strip_prefix(&self.root_path)
154 .unwrap_or(file_path)
155 .to_string_lossy()
156 .replace('\\', "/");
157 let lines = content.lines().count();
158
159 Some(ModuleNode {
160 id: relative_path.clone(),
161 name: file_path.file_name()?.to_string_lossy().to_string(),
162 path: file_path.to_string_lossy().to_string(),
163 language: language.to_string(),
164 lines,
165 size: metadata.len() as usize,
166 imports: self.extract_imports(&content, &relative_path, language),
167 exports: Vec::new(),
168 classes: Vec::new(),
169 interfaces: Vec::new(),
170 types: Vec::new(),
171 enums: Vec::new(),
172 functions: self.extract_functions(&content, &relative_path, language),
173 variables: Vec::new(),
174 })
175 }
176
177 pub fn analyze_files(&self, files: Option<Vec<PathBuf>>) -> Vec<ModuleNode> {
179 let files_to_analyze = files.unwrap_or_else(|| self.discover_files());
180 files_to_analyze
181 .iter()
182 .filter_map(|f| self.analyze_file(f))
183 .collect()
184 }
185
186 fn extract_imports(&self, content: &str, module_id: &str, lang: &str) -> Vec<ImportInfo> {
188 let mut imports = Vec::new();
189
190 match lang {
191 "typescript" | "javascript" => {
192 self.extract_js_imports(content, module_id, &mut imports);
193 }
194 "python" => {
195 self.extract_python_imports(content, module_id, &mut imports);
196 }
197 "rust" => {
198 self.extract_rust_imports(content, module_id, &mut imports);
199 }
200 _ => {}
201 }
202
203 imports
204 }
205
206 fn extract_js_imports(&self, content: &str, module_id: &str, imports: &mut Vec<ImportInfo>) {
208 let import_re = regex::Regex::new(
209 r#"import\s+(?:(?:\{([^}]*)\}|(\*\s+as\s+\w+)|(\w+))\s+from\s+)?['"]([^'"]+)['"]"#,
210 )
211 .unwrap();
212
213 for (line_num, line) in content.lines().enumerate() {
214 if let Some(caps) = import_re.captures(line) {
215 let source = caps
216 .get(4)
217 .map(|m| m.as_str().to_string())
218 .unwrap_or_default();
219 let mut symbols = Vec::new();
220 let mut is_default = false;
221 let mut is_namespace = false;
222
223 if let Some(named) = caps.get(1) {
224 symbols.extend(
225 named
226 .as_str()
227 .split(',')
228 .map(|s| s.trim().to_string())
229 .filter(|s| !s.is_empty()),
230 );
231 }
232 if caps.get(2).is_some() {
233 is_namespace = true;
234 }
235 if let Some(default) = caps.get(3) {
236 is_default = true;
237 symbols.push(default.as_str().to_string());
238 }
239
240 imports.push(ImportInfo {
241 source,
242 symbols,
243 is_default,
244 is_namespace,
245 is_dynamic: false,
246 location: LocationInfo {
247 file: module_id.to_string(),
248 start_line: (line_num + 1) as u32,
249 start_column: 0,
250 end_line: (line_num + 1) as u32,
251 end_column: line.len() as u32,
252 },
253 });
254 }
255 }
256 }
257
258 fn extract_python_imports(
260 &self,
261 content: &str,
262 module_id: &str,
263 imports: &mut Vec<ImportInfo>,
264 ) {
265 let from_import_re = regex::Regex::new(r"^from\s+(\S+)\s+import\s+(.+)$").unwrap();
266 let import_re = regex::Regex::new(r"^import\s+(.+)$").unwrap();
267
268 for (line_num, line) in content.lines().enumerate() {
269 let trimmed = line.trim();
270
271 if let Some(caps) = from_import_re.captures(trimmed) {
272 let source = caps
273 .get(1)
274 .map(|m| m.as_str().to_string())
275 .unwrap_or_default();
276 let import_part = caps.get(2).map(|m| m.as_str()).unwrap_or("");
277 let symbols: Vec<String> = import_part
278 .split(',')
279 .map(|s| {
280 s.trim()
281 .split(" as ")
282 .next()
283 .unwrap_or("")
284 .trim()
285 .to_string()
286 })
287 .filter(|s| !s.is_empty() && s != "*")
288 .collect();
289
290 imports.push(ImportInfo {
291 source,
292 symbols,
293 is_default: false,
294 is_namespace: import_part.trim() == "*",
295 is_dynamic: false,
296 location: LocationInfo {
297 file: module_id.to_string(),
298 start_line: (line_num + 1) as u32,
299 start_column: 0,
300 end_line: (line_num + 1) as u32,
301 end_column: line.len() as u32,
302 },
303 });
304 } else if let Some(caps) = import_re.captures(trimmed) {
305 let import_part = caps.get(1).map(|m| m.as_str()).unwrap_or("");
306 let source = import_part
307 .split(',')
308 .next()
309 .unwrap_or("")
310 .trim()
311 .to_string();
312
313 imports.push(ImportInfo {
314 source,
315 symbols: Vec::new(),
316 is_default: false,
317 is_namespace: false,
318 is_dynamic: false,
319 location: LocationInfo {
320 file: module_id.to_string(),
321 start_line: (line_num + 1) as u32,
322 start_column: 0,
323 end_line: (line_num + 1) as u32,
324 end_column: line.len() as u32,
325 },
326 });
327 }
328 }
329 }
330
331 fn extract_rust_imports(&self, content: &str, module_id: &str, imports: &mut Vec<ImportInfo>) {
333 let use_re = regex::Regex::new(r"^use\s+([^;]+);").unwrap();
334
335 for (line_num, line) in content.lines().enumerate() {
336 let trimmed = line.trim();
337 if let Some(caps) = use_re.captures(trimmed) {
338 let use_path = caps.get(1).map(|m| m.as_str()).unwrap_or("");
339 let source = use_path.split("::").next().unwrap_or("").to_string();
340
341 imports.push(ImportInfo {
342 source,
343 symbols: vec![use_path.to_string()],
344 is_default: false,
345 is_namespace: use_path.contains('*'),
346 is_dynamic: false,
347 location: LocationInfo {
348 file: module_id.to_string(),
349 start_line: (line_num + 1) as u32,
350 start_column: 0,
351 end_line: (line_num + 1) as u32,
352 end_column: line.len() as u32,
353 },
354 });
355 }
356 }
357 }
358
359 fn extract_functions(&self, content: &str, module_id: &str, lang: &str) -> Vec<FunctionNode> {
361 let mut functions = Vec::new();
362
363 let fn_re = match lang {
364 "rust" => regex::Regex::new(r"(?m)^(?:pub\s+)?(?:async\s+)?fn\s+(\w+)").ok(),
365 "typescript" | "javascript" => {
366 regex::Regex::new(r"(?m)(?:export\s+)?(?:async\s+)?function\s+(\w+)").ok()
367 }
368 "python" => regex::Regex::new(r"(?m)^(?:async\s+)?def\s+(\w+)").ok(),
369 _ => None,
370 };
371
372 if let Some(re) = fn_re {
373 for (line_num, line) in content.lines().enumerate() {
374 if let Some(caps) = re.captures(line) {
375 let name = caps
376 .get(1)
377 .map(|m| m.as_str().to_string())
378 .unwrap_or_default();
379 functions.push(FunctionNode {
380 id: format!("{}::{}", module_id, name),
381 name: name.clone(),
382 signature: line.trim().to_string(),
383 parameters: Vec::new(),
384 return_type: None,
385 is_async: line.contains("async"),
386 is_generator: false,
387 is_exported: line.contains("pub") || line.contains("export"),
388 location: LocationInfo {
389 file: module_id.to_string(),
390 start_line: (line_num + 1) as u32,
391 start_column: 0,
392 end_line: (line_num + 1) as u32,
393 end_column: line.len() as u32,
394 },
395 documentation: None,
396 calls: Vec::new(),
397 called_by: Vec::new(),
398 });
399 }
400 }
401 }
402
403 functions
404 }
405}
406
407pub fn create_analyzer(root_path: impl AsRef<Path>) -> CodeMapAnalyzer {
409 CodeMapAnalyzer::new(root_path)
410}
411
412pub fn generate_ontology(
414 root_path: impl AsRef<Path>,
415 options: Option<GenerateOptions>,
416) -> CodeOntology {
417 let opts = options.unwrap_or_default();
418 let analyzer = CodeMapAnalyzer::from_options(&root_path, &opts);
419 let modules = analyzer.analyze_files(None);
420
421 let mut statistics = OntologyStatistics::default();
422 let mut language_breakdown: HashMap<String, usize> = HashMap::new();
423
424 for module in &modules {
425 statistics.total_modules += 1;
426 statistics.total_functions += module.functions.len();
427 statistics.total_classes += module.classes.len();
428 statistics.total_interfaces += module.interfaces.len();
429 statistics.total_variables += module.variables.len();
430 statistics.total_lines += module.lines;
431
432 *language_breakdown
433 .entry(module.language.clone())
434 .or_insert(0) += 1;
435
436 for class in &module.classes {
437 statistics.total_methods += class.methods.len();
438 }
439 }
440
441 statistics.language_breakdown = language_breakdown;
442
443 CodeOntology {
444 version: "1.0.0".to_string(),
445 generated_at: chrono::Utc::now().to_rfc3339(),
446 project: ProjectInfo {
447 name: root_path
448 .as_ref()
449 .file_name()
450 .map(|n| n.to_string_lossy().to_string())
451 .unwrap_or_default(),
452 root_path: root_path.as_ref().to_string_lossy().to_string(),
453 languages: statistics.language_breakdown.keys().cloned().collect(),
454 file_count: statistics.total_modules,
455 total_lines: statistics.total_lines,
456 },
457 modules,
458 call_graph: CallGraph::default(),
459 dependency_graph: DependencyGraph::default(),
460 statistics,
461 }
462}