1use anyhow::Result;
5use regex::Regex;
6use std::collections::HashMap;
7use std::fs;
8use std::path::{Path, PathBuf};
9
10#[derive(Debug, Clone, PartialEq, Eq, Hash)]
12pub enum RelationType {
13 Imports,
15 FunctionCall,
17 TypeUsage,
19 TestedBy,
21 Exports,
23 Coupled,
25}
26
27#[derive(Debug, Clone)]
29pub struct FileRelation {
30 pub source: PathBuf,
32 pub target: PathBuf,
34 pub relation_type: RelationType,
36 pub items: Vec<String>,
38 pub strength: u8,
40}
41
42pub struct RelationAnalyzer {
44 relations: Vec<FileRelation>,
46 parsers: HashMap<String, Box<dyn LanguageParser>>,
48 file_cache: HashMap<PathBuf, String>,
50}
51
52trait LanguageParser: Send + Sync {
54 fn parse_imports(&self, content: &str, file_path: &Path) -> Vec<(String, Vec<String>)>;
56
57 fn parse_functions(&self, content: &str) -> Vec<String>;
59
60 fn parse_function_calls(&self, content: &str) -> Vec<String>;
62
63 fn parse_types(&self, content: &str) -> Vec<String>;
65
66 fn parse_type_usages(&self, content: &str) -> Vec<String>;
68}
69
70struct RustParser;
72
73impl LanguageParser for RustParser {
74 fn parse_imports(&self, content: &str, _file_path: &Path) -> Vec<(String, Vec<String>)> {
75 let mut imports = Vec::new();
76
77 let mut cleaned_content = String::new();
79 let mut in_use = false;
80 let mut use_buffer = String::new();
81
82 for line in content.lines() {
83 if line.trim_start().starts_with("use ") {
84 in_use = true;
85 use_buffer.push_str(line);
86 use_buffer.push(' ');
87 } else if in_use {
88 if line.contains(';') {
89 use_buffer.push_str(line);
90 cleaned_content.push_str(&use_buffer.replace('\n', " "));
91 cleaned_content.push('\n');
92 use_buffer.clear();
93 in_use = false;
94 } else {
95 use_buffer.push_str(line);
96 use_buffer.push(' ');
97 }
98 } else {
99 cleaned_content.push_str(line);
100 cleaned_content.push('\n');
101 }
102 }
103
104 let simple_use_re = Regex::new(r"use\s+([a-zA-Z0-9_:]+)(?:::([a-zA-Z0-9_]+))?;").unwrap();
106 for cap in simple_use_re.captures_iter(&cleaned_content) {
107 let module = cap.get(1).map_or("", |m| m.as_str());
108 let item = cap.get(2).map_or(vec![], |m| vec![m.as_str().to_string()]);
109 imports.push((module.to_string(), item));
110 }
111
112 let complex_use_re = Regex::new(r"use\s+([a-zA-Z0-9_:]+)::\{([^}]+)\}").unwrap();
114 for cap in complex_use_re.captures_iter(&cleaned_content) {
115 let module = cap.get(1).map_or("", |m| m.as_str());
116 let items = cap.get(2).map_or(vec![], |m| {
117 m.as_str()
118 .split(',')
119 .map(|s| {
120 let parts: Vec<&str> = s.trim().split("::").collect();
122 if parts.len() > 1 {
123 imports.push((
125 format!("{}::{}", module, parts[0]),
126 vec![parts[1].to_string()],
127 ));
128 }
129 s.trim().to_string()
130 })
131 .collect()
132 });
133 if !items.is_empty() {
134 imports.push((module.to_string(), items));
135 }
136 }
137
138 let mod_re = Regex::new(r"^\s*(?:pub\s+)?mod\s+([a-zA-Z0-9_]+)").unwrap();
140 for cap in mod_re.captures_iter(content) {
141 let module = cap.get(1).map_or("", |m| m.as_str());
142 imports.push((module.to_string(), vec![]));
143 }
144
145 imports
146 }
147
148 fn parse_functions(&self, content: &str) -> Vec<String> {
149 let fn_re = Regex::new(r"(?:pub\s+)?fn\s+([a-zA-Z0-9_]+)").unwrap();
150 fn_re
151 .captures_iter(content)
152 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
153 .collect()
154 }
155
156 fn parse_function_calls(&self, content: &str) -> Vec<String> {
157 let call_re = Regex::new(r"([a-zA-Z0-9_]+)\s*\(").unwrap();
158 call_re
159 .captures_iter(content)
160 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
161 .collect()
162 }
163
164 fn parse_types(&self, content: &str) -> Vec<String> {
165 let mut types = Vec::new();
166
167 let struct_re = Regex::new(r"(?:pub\s+)?struct\s+([A-Z][a-zA-Z0-9_]*)").unwrap();
169 types.extend(
170 struct_re
171 .captures_iter(content)
172 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string())),
173 );
174
175 let enum_re = Regex::new(r"(?:pub\s+)?enum\s+([A-Z][a-zA-Z0-9_]*)").unwrap();
177 types.extend(
178 enum_re
179 .captures_iter(content)
180 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string())),
181 );
182
183 let trait_re = Regex::new(r"(?:pub\s+)?trait\s+([A-Z][a-zA-Z0-9_]*)").unwrap();
185 types.extend(
186 trait_re
187 .captures_iter(content)
188 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string())),
189 );
190
191 types
192 }
193
194 fn parse_type_usages(&self, content: &str) -> Vec<String> {
195 let type_re = Regex::new(r":\s*([A-Z][a-zA-Z0-9_]*)").unwrap();
196 type_re
197 .captures_iter(content)
198 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
199 .collect()
200 }
201}
202
203struct PythonParser;
205
206impl LanguageParser for PythonParser {
207 fn parse_imports(&self, content: &str, _file_path: &Path) -> Vec<(String, Vec<String>)> {
208 let mut imports = Vec::new();
209
210 let import_re = Regex::new(r"import\s+([a-zA-Z0-9_.]+)").unwrap();
212 for cap in import_re.captures_iter(content) {
213 let module = cap.get(1).map_or("", |m| m.as_str());
214 imports.push((module.to_string(), vec![]));
215 }
216
217 let from_re = Regex::new(r"from\s+([a-zA-Z0-9_.]+)\s+import\s+(.+)").unwrap();
219 for cap in from_re.captures_iter(content) {
220 let module = cap.get(1).map_or("", |m| m.as_str());
221 let items = cap.get(2).map_or(vec![], |m| {
222 m.as_str()
223 .split(',')
224 .map(|s| s.split_whitespace().next().unwrap_or("").to_string())
225 .collect()
226 });
227 imports.push((module.to_string(), items));
228 }
229
230 imports
231 }
232
233 fn parse_functions(&self, content: &str) -> Vec<String> {
234 let fn_re = Regex::new(r"def\s+([a-zA-Z0-9_]+)").unwrap();
235 fn_re
236 .captures_iter(content)
237 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
238 .collect()
239 }
240
241 fn parse_function_calls(&self, content: &str) -> Vec<String> {
242 let call_re = Regex::new(r"([a-zA-Z0-9_]+)\s*\(").unwrap();
243 call_re
244 .captures_iter(content)
245 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
246 .filter(|name| {
247 !["if", "while", "for", "print", "len", "str", "int"].contains(&name.as_str())
248 })
249 .collect()
250 }
251
252 fn parse_types(&self, content: &str) -> Vec<String> {
253 let class_re = Regex::new(r"class\s+([A-Z][a-zA-Z0-9_]*)").unwrap();
254 class_re
255 .captures_iter(content)
256 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
257 .collect()
258 }
259
260 fn parse_type_usages(&self, content: &str) -> Vec<String> {
261 let type_re = Regex::new(r":\s*([A-Z][a-zA-Z0-9_\[\]]*)").unwrap();
263 type_re
264 .captures_iter(content)
265 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
266 .collect()
267 }
268}
269
270impl Default for RelationAnalyzer {
271 fn default() -> Self {
272 Self::new()
273 }
274}
275
276impl RelationAnalyzer {
277 pub fn new() -> Self {
279 let mut parsers: HashMap<String, Box<dyn LanguageParser>> = HashMap::new();
280 parsers.insert("rs".to_string(), Box::new(RustParser));
281 parsers.insert("py".to_string(), Box::new(PythonParser));
282
283 Self {
284 relations: Vec::new(),
285 parsers,
286 file_cache: HashMap::new(),
287 }
288 }
289
290 pub fn analyze_directory(&mut self, path: &Path) -> Result<()> {
292 self.collect_files(path)?;
294
295 let files: Vec<PathBuf> = self.file_cache.keys().cloned().collect();
297 for file in &files {
298 self.analyze_file(file)?;
299 }
300
301 self.detect_coupling();
303 self.detect_test_relationships();
304
305 Ok(())
306 }
307
308 fn collect_files(&mut self, path: &Path) -> Result<()> {
310 use walkdir::WalkDir;
311
312 for entry in WalkDir::new(path)
313 .follow_links(true)
314 .into_iter()
315 .filter_map(|e| e.ok())
316 .filter(|e| e.file_type().is_file())
317 {
318 let path = entry.path();
319 if let Some(ext) = path.extension() {
320 if self.parsers.contains_key(ext.to_str().unwrap_or("")) {
321 match fs::read_to_string(path) {
323 Ok(content) => {
324 self.file_cache.insert(path.to_path_buf(), content);
325 }
326 Err(e) => {
327 eprintln!("⚠️ Skipping {}: {}", path.display(), e);
329 }
330 }
331 }
332 }
333 }
334
335 Ok(())
336 }
337
338 fn analyze_file(&mut self, file_path: &Path) -> Result<()> {
340 let content = self
341 .file_cache
342 .get(file_path)
343 .ok_or_else(|| anyhow::anyhow!("File not in cache"))?
344 .clone();
345
346 let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
347
348 if let Some(parser) = self.parsers.get(ext) {
349 let imports = parser.parse_imports(&content, file_path);
351 for (module, items) in imports {
352 if let Some(target) = self.resolve_import(file_path, &module) {
353 self.relations.push(FileRelation {
354 source: file_path.to_path_buf(),
355 target,
356 relation_type: RelationType::Imports,
357 items,
358 strength: 8,
359 });
360 }
361 }
362
363 let _functions = parser.parse_functions(&content);
365 let _types = parser.parse_types(&content);
366 let _function_calls = parser.parse_function_calls(&content);
367 let _type_usages = parser.parse_type_usages(&content);
368
369 }
374
375 Ok(())
376 }
377
378 fn resolve_import(&self, from_file: &Path, module: &str) -> Option<PathBuf> {
380 if !module.starts_with("crate")
382 && !module.starts_with("super")
383 && !module.starts_with("self")
384 {
385 if !module.starts_with("st::") && !module.contains("::") {
387 return None; }
389 }
390
391 let mut src_dir = from_file.parent()?;
393 while src_dir.file_name() != Some(std::ffi::OsStr::new("src")) && src_dir.parent().is_some()
394 {
395 src_dir = src_dir.parent()?;
396 }
397
398 let clean_module = module
400 .trim_start_matches("crate::")
401 .trim_start_matches("st::")
402 .trim_start_matches("self::")
403 .replace("::", "/");
404
405 let (base_dir, module_path) = if module.starts_with("super::") {
407 let parent = from_file.parent()?.parent()?;
408 let path = module.trim_start_matches("super::").replace("::", "/");
409 (parent, path)
410 } else if module.starts_with("self::") {
411 let parent = from_file.parent()?;
412 let path = module.trim_start_matches("self::").replace("::", "/");
413 (parent, path)
414 } else {
415 (src_dir, clean_module)
416 };
417
418 let patterns = vec![
420 format!("{}.rs", module_path),
421 format!("{}/mod.rs", module_path),
422 format!(
423 "{}.rs",
424 module_path.split('/').next_back().unwrap_or(&module_path)
425 ),
426 ];
427
428 for pattern in patterns {
429 let path = base_dir.join(&pattern);
430 if self.file_cache.contains_key(&path) {
431 return Some(path);
432 }
433 }
434
435 None
436 }
437
438 fn detect_coupling(&mut self) {
440 let mut import_pairs: HashMap<(PathBuf, PathBuf), u8> = HashMap::new();
442
443 for rel in &self.relations {
444 if rel.relation_type == RelationType::Imports {
445 let pair = if rel.source < rel.target {
446 (rel.source.clone(), rel.target.clone())
447 } else {
448 (rel.target.clone(), rel.source.clone())
449 };
450 *import_pairs.entry(pair).or_insert(0) += 1;
451 }
452 }
453
454 for ((file1, file2), count) in import_pairs {
456 if count >= 2 {
457 self.relations.push(FileRelation {
458 source: file1,
459 target: file2,
460 relation_type: RelationType::Coupled,
461 items: vec![],
462 strength: count.min(10),
463 });
464 }
465 }
466 }
467
468 fn detect_test_relationships(&mut self) {
470 for file in self.file_cache.keys() {
471 let file_str = file.to_string_lossy();
472
473 if file_str.contains("test") || file_str.contains("_test") {
475 let base_name = file
477 .file_stem()
478 .and_then(|s| s.to_str())
479 .unwrap_or("")
480 .replace("_test", "")
481 .replace("test_", "");
482
483 for source in self.file_cache.keys() {
485 if source != file
486 && source
487 .file_stem()
488 .and_then(|s| s.to_str())
489 .is_some_and(|s| s == base_name)
490 {
491 self.relations.push(FileRelation {
492 source: source.clone(),
493 target: file.clone(),
494 relation_type: RelationType::TestedBy,
495 items: vec![],
496 strength: 10,
497 });
498 }
499 }
500 }
501 }
502 }
503
504 pub fn get_relations(&self) -> &[FileRelation] {
506 &self.relations
507 }
508
509 pub fn get_file_relations(&self, file: &Path) -> Vec<&FileRelation> {
511 self.relations
512 .iter()
513 .filter(|r| r.source == file || r.target == file)
514 .collect()
515 }
516
517 pub fn get_coupling_score(&self, file1: &Path, file2: &Path) -> u8 {
519 self.relations
520 .iter()
521 .filter(|r| {
522 (r.source == file1 && r.target == file2) || (r.source == file2 && r.target == file1)
523 })
524 .map(|r| r.strength)
525 .sum()
526 }
527}
528
529#[cfg(test)]
530mod tests {
531 use super::*;
532
533 #[test]
534 fn test_rust_parser() {
535 let parser = RustParser;
536 let content = r#"
537use std::collections::HashMap;
538use crate::scanner::{Scanner, FileInfo};
539mod formatters;
540
541pub fn process_file() {
542 let scanner = Scanner::new();
543}
544"#;
545
546 let imports = parser.parse_imports(content, Path::new("test.rs"));
547 assert_eq!(imports.len(), 2);
548
549 let functions = parser.parse_functions(content);
550 assert_eq!(functions, vec!["process_file"]);
551 }
552}