Skip to main content

st/
relations.rs

1//! Code relationship analyzer - "Semantic X-ray vision for codebases" - Omni
2//! Tracks imports, function calls, type usage, and test relationships
3
4use anyhow::Result;
5use regex::Regex;
6use std::collections::HashMap;
7use std::fs;
8use std::path::{Path, PathBuf};
9
10/// Types of relationships between files
11#[derive(Debug, Clone, PartialEq, Eq, Hash)]
12pub enum RelationType {
13    /// Direct import/use/require
14    Imports,
15    /// Function defined here, called there
16    FunctionCall,
17    /// Type/struct/class defined here, used there
18    TypeUsage,
19    /// Test file testing this source
20    TestedBy,
21    /// Module exports this
22    Exports,
23    /// Tight coupling detected
24    Coupled,
25}
26
27/// A relationship between two files
28#[derive(Debug, Clone)]
29pub struct FileRelation {
30    /// Source file path
31    pub source: PathBuf,
32    /// Target file path
33    pub target: PathBuf,
34    /// Type of relationship
35    pub relation_type: RelationType,
36    /// Specific items involved (function names, types, etc.)
37    pub items: Vec<String>,
38    /// Strength of relationship (1-10)
39    pub strength: u8,
40}
41
42/// Analyzes code relationships in a project
43pub struct RelationAnalyzer {
44    /// All discovered relationships
45    relations: Vec<FileRelation>,
46    /// Language-specific parsers
47    parsers: HashMap<String, Box<dyn LanguageParser>>,
48    /// File cache to avoid re-reading
49    file_cache: HashMap<PathBuf, String>,
50}
51
52/// Language-specific parsing trait
53trait LanguageParser: Send + Sync {
54    /// Parse imports/uses from file content
55    fn parse_imports(&self, content: &str, file_path: &Path) -> Vec<(String, Vec<String>)>;
56
57    /// Parse function definitions
58    fn parse_functions(&self, content: &str) -> Vec<String>;
59
60    /// Parse function calls
61    fn parse_function_calls(&self, content: &str) -> Vec<String>;
62
63    /// Parse type definitions
64    fn parse_types(&self, content: &str) -> Vec<String>;
65
66    /// Parse type usages
67    fn parse_type_usages(&self, content: &str) -> Vec<String>;
68}
69
70/// Rust language parser
71struct RustParser;
72
73impl LanguageParser for RustParser {
74    fn parse_imports(&self, content: &str, _file_path: &Path) -> Vec<(String, Vec<String>)> {
75        let mut imports = Vec::new();
76
77        // First, handle multi-line imports by joining them
78        let mut cleaned_content = String::new();
79        let mut in_use = false;
80        let mut use_buffer = String::new();
81
82        for line in content.lines() {
83            if line.trim_start().starts_with("use ") {
84                in_use = true;
85                use_buffer.push_str(line);
86                use_buffer.push(' ');
87            } else if in_use {
88                if line.contains(';') {
89                    use_buffer.push_str(line);
90                    cleaned_content.push_str(&use_buffer.replace('\n', " "));
91                    cleaned_content.push('\n');
92                    use_buffer.clear();
93                    in_use = false;
94                } else {
95                    use_buffer.push_str(line);
96                    use_buffer.push(' ');
97                }
98            } else {
99                cleaned_content.push_str(line);
100                cleaned_content.push('\n');
101            }
102        }
103
104        // Handle simple use statements: use module; or use module::item;
105        let simple_use_re = Regex::new(r"use\s+([a-zA-Z0-9_:]+)(?:::([a-zA-Z0-9_]+))?;").unwrap();
106        for cap in simple_use_re.captures_iter(&cleaned_content) {
107            let module = cap.get(1).map_or("", |m| m.as_str());
108            let item = cap.get(2).map_or(vec![], |m| vec![m.as_str().to_string()]);
109            imports.push((module.to_string(), item));
110        }
111
112        // Handle complex imports: use module::{item1, item2, ...}
113        let complex_use_re = Regex::new(r"use\s+([a-zA-Z0-9_:]+)::\{([^}]+)\}").unwrap();
114        for cap in complex_use_re.captures_iter(&cleaned_content) {
115            let module = cap.get(1).map_or("", |m| m.as_str());
116            let items = cap.get(2).map_or(vec![], |m| {
117                m.as_str()
118                    .split(',')
119                    .map(|s| {
120                        // Handle nested imports like ai::AiFormatter
121                        let parts: Vec<&str> = s.trim().split("::").collect();
122                        if parts.len() > 1 {
123                            // For ai::AiFormatter, we want to track both the submodule and item
124                            imports.push((
125                                format!("{}::{}", module, parts[0]),
126                                vec![parts[1].to_string()],
127                            ));
128                        }
129                        s.trim().to_string()
130                    })
131                    .collect()
132            });
133            if !items.is_empty() {
134                imports.push((module.to_string(), items));
135            }
136        }
137
138        // Match mod statements
139        let mod_re = Regex::new(r"^\s*(?:pub\s+)?mod\s+([a-zA-Z0-9_]+)").unwrap();
140        for cap in mod_re.captures_iter(content) {
141            let module = cap.get(1).map_or("", |m| m.as_str());
142            imports.push((module.to_string(), vec![]));
143        }
144
145        imports
146    }
147
148    fn parse_functions(&self, content: &str) -> Vec<String> {
149        let fn_re = Regex::new(r"(?:pub\s+)?fn\s+([a-zA-Z0-9_]+)").unwrap();
150        fn_re
151            .captures_iter(content)
152            .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
153            .collect()
154    }
155
156    fn parse_function_calls(&self, content: &str) -> Vec<String> {
157        let call_re = Regex::new(r"([a-zA-Z0-9_]+)\s*\(").unwrap();
158        call_re
159            .captures_iter(content)
160            .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
161            .collect()
162    }
163
164    fn parse_types(&self, content: &str) -> Vec<String> {
165        let mut types = Vec::new();
166
167        // Structs
168        let struct_re = Regex::new(r"(?:pub\s+)?struct\s+([A-Z][a-zA-Z0-9_]*)").unwrap();
169        types.extend(
170            struct_re
171                .captures_iter(content)
172                .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string())),
173        );
174
175        // Enums
176        let enum_re = Regex::new(r"(?:pub\s+)?enum\s+([A-Z][a-zA-Z0-9_]*)").unwrap();
177        types.extend(
178            enum_re
179                .captures_iter(content)
180                .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string())),
181        );
182
183        // Traits
184        let trait_re = Regex::new(r"(?:pub\s+)?trait\s+([A-Z][a-zA-Z0-9_]*)").unwrap();
185        types.extend(
186            trait_re
187                .captures_iter(content)
188                .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string())),
189        );
190
191        types
192    }
193
194    fn parse_type_usages(&self, content: &str) -> Vec<String> {
195        let type_re = Regex::new(r":\s*([A-Z][a-zA-Z0-9_]*)").unwrap();
196        type_re
197            .captures_iter(content)
198            .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
199            .collect()
200    }
201}
202
203/// Python language parser
204struct PythonParser;
205
206impl LanguageParser for PythonParser {
207    fn parse_imports(&self, content: &str, _file_path: &Path) -> Vec<(String, Vec<String>)> {
208        let mut imports = Vec::new();
209
210        // import module
211        let import_re = Regex::new(r"import\s+([a-zA-Z0-9_.]+)").unwrap();
212        for cap in import_re.captures_iter(content) {
213            let module = cap.get(1).map_or("", |m| m.as_str());
214            imports.push((module.to_string(), vec![]));
215        }
216
217        // from module import items
218        let from_re = Regex::new(r"from\s+([a-zA-Z0-9_.]+)\s+import\s+(.+)").unwrap();
219        for cap in from_re.captures_iter(content) {
220            let module = cap.get(1).map_or("", |m| m.as_str());
221            let items = cap.get(2).map_or(vec![], |m| {
222                m.as_str()
223                    .split(',')
224                    .map(|s| s.split_whitespace().next().unwrap_or("").to_string())
225                    .collect()
226            });
227            imports.push((module.to_string(), items));
228        }
229
230        imports
231    }
232
233    fn parse_functions(&self, content: &str) -> Vec<String> {
234        let fn_re = Regex::new(r"def\s+([a-zA-Z0-9_]+)").unwrap();
235        fn_re
236            .captures_iter(content)
237            .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
238            .collect()
239    }
240
241    fn parse_function_calls(&self, content: &str) -> Vec<String> {
242        let call_re = Regex::new(r"([a-zA-Z0-9_]+)\s*\(").unwrap();
243        call_re
244            .captures_iter(content)
245            .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
246            .filter(|name| {
247                !["if", "while", "for", "print", "len", "str", "int"].contains(&name.as_str())
248            })
249            .collect()
250    }
251
252    fn parse_types(&self, content: &str) -> Vec<String> {
253        let class_re = Regex::new(r"class\s+([A-Z][a-zA-Z0-9_]*)").unwrap();
254        class_re
255            .captures_iter(content)
256            .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
257            .collect()
258    }
259
260    fn parse_type_usages(&self, content: &str) -> Vec<String> {
261        // Python type hints
262        let type_re = Regex::new(r":\s*([A-Z][a-zA-Z0-9_\[\]]*)").unwrap();
263        type_re
264            .captures_iter(content)
265            .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
266            .collect()
267    }
268}
269
270impl Default for RelationAnalyzer {
271    fn default() -> Self {
272        Self::new()
273    }
274}
275
276impl RelationAnalyzer {
277    /// Create a new analyzer
278    pub fn new() -> Self {
279        let mut parsers: HashMap<String, Box<dyn LanguageParser>> = HashMap::new();
280        parsers.insert("rs".to_string(), Box::new(RustParser));
281        parsers.insert("py".to_string(), Box::new(PythonParser));
282
283        Self {
284            relations: Vec::new(),
285            parsers,
286            file_cache: HashMap::new(),
287        }
288    }
289
290    /// Analyze a directory for code relationships
291    pub fn analyze_directory(&mut self, path: &Path) -> Result<()> {
292        // First pass: collect all source files and their content
293        self.collect_files(path)?;
294
295        // Second pass: analyze relationships
296        let files: Vec<PathBuf> = self.file_cache.keys().cloned().collect();
297        for file in &files {
298            self.analyze_file(file)?;
299        }
300
301        // Third pass: detect coupling and test relationships
302        self.detect_coupling();
303        self.detect_test_relationships();
304
305        Ok(())
306    }
307
308    /// Collect all source files
309    fn collect_files(&mut self, path: &Path) -> Result<()> {
310        use walkdir::WalkDir;
311
312        for entry in WalkDir::new(path)
313            .follow_links(true)
314            .into_iter()
315            .filter_map(|e| e.ok())
316            .filter(|e| e.file_type().is_file())
317        {
318            let path = entry.path();
319            if let Some(ext) = path.extension() {
320                if self.parsers.contains_key(ext.to_str().unwrap_or("")) {
321                    // Skip files that can't be read as UTF-8
322                    match fs::read_to_string(path) {
323                        Ok(content) => {
324                            self.file_cache.insert(path.to_path_buf(), content);
325                        }
326                        Err(e) => {
327                            // Skip files with encoding errors or other read issues
328                            eprintln!("⚠️  Skipping {}: {}", path.display(), e);
329                        }
330                    }
331                }
332            }
333        }
334
335        Ok(())
336    }
337
338    /// Analyze a single file for relationships
339    fn analyze_file(&mut self, file_path: &Path) -> Result<()> {
340        let content = self
341            .file_cache
342            .get(file_path)
343            .ok_or_else(|| anyhow::anyhow!("File not in cache"))?
344            .clone();
345
346        let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
347
348        if let Some(parser) = self.parsers.get(ext) {
349            // Parse imports
350            let imports = parser.parse_imports(&content, file_path);
351            for (module, items) in imports {
352                if let Some(target) = self.resolve_import(file_path, &module) {
353                    self.relations.push(FileRelation {
354                        source: file_path.to_path_buf(),
355                        target,
356                        relation_type: RelationType::Imports,
357                        items,
358                        strength: 8,
359                    });
360                }
361            }
362
363            // Parse functions and types for cross-referencing
364            let _functions = parser.parse_functions(&content);
365            let _types = parser.parse_types(&content);
366            let _function_calls = parser.parse_function_calls(&content);
367            let _type_usages = parser.parse_type_usages(&content);
368
369            // Store for later cross-referencing
370            // (In a real implementation, we'd build an index here to track
371            // where functions are called and types are used, enabling deeper
372            // analysis like call graphs and type dependency chains)
373        }
374
375        Ok(())
376    }
377
378    /// Resolve an import to a file path
379    fn resolve_import(&self, from_file: &Path, module: &str) -> Option<PathBuf> {
380        // Skip external crates
381        if !module.starts_with("crate")
382            && !module.starts_with("super")
383            && !module.starts_with("self")
384        {
385            // Check if it's an internal module by looking for st:: or our crate name
386            if !module.starts_with("st::") && !module.contains("::") {
387                return None; // External crate
388            }
389        }
390
391        // Find the src directory (project root)
392        let mut src_dir = from_file.parent()?;
393        while src_dir.file_name() != Some(std::ffi::OsStr::new("src")) && src_dir.parent().is_some()
394        {
395            src_dir = src_dir.parent()?;
396        }
397
398        // Clean up the module path
399        let clean_module = module
400            .trim_start_matches("crate::")
401            .trim_start_matches("st::")
402            .trim_start_matches("self::")
403            .replace("::", "/");
404
405        // Handle super:: imports
406        let (base_dir, module_path) = if module.starts_with("super::") {
407            let parent = from_file.parent()?.parent()?;
408            let path = module.trim_start_matches("super::").replace("::", "/");
409            (parent, path)
410        } else if module.starts_with("self::") {
411            let parent = from_file.parent()?;
412            let path = module.trim_start_matches("self::").replace("::", "/");
413            (parent, path)
414        } else {
415            (src_dir, clean_module)
416        };
417
418        // Try different file patterns
419        let patterns = vec![
420            format!("{}.rs", module_path),
421            format!("{}/mod.rs", module_path),
422            format!(
423                "{}.rs",
424                module_path.split('/').next_back().unwrap_or(&module_path)
425            ),
426        ];
427
428        for pattern in patterns {
429            let path = base_dir.join(&pattern);
430            if self.file_cache.contains_key(&path) {
431                return Some(path);
432            }
433        }
434
435        None
436    }
437
438    /// Detect tightly coupled files
439    fn detect_coupling(&mut self) {
440        // Count bidirectional imports
441        let mut import_pairs: HashMap<(PathBuf, PathBuf), u8> = HashMap::new();
442
443        for rel in &self.relations {
444            if rel.relation_type == RelationType::Imports {
445                let pair = if rel.source < rel.target {
446                    (rel.source.clone(), rel.target.clone())
447                } else {
448                    (rel.target.clone(), rel.source.clone())
449                };
450                *import_pairs.entry(pair).or_insert(0) += 1;
451            }
452        }
453
454        // Mark bidirectional imports as coupled
455        for ((file1, file2), count) in import_pairs {
456            if count >= 2 {
457                self.relations.push(FileRelation {
458                    source: file1,
459                    target: file2,
460                    relation_type: RelationType::Coupled,
461                    items: vec![],
462                    strength: count.min(10),
463                });
464            }
465        }
466    }
467
468    /// Detect test relationships
469    fn detect_test_relationships(&mut self) {
470        for file in self.file_cache.keys() {
471            let file_str = file.to_string_lossy();
472
473            // Is this a test file?
474            if file_str.contains("test") || file_str.contains("_test") {
475                // Find what it's testing
476                let base_name = file
477                    .file_stem()
478                    .and_then(|s| s.to_str())
479                    .unwrap_or("")
480                    .replace("_test", "")
481                    .replace("test_", "");
482
483                // Look for matching source file
484                for source in self.file_cache.keys() {
485                    if source != file
486                        && source
487                            .file_stem()
488                            .and_then(|s| s.to_str())
489                            .is_some_and(|s| s == base_name)
490                    {
491                        self.relations.push(FileRelation {
492                            source: source.clone(),
493                            target: file.clone(),
494                            relation_type: RelationType::TestedBy,
495                            items: vec![],
496                            strength: 10,
497                        });
498                    }
499                }
500            }
501        }
502    }
503
504    /// Get all relationships
505    pub fn get_relations(&self) -> &[FileRelation] {
506        &self.relations
507    }
508
509    /// Get relationships for a specific file
510    pub fn get_file_relations(&self, file: &Path) -> Vec<&FileRelation> {
511        self.relations
512            .iter()
513            .filter(|r| r.source == file || r.target == file)
514            .collect()
515    }
516
517    /// Get coupling score between two files
518    pub fn get_coupling_score(&self, file1: &Path, file2: &Path) -> u8 {
519        self.relations
520            .iter()
521            .filter(|r| {
522                (r.source == file1 && r.target == file2) || (r.source == file2 && r.target == file1)
523            })
524            .map(|r| r.strength)
525            .sum()
526    }
527}
528
529#[cfg(test)]
530mod tests {
531    use super::*;
532
533    #[test]
534    fn test_rust_parser() {
535        let parser = RustParser;
536        let content = r#"
537use std::collections::HashMap;
538use crate::scanner::{Scanner, FileInfo};
539mod formatters;
540
541pub fn process_file() {
542    let scanner = Scanner::new();
543}
544"#;
545
546        let imports = parser.parse_imports(content, Path::new("test.rs"));
547        assert_eq!(imports.len(), 2);
548
549        let functions = parser.parse_functions(content);
550        assert_eq!(functions, vec!["process_file"]);
551    }
552}