1use crate::types::{RepoFile, Repository, SymbolKind};
7use once_cell::sync::Lazy;
8use petgraph::algo::tarjan_scc;
9use petgraph::graph::{DiGraph, NodeIndex};
10use regex::Regex;
11use std::collections::{HashMap, HashSet};
12use std::path::Path;
13
14static JS_REQUIRE_RE: Lazy<Regex> = Lazy::new(|| {
16 Regex::new(r#"require\s*\(\s*['"]([^'"]+)['"]\s*\)"#).expect("Invalid require regex")
17});
18static JS_IMPORT_RE: Lazy<Regex> = Lazy::new(|| {
19 Regex::new(r#"(?:from|import)\s*\(\s*['"]([^'"]+)['"]\s*\)|from\s+['"]([^'"]+)['"]"#)
20 .expect("Invalid import regex")
21});
22
23#[derive(Debug, Clone)]
25pub struct DependencyNode {
26 pub path: String,
28 pub module_name: String,
30 pub exports: Vec<String>,
32 pub tokens: u32,
34 pub importance: f64,
36}
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
40pub enum DependencyType {
41 Import,
43 Reexport,
45 TypeImport,
47 DynamicImport,
49 Inheritance,
51}
52
53#[derive(Debug, Clone)]
55pub struct DependencyEdge {
56 pub dep_type: DependencyType,
58 pub symbols: Vec<String>,
60 pub line: u32,
62 pub weight: f64,
64}
65
66#[derive(Debug, Clone)]
68pub struct ResolvedImport {
69 pub from_path: String,
71 pub to_path: Option<String>,
73 pub specifier: String,
75 pub symbols: Vec<String>,
77 pub import_type: DependencyType,
79 pub line: u32,
81 pub is_external: bool,
83}
84
85#[derive(Debug, Clone)]
87pub struct SymbolReference {
88 pub symbol_name: String,
90 pub file_path: String,
92 pub line: u32,
94 pub context: ReferenceContext,
96}
97
98#[derive(Debug, Clone, Copy, PartialEq, Eq)]
100pub enum ReferenceContext {
101 Call,
103 Type,
105 Assignment,
107 Parameter,
109 Return,
111 Reference,
113}
114
115pub struct DependencyGraph {
117 graph: DiGraph<DependencyNode, DependencyEdge>,
119 path_to_node: HashMap<String, NodeIndex>,
121 module_to_path: HashMap<String, String>,
123 symbol_to_file: HashMap<String, String>,
125 imports: Vec<ResolvedImport>,
127 external_deps: HashSet<String>,
129 circular_deps: Vec<Vec<String>>,
131}
132
133impl DependencyGraph {
134 pub fn new() -> Self {
136 Self {
137 graph: DiGraph::new(),
138 path_to_node: HashMap::new(),
139 module_to_path: HashMap::new(),
140 symbol_to_file: HashMap::new(),
141 imports: Vec::new(),
142 external_deps: HashSet::new(),
143 circular_deps: Vec::new(),
144 }
145 }
146
147 pub fn build(repo: &Repository) -> Self {
149 let mut graph = Self::new();
150
151 for file in &repo.files {
153 graph.add_file(file);
154 }
155
156 for file in &repo.files {
158 graph.extract_imports(file, repo);
159 }
160
161 graph.detect_cycles();
163
164 graph.compute_importance();
166
167 graph
168 }
169
170 fn add_file(&mut self, file: &RepoFile) {
172 let module_name = Self::path_to_module(&file.relative_path);
173
174 let exports: Vec<String> = file
176 .symbols
177 .iter()
178 .filter(|s| s.kind != SymbolKind::Import)
179 .map(|s| s.name.clone())
180 .collect();
181
182 for export in &exports {
184 let key = format!("{}::{}", module_name, export);
185 self.symbol_to_file.insert(key, file.relative_path.clone());
186 self.symbol_to_file
188 .entry(export.clone())
189 .or_insert_with(|| file.relative_path.clone());
190 }
191
192 let node = DependencyNode {
193 path: file.relative_path.clone(),
194 module_name: module_name.clone(),
195 exports,
196 tokens: file.token_count.claude,
197 importance: file.importance as f64,
198 };
199
200 let idx = self.graph.add_node(node);
201 self.path_to_node.insert(file.relative_path.clone(), idx);
202 self.module_to_path
203 .insert(module_name, file.relative_path.clone());
204 }
205
206 fn extract_imports(&mut self, file: &RepoFile, repo: &Repository) {
208 for symbol in &file.symbols {
210 if symbol.kind != SymbolKind::Import {
211 continue;
212 }
213
214 let parsed = self.parse_import_statement(&symbol.name);
216
217 for import in parsed {
218 let resolved = self.resolve_import(&import, file, repo);
220
221 if let Some(target_path) = &resolved.to_path {
222 if let (Some(&from_idx), Some(&to_idx)) = (
224 self.path_to_node.get(&file.relative_path),
225 self.path_to_node.get(target_path),
226 ) {
227 let edge = DependencyEdge {
228 dep_type: resolved.import_type,
229 symbols: resolved.symbols.clone(),
230 line: resolved.line,
231 weight: 1.0,
232 };
233 self.graph.add_edge(from_idx, to_idx, edge);
234 }
235 } else if resolved.is_external {
236 self.external_deps.insert(resolved.specifier.clone());
237 }
238
239 self.imports.push(resolved);
240 }
241 }
242
243 if let Some(content) = &file.content {
246 self.extract_imports_from_content(content, file);
247 }
248 }
249
250 fn extract_imports_from_content(&mut self, content: &str, file: &RepoFile) {
253 let is_js = file
254 .language
255 .as_deref()
256 .is_some_and(|l| matches!(l, "javascript" | "typescript" | "jsx" | "tsx"));
257
258 if !is_js {
259 return;
260 }
261
262 let mut found_packages = HashSet::new();
263
264 for cap in JS_REQUIRE_RE.captures_iter(content) {
266 if let Some(pkg) = cap.get(1) {
267 let specifier = pkg.as_str();
268 if Self::is_external_specifier(specifier) {
269 let pkg_name = Self::extract_package_name(specifier);
270 found_packages.insert(pkg_name);
271 }
272 }
273 }
274
275 for cap in JS_IMPORT_RE.captures_iter(content) {
277 let specifier = cap.get(1).or_else(|| cap.get(2));
279 if let Some(pkg) = specifier {
280 let spec = pkg.as_str();
281 if Self::is_external_specifier(spec) {
282 let pkg_name = Self::extract_package_name(spec);
283 found_packages.insert(pkg_name);
284 }
285 }
286 }
287
288 for pkg in found_packages {
290 if Self::is_valid_package_name(&pkg) {
291 self.external_deps.insert(pkg);
292 }
293 }
294 }
295
296 fn is_external_specifier(spec: &str) -> bool {
298 !spec.starts_with('.') && !spec.starts_with('/')
299 }
300
301 fn extract_package_name(spec: &str) -> String {
303 if spec.starts_with('@') {
304 let parts: Vec<&str> = spec.splitn(3, '/').collect();
306 if parts.len() >= 2 {
307 format!("{}/{}", parts[0], parts[1])
308 } else {
309 spec.to_owned()
310 }
311 } else {
312 spec.split('/').next().unwrap_or(spec).to_owned()
314 }
315 }
316
317 fn is_valid_package_name(name: &str) -> bool {
319 if name.is_empty() || name.contains(' ') || name.contains('+') {
321 return false;
322 }
323 if name.contains("://") || name.starts_with('/') {
325 return false;
326 }
327 if name.len() < 2 {
329 return false;
330 }
331 let Some(first) = name.chars().next() else {
334 return false;
335 };
336 if !first.is_ascii_alphabetic() && first != '@' && first != '_' {
337 return false;
338 }
339 true
340 }
341
342 fn parse_import_statement(&self, import_text: &str) -> Vec<ParsedImport> {
344 let mut imports = Vec::new();
345 let text = import_text.trim();
346
347 let js_specifier = if text.starts_with("import ") || text.contains("require(") {
349 Self::extract_string_literal(text)
350 } else {
351 None
352 };
353
354 if text.contains("require(") || (text.starts_with("import ") && js_specifier.is_some()) {
356 if let Some(spec) = js_specifier {
357 let symbols = Self::extract_import_symbols(text);
358 let import_type = if text.contains("type ") {
359 DependencyType::TypeImport
360 } else if text.contains("import(") {
361 DependencyType::DynamicImport
362 } else {
363 DependencyType::Import
364 };
365 imports.push(ParsedImport { specifier: spec, symbols, import_type });
366 }
367 }
368 else if text.starts_with("import ") {
370 let module = text.trim_start_matches("import ").trim();
371 let module = module.split(" as ").next().unwrap_or(module);
373 for m in module.split(',') {
375 imports.push(ParsedImport {
376 specifier: m.trim().to_owned(),
377 symbols: vec![],
378 import_type: DependencyType::Import,
379 });
380 }
381 } else if text.starts_with("from ") {
382 if let Some(rest) = text.strip_prefix("from ") {
384 let parts: Vec<&str> = rest.splitn(2, " import ").collect();
385 if parts.len() == 2 {
386 let module = parts[0].trim();
387 let symbols: Vec<String> = parts[1]
388 .split(',')
389 .map(|s| s.split(" as ").next().unwrap_or(s).trim().to_owned())
390 .filter(|s| !s.is_empty())
391 .collect();
392 imports.push(ParsedImport {
393 specifier: module.to_owned(),
394 symbols,
395 import_type: DependencyType::Import,
396 });
397 }
398 }
399 }
400 else if text.starts_with("use ") {
402 let path = text.trim_start_matches("use ").trim_end_matches(';').trim();
403 if path.contains("::") {
405 let parts: Vec<&str> = path.rsplitn(2, "::").collect();
406 let base = if parts.len() == 2 { parts[1] } else { "" };
407 let symbols_part = parts[0].trim_matches(|c| c == '{' || c == '}');
408 let symbols: Vec<String> = symbols_part
409 .split(',')
410 .map(|s| s.split(" as ").next().unwrap_or(s).trim().to_owned())
411 .filter(|s| !s.is_empty())
412 .collect();
413 imports.push(ParsedImport {
414 specifier: base.to_owned(),
415 symbols,
416 import_type: DependencyType::Import,
417 });
418 } else {
419 imports.push(ParsedImport {
420 specifier: path.to_owned(),
421 symbols: vec![],
422 import_type: DependencyType::Import,
423 });
424 }
425 }
426 else if text.contains("import") {
428 let mut i = 0;
430 let chars: Vec<char> = text.chars().collect();
431 while i < chars.len() {
432 if chars[i] == '"' {
433 let start = i + 1;
434 i += 1;
435 while i < chars.len() && chars[i] != '"' {
436 i += 1;
437 }
438 if i < chars.len() {
439 let spec: String = chars[start..i].iter().collect();
440 imports.push(ParsedImport {
441 specifier: spec,
442 symbols: vec![],
443 import_type: DependencyType::Import,
444 });
445 }
446 }
447 i += 1;
448 }
449 }
450
451 imports
452 }
453
454 fn extract_string_literal(text: &str) -> Option<String> {
456 let chars: Vec<char> = text.chars().collect();
458 let mut i = 0;
459 while i < chars.len() {
460 if chars[i] == '"' || chars[i] == '\'' {
461 let quote = chars[i];
462 let start = i + 1;
463 i += 1;
464 while i < chars.len() && chars[i] != quote {
465 i += 1;
466 }
467 if i < chars.len() {
468 return Some(chars[start..i].iter().collect());
469 }
470 }
471 i += 1;
472 }
473 None
474 }
475
476 fn extract_import_symbols(text: &str) -> Vec<String> {
478 let mut symbols = Vec::new();
479
480 if let Some(start) = text.find('{') {
482 if let Some(end) = text.find('}') {
483 let inner = &text[start + 1..end];
484 for sym in inner.split(',') {
485 let sym = sym.split(" as ").next().unwrap_or(sym).trim();
486 if !sym.is_empty() && sym != "type" {
487 symbols.push(sym.to_owned());
488 }
489 }
490 }
491 }
492 else if text.starts_with("import ") {
494 let after_import = text.trim_start_matches("import ");
495 if let Some(default_name) = after_import.split_whitespace().next() {
496 if default_name != "type" && default_name != "*" && !default_name.starts_with('{') {
497 symbols.push(default_name.to_owned());
498 }
499 }
500 }
501
502 symbols
503 }
504
505 fn resolve_import(
507 &self,
508 import: &ParsedImport,
509 from_file: &RepoFile,
510 repo: &Repository,
511 ) -> ResolvedImport {
512 let specifier = &import.specifier;
513
514 if self.is_external_import(specifier) {
516 return ResolvedImport {
517 from_path: from_file.relative_path.clone(),
518 to_path: None,
519 specifier: specifier.clone(),
520 symbols: import.symbols.clone(),
521 import_type: import.import_type,
522 line: 0,
523 is_external: true,
524 };
525 }
526
527 let base_dir = Path::new(&from_file.relative_path)
529 .parent()
530 .map(|p| p.to_string_lossy().to_string())
531 .unwrap_or_default();
532
533 let candidates =
535 self.generate_resolution_candidates(specifier, &base_dir, &from_file.language);
536
537 for candidate in candidates {
539 if repo.files.iter().any(|f| f.relative_path == candidate) {
540 return ResolvedImport {
541 from_path: from_file.relative_path.clone(),
542 to_path: Some(candidate),
543 specifier: specifier.clone(),
544 symbols: import.symbols.clone(),
545 import_type: import.import_type,
546 line: 0,
547 is_external: false,
548 };
549 }
550 }
551
552 if let Some(path) = self.module_to_path.get(specifier) {
554 return ResolvedImport {
555 from_path: from_file.relative_path.clone(),
556 to_path: Some(path.clone()),
557 specifier: specifier.clone(),
558 symbols: import.symbols.clone(),
559 import_type: import.import_type,
560 line: 0,
561 is_external: false,
562 };
563 }
564
565 ResolvedImport {
567 from_path: from_file.relative_path.clone(),
568 to_path: None,
569 specifier: specifier.clone(),
570 symbols: import.symbols.clone(),
571 import_type: import.import_type,
572 line: 0,
573 is_external: false,
574 }
575 }
576
577 fn is_external_import(&self, specifier: &str) -> bool {
579 if specifier.starts_with('.') || specifier.starts_with('/') {
581 return false;
582 }
583
584 let external_prefixes = [
586 "react",
587 "vue",
588 "angular",
589 "express",
590 "lodash",
591 "axios",
592 "std",
593 "core",
594 "alloc",
595 "collections", "fmt",
597 "os",
598 "io",
599 "net",
600 "http",
601 "sync",
602 "context", "java.",
604 "javax.",
605 "org.apache",
606 "com.google", "numpy",
608 "pandas",
609 "torch",
610 "tensorflow",
611 "sklearn", ];
613
614 for prefix in external_prefixes {
615 if specifier.starts_with(prefix) {
616 return true;
617 }
618 }
619
620 if specifier.starts_with('@') {
622 return true;
623 }
624
625 !specifier.contains('/') && !specifier.contains('\\')
627 }
628
629 fn generate_resolution_candidates(
631 &self,
632 specifier: &str,
633 base_dir: &str,
634 language: &Option<String>,
635 ) -> Vec<String> {
636 let mut candidates = Vec::new();
637
638 let extensions = match language.as_deref() {
640 Some("python") => vec!["py", "pyi"],
641 Some("javascript") | Some("jsx") => vec!["js", "jsx", "mjs", "cjs", "ts", "tsx"],
642 Some("typescript") | Some("tsx") => vec!["ts", "tsx", "js", "jsx"],
643 Some("rust") => vec!["rs"],
644 Some("go") => vec!["go"],
645 Some("java") => vec!["java"],
646 _ => vec![""],
647 };
648
649 if specifier.starts_with('.') {
651 let resolved = normalize_path(&format!("{}/{}", base_dir, specifier));
652
653 for ext in &extensions {
655 if ext.is_empty() {
656 candidates.push(resolved.clone());
657 } else {
658 candidates.push(format!("{}.{}", resolved, ext));
659 }
660 }
661
662 for ext in &extensions {
664 if !ext.is_empty() {
665 candidates.push(format!("{}/index.{}", resolved, ext));
666 candidates.push(format!("{}/mod.{}", resolved, ext)); candidates.push(format!("{}/__init__.{}", resolved, ext)); }
669 }
670 } else {
671 let prefixes = ["src", "lib", "app", "pkg", "internal", ""];
674
675 for prefix in prefixes {
676 let base = if prefix.is_empty() {
677 specifier.to_owned()
678 } else {
679 format!("{}/{}", prefix, specifier)
680 };
681
682 for ext in &extensions {
683 if ext.is_empty() {
684 candidates.push(base.clone());
685 } else {
686 candidates.push(format!("{}.{}", base, ext));
687 candidates.push(format!("{}/index.{}", base, ext));
688 candidates.push(format!("{}/mod.{}", base, ext));
689 }
690 }
691 }
692 }
693
694 candidates
695 }
696
697 fn detect_cycles(&mut self) {
699 let sccs = tarjan_scc(&self.graph);
700
701 for scc in sccs {
702 if scc.len() > 1 {
703 let cycle: Vec<String> = scc
705 .iter()
706 .filter_map(|&idx| self.graph.node_weight(idx))
707 .map(|n| n.path.clone())
708 .collect();
709 self.circular_deps.push(cycle);
710 }
711 }
712 }
713
714 fn compute_importance(&mut self) {
716 let node_count = self.graph.node_count();
717 if node_count == 0 {
718 return;
719 }
720
721 let initial = 1.0 / node_count as f64;
723 let mut importance: Vec<f64> = vec![initial; node_count];
724 let mut new_importance: Vec<f64> = vec![0.0; node_count];
725
726 let damping = 0.85;
727 let iterations = 30;
728
729 for _ in 0..iterations {
731 let teleport = (1.0 - damping) / node_count as f64;
732 new_importance.fill(teleport);
733
734 for node_idx in self.graph.node_indices() {
735 let out_degree = self.graph.neighbors(node_idx).count();
736 if out_degree > 0 {
737 let contribution = damping * importance[node_idx.index()] / out_degree as f64;
738 for neighbor in self.graph.neighbors(node_idx) {
739 new_importance[neighbor.index()] += contribution;
740 }
741 }
742 }
743
744 std::mem::swap(&mut importance, &mut new_importance);
745 }
746
747 let max_importance = importance.iter().cloned().fold(0.0_f64, f64::max);
749 if max_importance > 0.0 {
750 for (idx, node) in self.graph.node_weights_mut().enumerate() {
751 node.importance = importance[idx] / max_importance;
752 }
753 }
754 }
755
756 fn path_to_module(path: &str) -> String {
758 let path = path
759 .trim_start_matches("src/")
760 .trim_start_matches("lib/")
761 .trim_start_matches("app/");
762
763 let path = if let Some(pos) = path.rfind('.') {
765 &path[..pos]
766 } else {
767 path
768 };
769
770 path.chars()
772 .map(|c| if c == '/' || c == '\\' { '.' } else { c })
773 .collect::<String>()
774 .trim_matches('.')
775 .to_owned()
776 }
777
778 pub fn get_importers(&self, file_path: &str) -> Vec<&str> {
782 if let Some(&node_idx) = self.path_to_node.get(file_path) {
783 self.graph
784 .neighbors_directed(node_idx, petgraph::Direction::Incoming)
785 .filter_map(|idx| self.graph.node_weight(idx))
786 .map(|n| n.path.as_str())
787 .collect()
788 } else {
789 vec![]
790 }
791 }
792
793 pub fn get_imports(&self, file_path: &str) -> Vec<&str> {
795 if let Some(&node_idx) = self.path_to_node.get(file_path) {
796 self.graph
797 .neighbors(node_idx)
798 .filter_map(|idx| self.graph.node_weight(idx))
799 .map(|n| n.path.as_str())
800 .collect()
801 } else {
802 vec![]
803 }
804 }
805
806 pub fn get_circular_deps(&self) -> &[Vec<String>] {
808 &self.circular_deps
809 }
810
811 pub fn get_external_deps(&self) -> &HashSet<String> {
813 &self.external_deps
814 }
815
816 pub fn get_most_important(&self, n: usize) -> Vec<(&str, f64)> {
818 let mut nodes: Vec<_> = self
819 .graph
820 .node_weights()
821 .map(|n| (n.path.as_str(), n.importance))
822 .collect();
823
824 nodes.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
825 nodes.truncate(n);
826 nodes
827 }
828
829 pub fn get_all_imports(&self) -> &[ResolvedImport] {
831 &self.imports
832 }
833
834 pub fn get_unresolved_imports(&self) -> Vec<&ResolvedImport> {
836 self.imports
837 .iter()
838 .filter(|i| i.to_path.is_none() && !i.is_external)
839 .collect()
840 }
841
842 pub fn stats(&self) -> DependencyStats {
844 DependencyStats {
845 total_files: self.graph.node_count(),
846 total_edges: self.graph.edge_count(),
847 external_deps: self.external_deps.len(),
848 circular_dep_groups: self.circular_deps.len(),
849 unresolved_imports: self
850 .imports
851 .iter()
852 .filter(|i| i.to_path.is_none() && !i.is_external)
853 .count(),
854 }
855 }
856}
857
858impl Default for DependencyGraph {
859 fn default() -> Self {
860 Self::new()
861 }
862}
863
864#[derive(Debug, Clone)]
866pub struct DependencyStats {
867 pub total_files: usize,
868 pub total_edges: usize,
869 pub external_deps: usize,
870 pub circular_dep_groups: usize,
871 pub unresolved_imports: usize,
872}
873
874struct ParsedImport {
876 specifier: String,
877 symbols: Vec<String>,
878 import_type: DependencyType,
879}
880
881fn normalize_path(path: &str) -> String {
883 let mut parts: Vec<&str> = Vec::new();
884
885 for part in path.split('/') {
886 match part {
887 "." | "" => continue,
888 ".." => {
889 parts.pop();
890 },
891 _ => parts.push(part),
892 }
893 }
894
895 parts.join("/")
896}
897
898#[cfg(test)]
899#[allow(clippy::str_to_string)]
900mod tests {
901 use super::*;
902
903 #[test]
904 fn test_path_to_module() {
905 assert_eq!(DependencyGraph::path_to_module("src/foo/bar.py"), "foo.bar");
906 assert_eq!(DependencyGraph::path_to_module("lib/utils.rs"), "utils");
907 assert_eq!(DependencyGraph::path_to_module("app/main.ts"), "main");
908 }
909
910 #[test]
911 fn test_normalize_path() {
912 assert_eq!(normalize_path("foo/./bar"), "foo/bar");
913 assert_eq!(normalize_path("foo/bar/../baz"), "foo/baz");
914 assert_eq!(normalize_path("./foo/bar"), "foo/bar");
915 }
916
917 #[test]
918 fn test_is_external_import() {
919 let graph = DependencyGraph::new();
920
921 assert!(graph.is_external_import("react"));
922 assert!(graph.is_external_import("numpy"));
923 assert!(graph.is_external_import("@types/node"));
924
925 assert!(!graph.is_external_import("./utils"));
926 assert!(!graph.is_external_import("../lib/foo"));
927 }
928
929 #[test]
930 fn test_build_graph() {
931 let repo = Repository::new("test", "/tmp/test");
932 let graph = DependencyGraph::build(&repo);
934
935 assert_eq!(graph.stats().total_files, 0);
936 }
937
938 #[test]
939 fn test_extract_string_literal() {
940 assert_eq!(
941 DependencyGraph::extract_string_literal("import 'react'"),
942 Some("react".to_string())
943 );
944 assert_eq!(
945 DependencyGraph::extract_string_literal("require(\"lodash\")"),
946 Some("lodash".to_string())
947 );
948 }
949}