1use crate::types::{RepoFile, Repository, SymbolKind};
7use once_cell::sync::Lazy;
8use petgraph::algo::tarjan_scc;
9use petgraph::graph::{DiGraph, NodeIndex};
10use regex::Regex;
11use std::collections::{HashMap, HashSet};
12use std::path::Path;
13
14static JS_REQUIRE_RE: Lazy<Regex> = Lazy::new(|| {
16 Regex::new(r#"require\s*\(\s*['"]([^'"]+)['"]\s*\)"#).expect("Invalid require regex")
17});
18static JS_IMPORT_RE: Lazy<Regex> = Lazy::new(|| {
19 Regex::new(r#"(?:from|import)\s*\(\s*['"]([^'"]+)['"]\s*\)|from\s+['"]([^'"]+)['"]"#)
20 .expect("Invalid import regex")
21});
22
23#[derive(Debug, Clone)]
25pub struct DependencyNode {
26 pub path: String,
28 pub module_name: String,
30 pub exports: Vec<String>,
32 pub tokens: u32,
34 pub importance: f64,
36}
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
40pub enum DependencyType {
41 Import,
43 Reexport,
45 TypeImport,
47 DynamicImport,
49 Inheritance,
51}
52
53#[derive(Debug, Clone)]
55pub struct DependencyEdge {
56 pub dep_type: DependencyType,
58 pub symbols: Vec<String>,
60 pub line: u32,
62 pub weight: f64,
64}
65
66#[derive(Debug, Clone)]
68pub struct ResolvedImport {
69 pub from_path: String,
71 pub to_path: Option<String>,
73 pub specifier: String,
75 pub symbols: Vec<String>,
77 pub import_type: DependencyType,
79 pub line: u32,
81 pub is_external: bool,
83}
84
85#[derive(Debug, Clone)]
87pub struct SymbolReference {
88 pub symbol_name: String,
90 pub file_path: String,
92 pub line: u32,
94 pub context: ReferenceContext,
96}
97
98#[derive(Debug, Clone, Copy, PartialEq, Eq)]
100pub enum ReferenceContext {
101 Call,
103 Type,
105 Assignment,
107 Parameter,
109 Return,
111 Reference,
113}
114
115pub struct DependencyGraph {
117 graph: DiGraph<DependencyNode, DependencyEdge>,
119 path_to_node: HashMap<String, NodeIndex>,
121 module_to_path: HashMap<String, String>,
123 symbol_to_file: HashMap<String, String>,
125 imports: Vec<ResolvedImport>,
127 external_deps: HashSet<String>,
129 circular_deps: Vec<Vec<String>>,
131}
132
133impl DependencyGraph {
134 pub fn new() -> Self {
136 Self {
137 graph: DiGraph::new(),
138 path_to_node: HashMap::new(),
139 module_to_path: HashMap::new(),
140 symbol_to_file: HashMap::new(),
141 imports: Vec::new(),
142 external_deps: HashSet::new(),
143 circular_deps: Vec::new(),
144 }
145 }
146
147 pub fn build(repo: &Repository) -> Self {
149 let mut graph = Self::new();
150
151 for file in &repo.files {
153 graph.add_file(file);
154 }
155
156 for file in &repo.files {
158 graph.extract_imports(file, repo);
159 }
160
161 graph.detect_cycles();
163
164 graph.compute_importance();
166
167 graph
168 }
169
170 fn add_file(&mut self, file: &RepoFile) {
172 let module_name = Self::path_to_module(&file.relative_path);
173
174 let exports: Vec<String> = file
176 .symbols
177 .iter()
178 .filter(|s| s.kind != SymbolKind::Import)
179 .map(|s| s.name.clone())
180 .collect();
181
182 for export in &exports {
184 let key = format!("{}::{}", module_name, export);
185 self.symbol_to_file.insert(key, file.relative_path.clone());
186 self.symbol_to_file
188 .entry(export.clone())
189 .or_insert_with(|| file.relative_path.clone());
190 }
191
192 let node = DependencyNode {
193 path: file.relative_path.clone(),
194 module_name: module_name.clone(),
195 exports,
196 tokens: file.token_count.claude,
197 importance: file.importance as f64,
198 };
199
200 let idx = self.graph.add_node(node);
201 self.path_to_node.insert(file.relative_path.clone(), idx);
202 self.module_to_path
203 .insert(module_name, file.relative_path.clone());
204 }
205
206 fn extract_imports(&mut self, file: &RepoFile, repo: &Repository) {
208 for symbol in &file.symbols {
210 if symbol.kind != SymbolKind::Import {
211 continue;
212 }
213
214 let parsed = self.parse_import_statement(&symbol.name);
216
217 for import in parsed {
218 let resolved = self.resolve_import(&import, file, repo);
220
221 if let Some(target_path) = &resolved.to_path {
222 if let (Some(&from_idx), Some(&to_idx)) = (
224 self.path_to_node.get(&file.relative_path),
225 self.path_to_node.get(target_path),
226 ) {
227 let edge = DependencyEdge {
228 dep_type: resolved.import_type,
229 symbols: resolved.symbols.clone(),
230 line: resolved.line,
231 weight: 1.0,
232 };
233 self.graph.add_edge(from_idx, to_idx, edge);
234 }
235 } else if resolved.is_external {
236 self.external_deps.insert(resolved.specifier.clone());
237 }
238
239 self.imports.push(resolved);
240 }
241 }
242
243 if let Some(content) = &file.content {
246 self.extract_imports_from_content(content, file);
247 }
248 }
249
250 fn extract_imports_from_content(&mut self, content: &str, file: &RepoFile) {
253 let is_js = file
254 .language
255 .as_deref()
256 .map(|l| matches!(l, "javascript" | "typescript" | "jsx" | "tsx"))
257 .unwrap_or(false);
258
259 if !is_js {
260 return;
261 }
262
263 let mut found_packages = HashSet::new();
264
265 for cap in JS_REQUIRE_RE.captures_iter(content) {
267 if let Some(pkg) = cap.get(1) {
268 let specifier = pkg.as_str();
269 if Self::is_external_specifier(specifier) {
270 let pkg_name = Self::extract_package_name(specifier);
271 found_packages.insert(pkg_name);
272 }
273 }
274 }
275
276 for cap in JS_IMPORT_RE.captures_iter(content) {
278 let specifier = cap.get(1).or_else(|| cap.get(2));
280 if let Some(pkg) = specifier {
281 let spec = pkg.as_str();
282 if Self::is_external_specifier(spec) {
283 let pkg_name = Self::extract_package_name(spec);
284 found_packages.insert(pkg_name);
285 }
286 }
287 }
288
289 for pkg in found_packages {
291 if Self::is_valid_package_name(&pkg) {
292 self.external_deps.insert(pkg);
293 }
294 }
295 }
296
297 fn is_external_specifier(spec: &str) -> bool {
299 !spec.starts_with('.') && !spec.starts_with('/')
300 }
301
302 fn extract_package_name(spec: &str) -> String {
304 if spec.starts_with('@') {
305 let parts: Vec<&str> = spec.splitn(3, '/').collect();
307 if parts.len() >= 2 {
308 format!("{}/{}", parts[0], parts[1])
309 } else {
310 spec.to_owned()
311 }
312 } else {
313 spec.split('/').next().unwrap_or(spec).to_owned()
315 }
316 }
317
318 fn is_valid_package_name(name: &str) -> bool {
320 if name.is_empty() || name.contains(' ') || name.contains('+') {
322 return false;
323 }
324 if name.contains("://") || name.starts_with('/') {
326 return false;
327 }
328 if name.len() < 2 {
330 return false;
331 }
332 let Some(first) = name.chars().next() else {
335 return false;
336 };
337 if !first.is_ascii_alphabetic() && first != '@' && first != '_' {
338 return false;
339 }
340 true
341 }
342
343 fn parse_import_statement(&self, import_text: &str) -> Vec<ParsedImport> {
345 let mut imports = Vec::new();
346 let text = import_text.trim();
347
348 let js_specifier = if text.starts_with("import ") || text.contains("require(") {
350 Self::extract_string_literal(text)
351 } else {
352 None
353 };
354
355 if text.contains("require(") || (text.starts_with("import ") && js_specifier.is_some()) {
357 if let Some(spec) = js_specifier {
358 let symbols = Self::extract_import_symbols(text);
359 let import_type = if text.contains("type ") {
360 DependencyType::TypeImport
361 } else if text.contains("import(") {
362 DependencyType::DynamicImport
363 } else {
364 DependencyType::Import
365 };
366 imports.push(ParsedImport { specifier: spec, symbols, import_type });
367 }
368 }
369 else if text.starts_with("import ") {
371 let module = text.trim_start_matches("import ").trim();
372 let module = module.split(" as ").next().unwrap_or(module);
374 for m in module.split(',') {
376 imports.push(ParsedImport {
377 specifier: m.trim().to_owned(),
378 symbols: vec![],
379 import_type: DependencyType::Import,
380 });
381 }
382 } else if text.starts_with("from ") {
383 if let Some(rest) = text.strip_prefix("from ") {
385 let parts: Vec<&str> = rest.splitn(2, " import ").collect();
386 if parts.len() == 2 {
387 let module = parts[0].trim();
388 let symbols: Vec<String> = parts[1]
389 .split(',')
390 .map(|s| s.split(" as ").next().unwrap_or(s).trim().to_owned())
391 .filter(|s| !s.is_empty())
392 .collect();
393 imports.push(ParsedImport {
394 specifier: module.to_owned(),
395 symbols,
396 import_type: DependencyType::Import,
397 });
398 }
399 }
400 }
401 else if text.starts_with("use ") {
403 let path = text.trim_start_matches("use ").trim_end_matches(';').trim();
404 if path.contains("::") {
406 let parts: Vec<&str> = path.rsplitn(2, "::").collect();
407 let base = if parts.len() == 2 { parts[1] } else { "" };
408 let symbols_part = parts[0].trim_matches(|c| c == '{' || c == '}');
409 let symbols: Vec<String> = symbols_part
410 .split(',')
411 .map(|s| s.split(" as ").next().unwrap_or(s).trim().to_owned())
412 .filter(|s| !s.is_empty())
413 .collect();
414 imports.push(ParsedImport {
415 specifier: base.to_owned(),
416 symbols,
417 import_type: DependencyType::Import,
418 });
419 } else {
420 imports.push(ParsedImport {
421 specifier: path.to_owned(),
422 symbols: vec![],
423 import_type: DependencyType::Import,
424 });
425 }
426 }
427 else if text.contains("import") {
429 let mut i = 0;
431 let chars: Vec<char> = text.chars().collect();
432 while i < chars.len() {
433 if chars[i] == '"' {
434 let start = i + 1;
435 i += 1;
436 while i < chars.len() && chars[i] != '"' {
437 i += 1;
438 }
439 if i < chars.len() {
440 let spec: String = chars[start..i].iter().collect();
441 imports.push(ParsedImport {
442 specifier: spec,
443 symbols: vec![],
444 import_type: DependencyType::Import,
445 });
446 }
447 }
448 i += 1;
449 }
450 }
451
452 imports
453 }
454
455 fn extract_string_literal(text: &str) -> Option<String> {
457 let chars: Vec<char> = text.chars().collect();
459 let mut i = 0;
460 while i < chars.len() {
461 if chars[i] == '"' || chars[i] == '\'' {
462 let quote = chars[i];
463 let start = i + 1;
464 i += 1;
465 while i < chars.len() && chars[i] != quote {
466 i += 1;
467 }
468 if i < chars.len() {
469 return Some(chars[start..i].iter().collect());
470 }
471 }
472 i += 1;
473 }
474 None
475 }
476
477 fn extract_import_symbols(text: &str) -> Vec<String> {
479 let mut symbols = Vec::new();
480
481 if let Some(start) = text.find('{') {
483 if let Some(end) = text.find('}') {
484 let inner = &text[start + 1..end];
485 for sym in inner.split(',') {
486 let sym = sym.split(" as ").next().unwrap_or(sym).trim();
487 if !sym.is_empty() && sym != "type" {
488 symbols.push(sym.to_owned());
489 }
490 }
491 }
492 }
493 else if text.starts_with("import ") {
495 let after_import = text.trim_start_matches("import ");
496 if let Some(default_name) = after_import.split_whitespace().next() {
497 if default_name != "type" && default_name != "*" && !default_name.starts_with('{') {
498 symbols.push(default_name.to_owned());
499 }
500 }
501 }
502
503 symbols
504 }
505
506 fn resolve_import(
508 &self,
509 import: &ParsedImport,
510 from_file: &RepoFile,
511 repo: &Repository,
512 ) -> ResolvedImport {
513 let specifier = &import.specifier;
514
515 if self.is_external_import(specifier) {
517 return ResolvedImport {
518 from_path: from_file.relative_path.clone(),
519 to_path: None,
520 specifier: specifier.clone(),
521 symbols: import.symbols.clone(),
522 import_type: import.import_type,
523 line: 0,
524 is_external: true,
525 };
526 }
527
528 let base_dir = Path::new(&from_file.relative_path)
530 .parent()
531 .map(|p| p.to_string_lossy().to_string())
532 .unwrap_or_default();
533
534 let candidates =
536 self.generate_resolution_candidates(specifier, &base_dir, &from_file.language);
537
538 for candidate in candidates {
540 if repo.files.iter().any(|f| f.relative_path == candidate) {
541 return ResolvedImport {
542 from_path: from_file.relative_path.clone(),
543 to_path: Some(candidate),
544 specifier: specifier.clone(),
545 symbols: import.symbols.clone(),
546 import_type: import.import_type,
547 line: 0,
548 is_external: false,
549 };
550 }
551 }
552
553 if let Some(path) = self.module_to_path.get(specifier) {
555 return ResolvedImport {
556 from_path: from_file.relative_path.clone(),
557 to_path: Some(path.clone()),
558 specifier: specifier.clone(),
559 symbols: import.symbols.clone(),
560 import_type: import.import_type,
561 line: 0,
562 is_external: false,
563 };
564 }
565
566 ResolvedImport {
568 from_path: from_file.relative_path.clone(),
569 to_path: None,
570 specifier: specifier.clone(),
571 symbols: import.symbols.clone(),
572 import_type: import.import_type,
573 line: 0,
574 is_external: false,
575 }
576 }
577
578 fn is_external_import(&self, specifier: &str) -> bool {
580 if specifier.starts_with('.') || specifier.starts_with('/') {
582 return false;
583 }
584
585 let external_prefixes = [
587 "react",
588 "vue",
589 "angular",
590 "express",
591 "lodash",
592 "axios",
593 "std",
594 "core",
595 "alloc",
596 "collections", "fmt",
598 "os",
599 "io",
600 "net",
601 "http",
602 "sync",
603 "context", "java.",
605 "javax.",
606 "org.apache",
607 "com.google", "numpy",
609 "pandas",
610 "torch",
611 "tensorflow",
612 "sklearn", ];
614
615 for prefix in external_prefixes {
616 if specifier.starts_with(prefix) {
617 return true;
618 }
619 }
620
621 if specifier.starts_with('@') {
623 return true;
624 }
625
626 !specifier.contains('/') && !specifier.contains('\\')
628 }
629
630 fn generate_resolution_candidates(
632 &self,
633 specifier: &str,
634 base_dir: &str,
635 language: &Option<String>,
636 ) -> Vec<String> {
637 let mut candidates = Vec::new();
638
639 let extensions = match language.as_deref() {
641 Some("python") => vec!["py", "pyi"],
642 Some("javascript") | Some("jsx") => vec!["js", "jsx", "mjs", "cjs", "ts", "tsx"],
643 Some("typescript") | Some("tsx") => vec!["ts", "tsx", "js", "jsx"],
644 Some("rust") => vec!["rs"],
645 Some("go") => vec!["go"],
646 Some("java") => vec!["java"],
647 _ => vec![""],
648 };
649
650 if specifier.starts_with('.') {
652 let resolved = normalize_path(&format!("{}/{}", base_dir, specifier));
653
654 for ext in &extensions {
656 if ext.is_empty() {
657 candidates.push(resolved.clone());
658 } else {
659 candidates.push(format!("{}.{}", resolved, ext));
660 }
661 }
662
663 for ext in &extensions {
665 if !ext.is_empty() {
666 candidates.push(format!("{}/index.{}", resolved, ext));
667 candidates.push(format!("{}/mod.{}", resolved, ext)); candidates.push(format!("{}/__init__.{}", resolved, ext)); }
670 }
671 } else {
672 let prefixes = ["src", "lib", "app", "pkg", "internal", ""];
675
676 for prefix in prefixes {
677 let base = if prefix.is_empty() {
678 specifier.to_owned()
679 } else {
680 format!("{}/{}", prefix, specifier)
681 };
682
683 for ext in &extensions {
684 if ext.is_empty() {
685 candidates.push(base.clone());
686 } else {
687 candidates.push(format!("{}.{}", base, ext));
688 candidates.push(format!("{}/index.{}", base, ext));
689 candidates.push(format!("{}/mod.{}", base, ext));
690 }
691 }
692 }
693 }
694
695 candidates
696 }
697
698 fn detect_cycles(&mut self) {
700 let sccs = tarjan_scc(&self.graph);
701
702 for scc in sccs {
703 if scc.len() > 1 {
704 let cycle: Vec<String> = scc
706 .iter()
707 .filter_map(|&idx| self.graph.node_weight(idx))
708 .map(|n| n.path.clone())
709 .collect();
710 self.circular_deps.push(cycle);
711 }
712 }
713 }
714
715 fn compute_importance(&mut self) {
717 let node_count = self.graph.node_count();
718 if node_count == 0 {
719 return;
720 }
721
722 let initial = 1.0 / node_count as f64;
724 let mut importance: Vec<f64> = vec![initial; node_count];
725 let mut new_importance: Vec<f64> = vec![0.0; node_count];
726
727 let damping = 0.85;
728 let iterations = 30;
729
730 for _ in 0..iterations {
732 let teleport = (1.0 - damping) / node_count as f64;
733 new_importance.fill(teleport);
734
735 for node_idx in self.graph.node_indices() {
736 let out_degree = self.graph.neighbors(node_idx).count();
737 if out_degree > 0 {
738 let contribution = damping * importance[node_idx.index()] / out_degree as f64;
739 for neighbor in self.graph.neighbors(node_idx) {
740 new_importance[neighbor.index()] += contribution;
741 }
742 }
743 }
744
745 std::mem::swap(&mut importance, &mut new_importance);
746 }
747
748 let max_importance = importance.iter().cloned().fold(0.0_f64, f64::max);
750 if max_importance > 0.0 {
751 for (idx, node) in self.graph.node_weights_mut().enumerate() {
752 node.importance = importance[idx] / max_importance;
753 }
754 }
755 }
756
757 fn path_to_module(path: &str) -> String {
759 let path = path
760 .trim_start_matches("src/")
761 .trim_start_matches("lib/")
762 .trim_start_matches("app/");
763
764 let path = if let Some(pos) = path.rfind('.') {
766 &path[..pos]
767 } else {
768 path
769 };
770
771 path.chars()
773 .map(|c| if c == '/' || c == '\\' { '.' } else { c })
774 .collect::<String>()
775 .trim_matches('.')
776 .to_owned()
777 }
778
779 pub fn get_importers(&self, file_path: &str) -> Vec<&str> {
783 if let Some(&node_idx) = self.path_to_node.get(file_path) {
784 self.graph
785 .neighbors_directed(node_idx, petgraph::Direction::Incoming)
786 .filter_map(|idx| self.graph.node_weight(idx))
787 .map(|n| n.path.as_str())
788 .collect()
789 } else {
790 vec![]
791 }
792 }
793
794 pub fn get_imports(&self, file_path: &str) -> Vec<&str> {
796 if let Some(&node_idx) = self.path_to_node.get(file_path) {
797 self.graph
798 .neighbors(node_idx)
799 .filter_map(|idx| self.graph.node_weight(idx))
800 .map(|n| n.path.as_str())
801 .collect()
802 } else {
803 vec![]
804 }
805 }
806
807 pub fn get_circular_deps(&self) -> &[Vec<String>] {
809 &self.circular_deps
810 }
811
812 pub fn get_external_deps(&self) -> &HashSet<String> {
814 &self.external_deps
815 }
816
817 pub fn get_most_important(&self, n: usize) -> Vec<(&str, f64)> {
819 let mut nodes: Vec<_> = self
820 .graph
821 .node_weights()
822 .map(|n| (n.path.as_str(), n.importance))
823 .collect();
824
825 nodes.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
826 nodes.truncate(n);
827 nodes
828 }
829
830 pub fn get_all_imports(&self) -> &[ResolvedImport] {
832 &self.imports
833 }
834
835 pub fn get_unresolved_imports(&self) -> Vec<&ResolvedImport> {
837 self.imports
838 .iter()
839 .filter(|i| i.to_path.is_none() && !i.is_external)
840 .collect()
841 }
842
843 pub fn stats(&self) -> DependencyStats {
845 DependencyStats {
846 total_files: self.graph.node_count(),
847 total_edges: self.graph.edge_count(),
848 external_deps: self.external_deps.len(),
849 circular_dep_groups: self.circular_deps.len(),
850 unresolved_imports: self
851 .imports
852 .iter()
853 .filter(|i| i.to_path.is_none() && !i.is_external)
854 .count(),
855 }
856 }
857}
858
859impl Default for DependencyGraph {
860 fn default() -> Self {
861 Self::new()
862 }
863}
864
865#[derive(Debug, Clone)]
867pub struct DependencyStats {
868 pub total_files: usize,
869 pub total_edges: usize,
870 pub external_deps: usize,
871 pub circular_dep_groups: usize,
872 pub unresolved_imports: usize,
873}
874
875struct ParsedImport {
877 specifier: String,
878 symbols: Vec<String>,
879 import_type: DependencyType,
880}
881
882fn normalize_path(path: &str) -> String {
884 let mut parts: Vec<&str> = Vec::new();
885
886 for part in path.split('/') {
887 match part {
888 "." | "" => continue,
889 ".." => {
890 parts.pop();
891 },
892 _ => parts.push(part),
893 }
894 }
895
896 parts.join("/")
897}
898
899#[cfg(test)]
900#[allow(clippy::str_to_string)]
901mod tests {
902 use super::*;
903
904 #[test]
905 fn test_path_to_module() {
906 assert_eq!(DependencyGraph::path_to_module("src/foo/bar.py"), "foo.bar");
907 assert_eq!(DependencyGraph::path_to_module("lib/utils.rs"), "utils");
908 assert_eq!(DependencyGraph::path_to_module("app/main.ts"), "main");
909 }
910
911 #[test]
912 fn test_normalize_path() {
913 assert_eq!(normalize_path("foo/./bar"), "foo/bar");
914 assert_eq!(normalize_path("foo/bar/../baz"), "foo/baz");
915 assert_eq!(normalize_path("./foo/bar"), "foo/bar");
916 }
917
918 #[test]
919 fn test_is_external_import() {
920 let graph = DependencyGraph::new();
921
922 assert!(graph.is_external_import("react"));
923 assert!(graph.is_external_import("numpy"));
924 assert!(graph.is_external_import("@types/node"));
925
926 assert!(!graph.is_external_import("./utils"));
927 assert!(!graph.is_external_import("../lib/foo"));
928 }
929
930 #[test]
931 fn test_build_graph() {
932 let repo = Repository::new("test", "/tmp/test");
933 let graph = DependencyGraph::build(&repo);
935
936 assert_eq!(graph.stats().total_files, 0);
937 }
938
939 #[test]
940 fn test_extract_string_literal() {
941 assert_eq!(
942 DependencyGraph::extract_string_literal("import 'react'"),
943 Some("react".to_string())
944 );
945 assert_eq!(
946 DependencyGraph::extract_string_literal("require(\"lodash\")"),
947 Some("lodash".to_string())
948 );
949 }
950}