scribe_analysis/heuristics/
import_analysis.rs1use super::ScanResult;
15use crate::ast_import_parser::{ImportLanguage, SimpleAstParser, SimpleImport};
16use scribe_core::Result;
17use std::collections::HashMap;
18use std::path::Path;
19
20#[derive(Debug, Clone)]
22pub struct ImportGraph {
23 nodes: Vec<String>,
25 path_to_index: HashMap<String, usize>,
27 dependencies: Vec<Vec<usize>>,
29 dependents: Vec<Vec<usize>>,
31 pagerank_scores: Option<Vec<f64>>,
33}
34
35#[derive(Debug)]
37struct ImportKeyMap {
38 exact: HashMap<String, usize>,
40 suffix: HashMap<String, usize>,
42}
43
44impl ImportKeyMap {
45 fn new<T>(files: &[T]) -> Self
46 where
47 T: ScanResult,
48 {
49 let mut exact = HashMap::with_capacity(files.len());
50 let mut suffix = HashMap::with_capacity(files.len() * 2);
51
52 for (i, file) in files.iter().enumerate() {
53 let path = file.path();
54
55 for key in Self::normalized_keys(path) {
57 exact.insert(key, i);
58 }
59
60 for key in Self::suffix_keys(path) {
62 suffix.insert(key, i);
63 }
64 }
65
66 Self { exact, suffix }
67 }
68
69 fn resolve_import(&self, normalized_import: &str) -> Option<usize> {
70 if let Some(&idx) = self.exact.get(normalized_import) {
72 return Some(idx);
73 }
74
75 self.suffix.get(normalized_import).copied()
77 }
78
79 fn normalized_keys(path: &str) -> Vec<String> {
80 let mut keys = Vec::new();
81
82 keys.push(path.to_string());
84
85 if path.starts_with("./") {
87 keys.push(path[2..].to_string());
88 }
89
90 if let Some(stem) = Path::new(path).file_stem().and_then(|s| s.to_str()) {
92 if let Some(parent) = Path::new(path).parent().and_then(|p| p.to_str()) {
93 if parent != "" {
94 keys.push(format!("{}/{}", parent, stem));
95 } else {
96 keys.push(stem.to_string());
97 }
98 }
99 }
100
101 keys
102 }
103
104 fn suffix_keys(path: &str) -> Vec<String> {
105 let mut keys = Vec::new();
106
107 if let Some(stem) = Path::new(path).file_stem().and_then(|s| s.to_str()) {
109 keys.push(stem.to_string());
110 }
111
112 if let Some(filename) = Path::new(path).file_name().and_then(|s| s.to_str()) {
114 keys.push(filename.to_string());
115 }
116
117 keys
118 }
119}
120
121#[derive(Debug)]
123pub struct ImportGraphBuilder {
124 normalization_cache: HashMap<String, String>,
126 ast_parser: SimpleAstParser,
128}
129
130#[derive(Debug)]
132pub struct CentralityCalculator {
133 damping_factor: f64,
135 max_iterations: usize,
137 tolerance: f64,
139}
140
141impl Default for ImportGraphBuilder {
142 fn default() -> Self {
143 Self::new().expect("Failed to create ImportGraphBuilder")
144 }
145}
146
147impl ImportGraphBuilder {
148 pub fn new() -> Result<Self> {
150 Ok(Self {
151 normalization_cache: HashMap::new(),
152 ast_parser: SimpleAstParser::new()?,
153 })
154 }
155
156 pub fn build_graph<T>(&mut self, files: &[T]) -> Result<ImportGraph>
158 where
159 T: ScanResult,
160 {
161 let mut graph = ImportGraph::new();
162
163 for file in files {
165 graph.add_node(file.path().to_string());
166 }
167
168 let keymap = ImportKeyMap::new(files);
170
171 for (file_idx, file) in files.iter().enumerate() {
173 if let Some(imports) = file.imports() {
174 for import_path in imports {
175 let normalized_import = self.normalize_import_path(import_path);
176 if let Some(target_idx) = keymap.resolve_import(&normalized_import) {
177 graph.add_edge(file_idx, target_idx);
178 }
179 }
180 }
181 }
182
183 Ok(graph)
184 }
185
186 fn normalize_import_path(&mut self, import_path: &str) -> String {
190 if let Some(cached) = self.normalization_cache.get(import_path) {
192 return cached.clone();
193 }
194
195 let normalized = self.perform_normalization(import_path);
196 self.normalization_cache
197 .insert(import_path.to_string(), normalized.clone());
198 normalized
199 }
200
201 fn perform_normalization(&self, import_path: &str) -> String {
203 let mut normalized = import_path.to_string();
204
205 normalized = normalized
207 .trim_matches(|c| c == '"' || c == '\'' || c == ' ')
208 .to_string();
209
210 normalized = normalized.replace('\\', "/");
212
213 if let Some(dot_pos) = normalized.rfind('.') {
215 let extension = &normalized[dot_pos..];
217 match extension {
218 ".json" | ".xml" | ".yaml" | ".toml" | ".css" | ".scss" => {
219 }
221 _ => {
222 normalized.truncate(dot_pos);
224 }
225 }
226 }
227
228 if normalized.starts_with("./") || normalized.starts_with("../") {
230 normalized = self.normalize_relative_path(&normalized);
232 }
233
234 normalized = normalized.replace("::", "/").replace('.', "/");
236
237 normalized = self.resolve_import_aliases(&normalized);
239
240 normalized
241 }
242
243 fn normalize_relative_path(&self, path: &str) -> String {
245 let components: Vec<&str> = path.split('/').collect();
247 let mut normalized = Vec::new();
248
249 for component in components {
250 match component {
251 "" | "." => continue,
252 ".." => {
253 normalized.pop();
254 }
255 _ => normalized.push(component),
256 }
257 }
258
259 normalized.join("/")
260 }
261
262 fn resolve_import_aliases(&self, import_path: &str) -> String {
264 match import_path {
266 "fs" | "path" | "http" | "https" | "url" | "crypto" | "os" => {
268 return import_path.to_string(); }
270 path if !path.contains('/') && !path.starts_with('.') => {
272 return import_path.to_string(); }
274 _ => {}
275 }
276
277 let mut resolved = import_path.to_string();
278
279 if resolved.starts_with("@/") {
281 resolved = resolved.replacen("@/", "src/", 1);
282 }
283
284 if resolved.starts_with("~") {
285 resolved = resolved.replacen("~", "src", 1);
286 }
287
288 resolved
289 }
290
291 fn detect_language_from_path(&self, file_path: &str) -> Option<ImportLanguage> {
293 let extension = Path::new(file_path)
294 .extension()
295 .and_then(|ext| ext.to_str())
296 .unwrap_or("");
297
298 ImportLanguage::from_extension(extension)
299 }
300
301 pub fn extract_imports(&self, content: &str, file_path: &str) -> Vec<String> {
303 let language = match self.detect_language_from_path(file_path) {
305 Some(lang) => lang,
306 None => return Vec::new(),
307 };
308
309 match self.ast_parser.extract_imports(content, language) {
311 Ok(simple_imports) => simple_imports
312 .into_iter()
313 .map(|import| import.module)
314 .collect(),
315 Err(_) => {
316 Vec::new()
318 }
319 }
320 }
321}
322
323impl ImportGraph {
324 pub fn new() -> Self {
326 Self {
327 nodes: Vec::new(),
328 path_to_index: HashMap::new(),
329 dependencies: Vec::new(),
330 dependents: Vec::new(),
331 pagerank_scores: None,
332 }
333 }
334
335 pub fn add_node(&mut self, path: String) -> usize {
337 if let Some(&existing_idx) = self.path_to_index.get(&path) {
338 return existing_idx;
339 }
340
341 let index = self.nodes.len();
342 self.nodes.push(path.clone());
343 self.path_to_index.insert(path, index);
344 self.dependencies.push(Vec::new());
345 self.dependents.push(Vec::new());
346
347 self.pagerank_scores = None;
349
350 index
351 }
352
353 pub fn add_edge(&mut self, source: usize, target: usize) {
355 if source < self.dependencies.len() && target < self.dependents.len() && source != target {
356 if !self.dependencies[source].contains(&target) {
358 self.dependencies[source].push(target);
359 self.dependents[target].push(source);
360
361 self.pagerank_scores = None;
363 }
364 }
365 }
366
367 pub fn get_node_degrees(&self, path: &str) -> Option<(usize, usize)> {
369 let index = *self.path_to_index.get(path)?;
370 let in_degree = self.dependents[index].len();
371 let out_degree = self.dependencies[index].len();
372 Some((in_degree, out_degree))
373 }
374
375 pub fn get_pagerank_scores(&mut self) -> Result<&[f64]> {
377 if self.pagerank_scores.is_none() {
378 let calculator = CentralityCalculator::default();
379 let scores = calculator.calculate_pagerank(self)?;
380 self.pagerank_scores = Some(scores);
381 }
382
383 Ok(self.pagerank_scores.as_ref().unwrap())
384 }
385
386 pub fn get_pagerank_score(&mut self, path: &str) -> Result<f64> {
388 let index = *self.path_to_index.get(path).unwrap_or(&0);
389 let scores = self.get_pagerank_scores()?;
390 Ok(*scores.get(index).unwrap_or(&0.0))
391 }
392
393 pub fn stats(&self) -> GraphStats {
395 let node_count = self.nodes.len();
396 let edge_count: usize = self.dependencies.iter().map(|deps| deps.len()).sum();
397
398 let in_degrees: Vec<usize> = self.dependents.iter().map(|deps| deps.len()).collect();
399 let out_degrees: Vec<usize> = self.dependencies.iter().map(|deps| deps.len()).collect();
400
401 let avg_in_degree = if node_count > 0 {
402 in_degrees.iter().sum::<usize>() as f64 / node_count as f64
403 } else {
404 0.0
405 };
406
407 let avg_out_degree = if node_count > 0 {
408 out_degrees.iter().sum::<usize>() as f64 / node_count as f64
409 } else {
410 0.0
411 };
412
413 let max_in_degree = in_degrees.iter().max().cloned().unwrap_or(0);
414 let max_out_degree = out_degrees.iter().max().cloned().unwrap_or(0);
415
416 let density = if node_count > 1 {
417 edge_count as f64 / (node_count * (node_count - 1)) as f64
418 } else {
419 0.0
420 };
421
422 GraphStats {
423 node_count,
424 edge_count,
425 avg_in_degree,
426 avg_out_degree,
427 max_in_degree,
428 max_out_degree,
429 density,
430 }
431 }
432}
433
434impl Default for ImportGraph {
435 fn default() -> Self {
436 Self::new()
437 }
438}
439
440impl CentralityCalculator {
441 pub fn new() -> Self {
443 Self {
444 damping_factor: 0.85,
445 max_iterations: 100,
446 tolerance: 1e-6,
447 }
448 }
449
450 pub fn with_params(damping_factor: f64, max_iterations: usize, tolerance: f64) -> Self {
452 Self {
453 damping_factor,
454 max_iterations,
455 tolerance,
456 }
457 }
458
459 pub fn calculate_pagerank(&self, graph: &ImportGraph) -> Result<Vec<f64>> {
461 let n = graph.nodes.len();
462 if n == 0 {
463 return Ok(Vec::new());
464 }
465
466 let mut scores = vec![1.0 / n as f64; n];
468 let mut new_scores = vec![0.0; n];
469
470 for _iteration in 0..self.max_iterations {
471 for (i, new_score) in new_scores.iter_mut().enumerate() {
473 let mut rank_sum = 0.0;
474
475 for &source in &graph.dependents[i] {
477 let out_degree = graph.dependencies[source].len();
478 if out_degree > 0 {
479 rank_sum += scores[source] / out_degree as f64;
480 }
481 }
482
483 *new_score =
485 (1.0 - self.damping_factor) / n as f64 + self.damping_factor * rank_sum;
486 }
487
488 let mut max_diff = 0.0;
490 for i in 0..n {
491 let diff = (new_scores[i] - scores[i]).abs();
492 if diff > max_diff {
493 max_diff = diff;
494 }
495 }
496
497 std::mem::swap(&mut scores, &mut new_scores);
499
500 if max_diff < self.tolerance {
501 break;
502 }
503 }
504
505 Ok(scores)
506 }
507}
508
509impl Default for CentralityCalculator {
510 fn default() -> Self {
511 Self::new()
512 }
513}
514
515#[derive(Debug, Clone)]
517pub struct GraphStats {
518 pub node_count: usize,
519 pub edge_count: usize,
520 pub avg_in_degree: f64,
521 pub avg_out_degree: f64,
522 pub max_in_degree: usize,
523 pub max_out_degree: usize,
524 pub density: f64,
525}
526
527pub fn import_matches_file(import_path: &str, file_path: &str) -> bool {
529 let normalized_import = normalize_for_matching(import_path);
531 let normalized_file = normalize_for_matching(file_path);
532
533 if normalized_import == normalized_file {
535 return true;
536 }
537
538 if normalized_file.contains(&normalized_import) {
540 return true;
541 }
542
543 if normalized_file.ends_with(&normalized_import) {
545 return true;
546 }
547
548 let module_import = normalized_import.replace("::", "/");
550 if normalized_file.contains(&module_import) || module_import.contains(&normalized_file) {
551 return true;
552 }
553
554 if normalized_import.starts_with("std::") {
556 let std_path = normalized_import.replace("::", "/");
557 if normalized_file
558 .replace("_", "")
559 .contains(&std_path.replace("_", ""))
560 {
561 return true;
562 }
563 }
564
565 if is_index_file(file_path) {
567 let directory = std::path::Path::new(file_path)
568 .parent()
569 .and_then(|p| p.to_str())
570 .unwrap_or("");
571 let normalized_dir = normalize_for_matching(directory);
572 if normalized_import == normalized_dir {
573 return true;
574 }
575 }
576
577 false
578}
579
580fn normalize_for_matching(path: &str) -> String {
582 let mut normalized = path.to_lowercase();
583
584 if normalized.starts_with("./") {
586 normalized = normalized[2..].to_string();
587 }
588
589 if normalized.starts_with("src/") {
590 normalized = normalized[4..].to_string();
591 }
592
593 if let Some(dot_pos) = normalized.rfind('.') {
595 normalized.truncate(dot_pos);
596 }
597
598 normalized = normalized.replace('\\', "/");
600
601 normalized.trim_end_matches('/').to_string()
603}
604
605fn is_index_file(file_path: &str) -> bool {
607 let file_name = std::path::Path::new(file_path)
608 .file_stem()
609 .and_then(|stem| stem.to_str())
610 .unwrap_or("");
611
612 matches!(file_name, "index" | "__init__" | "mod" | "main")
613}
614
615#[cfg(test)]
616mod tests {
617 use super::super::DocumentAnalysis;
618 use super::*;
619
620 #[derive(Debug)]
622 struct MockScanResult {
623 path: String,
624 relative_path: String,
625 depth: usize,
626 imports: Option<Vec<String>>,
627 }
628
629 impl MockScanResult {
630 fn new(path: &str, imports: Vec<&str>) -> Self {
631 Self {
632 path: path.to_string(),
633 relative_path: path.to_string(),
634 depth: path.matches('/').count(),
635 imports: Some(imports.iter().map(|s| s.to_string()).collect()),
636 }
637 }
638 }
639
640 impl ScanResult for MockScanResult {
641 fn path(&self) -> &str {
642 &self.path
643 }
644 fn relative_path(&self) -> &str {
645 &self.relative_path
646 }
647 fn depth(&self) -> usize {
648 self.depth
649 }
650 fn is_docs(&self) -> bool {
651 false
652 }
653 fn is_readme(&self) -> bool {
654 false
655 }
656 fn is_test(&self) -> bool {
657 false
658 }
659 fn is_entrypoint(&self) -> bool {
660 false
661 }
662 fn has_examples(&self) -> bool {
663 false
664 }
665 fn priority_boost(&self) -> f64 {
666 0.0
667 }
668 fn churn_score(&self) -> f64 {
669 0.0
670 }
671 fn centrality_in(&self) -> f64 {
672 0.0
673 }
674 fn imports(&self) -> Option<&[String]> {
675 self.imports.as_deref()
676 }
677 fn doc_analysis(&self) -> Option<&DocumentAnalysis> {
678 None
679 }
680 }
681
682 #[test]
683 fn test_import_graph_creation() {
684 let mut graph = ImportGraph::new();
685
686 let idx1 = graph.add_node("file1.rs".to_string());
687 let idx2 = graph.add_node("file2.rs".to_string());
688
689 assert_eq!(idx1, 0);
690 assert_eq!(idx2, 1);
691 assert_eq!(graph.nodes.len(), 2);
692
693 graph.add_edge(idx1, idx2);
694 assert_eq!(graph.dependencies[idx1].len(), 1);
695 assert_eq!(graph.dependents[idx2].len(), 1);
696 }
697
698 #[test]
699 fn test_graph_builder() {
700 let files = vec![
701 MockScanResult::new("src/main.rs", vec!["src/lib.rs", "src/utils.rs"]),
702 MockScanResult::new("src/lib.rs", vec!["src/utils.rs"]),
703 MockScanResult::new("src/utils.rs", vec![]),
704 ];
705
706 let mut builder = ImportGraphBuilder::new().unwrap();
707 let result = builder.build_graph(&files);
708 assert!(result.is_ok());
709
710 let graph = result.unwrap();
711 assert_eq!(graph.nodes.len(), 3);
712
713 let stats = graph.stats();
714 assert_eq!(stats.node_count, 3);
715 assert!(stats.edge_count > 0);
716 }
717
718 #[test]
719 fn test_import_matching() {
720 assert!(import_matches_file("src/utils", "src/utils.rs"));
722 assert!(import_matches_file("./lib", "src/lib.js"));
723
724 assert!(import_matches_file(
726 "std::collections::HashMap",
727 "std/collections/hash_map.rs"
728 ));
729
730 assert!(import_matches_file(
732 "src/components",
733 "src/components/index.js"
734 ));
735
736 assert!(!import_matches_file("completely_different", "src/utils.rs"));
738 }
739
740 #[test]
741 fn test_path_normalization() {
742 let mut builder = ImportGraphBuilder::new().unwrap();
743
744 let normalized1 = builder.normalize_import_path("\"./utils.js\"");
746 assert_eq!(normalized1, "utils");
747
748 let normalized2 = builder.normalize_import_path("../lib/helper.ts");
749 assert!(normalized2.contains("helper"));
750
751 let normalized3 = builder.normalize_import_path("@/components/Button");
752 assert!(normalized3.contains("src/components/Button"));
753 }
754
755 #[test]
756 fn test_pagerank_calculation() {
757 let mut graph = ImportGraph::new();
758
759 let idx_a = graph.add_node("A".to_string());
761 let idx_b = graph.add_node("B".to_string());
762 let idx_c = graph.add_node("C".to_string());
763
764 graph.add_edge(idx_a, idx_b);
765 graph.add_edge(idx_b, idx_c);
766 graph.add_edge(idx_a, idx_c);
767
768 let scores = graph.get_pagerank_scores();
769 assert!(scores.is_ok());
770
771 let scores = scores.unwrap();
772 assert_eq!(scores.len(), 3);
773
774 assert!(scores[idx_c] > scores[idx_a]);
776 assert!(scores[idx_c] > scores[idx_b]);
777 }
778
779 #[test]
780 fn test_import_extraction() {
781 let builder = ImportGraphBuilder::new().unwrap();
782
783 let js_content = r#"
785 import { Component } from 'react';
786 import utils from './utils.js';
787 const fs = require('fs');
788 "#;
789
790 let imports = builder.extract_imports(js_content, "test.js");
791 assert!(imports.len() >= 2);
792 assert!(imports.contains(&"react".to_string()));
793 assert!(imports.contains(&"./utils.js".to_string()));
794
795 let rust_content = r#"
797 use std::collections::HashMap;
798 use crate::utils::helper;
799 mod tests;
800 "#;
801
802 let imports = builder.extract_imports(rust_content, "test.rs");
803 assert!(imports.len() >= 2);
804 assert!(imports.contains(&"std::collections::HashMap".to_string()));
805 assert!(imports.contains(&"crate::utils::helper".to_string()));
806 }
807
808 #[test]
809 fn test_centrality_calculator() {
810 let mut graph = ImportGraph::new();
811
812 let center = graph.add_node("center.rs".to_string());
814 for i in 1..=5 {
815 let node = graph.add_node(format!("node{}.rs", i));
816 graph.add_edge(node, center); }
818
819 let calculator = CentralityCalculator::default();
820 let scores = calculator.calculate_pagerank(&graph);
821 assert!(scores.is_ok());
822
823 let scores = scores.unwrap();
824 assert_eq!(scores.len(), 6);
825
826 assert!(scores[center] > scores[1]); }
829
830 #[test]
831 fn test_graph_statistics() {
832 let mut graph = ImportGraph::new();
833
834 for i in 0..5 {
835 graph.add_node(format!("file{}.rs", i));
836 }
837
838 graph.add_edge(0, 1);
840 graph.add_edge(1, 2);
841 graph.add_edge(2, 3);
842 graph.add_edge(0, 4);
843
844 let stats = graph.stats();
845 assert_eq!(stats.node_count, 5);
846 assert_eq!(stats.edge_count, 4);
847 assert!(stats.avg_out_degree > 0.0);
848 assert!(stats.density > 0.0);
849 assert!(stats.density < 1.0);
850 }
851}