scribe_analysis/heuristics/
import_analysis.rs1use std::collections::HashMap;
15use std::path::Path;
16use scribe_core::Result;
17use super::ScanResult;
18use crate::ast_import_parser::{SimpleAstParser, ImportLanguage, SimpleImport};
19
20#[derive(Debug, Clone)]
22pub struct ImportGraph {
23 nodes: Vec<String>,
25 path_to_index: HashMap<String, usize>,
27 dependencies: Vec<Vec<usize>>,
29 dependents: Vec<Vec<usize>>,
31 pagerank_scores: Option<Vec<f64>>,
33}
34
35#[derive(Debug)]
37struct ImportKeyMap {
38 exact: HashMap<String, usize>,
40 suffix: HashMap<String, usize>,
42}
43
44impl ImportKeyMap {
45 fn new<T>(files: &[T]) -> Self
46 where T: ScanResult
47 {
48 let mut exact = HashMap::with_capacity(files.len());
49 let mut suffix = HashMap::with_capacity(files.len() * 2);
50
51 for (i, file) in files.iter().enumerate() {
52 let path = file.path();
53
54 for key in Self::normalized_keys(path) {
56 exact.insert(key, i);
57 }
58
59 for key in Self::suffix_keys(path) {
61 suffix.insert(key, i);
62 }
63 }
64
65 Self { exact, suffix }
66 }
67
68 fn resolve_import(&self, normalized_import: &str) -> Option<usize> {
69 if let Some(&idx) = self.exact.get(normalized_import) {
71 return Some(idx);
72 }
73
74 self.suffix.get(normalized_import).copied()
76 }
77
78 fn normalized_keys(path: &str) -> Vec<String> {
79 let mut keys = Vec::new();
80
81 keys.push(path.to_string());
83
84 if path.starts_with("./") {
86 keys.push(path[2..].to_string());
87 }
88
89 if let Some(stem) = Path::new(path).file_stem().and_then(|s| s.to_str()) {
91 if let Some(parent) = Path::new(path).parent().and_then(|p| p.to_str()) {
92 if parent != "" {
93 keys.push(format!("{}/{}", parent, stem));
94 } else {
95 keys.push(stem.to_string());
96 }
97 }
98 }
99
100 keys
101 }
102
103 fn suffix_keys(path: &str) -> Vec<String> {
104 let mut keys = Vec::new();
105
106 if let Some(stem) = Path::new(path).file_stem().and_then(|s| s.to_str()) {
108 keys.push(stem.to_string());
109 }
110
111 if let Some(filename) = Path::new(path).file_name().and_then(|s| s.to_str()) {
113 keys.push(filename.to_string());
114 }
115
116 keys
117 }
118}
119
120#[derive(Debug)]
122pub struct ImportGraphBuilder {
123 normalization_cache: HashMap<String, String>,
125 ast_parser: SimpleAstParser,
127}
128
129
130#[derive(Debug)]
132pub struct CentralityCalculator {
133 damping_factor: f64,
135 max_iterations: usize,
137 tolerance: f64,
139}
140
141impl Default for ImportGraphBuilder {
142 fn default() -> Self {
143 Self::new().expect("Failed to create ImportGraphBuilder")
144 }
145}
146
147impl ImportGraphBuilder {
148 pub fn new() -> Result<Self> {
150 Ok(Self {
151 normalization_cache: HashMap::new(),
152 ast_parser: SimpleAstParser::new()?,
153 })
154 }
155
156 pub fn build_graph<T>(&mut self, files: &[T]) -> Result<ImportGraph>
158 where
159 T: ScanResult,
160 {
161 let mut graph = ImportGraph::new();
162
163 for file in files {
165 graph.add_node(file.path().to_string());
166 }
167
168 let keymap = ImportKeyMap::new(files);
170
171 for (file_idx, file) in files.iter().enumerate() {
173 if let Some(imports) = file.imports() {
174 for import_path in imports {
175 let normalized_import = self.normalize_import_path(import_path);
176 if let Some(target_idx) = keymap.resolve_import(&normalized_import) {
177 graph.add_edge(file_idx, target_idx);
178 }
179 }
180 }
181 }
182
183 Ok(graph)
184 }
185
186 fn find_matching_file<T>(&mut self, import_path: &str, files: &[T]) -> Option<usize>
188 where
189 T: ScanResult,
190 {
191 let normalized_import = self.normalize_import_path(import_path);
192
193 for (idx, file) in files.iter().enumerate() {
194 if import_matches_file(&normalized_import, file.path()) {
195 return Some(idx);
196 }
197 }
198
199 None
200 }
201
202 fn normalize_import_path(&mut self, import_path: &str) -> String {
204 if let Some(cached) = self.normalization_cache.get(import_path) {
206 return cached.clone();
207 }
208
209 let normalized = self.perform_normalization(import_path);
210 self.normalization_cache.insert(import_path.to_string(), normalized.clone());
211 normalized
212 }
213
214 fn perform_normalization(&self, import_path: &str) -> String {
216 let mut normalized = import_path.to_string();
217
218 normalized = normalized.trim_matches(|c| c == '"' || c == '\'' || c == ' ').to_string();
220
221 normalized = normalized.replace('\\', "/");
223
224 if let Some(dot_pos) = normalized.rfind('.') {
226 let extension = &normalized[dot_pos..];
228 match extension {
229 ".json" | ".xml" | ".yaml" | ".toml" | ".css" | ".scss" => {
230 }
232 _ => {
233 normalized.truncate(dot_pos);
235 }
236 }
237 }
238
239 if normalized.starts_with("./") || normalized.starts_with("../") {
241 normalized = self.normalize_relative_path(&normalized);
243 }
244
245 normalized = normalized.replace("::", "/").replace('.', "/");
247
248 normalized = self.resolve_import_aliases(&normalized);
250
251 normalized
252 }
253
254 fn normalize_relative_path(&self, path: &str) -> String {
256 let components: Vec<&str> = path.split('/').collect();
258 let mut normalized = Vec::new();
259
260 for component in components {
261 match component {
262 "" | "." => continue,
263 ".." => {
264 normalized.pop();
265 }
266 _ => normalized.push(component),
267 }
268 }
269
270 normalized.join("/")
271 }
272
273 fn resolve_import_aliases(&self, import_path: &str) -> String {
275 match import_path {
277 "fs" | "path" | "http" | "https" | "url" | "crypto" | "os" => {
279 return import_path.to_string(); }
281 path if !path.contains('/') && !path.starts_with('.') => {
283 return import_path.to_string(); }
285 _ => {}
286 }
287
288 let mut resolved = import_path.to_string();
289
290 if resolved.starts_with("@/") {
292 resolved = resolved.replacen("@/", "src/", 1);
293 }
294
295 if resolved.starts_with("~") {
296 resolved = resolved.replacen("~", "src", 1);
297 }
298
299 resolved
300 }
301
302 fn detect_language_from_path(&self, file_path: &str) -> Option<ImportLanguage> {
304 let extension = Path::new(file_path)
305 .extension()
306 .and_then(|ext| ext.to_str())
307 .unwrap_or("");
308
309 ImportLanguage::from_extension(extension)
310 }
311
312 pub fn extract_imports(&self, content: &str, file_path: &str) -> Vec<String> {
314 let language = match self.detect_language_from_path(file_path) {
316 Some(lang) => lang,
317 None => return Vec::new(),
318 };
319
320 match self.ast_parser.extract_imports(content, language) {
322 Ok(simple_imports) => {
323 simple_imports.into_iter()
324 .map(|import| import.module)
325 .collect()
326 }
327 Err(_) => {
328 Vec::new()
330 }
331 }
332 }
333}
334
335impl ImportGraph {
336 pub fn new() -> Self {
338 Self {
339 nodes: Vec::new(),
340 path_to_index: HashMap::new(),
341 dependencies: Vec::new(),
342 dependents: Vec::new(),
343 pagerank_scores: None,
344 }
345 }
346
347 pub fn add_node(&mut self, path: String) -> usize {
349 if let Some(&existing_idx) = self.path_to_index.get(&path) {
350 return existing_idx;
351 }
352
353 let index = self.nodes.len();
354 self.nodes.push(path.clone());
355 self.path_to_index.insert(path, index);
356 self.dependencies.push(Vec::new());
357 self.dependents.push(Vec::new());
358
359 self.pagerank_scores = None;
361
362 index
363 }
364
365 pub fn add_edge(&mut self, source: usize, target: usize) {
367 if source < self.dependencies.len() && target < self.dependents.len() && source != target {
368 if !self.dependencies[source].contains(&target) {
370 self.dependencies[source].push(target);
371 self.dependents[target].push(source);
372
373 self.pagerank_scores = None;
375 }
376 }
377 }
378
379 pub fn get_node_degrees(&self, path: &str) -> Option<(usize, usize)> {
381 let index = *self.path_to_index.get(path)?;
382 let in_degree = self.dependents[index].len();
383 let out_degree = self.dependencies[index].len();
384 Some((in_degree, out_degree))
385 }
386
387 pub fn get_pagerank_scores(&mut self) -> Result<&[f64]> {
389 if self.pagerank_scores.is_none() {
390 let calculator = CentralityCalculator::default();
391 let scores = calculator.calculate_pagerank(self)?;
392 self.pagerank_scores = Some(scores);
393 }
394
395 Ok(self.pagerank_scores.as_ref().unwrap())
396 }
397
398 pub fn get_pagerank_score(&mut self, path: &str) -> Result<f64> {
400 let index = *self.path_to_index.get(path).unwrap_or(&0);
401 let scores = self.get_pagerank_scores()?;
402 Ok(*scores.get(index).unwrap_or(&0.0))
403 }
404
405 pub fn stats(&self) -> GraphStats {
407 let node_count = self.nodes.len();
408 let edge_count: usize = self.dependencies.iter().map(|deps| deps.len()).sum();
409
410 let in_degrees: Vec<usize> = self.dependents.iter().map(|deps| deps.len()).collect();
411 let out_degrees: Vec<usize> = self.dependencies.iter().map(|deps| deps.len()).collect();
412
413 let avg_in_degree = if node_count > 0 {
414 in_degrees.iter().sum::<usize>() as f64 / node_count as f64
415 } else {
416 0.0
417 };
418
419 let avg_out_degree = if node_count > 0 {
420 out_degrees.iter().sum::<usize>() as f64 / node_count as f64
421 } else {
422 0.0
423 };
424
425 let max_in_degree = in_degrees.iter().max().cloned().unwrap_or(0);
426 let max_out_degree = out_degrees.iter().max().cloned().unwrap_or(0);
427
428 let density = if node_count > 1 {
429 edge_count as f64 / (node_count * (node_count - 1)) as f64
430 } else {
431 0.0
432 };
433
434 GraphStats {
435 node_count,
436 edge_count,
437 avg_in_degree,
438 avg_out_degree,
439 max_in_degree,
440 max_out_degree,
441 density,
442 }
443 }
444}
445
446impl Default for ImportGraph {
447 fn default() -> Self {
448 Self::new()
449 }
450}
451
452impl CentralityCalculator {
453 pub fn new() -> Self {
455 Self {
456 damping_factor: 0.85,
457 max_iterations: 100,
458 tolerance: 1e-6,
459 }
460 }
461
462 pub fn with_params(damping_factor: f64, max_iterations: usize, tolerance: f64) -> Self {
464 Self {
465 damping_factor,
466 max_iterations,
467 tolerance,
468 }
469 }
470
471 pub fn calculate_pagerank(&self, graph: &ImportGraph) -> Result<Vec<f64>> {
473 let n = graph.nodes.len();
474 if n == 0 {
475 return Ok(Vec::new());
476 }
477
478 let mut scores = vec![1.0 / n as f64; n];
480 let mut new_scores = vec![0.0; n];
481
482 for _iteration in 0..self.max_iterations {
483 for (i, new_score) in new_scores.iter_mut().enumerate() {
485 let mut rank_sum = 0.0;
486
487 for &source in &graph.dependents[i] {
489 let out_degree = graph.dependencies[source].len();
490 if out_degree > 0 {
491 rank_sum += scores[source] / out_degree as f64;
492 }
493 }
494
495 *new_score = (1.0 - self.damping_factor) / n as f64 + self.damping_factor * rank_sum;
497 }
498
499 let mut max_diff = 0.0;
501 for i in 0..n {
502 let diff = (new_scores[i] - scores[i]).abs();
503 if diff > max_diff {
504 max_diff = diff;
505 }
506 }
507
508 std::mem::swap(&mut scores, &mut new_scores);
510
511 if max_diff < self.tolerance {
512 break;
513 }
514 }
515
516 Ok(scores)
517 }
518}
519
520impl Default for CentralityCalculator {
521 fn default() -> Self {
522 Self::new()
523 }
524}
525
526#[derive(Debug, Clone)]
528pub struct GraphStats {
529 pub node_count: usize,
530 pub edge_count: usize,
531 pub avg_in_degree: f64,
532 pub avg_out_degree: f64,
533 pub max_in_degree: usize,
534 pub max_out_degree: usize,
535 pub density: f64,
536}
537
538pub fn import_matches_file(import_path: &str, file_path: &str) -> bool {
540 let normalized_import = normalize_for_matching(import_path);
542 let normalized_file = normalize_for_matching(file_path);
543
544 if normalized_import == normalized_file {
546 return true;
547 }
548
549 if normalized_file.contains(&normalized_import) {
551 return true;
552 }
553
554 if normalized_file.ends_with(&normalized_import) {
556 return true;
557 }
558
559 let module_import = normalized_import.replace("::", "/");
561 if normalized_file.contains(&module_import) ||
562 module_import.contains(&normalized_file) {
563 return true;
564 }
565
566 if normalized_import.starts_with("std::") {
568 let std_path = normalized_import.replace("::", "/");
569 if normalized_file.replace("_", "").contains(&std_path.replace("_", "")) {
570 return true;
571 }
572 }
573
574 if is_index_file(file_path) {
576 let directory = std::path::Path::new(file_path)
577 .parent()
578 .and_then(|p| p.to_str())
579 .unwrap_or("");
580 let normalized_dir = normalize_for_matching(directory);
581 if normalized_import == normalized_dir {
582 return true;
583 }
584 }
585
586 false
587}
588
589fn normalize_for_matching(path: &str) -> String {
591 let mut normalized = path.to_lowercase();
592
593 if normalized.starts_with("./") {
595 normalized = normalized[2..].to_string();
596 }
597
598 if normalized.starts_with("src/") {
599 normalized = normalized[4..].to_string();
600 }
601
602 if let Some(dot_pos) = normalized.rfind('.') {
604 normalized.truncate(dot_pos);
605 }
606
607 normalized = normalized.replace('\\', "/");
609
610 normalized.trim_end_matches('/').to_string()
612}
613
614fn is_index_file(file_path: &str) -> bool {
616 let file_name = std::path::Path::new(file_path)
617 .file_stem()
618 .and_then(|stem| stem.to_str())
619 .unwrap_or("");
620
621 matches!(file_name, "index" | "__init__" | "mod" | "main")
622}
623
624#[cfg(test)]
625mod tests {
626 use super::*;
627 use super::super::DocumentAnalysis;
628
629 #[derive(Debug)]
631 struct MockScanResult {
632 path: String,
633 relative_path: String,
634 depth: usize,
635 imports: Option<Vec<String>>,
636 }
637
638 impl MockScanResult {
639 fn new(path: &str, imports: Vec<&str>) -> Self {
640 Self {
641 path: path.to_string(),
642 relative_path: path.to_string(),
643 depth: path.matches('/').count(),
644 imports: Some(imports.iter().map(|s| s.to_string()).collect()),
645 }
646 }
647 }
648
649 impl ScanResult for MockScanResult {
650 fn path(&self) -> &str { &self.path }
651 fn relative_path(&self) -> &str { &self.relative_path }
652 fn depth(&self) -> usize { self.depth }
653 fn is_docs(&self) -> bool { false }
654 fn is_readme(&self) -> bool { false }
655 fn is_test(&self) -> bool { false }
656 fn is_entrypoint(&self) -> bool { false }
657 fn has_examples(&self) -> bool { false }
658 fn priority_boost(&self) -> f64 { 0.0 }
659 fn churn_score(&self) -> f64 { 0.0 }
660 fn centrality_in(&self) -> f64 { 0.0 }
661 fn imports(&self) -> Option<&[String]> { self.imports.as_deref() }
662 fn doc_analysis(&self) -> Option<&DocumentAnalysis> { None }
663 }
664
665 #[test]
666 fn test_import_graph_creation() {
667 let mut graph = ImportGraph::new();
668
669 let idx1 = graph.add_node("file1.rs".to_string());
670 let idx2 = graph.add_node("file2.rs".to_string());
671
672 assert_eq!(idx1, 0);
673 assert_eq!(idx2, 1);
674 assert_eq!(graph.nodes.len(), 2);
675
676 graph.add_edge(idx1, idx2);
677 assert_eq!(graph.dependencies[idx1].len(), 1);
678 assert_eq!(graph.dependents[idx2].len(), 1);
679 }
680
681 #[test]
682 fn test_graph_builder() {
683 let files = vec![
684 MockScanResult::new("src/main.rs", vec!["src/lib.rs", "src/utils.rs"]),
685 MockScanResult::new("src/lib.rs", vec!["src/utils.rs"]),
686 MockScanResult::new("src/utils.rs", vec![]),
687 ];
688
689 let mut builder = ImportGraphBuilder::new().unwrap();
690 let result = builder.build_graph(&files);
691 assert!(result.is_ok());
692
693 let graph = result.unwrap();
694 assert_eq!(graph.nodes.len(), 3);
695
696 let stats = graph.stats();
697 assert_eq!(stats.node_count, 3);
698 assert!(stats.edge_count > 0);
699 }
700
701 #[test]
702 fn test_import_matching() {
703 assert!(import_matches_file("src/utils", "src/utils.rs"));
705 assert!(import_matches_file("./lib", "src/lib.js"));
706
707 assert!(import_matches_file("std::collections::HashMap", "std/collections/hash_map.rs"));
709
710 assert!(import_matches_file("src/components", "src/components/index.js"));
712
713 assert!(!import_matches_file("completely_different", "src/utils.rs"));
715 }
716
717 #[test]
718 fn test_path_normalization() {
719 let mut builder = ImportGraphBuilder::new().unwrap();
720
721 let normalized1 = builder.normalize_import_path("\"./utils.js\"");
723 assert_eq!(normalized1, "utils");
724
725 let normalized2 = builder.normalize_import_path("../lib/helper.ts");
726 assert!(normalized2.contains("helper"));
727
728 let normalized3 = builder.normalize_import_path("@/components/Button");
729 assert!(normalized3.contains("src/components/Button"));
730 }
731
732 #[test]
733 fn test_pagerank_calculation() {
734 let mut graph = ImportGraph::new();
735
736 let idx_a = graph.add_node("A".to_string());
738 let idx_b = graph.add_node("B".to_string());
739 let idx_c = graph.add_node("C".to_string());
740
741 graph.add_edge(idx_a, idx_b);
742 graph.add_edge(idx_b, idx_c);
743 graph.add_edge(idx_a, idx_c);
744
745 let scores = graph.get_pagerank_scores();
746 assert!(scores.is_ok());
747
748 let scores = scores.unwrap();
749 assert_eq!(scores.len(), 3);
750
751 assert!(scores[idx_c] > scores[idx_a]);
753 assert!(scores[idx_c] > scores[idx_b]);
754 }
755
756 #[test]
757 fn test_import_extraction() {
758 let builder = ImportGraphBuilder::new().unwrap();
759
760 let js_content = r#"
762 import { Component } from 'react';
763 import utils from './utils.js';
764 const fs = require('fs');
765 "#;
766
767 let imports = builder.extract_imports(js_content, "test.js");
768 assert!(imports.len() >= 2);
769 assert!(imports.contains(&"react".to_string()));
770 assert!(imports.contains(&"./utils.js".to_string()));
771
772 let rust_content = r#"
774 use std::collections::HashMap;
775 use crate::utils::helper;
776 mod tests;
777 "#;
778
779 let imports = builder.extract_imports(rust_content, "test.rs");
780 assert!(imports.len() >= 2);
781 assert!(imports.contains(&"std::collections::HashMap".to_string()));
782 assert!(imports.contains(&"crate::utils::helper".to_string()));
783 }
784
785 #[test]
786 fn test_centrality_calculator() {
787 let mut graph = ImportGraph::new();
788
789 let center = graph.add_node("center.rs".to_string());
791 for i in 1..=5 {
792 let node = graph.add_node(format!("node{}.rs", i));
793 graph.add_edge(node, center); }
795
796 let calculator = CentralityCalculator::default();
797 let scores = calculator.calculate_pagerank(&graph);
798 assert!(scores.is_ok());
799
800 let scores = scores.unwrap();
801 assert_eq!(scores.len(), 6);
802
803 assert!(scores[center] > scores[1]); }
806
807 #[test]
808 fn test_graph_statistics() {
809 let mut graph = ImportGraph::new();
810
811 for i in 0..5 {
812 graph.add_node(format!("file{}.rs", i));
813 }
814
815 graph.add_edge(0, 1);
817 graph.add_edge(1, 2);
818 graph.add_edge(2, 3);
819 graph.add_edge(0, 4);
820
821 let stats = graph.stats();
822 assert_eq!(stats.node_count, 5);
823 assert_eq!(stats.edge_count, 4);
824 assert!(stats.avg_out_degree > 0.0);
825 assert!(stats.density > 0.0);
826 assert!(stats.density < 1.0);
827 }
828}