scribe_analysis/heuristics/
import_analysis.rs1use std::collections::HashMap;
15use std::path::Path;
16use scribe_core::Result;
17use super::ScanResult;
18use crate::ast_import_parser::{SimpleAstParser, ImportLanguage, SimpleImport};
19
20#[derive(Debug, Clone)]
22pub struct ImportGraph {
23 nodes: Vec<String>,
25 path_to_index: HashMap<String, usize>,
27 dependencies: Vec<Vec<usize>>,
29 dependents: Vec<Vec<usize>>,
31 pagerank_scores: Option<Vec<f64>>,
33}
34
35#[derive(Debug)]
37pub struct ImportGraphBuilder {
38 normalization_cache: HashMap<String, String>,
40 ast_parser: SimpleAstParser,
42}
43
44
45#[derive(Debug)]
47pub struct CentralityCalculator {
48 damping_factor: f64,
50 max_iterations: usize,
52 tolerance: f64,
54}
55
56impl Default for ImportGraphBuilder {
57 fn default() -> Self {
58 Self::new().expect("Failed to create ImportGraphBuilder")
59 }
60}
61
62impl ImportGraphBuilder {
63 pub fn new() -> Result<Self> {
65 Ok(Self {
66 normalization_cache: HashMap::new(),
67 ast_parser: SimpleAstParser::new()?,
68 })
69 }
70
71 pub fn build_graph<T>(&mut self, files: &[T]) -> Result<ImportGraph>
73 where
74 T: ScanResult,
75 {
76 let mut graph = ImportGraph::new();
77
78 for file in files {
80 graph.add_node(file.path().to_string());
81 }
82
83 for (file_idx, file) in files.iter().enumerate() {
85 if let Some(imports) = file.imports() {
86 for import_path in imports {
87 if let Some(target_idx) = self.find_matching_file(import_path, files) {
88 graph.add_edge(file_idx, target_idx);
89 }
90 }
91 }
92 }
93
94 Ok(graph)
95 }
96
97 fn find_matching_file<T>(&mut self, import_path: &str, files: &[T]) -> Option<usize>
99 where
100 T: ScanResult,
101 {
102 let normalized_import = self.normalize_import_path(import_path);
103
104 for (idx, file) in files.iter().enumerate() {
105 if import_matches_file(&normalized_import, file.path()) {
106 return Some(idx);
107 }
108 }
109
110 None
111 }
112
113 fn normalize_import_path(&mut self, import_path: &str) -> String {
115 if let Some(cached) = self.normalization_cache.get(import_path) {
117 return cached.clone();
118 }
119
120 let normalized = self.perform_normalization(import_path);
121 self.normalization_cache.insert(import_path.to_string(), normalized.clone());
122 normalized
123 }
124
125 fn perform_normalization(&self, import_path: &str) -> String {
127 let mut normalized = import_path.to_string();
128
129 normalized = normalized.trim_matches(|c| c == '"' || c == '\'' || c == ' ').to_string();
131
132 normalized = normalized.replace('\\', "/");
134
135 if let Some(dot_pos) = normalized.rfind('.') {
137 let extension = &normalized[dot_pos..];
139 match extension {
140 ".json" | ".xml" | ".yaml" | ".toml" | ".css" | ".scss" => {
141 }
143 _ => {
144 normalized.truncate(dot_pos);
146 }
147 }
148 }
149
150 if normalized.starts_with("./") || normalized.starts_with("../") {
152 normalized = self.normalize_relative_path(&normalized);
154 }
155
156 normalized = normalized.replace("::", "/").replace('.', "/");
158
159 normalized = self.resolve_import_aliases(&normalized);
161
162 normalized
163 }
164
165 fn normalize_relative_path(&self, path: &str) -> String {
167 let components: Vec<&str> = path.split('/').collect();
169 let mut normalized = Vec::new();
170
171 for component in components {
172 match component {
173 "" | "." => continue,
174 ".." => {
175 normalized.pop();
176 }
177 _ => normalized.push(component),
178 }
179 }
180
181 normalized.join("/")
182 }
183
184 fn resolve_import_aliases(&self, import_path: &str) -> String {
186 match import_path {
188 "fs" | "path" | "http" | "https" | "url" | "crypto" | "os" => {
190 return import_path.to_string(); }
192 path if !path.contains('/') && !path.starts_with('.') => {
194 return import_path.to_string(); }
196 _ => {}
197 }
198
199 let mut resolved = import_path.to_string();
200
201 if resolved.starts_with("@/") {
203 resolved = resolved.replacen("@/", "src/", 1);
204 }
205
206 if resolved.starts_with("~") {
207 resolved = resolved.replacen("~", "src", 1);
208 }
209
210 resolved
211 }
212
213 fn detect_language_from_path(&self, file_path: &str) -> Option<ImportLanguage> {
215 let extension = Path::new(file_path)
216 .extension()
217 .and_then(|ext| ext.to_str())
218 .unwrap_or("");
219
220 ImportLanguage::from_extension(extension)
221 }
222
223 pub fn extract_imports(&self, content: &str, file_path: &str) -> Vec<String> {
225 let language = match self.detect_language_from_path(file_path) {
227 Some(lang) => lang,
228 None => return Vec::new(),
229 };
230
231 match self.ast_parser.extract_imports(content, language) {
233 Ok(simple_imports) => {
234 simple_imports.into_iter()
235 .map(|import| import.module)
236 .collect()
237 }
238 Err(_) => {
239 Vec::new()
241 }
242 }
243 }
244}
245
246impl ImportGraph {
247 pub fn new() -> Self {
249 Self {
250 nodes: Vec::new(),
251 path_to_index: HashMap::new(),
252 dependencies: Vec::new(),
253 dependents: Vec::new(),
254 pagerank_scores: None,
255 }
256 }
257
258 pub fn add_node(&mut self, path: String) -> usize {
260 if let Some(&existing_idx) = self.path_to_index.get(&path) {
261 return existing_idx;
262 }
263
264 let index = self.nodes.len();
265 self.nodes.push(path.clone());
266 self.path_to_index.insert(path, index);
267 self.dependencies.push(Vec::new());
268 self.dependents.push(Vec::new());
269
270 self.pagerank_scores = None;
272
273 index
274 }
275
276 pub fn add_edge(&mut self, source: usize, target: usize) {
278 if source < self.dependencies.len() && target < self.dependents.len() && source != target {
279 if !self.dependencies[source].contains(&target) {
281 self.dependencies[source].push(target);
282 self.dependents[target].push(source);
283
284 self.pagerank_scores = None;
286 }
287 }
288 }
289
290 pub fn get_node_degrees(&self, path: &str) -> Option<(usize, usize)> {
292 let index = *self.path_to_index.get(path)?;
293 let in_degree = self.dependents[index].len();
294 let out_degree = self.dependencies[index].len();
295 Some((in_degree, out_degree))
296 }
297
298 pub fn get_pagerank_scores(&mut self) -> Result<&[f64]> {
300 if self.pagerank_scores.is_none() {
301 let calculator = CentralityCalculator::default();
302 let scores = calculator.calculate_pagerank(self)?;
303 self.pagerank_scores = Some(scores);
304 }
305
306 Ok(self.pagerank_scores.as_ref().unwrap())
307 }
308
309 pub fn get_pagerank_score(&mut self, path: &str) -> Result<f64> {
311 let index = *self.path_to_index.get(path).unwrap_or(&0);
312 let scores = self.get_pagerank_scores()?;
313 Ok(*scores.get(index).unwrap_or(&0.0))
314 }
315
316 pub fn stats(&self) -> GraphStats {
318 let node_count = self.nodes.len();
319 let edge_count: usize = self.dependencies.iter().map(|deps| deps.len()).sum();
320
321 let in_degrees: Vec<usize> = self.dependents.iter().map(|deps| deps.len()).collect();
322 let out_degrees: Vec<usize> = self.dependencies.iter().map(|deps| deps.len()).collect();
323
324 let avg_in_degree = if node_count > 0 {
325 in_degrees.iter().sum::<usize>() as f64 / node_count as f64
326 } else {
327 0.0
328 };
329
330 let avg_out_degree = if node_count > 0 {
331 out_degrees.iter().sum::<usize>() as f64 / node_count as f64
332 } else {
333 0.0
334 };
335
336 let max_in_degree = in_degrees.iter().max().cloned().unwrap_or(0);
337 let max_out_degree = out_degrees.iter().max().cloned().unwrap_or(0);
338
339 let density = if node_count > 1 {
340 edge_count as f64 / (node_count * (node_count - 1)) as f64
341 } else {
342 0.0
343 };
344
345 GraphStats {
346 node_count,
347 edge_count,
348 avg_in_degree,
349 avg_out_degree,
350 max_in_degree,
351 max_out_degree,
352 density,
353 }
354 }
355}
356
357impl Default for ImportGraph {
358 fn default() -> Self {
359 Self::new()
360 }
361}
362
363impl CentralityCalculator {
364 pub fn new() -> Self {
366 Self {
367 damping_factor: 0.85,
368 max_iterations: 100,
369 tolerance: 1e-6,
370 }
371 }
372
373 pub fn with_params(damping_factor: f64, max_iterations: usize, tolerance: f64) -> Self {
375 Self {
376 damping_factor,
377 max_iterations,
378 tolerance,
379 }
380 }
381
382 pub fn calculate_pagerank(&self, graph: &ImportGraph) -> Result<Vec<f64>> {
384 let n = graph.nodes.len();
385 if n == 0 {
386 return Ok(Vec::new());
387 }
388
389 let mut scores = vec![1.0 / n as f64; n];
391 let mut new_scores = vec![0.0; n];
392
393 for _iteration in 0..self.max_iterations {
394 for (i, new_score) in new_scores.iter_mut().enumerate() {
396 let mut rank_sum = 0.0;
397
398 for &source in &graph.dependents[i] {
400 let out_degree = graph.dependencies[source].len();
401 if out_degree > 0 {
402 rank_sum += scores[source] / out_degree as f64;
403 }
404 }
405
406 *new_score = (1.0 - self.damping_factor) / n as f64 + self.damping_factor * rank_sum;
408 }
409
410 let mut max_diff = 0.0;
412 for i in 0..n {
413 let diff = (new_scores[i] - scores[i]).abs();
414 if diff > max_diff {
415 max_diff = diff;
416 }
417 }
418
419 std::mem::swap(&mut scores, &mut new_scores);
421
422 if max_diff < self.tolerance {
423 break;
424 }
425 }
426
427 Ok(scores)
428 }
429}
430
431impl Default for CentralityCalculator {
432 fn default() -> Self {
433 Self::new()
434 }
435}
436
437#[derive(Debug, Clone)]
439pub struct GraphStats {
440 pub node_count: usize,
441 pub edge_count: usize,
442 pub avg_in_degree: f64,
443 pub avg_out_degree: f64,
444 pub max_in_degree: usize,
445 pub max_out_degree: usize,
446 pub density: f64,
447}
448
449pub fn import_matches_file(import_path: &str, file_path: &str) -> bool {
451 let normalized_import = normalize_for_matching(import_path);
453 let normalized_file = normalize_for_matching(file_path);
454
455 if normalized_import == normalized_file {
457 return true;
458 }
459
460 if normalized_file.contains(&normalized_import) {
462 return true;
463 }
464
465 if normalized_file.ends_with(&normalized_import) {
467 return true;
468 }
469
470 let module_import = normalized_import.replace("::", "/");
472 if normalized_file.contains(&module_import) ||
473 module_import.contains(&normalized_file) {
474 return true;
475 }
476
477 if normalized_import.starts_with("std::") {
479 let std_path = normalized_import.replace("::", "/");
480 if normalized_file.replace("_", "").contains(&std_path.replace("_", "")) {
481 return true;
482 }
483 }
484
485 if is_index_file(file_path) {
487 let directory = std::path::Path::new(file_path)
488 .parent()
489 .and_then(|p| p.to_str())
490 .unwrap_or("");
491 let normalized_dir = normalize_for_matching(directory);
492 if normalized_import == normalized_dir {
493 return true;
494 }
495 }
496
497 false
498}
499
500fn normalize_for_matching(path: &str) -> String {
502 let mut normalized = path.to_lowercase();
503
504 if normalized.starts_with("./") {
506 normalized = normalized[2..].to_string();
507 }
508
509 if normalized.starts_with("src/") {
510 normalized = normalized[4..].to_string();
511 }
512
513 if let Some(dot_pos) = normalized.rfind('.') {
515 normalized.truncate(dot_pos);
516 }
517
518 normalized = normalized.replace('\\', "/");
520
521 normalized.trim_end_matches('/').to_string()
523}
524
525fn is_index_file(file_path: &str) -> bool {
527 let file_name = std::path::Path::new(file_path)
528 .file_stem()
529 .and_then(|stem| stem.to_str())
530 .unwrap_or("");
531
532 matches!(file_name, "index" | "__init__" | "mod" | "main")
533}
534
535#[cfg(test)]
536mod tests {
537 use super::*;
538 use super::super::DocumentAnalysis;
539
540 #[derive(Debug)]
542 struct MockScanResult {
543 path: String,
544 relative_path: String,
545 depth: usize,
546 imports: Option<Vec<String>>,
547 }
548
549 impl MockScanResult {
550 fn new(path: &str, imports: Vec<&str>) -> Self {
551 Self {
552 path: path.to_string(),
553 relative_path: path.to_string(),
554 depth: path.matches('/').count(),
555 imports: Some(imports.iter().map(|s| s.to_string()).collect()),
556 }
557 }
558 }
559
560 impl ScanResult for MockScanResult {
561 fn path(&self) -> &str { &self.path }
562 fn relative_path(&self) -> &str { &self.relative_path }
563 fn depth(&self) -> usize { self.depth }
564 fn is_docs(&self) -> bool { false }
565 fn is_readme(&self) -> bool { false }
566 fn is_test(&self) -> bool { false }
567 fn is_entrypoint(&self) -> bool { false }
568 fn has_examples(&self) -> bool { false }
569 fn priority_boost(&self) -> f64 { 0.0 }
570 fn churn_score(&self) -> f64 { 0.0 }
571 fn centrality_in(&self) -> f64 { 0.0 }
572 fn imports(&self) -> Option<&[String]> { self.imports.as_deref() }
573 fn doc_analysis(&self) -> Option<&DocumentAnalysis> { None }
574 }
575
576 #[test]
577 fn test_import_graph_creation() {
578 let mut graph = ImportGraph::new();
579
580 let idx1 = graph.add_node("file1.rs".to_string());
581 let idx2 = graph.add_node("file2.rs".to_string());
582
583 assert_eq!(idx1, 0);
584 assert_eq!(idx2, 1);
585 assert_eq!(graph.nodes.len(), 2);
586
587 graph.add_edge(idx1, idx2);
588 assert_eq!(graph.dependencies[idx1].len(), 1);
589 assert_eq!(graph.dependents[idx2].len(), 1);
590 }
591
592 #[test]
593 fn test_graph_builder() {
594 let files = vec![
595 MockScanResult::new("src/main.rs", vec!["src/lib.rs", "src/utils.rs"]),
596 MockScanResult::new("src/lib.rs", vec!["src/utils.rs"]),
597 MockScanResult::new("src/utils.rs", vec![]),
598 ];
599
600 let mut builder = ImportGraphBuilder::new().unwrap();
601 let result = builder.build_graph(&files);
602 assert!(result.is_ok());
603
604 let graph = result.unwrap();
605 assert_eq!(graph.nodes.len(), 3);
606
607 let stats = graph.stats();
608 assert_eq!(stats.node_count, 3);
609 assert!(stats.edge_count > 0);
610 }
611
612 #[test]
613 fn test_import_matching() {
614 assert!(import_matches_file("src/utils", "src/utils.rs"));
616 assert!(import_matches_file("./lib", "src/lib.js"));
617
618 assert!(import_matches_file("std::collections::HashMap", "std/collections/hash_map.rs"));
620
621 assert!(import_matches_file("src/components", "src/components/index.js"));
623
624 assert!(!import_matches_file("completely_different", "src/utils.rs"));
626 }
627
628 #[test]
629 fn test_path_normalization() {
630 let mut builder = ImportGraphBuilder::new().unwrap();
631
632 let normalized1 = builder.normalize_import_path("\"./utils.js\"");
634 assert_eq!(normalized1, "utils");
635
636 let normalized2 = builder.normalize_import_path("../lib/helper.ts");
637 assert!(normalized2.contains("helper"));
638
639 let normalized3 = builder.normalize_import_path("@/components/Button");
640 assert!(normalized3.contains("src/components/Button"));
641 }
642
643 #[test]
644 fn test_pagerank_calculation() {
645 let mut graph = ImportGraph::new();
646
647 let idx_a = graph.add_node("A".to_string());
649 let idx_b = graph.add_node("B".to_string());
650 let idx_c = graph.add_node("C".to_string());
651
652 graph.add_edge(idx_a, idx_b);
653 graph.add_edge(idx_b, idx_c);
654 graph.add_edge(idx_a, idx_c);
655
656 let scores = graph.get_pagerank_scores();
657 assert!(scores.is_ok());
658
659 let scores = scores.unwrap();
660 assert_eq!(scores.len(), 3);
661
662 assert!(scores[idx_c] > scores[idx_a]);
664 assert!(scores[idx_c] > scores[idx_b]);
665 }
666
667 #[test]
668 fn test_import_extraction() {
669 let builder = ImportGraphBuilder::new().unwrap();
670
671 let js_content = r#"
673 import { Component } from 'react';
674 import utils from './utils.js';
675 const fs = require('fs');
676 "#;
677
678 let imports = builder.extract_imports(js_content, "test.js");
679 assert!(imports.len() >= 2);
680 assert!(imports.contains(&"react".to_string()));
681 assert!(imports.contains(&"./utils.js".to_string()));
682
683 let rust_content = r#"
685 use std::collections::HashMap;
686 use crate::utils::helper;
687 mod tests;
688 "#;
689
690 let imports = builder.extract_imports(rust_content, "test.rs");
691 assert!(imports.len() >= 2);
692 assert!(imports.contains(&"std::collections::HashMap".to_string()));
693 assert!(imports.contains(&"crate::utils::helper".to_string()));
694 }
695
696 #[test]
697 fn test_centrality_calculator() {
698 let mut graph = ImportGraph::new();
699
700 let center = graph.add_node("center.rs".to_string());
702 for i in 1..=5 {
703 let node = graph.add_node(format!("node{}.rs", i));
704 graph.add_edge(node, center); }
706
707 let calculator = CentralityCalculator::default();
708 let scores = calculator.calculate_pagerank(&graph);
709 assert!(scores.is_ok());
710
711 let scores = scores.unwrap();
712 assert_eq!(scores.len(), 6);
713
714 assert!(scores[center] > scores[1]); }
717
718 #[test]
719 fn test_graph_statistics() {
720 let mut graph = ImportGraph::new();
721
722 for i in 0..5 {
723 graph.add_node(format!("file{}.rs", i));
724 }
725
726 graph.add_edge(0, 1);
728 graph.add_edge(1, 2);
729 graph.add_edge(2, 3);
730 graph.add_edge(0, 4);
731
732 let stats = graph.stats();
733 assert_eq!(stats.node_count, 5);
734 assert_eq!(stats.edge_count, 4);
735 assert!(stats.avg_out_degree > 0.0);
736 assert!(stats.density > 0.0);
737 assert!(stats.density < 1.0);
738 }
739}