use super::ScanResult;
use crate::ast_import_parser::{ImportLanguage, SimpleAstParser, SimpleImport};
use scribe_core::Result;
use std::collections::HashMap;
use std::path::Path;
#[derive(Debug, Clone)]
pub struct ImportGraph {
nodes: Vec<String>,
path_to_index: HashMap<String, usize>,
dependencies: Vec<Vec<usize>>,
dependents: Vec<Vec<usize>>,
pagerank_scores: Option<Vec<f64>>,
}
#[derive(Debug)]
struct ImportKeyMap {
exact: HashMap<String, usize>,
suffix: HashMap<String, usize>,
}
impl ImportKeyMap {
fn new<T>(files: &[T]) -> Self
where
T: ScanResult,
{
let mut exact = HashMap::with_capacity(files.len());
let mut suffix = HashMap::with_capacity(files.len() * 2);
for (i, file) in files.iter().enumerate() {
let path = file.path();
for key in Self::normalized_keys(path) {
exact.insert(key, i);
}
for key in Self::suffix_keys(path) {
suffix.insert(key, i);
}
}
Self { exact, suffix }
}
fn resolve_import(&self, normalized_import: &str) -> Option<usize> {
if let Some(&idx) = self.exact.get(normalized_import) {
return Some(idx);
}
self.suffix.get(normalized_import).copied()
}
fn normalized_keys(path: &str) -> Vec<String> {
let mut keys = Vec::new();
keys.push(path.to_string());
if path.starts_with("./") {
keys.push(path[2..].to_string());
}
if let Some(stem) = Path::new(path).file_stem().and_then(|s| s.to_str()) {
if let Some(parent) = Path::new(path).parent().and_then(|p| p.to_str()) {
if parent != "" {
keys.push(format!("{}/{}", parent, stem));
} else {
keys.push(stem.to_string());
}
}
}
keys
}
fn suffix_keys(path: &str) -> Vec<String> {
let mut keys = Vec::new();
if let Some(stem) = Path::new(path).file_stem().and_then(|s| s.to_str()) {
keys.push(stem.to_string());
}
if let Some(filename) = Path::new(path).file_name().and_then(|s| s.to_str()) {
keys.push(filename.to_string());
}
keys
}
}
#[derive(Debug)]
pub struct ImportGraphBuilder {
normalization_cache: HashMap<String, String>,
ast_parser: SimpleAstParser,
}
#[derive(Debug)]
pub struct CentralityCalculator {
damping_factor: f64,
max_iterations: usize,
tolerance: f64,
}
impl Default for ImportGraphBuilder {
fn default() -> Self {
Self::new().expect("Failed to create ImportGraphBuilder")
}
}
impl ImportGraphBuilder {
pub fn new() -> Result<Self> {
Ok(Self {
normalization_cache: HashMap::new(),
ast_parser: SimpleAstParser::new()?,
})
}
pub fn build_graph<T>(&mut self, files: &[T]) -> Result<ImportGraph>
where
T: ScanResult,
{
let mut graph = ImportGraph::new();
for file in files {
graph.add_node(file.path().to_string());
}
let keymap = ImportKeyMap::new(files);
for (file_idx, file) in files.iter().enumerate() {
if let Some(imports) = file.imports() {
for import_path in imports {
let normalized_import = self.normalize_import_path(import_path);
if let Some(target_idx) = keymap.resolve_import(&normalized_import) {
graph.add_edge(file_idx, target_idx);
}
}
}
}
Ok(graph)
}
fn normalize_import_path(&mut self, import_path: &str) -> String {
if let Some(cached) = self.normalization_cache.get(import_path) {
return cached.clone();
}
let normalized = self.perform_normalization(import_path);
self.normalization_cache
.insert(import_path.to_string(), normalized.clone());
normalized
}
fn perform_normalization(&self, import_path: &str) -> String {
let mut normalized = import_path.to_string();
normalized = normalized
.trim_matches(|c| c == '"' || c == '\'' || c == ' ')
.to_string();
normalized = normalized.replace('\\', "/");
if let Some(dot_pos) = normalized.rfind('.') {
let extension = &normalized[dot_pos..];
match extension {
".json" | ".xml" | ".yaml" | ".toml" | ".css" | ".scss" => {
}
_ => {
normalized.truncate(dot_pos);
}
}
}
if normalized.starts_with("./") || normalized.starts_with("../") {
normalized = self.normalize_relative_path(&normalized);
}
normalized = normalized.replace("::", "/").replace('.', "/");
normalized = self.resolve_import_aliases(&normalized);
normalized
}
fn normalize_relative_path(&self, path: &str) -> String {
let components: Vec<&str> = path.split('/').collect();
let mut normalized = Vec::new();
for component in components {
match component {
"" | "." => continue,
".." => {
normalized.pop();
}
_ => normalized.push(component),
}
}
normalized.join("/")
}
fn resolve_import_aliases(&self, import_path: &str) -> String {
match import_path {
"fs" | "path" | "http" | "https" | "url" | "crypto" | "os" => {
return import_path.to_string(); }
path if !path.contains('/') && !path.starts_with('.') => {
return import_path.to_string(); }
_ => {}
}
let mut resolved = import_path.to_string();
if resolved.starts_with("@/") {
resolved = resolved.replacen("@/", "src/", 1);
}
if resolved.starts_with("~") {
resolved = resolved.replacen("~", "src", 1);
}
resolved
}
fn detect_language_from_path(&self, file_path: &str) -> Option<ImportLanguage> {
let extension = Path::new(file_path)
.extension()
.and_then(|ext| ext.to_str())
.unwrap_or("");
ImportLanguage::from_extension(extension)
}
pub fn extract_imports(&self, content: &str, file_path: &str) -> Vec<String> {
let language = match self.detect_language_from_path(file_path) {
Some(lang) => lang,
None => return Vec::new(),
};
match self.ast_parser.extract_imports(content, language) {
Ok(simple_imports) => simple_imports
.into_iter()
.map(|import| import.module)
.collect(),
Err(_) => {
Vec::new()
}
}
}
}
impl ImportGraph {
pub fn new() -> Self {
Self {
nodes: Vec::new(),
path_to_index: HashMap::new(),
dependencies: Vec::new(),
dependents: Vec::new(),
pagerank_scores: None,
}
}
pub fn add_node(&mut self, path: String) -> usize {
if let Some(&existing_idx) = self.path_to_index.get(&path) {
return existing_idx;
}
let index = self.nodes.len();
self.nodes.push(path.clone());
self.path_to_index.insert(path, index);
self.dependencies.push(Vec::new());
self.dependents.push(Vec::new());
self.pagerank_scores = None;
index
}
pub fn add_edge(&mut self, source: usize, target: usize) {
if source < self.dependencies.len() && target < self.dependents.len() && source != target {
if !self.dependencies[source].contains(&target) {
self.dependencies[source].push(target);
self.dependents[target].push(source);
self.pagerank_scores = None;
}
}
}
pub fn get_node_degrees(&self, path: &str) -> Option<(usize, usize)> {
let index = *self.path_to_index.get(path)?;
let in_degree = self.dependents[index].len();
let out_degree = self.dependencies[index].len();
Some((in_degree, out_degree))
}
pub fn get_pagerank_scores(&mut self) -> Result<&[f64]> {
if self.pagerank_scores.is_none() {
let calculator = CentralityCalculator::default();
let scores = calculator.calculate_pagerank(self)?;
self.pagerank_scores = Some(scores);
}
Ok(self.pagerank_scores.as_ref().unwrap())
}
pub fn get_pagerank_score(&mut self, path: &str) -> Result<f64> {
let index = *self.path_to_index.get(path).unwrap_or(&0);
let scores = self.get_pagerank_scores()?;
Ok(*scores.get(index).unwrap_or(&0.0))
}
pub fn stats(&self) -> GraphStats {
let node_count = self.nodes.len();
let edge_count: usize = self.dependencies.iter().map(|deps| deps.len()).sum();
let in_degrees: Vec<usize> = self.dependents.iter().map(|deps| deps.len()).collect();
let out_degrees: Vec<usize> = self.dependencies.iter().map(|deps| deps.len()).collect();
let avg_in_degree = if node_count > 0 {
in_degrees.iter().sum::<usize>() as f64 / node_count as f64
} else {
0.0
};
let avg_out_degree = if node_count > 0 {
out_degrees.iter().sum::<usize>() as f64 / node_count as f64
} else {
0.0
};
let max_in_degree = in_degrees.iter().max().cloned().unwrap_or(0);
let max_out_degree = out_degrees.iter().max().cloned().unwrap_or(0);
let density = if node_count > 1 {
edge_count as f64 / (node_count * (node_count - 1)) as f64
} else {
0.0
};
GraphStats {
node_count,
edge_count,
avg_in_degree,
avg_out_degree,
max_in_degree,
max_out_degree,
density,
}
}
}
impl Default for ImportGraph {
fn default() -> Self {
Self::new()
}
}
impl CentralityCalculator {
pub fn new() -> Self {
Self {
damping_factor: 0.85,
max_iterations: 100,
tolerance: 1e-6,
}
}
pub fn with_params(damping_factor: f64, max_iterations: usize, tolerance: f64) -> Self {
Self {
damping_factor,
max_iterations,
tolerance,
}
}
pub fn calculate_pagerank(&self, graph: &ImportGraph) -> Result<Vec<f64>> {
let n = graph.nodes.len();
if n == 0 {
return Ok(Vec::new());
}
let mut scores = vec![1.0 / n as f64; n];
let mut new_scores = vec![0.0; n];
for _iteration in 0..self.max_iterations {
for (i, new_score) in new_scores.iter_mut().enumerate() {
let mut rank_sum = 0.0;
for &source in &graph.dependents[i] {
let out_degree = graph.dependencies[source].len();
if out_degree > 0 {
rank_sum += scores[source] / out_degree as f64;
}
}
*new_score =
(1.0 - self.damping_factor) / n as f64 + self.damping_factor * rank_sum;
}
let mut max_diff = 0.0;
for i in 0..n {
let diff = (new_scores[i] - scores[i]).abs();
if diff > max_diff {
max_diff = diff;
}
}
std::mem::swap(&mut scores, &mut new_scores);
if max_diff < self.tolerance {
break;
}
}
Ok(scores)
}
}
impl Default for CentralityCalculator {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct GraphStats {
pub node_count: usize,
pub edge_count: usize,
pub avg_in_degree: f64,
pub avg_out_degree: f64,
pub max_in_degree: usize,
pub max_out_degree: usize,
pub density: f64,
}
pub fn import_matches_file(import_path: &str, file_path: &str) -> bool {
let normalized_import = normalize_for_matching(import_path);
let normalized_file = normalize_for_matching(file_path);
if normalized_import == normalized_file {
return true;
}
if normalized_file.contains(&normalized_import) {
return true;
}
if normalized_file.ends_with(&normalized_import) {
return true;
}
let module_import = normalized_import.replace("::", "/");
if normalized_file.contains(&module_import) || module_import.contains(&normalized_file) {
return true;
}
if normalized_import.starts_with("std::") {
let std_path = normalized_import.replace("::", "/");
if normalized_file
.replace("_", "")
.contains(&std_path.replace("_", ""))
{
return true;
}
}
if is_index_file(file_path) {
let directory = std::path::Path::new(file_path)
.parent()
.and_then(|p| p.to_str())
.unwrap_or("");
let normalized_dir = normalize_for_matching(directory);
if normalized_import == normalized_dir {
return true;
}
}
false
}
fn normalize_for_matching(path: &str) -> String {
let mut normalized = path.to_lowercase();
if normalized.starts_with("./") {
normalized = normalized[2..].to_string();
}
if normalized.starts_with("src/") {
normalized = normalized[4..].to_string();
}
if let Some(dot_pos) = normalized.rfind('.') {
normalized.truncate(dot_pos);
}
normalized = normalized.replace('\\', "/");
normalized.trim_end_matches('/').to_string()
}
fn is_index_file(file_path: &str) -> bool {
let file_name = std::path::Path::new(file_path)
.file_stem()
.and_then(|stem| stem.to_str())
.unwrap_or("");
matches!(file_name, "index" | "__init__" | "mod" | "main")
}
#[cfg(test)]
mod tests {
use super::super::DocumentAnalysis;
use super::*;
#[derive(Debug)]
struct MockScanResult {
path: String,
relative_path: String,
depth: usize,
imports: Option<Vec<String>>,
}
impl MockScanResult {
fn new(path: &str, imports: Vec<&str>) -> Self {
Self {
path: path.to_string(),
relative_path: path.to_string(),
depth: path.matches('/').count(),
imports: Some(imports.iter().map(|s| s.to_string()).collect()),
}
}
}
impl ScanResult for MockScanResult {
fn path(&self) -> &str {
&self.path
}
fn relative_path(&self) -> &str {
&self.relative_path
}
fn depth(&self) -> usize {
self.depth
}
fn is_docs(&self) -> bool {
false
}
fn is_readme(&self) -> bool {
false
}
fn is_test(&self) -> bool {
false
}
fn is_entrypoint(&self) -> bool {
false
}
fn has_examples(&self) -> bool {
false
}
fn priority_boost(&self) -> f64 {
0.0
}
fn churn_score(&self) -> f64 {
0.0
}
fn centrality_in(&self) -> f64 {
0.0
}
fn imports(&self) -> Option<&[String]> {
self.imports.as_deref()
}
fn doc_analysis(&self) -> Option<&DocumentAnalysis> {
None
}
}
#[test]
fn test_import_graph_creation() {
let mut graph = ImportGraph::new();
let idx1 = graph.add_node("file1.rs".to_string());
let idx2 = graph.add_node("file2.rs".to_string());
assert_eq!(idx1, 0);
assert_eq!(idx2, 1);
assert_eq!(graph.nodes.len(), 2);
graph.add_edge(idx1, idx2);
assert_eq!(graph.dependencies[idx1].len(), 1);
assert_eq!(graph.dependents[idx2].len(), 1);
}
#[test]
fn test_graph_builder() {
let files = vec![
MockScanResult::new("src/main.rs", vec!["src/lib.rs", "src/utils.rs"]),
MockScanResult::new("src/lib.rs", vec!["src/utils.rs"]),
MockScanResult::new("src/utils.rs", vec![]),
];
let mut builder = ImportGraphBuilder::new().unwrap();
let result = builder.build_graph(&files);
assert!(result.is_ok());
let graph = result.unwrap();
assert_eq!(graph.nodes.len(), 3);
let stats = graph.stats();
assert_eq!(stats.node_count, 3);
assert!(stats.edge_count > 0);
}
#[test]
fn test_import_matching() {
assert!(import_matches_file("src/utils", "src/utils.rs"));
assert!(import_matches_file("./lib", "src/lib.js"));
assert!(import_matches_file(
"std::collections::HashMap",
"std/collections/hash_map.rs"
));
assert!(import_matches_file(
"src/components",
"src/components/index.js"
));
assert!(!import_matches_file("completely_different", "src/utils.rs"));
}
#[test]
fn test_path_normalization() {
let mut builder = ImportGraphBuilder::new().unwrap();
let normalized1 = builder.normalize_import_path("\"./utils.js\"");
assert_eq!(normalized1, "utils");
let normalized2 = builder.normalize_import_path("../lib/helper.ts");
assert!(normalized2.contains("helper"));
let normalized3 = builder.normalize_import_path("@/components/Button");
assert!(normalized3.contains("src/components/Button"));
}
#[test]
fn test_pagerank_calculation() {
let mut graph = ImportGraph::new();
let idx_a = graph.add_node("A".to_string());
let idx_b = graph.add_node("B".to_string());
let idx_c = graph.add_node("C".to_string());
graph.add_edge(idx_a, idx_b);
graph.add_edge(idx_b, idx_c);
graph.add_edge(idx_a, idx_c);
let scores = graph.get_pagerank_scores();
assert!(scores.is_ok());
let scores = scores.unwrap();
assert_eq!(scores.len(), 3);
assert!(scores[idx_c] > scores[idx_a]);
assert!(scores[idx_c] > scores[idx_b]);
}
#[test]
fn test_import_extraction() {
let builder = ImportGraphBuilder::new().unwrap();
let js_content = r#"
import { Component } from 'react';
import utils from './utils.js';
const fs = require('fs');
"#;
let imports = builder.extract_imports(js_content, "test.js");
assert!(imports.len() >= 2);
assert!(imports.contains(&"react".to_string()));
assert!(imports.contains(&"./utils.js".to_string()));
let rust_content = r#"
use std::collections::HashMap;
use crate::utils::helper;
mod tests;
"#;
let imports = builder.extract_imports(rust_content, "test.rs");
assert!(imports.len() >= 2);
assert!(imports.contains(&"std::collections::HashMap".to_string()));
assert!(imports.contains(&"crate::utils::helper".to_string()));
}
#[test]
fn test_centrality_calculator() {
let mut graph = ImportGraph::new();
let center = graph.add_node("center.rs".to_string());
for i in 1..=5 {
let node = graph.add_node(format!("node{}.rs", i));
graph.add_edge(node, center); }
let calculator = CentralityCalculator::default();
let scores = calculator.calculate_pagerank(&graph);
assert!(scores.is_ok());
let scores = scores.unwrap();
assert_eq!(scores.len(), 6);
assert!(scores[center] > scores[1]); }
#[test]
fn test_graph_statistics() {
let mut graph = ImportGraph::new();
for i in 0..5 {
graph.add_node(format!("file{}.rs", i));
}
graph.add_edge(0, 1);
graph.add_edge(1, 2);
graph.add_edge(2, 3);
graph.add_edge(0, 4);
let stats = graph.stats();
assert_eq!(stats.node_count, 5);
assert_eq!(stats.edge_count, 4);
assert!(stats.avg_out_degree > 0.0);
assert!(stats.density > 0.0);
assert!(stats.density < 1.0);
}
}