scribe_graph/lib.rs
1//! # Scribe Graph - Advanced Code Dependency Analysis
2//!
3//! High-performance graph-based code analysis with PageRank centrality computation.
4//! This crate provides sophisticated tools for understanding code structure, dependency
5//! relationships, and file importance through research-grade graph algorithms.
6//!
7//! ## Key Features
8//!
9//! ### PageRank Centrality Analysis
10//! - **Research-grade PageRank implementation** optimized for code dependency graphs
11//! - **Reverse edge emphasis** (importance flows to imported files)
12//! - **Convergence detection** with configurable precision
13//! - **Multi-language import detection** (Python, JavaScript, TypeScript, Rust, Go, Java)
14//!
15//! ### Graph Construction and Analysis
16//! - **Efficient dependency graph** representation with adjacency lists
17//! - **Comprehensive statistics** (degree distribution, connectivity, structural patterns)
18//! - **Performance optimized** for large codebases (10k+ files)
19//! - **Concurrent processing** support for multi-core systems
20//!
21//! ### Integration with FastPath Heuristics
22//! - **Seamless V2 integration** with existing heuristic scoring system
23//! - **Configurable centrality weighting** in final importance scores
24//! - **Multiple normalization methods** (min-max, z-score, rank-based)
25//! - **Entrypoint boosting** for main/index files
26//!
27//! ## Quick Start
28//!
29//! ```ignore
30//! use scribe_graph::{CentralityCalculator, PageRankConfig};
31//! # use scribe_analysis::heuristics::ScanResult;
32//! # use std::collections::HashMap;
33//! #
34//! # // Mock implementation for documentation
35//! # #[derive(Debug)]
36//! # struct MockScanResult {
37//! # path: String,
38//! # relative_path: String,
39//! # }
40//! #
41//! # impl ScanResult for MockScanResult {
42//! # fn path(&self) -> &str { &self.path }
43//! # fn relative_path(&self) -> &str { &self.relative_path }
44//! # fn depth(&self) -> usize { 1 }
45//! # fn is_docs(&self) -> bool { false }
46//! # fn is_readme(&self) -> bool { false }
47//! # fn is_entrypoint(&self) -> bool { false }
48//! # fn is_examples(&self) -> bool { false }
49//! # fn is_tests(&self) -> bool { false }
50//! # fn priority_boost(&self) -> f64 { 0.0 }
51//! # fn get_documentation_score(&self) -> f64 { 0.0 }
52//! # fn get_file_size(&self) -> usize { 1000 }
53//! # fn get_imports(&self) -> Vec<String> { vec![] }
54//! # fn get_git_churn(&self) -> usize { 0 }
55//! # }
56//! #
57//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
58//! // Create centrality calculator optimized for code analysis
59//! let calculator = CentralityCalculator::for_large_codebases()?;
60//!
61//! // Example scan results (replace with actual scan results)
62//! let scan_results = vec![
63//! MockScanResult { path: "main.rs".to_string(), relative_path: "main.rs".to_string() },
64//! MockScanResult { path: "lib.rs".to_string(), relative_path: "lib.rs".to_string() },
65//! ];
66//! let heuristic_scores = HashMap::new();
67//!
68//! // Calculate PageRank centrality for scan results
69//! let centrality_results = calculator.calculate_centrality(&scan_results)?;
70//!
71//! // Get top files by centrality
72//! let top_files = centrality_results.top_files_by_centrality(10);
73//!
74//! // Integrate with existing heuristic scores
75//! let integrated_scores = calculator.integrate_with_heuristics(
76//! ¢rality_results,
77//! &heuristic_scores
78//! )?;
79//! # Ok(())
80//! # }
81//! ```
82//!
83//! ## Performance Characteristics
84//!
85//! - **Memory usage**: ~2MB for 1000-file codebases, ~20MB for 10k+ files
86//! - **Computation time**: ~10ms for small projects, ~100ms for large codebases
87//! - **Convergence**: Typically 8-15 iterations for most dependency graphs
88//! - **Parallel efficiency**: Near-linear speedup on multi-core systems
89
90// Core modules
91pub mod centrality;
92pub mod graph;
93pub mod pagerank;
94pub mod statistics;
95
96// Legacy modules (maintained for compatibility)
97pub mod algorithms;
98pub mod builder;
99pub mod traversal;
100pub mod visualization;
101
102// Primary API exports - PageRank centrality system
103pub use centrality::{
104 CentralityCalculator, CentralityConfig, CentralityResults, ImportDetectionStats,
105 ImportResolutionConfig, IntegrationConfig, IntegrationMetadata, NormalizationMethod,
106};
107
108pub use pagerank::{
109 PageRankComputer, PageRankConfig, PageRankResults, PerformanceMetrics, ScoreStatistics,
110};
111
112pub use graph::{
113 ConcurrentDependencyGraph, DegreeInfo, DependencyGraph, GraphStatistics, NodeMetadata,
114};
115
116pub use statistics::{
117 ConnectivityAnalysis, DegreeDistribution, GraphAnalysisResults, GraphStatisticsAnalyzer,
118 ImportInsights, PerformanceProfile, StatisticsConfig, StructuralPatterns,
119};
120
121// Legacy exports (for backward compatibility)
122pub use algorithms::{GraphAlgorithms, PathFinder};
123pub use builder::{BuildOptions, GraphBuilder};
124pub use graph::DependencyGraph as CodeGraph; // Alias for compatibility
125pub use traversal::{GraphTraversal, TraversalOrder};
126
127use scribe_analysis::{heuristics::ScanResult, AnalysisResult};
128use scribe_core::Result;
129use std::collections::HashMap;
130
131/// Main entry point for PageRank centrality analysis
132///
133/// This is the primary interface for computing PageRank centrality scores
134/// and integrating them with the FastPath heuristic system.
135///
136/// # Examples
137///
138/// ```ignore
139/// use scribe_graph::PageRankAnalysis;
140/// # use scribe_analysis::heuristics::ScanResult;
141/// #
142/// # // Mock implementation for documentation
143/// # #[derive(Debug)]
144/// # struct MockScanResult {
145/// # path: String,
146/// # relative_path: String,
147/// # }
148/// #
149/// # impl ScanResult for MockScanResult {
150/// # fn path(&self) -> &str { &self.path }
151/// # fn relative_path(&self) -> &str { &self.relative_path }
152/// # fn depth(&self) -> usize { 1 }
153/// # fn is_docs(&self) -> bool { false }
154/// # fn is_readme(&self) -> bool { false }
155/// # fn is_entrypoint(&self) -> bool { false }
156/// # fn is_examples(&self) -> bool { false }
157/// # fn is_tests(&self) -> bool { false }
158/// # fn priority_boost(&self) -> f64 { 0.0 }
159/// # fn get_documentation_score(&self) -> f64 { 0.0 }
160/// # fn get_file_size(&self) -> usize { 1000 }
161/// # fn get_imports(&self) -> Vec<String> { vec![] }
162/// # fn get_git_churn(&self) -> usize { 0 }
163/// # }
164/// #
165/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
166/// let analysis = PageRankAnalysis::new()?;
167/// let scan_results = vec![
168/// MockScanResult { path: "main.rs".to_string(), relative_path: "main.rs".to_string() },
169/// MockScanResult { path: "lib.rs".to_string(), relative_path: "lib.rs".to_string() },
170/// ];
171/// let centrality_results = analysis.compute_centrality(&scan_results)?;
172///
173/// // Get files ranked by importance
174/// let top_files = centrality_results.top_files_by_centrality(10);
175///
176/// println!("Top 10 most important files:");
177/// for (file, score) in top_files {
178/// println!(" {}: {:.4}", file, score);
179/// }
180/// # Ok(())
181/// # }
182/// ```
183pub struct PageRankAnalysis {
184 calculator: CentralityCalculator,
185}
186
187impl PageRankAnalysis {
188 /// Create a new PageRank analysis instance with default configuration
189 pub fn new() -> Result<Self> {
190 Ok(Self {
191 calculator: CentralityCalculator::new()?,
192 })
193 }
194
195 /// Create with custom centrality configuration
196 pub fn with_config(config: CentralityConfig) -> Result<Self> {
197 Ok(Self {
198 calculator: CentralityCalculator::with_config(config)?,
199 })
200 }
201
202 /// Create optimized for code dependency analysis
203 pub fn for_code_analysis() -> Result<Self> {
204 Ok(Self {
205 calculator: CentralityCalculator::new()?,
206 })
207 }
208
209 /// Create optimized for large codebases (>5k files)
210 pub fn for_large_codebases() -> Result<Self> {
211 Ok(Self {
212 calculator: CentralityCalculator::for_large_codebases()?,
213 })
214 }
215
216 /// Compute PageRank centrality scores for a collection of files
217 pub fn compute_centrality<T>(&self, scan_results: &[T]) -> Result<CentralityResults>
218 where
219 T: ScanResult + Sync,
220 {
221 self.calculator.calculate_centrality(scan_results)
222 }
223
224 /// Integrate centrality scores with existing heuristic scores
225 ///
226 /// This combines PageRank centrality with FastPath heuristic scores using
227 /// configurable weights. The default configuration uses 15% centrality weight
228 /// and 85% heuristic weight.
229 pub fn integrate_with_heuristics(
230 &self,
231 centrality_results: &CentralityResults,
232 heuristic_scores: &HashMap<String, f64>,
233 ) -> Result<HashMap<String, f64>> {
234 self.calculator
235 .integrate_with_heuristics(centrality_results, heuristic_scores)
236 }
237
238 /// Get a summary of centrality computation results
239 pub fn summarize_results(&self, results: &CentralityResults) -> String {
240 results.summary()
241 }
242}
243
244impl Default for PageRankAnalysis {
245 fn default() -> Self {
246 Self::new().expect("Failed to create PageRankAnalysis")
247 }
248}
249
250/// Legacy GraphAnalysis maintained for backward compatibility
251///
252/// **Note**: For new projects, use `PageRankAnalysis` instead, which provides
253/// the complete PageRank centrality system with performance optimizations.
254pub struct GraphAnalysis {
255 builder: GraphBuilder,
256 algorithms: GraphAlgorithms,
257}
258
259impl GraphAnalysis {
260 /// Create a new graph analysis instance
261 pub fn new() -> Self {
262 Self {
263 builder: GraphBuilder::new(),
264 algorithms: GraphAlgorithms::new(),
265 }
266 }
267
268 /// Build a code graph from analysis results
269 pub async fn build_graph(&self, analysis: &AnalysisResult) -> Result<CodeGraph> {
270 self.builder.build_from_analysis(analysis).await
271 }
272
273 /// Analyze relationships in a code graph
274 pub fn analyze_relationships(&self, graph: &CodeGraph) -> Result<Vec<String>> {
275 self.algorithms.find_dependencies(graph)
276 }
277}
278
279impl Default for GraphAnalysis {
280 fn default() -> Self {
281 Self::new()
282 }
283}
284
285/// Utility functions for PageRank analysis
286pub mod utils {
287 use super::*;
288
289 /// Quick function to compute centrality scores for scan results
290 ///
291 /// This is a convenience function for simple use cases. For more control
292 /// over configuration, use `PageRankAnalysis` directly.
293 pub fn compute_file_centrality<T>(scan_results: &[T]) -> Result<HashMap<String, f64>>
294 where
295 T: ScanResult + Sync,
296 {
297 let analysis = PageRankAnalysis::for_code_analysis()?;
298 let results = analysis.compute_centrality(scan_results)?;
299 Ok(results.pagerank_scores)
300 }
301
302 /// Quick function to get top-K most important files
303 pub fn get_top_important_files<T>(
304 scan_results: &[T],
305 top_k: usize,
306 ) -> Result<Vec<(String, f64)>>
307 where
308 T: ScanResult + Sync,
309 {
310 let analysis = PageRankAnalysis::for_code_analysis()?;
311 let results = analysis.compute_centrality(scan_results)?;
312 Ok(results.top_files_by_centrality(top_k))
313 }
314
315 /// Combine centrality and heuristic scores with default configuration
316 pub fn combine_scores<T>(
317 scan_results: &[T],
318 heuristic_scores: &HashMap<String, f64>,
319 ) -> Result<HashMap<String, f64>>
320 where
321 T: ScanResult + Sync,
322 {
323 let analysis = PageRankAnalysis::for_code_analysis()?;
324 let centrality_results = analysis.compute_centrality(scan_results)?;
325 analysis.integrate_with_heuristics(¢rality_results, heuristic_scores)
326 }
327}
328
329#[cfg(test)]
330mod tests {
331 use super::*;
332
333 #[test]
334 fn test_graph_analysis_creation() {
335 let graph_analysis = GraphAnalysis::new();
336 // Basic smoke test
337 assert!(true);
338 }
339}