scribe_scaling/
lib.rs

1//! # Scribe Scaling
2//! 
3//! Advanced scaling optimizations for handling large repositories (10k-100k+ files) efficiently.
4//! This crate implements progressive loading, intelligent caching, parallel processing, and
5//! adaptive threshold management for optimal performance at scale.
6//!
7//! ## Core Features
8//!
9//! - **Progressive Loading**: Metadata-first streaming architecture that avoids loading all files into memory
10//! - **Intelligent Caching**: Persistent caching with signature-based invalidation
11//! - **Parallel Processing**: Async/parallel pipeline with backpressure management
12//! - **Dynamic Thresholds**: Repository-aware adaptive configuration
13//! - **Advanced Signatures**: Multi-level signature extraction with budget pressure adaptation
14//! - **Repository Profiling**: Automatic detection of repo type and optimal configuration
15//!
16//! ## Performance Targets
17//!
18//! - Small repos (≤1k files): <1s selection, <50MB memory
19//! - Medium repos (1k-10k files): <5s selection, <200MB memory  
20//! - Large repos (10k-100k files): <15s selection, <1GB memory
21//! - Enterprise repos (100k+ files): <30s selection, <2GB memory
22//!
23//! ## Architecture
24//!
25//! The scaling system is built around a streaming, metadata-first approach:
26//!
27//! ```text
28//! Repository Discovery → Metadata Stream → Filtered Stream → Analysis Pipeline → Selection
29//!       ↓                     ↓                ↓                   ↓             ↓
30//!   Fast scanning      Lightweight load    Smart filtering   Parallel work   Optimized result
31//! ```
32
33pub mod error;
34pub mod streaming;
35pub mod caching;
36pub mod parallel;
37pub mod adaptive;
38pub mod signatures;
39pub mod profiling;
40pub mod memory;
41pub mod metrics;
42
43// Context positioning optimization
44pub mod positioning;
45
46// Core scaling engine
47pub mod engine;
48
49// Intelligent scaling selector
50pub mod selector;
51
52// Re-export main types
53pub use engine::{ScalingEngine, ScalingConfig, ProcessingResult};
54pub use selector::{ScalingSelector, ScalingSelectionConfig, ScalingSelectionResult, SelectionAlgorithm};
55pub use positioning::{ContextPositioner, ContextPositioningConfig, PositionedSelection, ContextPositioning};
56pub use streaming::{StreamingConfig, FileMetadata, FileChunk};
57pub use caching::CacheConfig;
58pub use parallel::ParallelConfig;
59pub use adaptive::AdaptiveConfig;
60pub use signatures::{SignatureLevel, SignatureConfig};
61pub use profiling::{RepositoryProfiler, RepositoryProfile, RepositoryType};
62pub use memory::{MemoryConfig, MemoryStats};
63pub use metrics::{ScalingMetrics, BenchmarkResult};
64
65// Re-export error types
66pub use error::{ScalingError, ScalingResult};
67
68/// Current version of the scaling crate
69pub const VERSION: &str = env!("CARGO_PKG_VERSION");
70
71/// Default scaling configuration optimized for most repositories
72pub fn default_scaling_config() -> ScalingConfig {
73    ScalingConfig::default()
74}
75
76/// Create a scaling engine with automatic repository profiling
77pub async fn create_scaling_engine<P: AsRef<std::path::Path>>(
78    repo_path: P,
79) -> ScalingResult<ScalingEngine> {
80    let profiler = RepositoryProfiler::new();
81    let profile = profiler.profile_repository(repo_path.as_ref()).await?;
82    let config = profile.to_scaling_config();
83    
84    Ok(ScalingEngine::with_config(config))
85}
86
87/// Quick scaling analysis for immediate performance estimates
88pub async fn quick_scale_estimate<P: AsRef<std::path::Path>>(
89    repo_path: P,
90) -> ScalingResult<(usize, std::time::Duration, usize)> {
91    let profiler = RepositoryProfiler::new();
92    let (file_count, estimated_duration, memory_usage) = profiler.quick_estimate(repo_path.as_ref()).await?;
93    Ok((file_count, estimated_duration, memory_usage))
94}
95
96#[cfg(test)]
97mod tests {
98    use super::*;
99    use tempfile::TempDir;
100    use std::fs;
101
102    #[tokio::test]
103    async fn test_scaling_engine_creation() {
104        let temp_dir = TempDir::new().unwrap();
105        
106        // Create some test files
107        fs::write(temp_dir.path().join("test.rs"), "fn main() {}").unwrap();
108        fs::write(temp_dir.path().join("lib.rs"), "pub fn test() {}").unwrap();
109        
110        let engine = create_scaling_engine(temp_dir.path()).await.unwrap();
111        assert!(engine.is_ready());
112    }
113
114    #[tokio::test]
115    async fn test_quick_scale_estimate() {
116        let temp_dir = TempDir::new().unwrap();
117        
118        // Create test files
119        for i in 0..10 {
120            fs::write(temp_dir.path().join(format!("file_{}.rs", i)), "// test file").unwrap();
121        }
122        
123        let (file_count, duration, memory) = quick_scale_estimate(temp_dir.path()).await.unwrap();
124        assert!(file_count >= 10);
125        assert!(duration.as_millis() > 0);
126        assert!(memory > 0);
127    }
128
129    #[test]
130    fn test_default_config() {
131        let config = default_scaling_config();
132        assert!(config.streaming.chunk_size > 0);
133        assert!(config.caching.enable_persistent_cache);
134        assert!(config.parallel.max_concurrent_tasks > 0);
135    }
136}