scribe_scaling/
lib.rs

1//! # Scribe Scaling
2//!
3//! Advanced scaling optimizations for handling large repositories (10k-100k+ files) efficiently.
4//! This crate implements progressive loading, intelligent caching, parallel processing, and
5//! adaptive threshold management for optimal performance at scale.
6//!
7//! ## Core Features
8//!
9//! - **Progressive Loading**: Metadata-first streaming architecture that avoids loading all files into memory
10//! - **Intelligent Caching**: Persistent caching with signature-based invalidation
11//! - **Parallel Processing**: Async/parallel pipeline with backpressure management
12//! - **Dynamic Thresholds**: Repository-aware adaptive configuration
13//! - **Advanced Signatures**: Multi-level signature extraction with budget pressure adaptation
14//! - **Repository Profiling**: Automatic detection of repo type and optimal configuration
15//!
16//! ## Performance Targets
17//!
18//! - Small repos (≤1k files): <1s selection, <50MB memory
19//! - Medium repos (1k-10k files): <5s selection, <200MB memory  
20//! - Large repos (10k-100k files): <15s selection, <1GB memory
21//! - Enterprise repos (100k+ files): <30s selection, <2GB memory
22//!
23//! ## Architecture
24//!
25//! The scaling system is built around a streaming, metadata-first approach:
26//!
27//! ```text
28//! Repository Discovery → Metadata Stream → Filtered Stream → Analysis Pipeline → Selection
29//!       ↓                     ↓                ↓                   ↓             ↓
30//!   Fast scanning      Lightweight load    Smart filtering   Parallel work   Optimized result
31//! ```
32
33pub mod adaptive;
34pub mod caching;
35pub mod error;
36pub mod memory;
37pub mod metrics;
38pub mod parallel;
39pub mod profiling;
40pub mod signatures;
41pub mod streaming;
42
43// Context positioning optimization
44pub mod positioning;
45
46// Core scaling engine
47pub mod engine;
48
49// Intelligent scaling selector
50pub mod selector;
51
52// Re-export main types
53pub use adaptive::AdaptiveConfig;
54pub use caching::CacheConfig;
55pub use engine::{ProcessingResult, ScalingConfig, ScalingEngine};
56pub use memory::{MemoryConfig, MemoryStats};
57pub use metrics::{BenchmarkResult, ScalingMetrics};
58pub use parallel::ParallelConfig;
59pub use positioning::{
60    ContextPositioner, ContextPositioning, ContextPositioningConfig, PositionedSelection,
61};
62pub use profiling::{RepositoryProfile, RepositoryProfiler, RepositoryType};
63pub use selector::{
64    ScalingSelectionConfig, ScalingSelectionResult, ScalingSelector, SelectionAlgorithm,
65};
66pub use signatures::{SignatureConfig, SignatureLevel};
67pub use streaming::{FileChunk, FileMetadata, StreamingConfig};
68
69// Re-export error types
70pub use error::{ScalingError, ScalingResult};
71
72/// Current version of the scaling crate
73pub const VERSION: &str = env!("CARGO_PKG_VERSION");
74
75/// Default scaling configuration optimized for most repositories
76pub fn default_scaling_config() -> ScalingConfig {
77    ScalingConfig::default()
78}
79
80/// Create a scaling engine with automatic repository profiling
81pub async fn create_scaling_engine<P: AsRef<std::path::Path>>(
82    repo_path: P,
83) -> ScalingResult<ScalingEngine> {
84    let profiler = RepositoryProfiler::new();
85    let profile = profiler.profile_repository(repo_path.as_ref()).await?;
86    let config = profile.to_scaling_config();
87
88    Ok(ScalingEngine::with_config(config))
89}
90
91/// Quick scaling analysis for immediate performance estimates
92pub async fn quick_scale_estimate<P: AsRef<std::path::Path>>(
93    repo_path: P,
94) -> ScalingResult<(usize, std::time::Duration, usize)> {
95    let profiler = RepositoryProfiler::new();
96    let (file_count, estimated_duration, memory_usage) =
97        profiler.quick_estimate(repo_path.as_ref()).await?;
98    Ok((file_count, estimated_duration, memory_usage))
99}
100
101#[cfg(test)]
102mod tests {
103    use super::*;
104    use std::fs;
105    use tempfile::TempDir;
106
107    #[tokio::test]
108    async fn test_scaling_engine_creation() {
109        let temp_dir = TempDir::new().unwrap();
110
111        // Create some test files
112        fs::write(temp_dir.path().join("test.rs"), "fn main() {}").unwrap();
113        fs::write(temp_dir.path().join("lib.rs"), "pub fn test() {}").unwrap();
114
115        let engine = create_scaling_engine(temp_dir.path()).await.unwrap();
116        assert!(engine.is_ready());
117    }
118
119    #[tokio::test]
120    async fn test_quick_scale_estimate() {
121        let temp_dir = TempDir::new().unwrap();
122
123        // Create test files
124        for i in 0..10 {
125            fs::write(
126                temp_dir.path().join(format!("file_{}.rs", i)),
127                "// test file",
128            )
129            .unwrap();
130        }
131
132        let (file_count, duration, memory) = quick_scale_estimate(temp_dir.path()).await.unwrap();
133        assert!(file_count >= 10);
134        assert!(duration.as_millis() > 0);
135        assert!(memory > 0);
136    }
137
138    #[test]
139    fn test_default_config() {
140        let config = default_scaling_config();
141        assert!(config.streaming.concurrency_limit > 0);
142        assert!(config.streaming.selection_heap_size > 0);
143        assert!(config.caching.enable_persistent_cache);
144        assert!(config.parallel.max_concurrent_tasks > 0);
145    }
146}