scribe_scaling/
lib.rs

1#![allow(dead_code, unused_imports, unused_variables, unused_mut)]
2
3//! # Scribe Scaling
4//!
5//! Advanced scaling optimizations for handling large repositories (10k-100k+ files) efficiently.
6//! This crate implements progressive loading, intelligent caching, parallel processing, and
7//! adaptive threshold management for optimal performance at scale.
8//!
9//! ## Core Features
10//!
11//! - **Progressive Loading**: Metadata-first streaming architecture that avoids loading all files into memory
12//! - **Intelligent Caching**: Persistent caching with signature-based invalidation
13//! - **Parallel Processing**: Async/parallel pipeline with backpressure management
14//! - **Dynamic Thresholds**: Repository-aware adaptive configuration
15//! - **Advanced Signatures**: Multi-level signature extraction with budget pressure adaptation
16//! - **Repository Profiling**: Automatic detection of repo type and optimal configuration
17//!
18//! ## Performance Targets
19//!
20//! - Small repos (≤1k files): <1s selection, <50MB memory
21//! - Medium repos (1k-10k files): <5s selection, <200MB memory  
22//! - Large repos (10k-100k files): <15s selection, <1GB memory
23//! - Enterprise repos (100k+ files): <30s selection, <2GB memory
24//!
25//! ## Architecture
26//!
27//! The scaling system is built around a streaming, metadata-first approach:
28//!
29//! ```text
30//! Repository Discovery → Metadata Stream → Filtered Stream → Analysis Pipeline → Selection
31//!       ↓                     ↓                ↓                   ↓             ↓
32//!   Fast scanning      Lightweight load    Smart filtering   Parallel work   Optimized result
33//! ```
34
35pub mod adaptive;
36pub mod caching;
37pub mod error;
38pub mod memory;
39pub mod metrics;
40pub mod parallel;
41pub mod profiling;
42pub mod signatures;
43pub mod streaming;
44
45// Context positioning optimization
46pub mod positioning;
47
48// Core scaling engine
49pub mod engine;
50
51// Intelligent scaling selector
52pub mod selector;
53
54// Re-export main types
55pub use adaptive::AdaptiveConfig;
56pub use caching::CacheConfig;
57pub use engine::{ProcessingResult, ScalingConfig, ScalingEngine};
58pub use memory::{MemoryConfig, MemoryStats};
59pub use metrics::{BenchmarkResult, ScalingMetrics};
60pub use parallel::ParallelConfig;
61pub use positioning::{
62    ContextPositioner, ContextPositioning, ContextPositioningConfig, PositionedSelection,
63};
64pub use profiling::{RepositoryProfile, RepositoryProfiler, RepositoryType};
65pub use selector::{
66    ScalingSelectionConfig, ScalingSelectionResult, ScalingSelector, SelectionAlgorithm,
67};
68pub use signatures::{SignatureConfig, SignatureLevel};
69pub use streaming::{FileChunk, FileMetadata, StreamingConfig};
70
71// Re-export error types
72pub use error::{ScalingError, ScalingResult};
73
74/// Current version of the scaling crate
75pub const VERSION: &str = env!("CARGO_PKG_VERSION");
76
77/// Default scaling configuration optimized for most repositories
78pub fn default_scaling_config() -> ScalingConfig {
79    ScalingConfig::default()
80}
81
82/// Create a scaling engine with automatic repository profiling
83pub async fn create_scaling_engine<P: AsRef<std::path::Path>>(
84    repo_path: P,
85) -> ScalingResult<ScalingEngine> {
86    let profiler = RepositoryProfiler::new();
87    let profile = profiler.profile_repository(repo_path.as_ref()).await?;
88    let config = profile.to_scaling_config();
89
90    Ok(ScalingEngine::with_config(config))
91}
92
93/// Quick scaling analysis for immediate performance estimates
94pub async fn quick_scale_estimate<P: AsRef<std::path::Path>>(
95    repo_path: P,
96) -> ScalingResult<(usize, std::time::Duration, usize)> {
97    let profiler = RepositoryProfiler::new();
98    let (file_count, estimated_duration, memory_usage) =
99        profiler.quick_estimate(repo_path.as_ref()).await?;
100    Ok((file_count, estimated_duration, memory_usage))
101}
102
103#[cfg(test)]
104mod tests {
105    use super::*;
106    use std::fs;
107    use tempfile::TempDir;
108
109    #[tokio::test]
110    async fn test_scaling_engine_creation() {
111        let temp_dir = TempDir::new().unwrap();
112
113        // Create some test files
114        fs::write(temp_dir.path().join("test.rs"), "fn main() {}").unwrap();
115        fs::write(temp_dir.path().join("lib.rs"), "pub fn test() {}").unwrap();
116
117        let engine = create_scaling_engine(temp_dir.path()).await.unwrap();
118        assert!(engine.is_ready());
119    }
120
121    #[tokio::test]
122    async fn test_quick_scale_estimate() {
123        let temp_dir = TempDir::new().unwrap();
124
125        // Create test files
126        for i in 0..10 {
127            fs::write(
128                temp_dir.path().join(format!("file_{}.rs", i)),
129                "// test file",
130            )
131            .unwrap();
132        }
133
134        let (file_count, duration, memory) = quick_scale_estimate(temp_dir.path()).await.unwrap();
135        assert!(file_count >= 10);
136        assert!(duration.as_millis() > 0);
137        assert!(memory > 0);
138    }
139
140    #[test]
141    fn test_default_config() {
142        let config = default_scaling_config();
143        assert!(config.streaming.concurrency_limit > 0);
144        assert!(config.streaming.selection_heap_size > 0);
145        assert!(config.caching.enable_persistent_cache);
146        assert!(config.parallel.max_concurrent_tasks > 0);
147    }
148}