scribe_scaling/
engine.rs

1//! Main scaling engine that integrates all optimization components.
2//!
3//! This module provides the primary interface for the scaling system,
4//! coordinating streaming, caching, parallel processing, adaptive configuration,
5//! and signature extraction for optimal performance at scale.
6
7use std::path::Path;
8use std::sync::Arc;
9use std::time::{Duration, Instant};
10use std::collections::HashMap;
11
12use serde::{Deserialize, Serialize};
13use tracing::{debug, info, warn, error};
14
15use crate::error::{ScalingResult, ScalingError};
16use crate::streaming::{StreamingConfig, FileMetadata};
17use crate::caching::CacheConfig;
18use crate::parallel::ParallelConfig;
19use crate::adaptive::AdaptiveConfig;
20use crate::signatures::SignatureConfig;
21use crate::memory::MemoryConfig;
22use crate::metrics::{ScalingMetrics, BenchmarkResult};
23
24/// Complete scaling configuration combining all subsystems
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct ScalingConfig {
27    pub streaming: StreamingConfig,
28    pub caching: CacheConfig,
29    pub parallel: ParallelConfig,
30    pub adaptive: AdaptiveConfig,
31    pub signatures: SignatureConfig,
32    pub memory: MemoryConfig,
33    
34    /// Token budget for intelligent selection (0 = unlimited)
35    pub token_budget: Option<usize>,
36    
37    /// Enable intelligent file selection before processing
38    pub enable_intelligent_selection: bool,
39    
40    /// Selection algorithm to use when intelligent selection is enabled
41    pub selection_algorithm: Option<String>,
42    
43    /// Enable context positioning optimization (HEAD/MIDDLE/TAIL)
44    pub enable_context_positioning: bool,
45    
46    /// Query for context positioning (affects HEAD section)
47    pub positioning_query: Option<String>,
48}
49
50impl Default for ScalingConfig {
51    fn default() -> Self {
52        Self {
53            streaming: StreamingConfig::default(),
54            caching: CacheConfig::default(),
55            parallel: ParallelConfig::default(),
56            adaptive: AdaptiveConfig::default(),
57            signatures: SignatureConfig::default(),
58            memory: MemoryConfig::default(),
59            token_budget: None, // Unlimited by default
60            enable_intelligent_selection: false, // Off by default for backward compatibility
61            selection_algorithm: None, // Will use V5Integrated when enabled
62            enable_context_positioning: false, // Off by default for backward compatibility
63            positioning_query: None, // No query by default
64        }
65    }
66}
67
68impl ScalingConfig {
69    /// Create configuration optimized for small repositories
70    pub fn small_repository() -> Self {
71        Self {
72            streaming: StreamingConfig {
73                enable_streaming: false,
74                chunk_size: 1000,
75                memory_limit: 50 * 1024 * 1024, // 50MB
76            },
77            parallel: ParallelConfig {
78                max_concurrent_tasks: 2,
79                async_worker_count: 1,
80                cpu_worker_count: 1,
81                task_timeout: Duration::from_secs(10),
82                enable_work_stealing: false,
83            },
84            token_budget: Some(8000), // Reasonable default for small repos
85            enable_intelligent_selection: true,
86            selection_algorithm: Some("v2_quotas".to_string()),
87            ..Default::default()
88        }
89    }
90
91    /// Create configuration optimized for large repositories
92    pub fn large_repository() -> Self {
93        Self {
94            streaming: StreamingConfig {
95                enable_streaming: true,
96                chunk_size: 100,
97                memory_limit: 500 * 1024 * 1024, // 500MB
98            },
99            parallel: ParallelConfig {
100                max_concurrent_tasks: 16,
101                async_worker_count: 8,
102                cpu_worker_count: 8,
103                task_timeout: Duration::from_secs(60),
104                enable_work_stealing: true,
105            },
106            token_budget: Some(15000), // Larger budget for complex repositories
107            enable_intelligent_selection: true,
108            selection_algorithm: Some("v5_integrated".to_string()),
109            ..Default::default()
110        }
111    }
112    
113    /// Create configuration with specific token budget
114    pub fn with_token_budget(token_budget: usize) -> Self {
115        Self {
116            token_budget: Some(token_budget),
117            enable_intelligent_selection: true,
118            selection_algorithm: Some(match token_budget {
119                0..=2000 => "v2_quotas",
120                2001..=15000 => "v5_integrated",
121                _ => "v5_integrated"
122            }.to_string()),
123            ..Self::default()
124        }
125    }
126}
127
128/// Repository processing results
129#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct ProcessingResult {
131    /// List of processed files with metadata
132    pub files: Vec<FileMetadata>,
133    
134    /// Total number of files processed
135    pub total_files: usize,
136    
137    /// Processing time
138    pub processing_time: Duration,
139    
140    /// Peak memory usage in bytes
141    pub memory_peak: usize,
142    
143    /// Cache hit count
144    pub cache_hits: u64,
145    
146    /// Cache miss count
147    pub cache_misses: u64,
148    
149    /// Additional metrics
150    pub metrics: ScalingMetrics,
151}
152
153/// Main scaling engine
154pub struct ScalingEngine {
155    config: ScalingConfig,
156    started_at: Option<Instant>,
157}
158
159impl ScalingEngine {
160    /// Create a new scaling engine with the given configuration
161    pub async fn new(config: ScalingConfig) -> ScalingResult<Self> {
162        info!("Initializing scaling engine with configuration: {:?}", config);
163        
164        Ok(Self {
165            config,
166            started_at: None,
167        })
168    }
169
170    /// Create a scaling engine with default configuration
171    pub async fn with_defaults() -> ScalingResult<Self> {
172        Self::new(ScalingConfig::default()).await
173    }
174    
175    /// Create a scaling engine with the given configuration
176    pub fn with_config(config: ScalingConfig) -> Self {
177        Self {
178            config,
179            started_at: None,
180        }
181    }
182
183    /// Process a repository with scaling optimizations
184    pub async fn process_repository(&mut self, path: &Path) -> ScalingResult<ProcessingResult> {
185        let start_time = Instant::now();
186        self.started_at = Some(start_time);
187        
188        info!("Processing repository: {:?}", path);
189        
190        if !path.exists() {
191            return Err(ScalingError::path("Repository path does not exist", path));
192        }
193
194        if !path.is_dir() {
195            return Err(ScalingError::path("Repository path is not a directory", path));
196        }
197
198        // Check if intelligent selection is enabled
199        if self.config.enable_intelligent_selection && self.config.token_budget.is_some() {
200            return self.process_with_intelligent_selection(path).await;
201        }
202
203        // Basic file discovery using walkdir
204        let mut files = Vec::new();
205        let mut total_size = 0u64;
206        
207        for entry in walkdir::WalkDir::new(path)
208            .follow_links(false)
209            .max_depth(10) 
210        {
211            match entry {
212                Ok(entry) => {
213                    if entry.file_type().is_file() {
214                        let (size, modified) = match entry.metadata() {
215                            Ok(metadata) => {
216                                let size = metadata.len();
217                                let modified = metadata.modified().unwrap_or_else(|_| std::time::SystemTime::now());
218                                (size, modified)
219                            }
220                            Err(_) => (0, std::time::SystemTime::now()),
221                        };
222                        
223                        let metadata = FileMetadata {
224                            path: entry.path().to_path_buf(),
225                            size,
226                            modified,
227                            language: detect_language(&entry.path()),
228                            file_type: classify_file_type(&entry.path()),
229                        };
230                        
231                        total_size += metadata.size;
232                        files.push(metadata);
233                    }
234                }
235                Err(e) => {
236                    warn!("Skipping file due to error: {}", e);
237                }
238            }
239        }
240
241        let processing_time = start_time.elapsed();
242        
243        info!("Processed {} files in {:?}", files.len(), processing_time);
244        
245        Ok(ProcessingResult {
246            total_files: files.len(),
247            processing_time,
248            memory_peak: estimate_memory_usage(files.len()),
249            cache_hits: 0, // Placeholder
250            cache_misses: files.len() as u64, // All cache misses for now
251            metrics: ScalingMetrics {
252                files_processed: files.len() as u64,
253                total_processing_time: processing_time,
254                memory_peak: estimate_memory_usage(files.len()),
255                cache_hits: 0,
256                cache_misses: files.len() as u64,
257                parallel_efficiency: 1.0, // No parallelism yet
258                streaming_overhead: Duration::from_millis(0),
259            },
260            files,
261        })
262    }
263
264    /// Run performance benchmarks
265    pub async fn benchmark(&mut self, path: &Path, iterations: usize) -> ScalingResult<Vec<BenchmarkResult>> {
266        let mut results = Vec::with_capacity(iterations);
267        
268        for i in 0..iterations {
269            info!("Running benchmark iteration {}/{}", i + 1, iterations);
270            
271            let start = Instant::now();
272            let result = self.process_repository(path).await?;
273            let duration = start.elapsed();
274            
275            let benchmark_result = BenchmarkResult::new(
276                format!("iteration_{}", i + 1),
277                duration,
278                result.memory_peak,
279                result.total_files as f64 / duration.as_secs_f64(),
280                1.0, // 100% success rate
281            );
282            
283            results.push(benchmark_result);
284        }
285        
286        Ok(results)
287    }
288
289    /// Get current configuration
290    pub fn config(&self) -> &ScalingConfig {
291        &self.config
292    }
293    
294    /// Process repository with intelligent selection enabled
295    async fn process_with_intelligent_selection(&self, path: &Path) -> ScalingResult<ProcessingResult> {
296        info!("Processing repository with intelligent selection enabled");
297        
298        // Create ScalingSelector with the configured token budget
299        let token_budget = self.config.token_budget.unwrap_or(8000);
300        let mut selector = crate::selector::ScalingSelector::with_token_budget(token_budget);
301        
302        // Execute intelligent selection and processing
303        let selection_result = selector.select_and_process(path).await?;
304        
305        info!("Intelligent selection completed: {} files selected, {:.1}% token utilization", 
306              selection_result.selected_files.len(), 
307              selection_result.token_utilization * 100.0);
308        
309        // Return the processing result from the selector
310        Ok(selection_result.processing_result)
311    }
312    
313    /// Check if engine is ready for processing
314    pub fn is_ready(&self) -> bool {
315        true // Always ready in this simple implementation
316    }
317}
318
319/// Simple language detection based on file extension
320fn detect_language(path: &Path) -> String {
321    match path.extension().and_then(|s| s.to_str()) {
322        Some("rs") => "Rust".to_string(),
323        Some("py") => "Python".to_string(),
324        Some("js") => "JavaScript".to_string(),
325        Some("ts") => "TypeScript".to_string(),
326        Some("go") => "Go".to_string(),
327        Some("java") => "Java".to_string(),
328        Some("cpp" | "cc" | "cxx") => "C++".to_string(),
329        Some("c") => "C".to_string(),
330        Some("h") => "Header".to_string(),
331        Some("md") => "Markdown".to_string(),
332        Some("json") => "JSON".to_string(),
333        Some("yaml" | "yml") => "YAML".to_string(),
334        Some("toml") => "TOML".to_string(),
335        _ => "Unknown".to_string(),
336    }
337}
338
339/// Simple file type classification
340fn classify_file_type(path: &Path) -> String {
341    match path.extension().and_then(|s| s.to_str()) {
342        Some("rs" | "py" | "js" | "ts" | "go" | "java" | "cpp" | "cc" | "cxx" | "c") => "Source".to_string(),
343        Some("h" | "hpp" | "hxx") => "Header".to_string(),
344        Some("md" | "txt" | "rst") => "Documentation".to_string(),
345        Some("json" | "yaml" | "yml" | "toml" | "ini" | "cfg") => "Configuration".to_string(),
346        Some("png" | "jpg" | "jpeg" | "gif" | "svg") => "Image".to_string(),
347        _ => "Other".to_string(),
348    }
349}
350
351/// Estimate memory usage based on file count
352fn estimate_memory_usage(file_count: usize) -> usize {
353    // Rough estimate: ~1KB per file metadata
354    file_count * 1024
355}
356
357#[cfg(test)]
358mod tests {
359    use super::*;
360    use tempfile::TempDir;
361    use std::fs;
362
363    #[tokio::test]
364    async fn test_scaling_engine_creation() {
365        let engine = ScalingEngine::with_defaults().await;
366        assert!(engine.is_ok());
367    }
368
369    #[tokio::test]
370    async fn test_repository_processing() {
371        let temp_dir = TempDir::new().unwrap();
372        let repo_path = temp_dir.path();
373        
374        // Create test files
375        fs::write(repo_path.join("main.rs"), "fn main() {}").unwrap();
376        fs::write(repo_path.join("lib.rs"), "pub fn test() {}").unwrap();
377        
378        let mut engine = ScalingEngine::with_defaults().await.unwrap();
379        let result = engine.process_repository(repo_path).await.unwrap();
380        
381        assert!(result.total_files >= 2);
382        assert!(result.processing_time.as_nanos() > 0);
383        assert!(result.memory_peak > 0);
384    }
385
386    #[tokio::test]
387    async fn test_configuration_presets() {
388        let small_config = ScalingConfig::small_repository();
389        assert!(!small_config.streaming.enable_streaming);
390        assert_eq!(small_config.parallel.max_concurrent_tasks, 2);
391        
392        let large_config = ScalingConfig::large_repository();
393        assert!(large_config.streaming.enable_streaming);
394        assert!(large_config.parallel.max_concurrent_tasks >= 16);
395    }
396
397    #[tokio::test]
398    async fn test_error_handling() {
399        let mut engine = ScalingEngine::with_defaults().await.unwrap();
400        let non_existent_path = Path::new("/non/existent/path");
401        
402        let result = engine.process_repository(non_existent_path).await;
403        assert!(result.is_err());
404    }
405
406    #[tokio::test]
407    async fn test_benchmarking() {
408        let temp_dir = TempDir::new().unwrap();
409        let repo_path = temp_dir.path();
410        
411        // Create test file
412        fs::write(repo_path.join("test.rs"), "fn test() {}").unwrap();
413        
414        let mut engine = ScalingEngine::with_defaults().await.unwrap();
415        let results = engine.benchmark(repo_path, 3).await.unwrap();
416        
417        assert_eq!(results.len(), 3);
418        for result in results {
419            assert!(result.duration.as_nanos() > 0);
420            assert!(result.throughput > 0.0);
421            assert_eq!(result.success_rate, 1.0);
422        }
423    }
424
425    #[test]
426    fn test_language_detection() {
427        assert_eq!(detect_language(Path::new("test.rs")), "Rust");
428        assert_eq!(detect_language(Path::new("test.py")), "Python");
429        assert_eq!(detect_language(Path::new("test.unknown")), "Unknown");
430    }
431
432    #[test]
433    fn test_file_type_classification() {
434        assert_eq!(classify_file_type(Path::new("main.rs")), "Source");
435        assert_eq!(classify_file_type(Path::new("README.md")), "Documentation");
436        assert_eq!(classify_file_type(Path::new("config.json")), "Configuration");
437    }
438}