1use std::path::Path;
8use std::sync::Arc;
9use std::time::{Duration, Instant};
10use std::collections::HashMap;
11
12use serde::{Deserialize, Serialize};
13use tracing::{debug, info, warn, error};
14
15use crate::error::{ScalingResult, ScalingError};
16use crate::streaming::{StreamingConfig, FileMetadata};
17use crate::caching::CacheConfig;
18use crate::parallel::ParallelConfig;
19use crate::adaptive::AdaptiveConfig;
20use crate::signatures::SignatureConfig;
21use crate::memory::MemoryConfig;
22use crate::metrics::{ScalingMetrics, BenchmarkResult};
23
24#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct ScalingConfig {
27 pub streaming: StreamingConfig,
28 pub caching: CacheConfig,
29 pub parallel: ParallelConfig,
30 pub adaptive: AdaptiveConfig,
31 pub signatures: SignatureConfig,
32 pub memory: MemoryConfig,
33
34 pub token_budget: Option<usize>,
36
37 pub enable_intelligent_selection: bool,
39
40 pub selection_algorithm: Option<String>,
42
43 pub enable_context_positioning: bool,
45
46 pub positioning_query: Option<String>,
48}
49
50impl Default for ScalingConfig {
51 fn default() -> Self {
52 Self {
53 streaming: StreamingConfig::default(),
54 caching: CacheConfig::default(),
55 parallel: ParallelConfig::default(),
56 adaptive: AdaptiveConfig::default(),
57 signatures: SignatureConfig::default(),
58 memory: MemoryConfig::default(),
59 token_budget: None, enable_intelligent_selection: false, selection_algorithm: None, enable_context_positioning: false, positioning_query: None, }
65 }
66}
67
68impl ScalingConfig {
69 pub fn small_repository() -> Self {
71 Self {
72 streaming: StreamingConfig {
73 enable_streaming: false,
74 chunk_size: 1000,
75 memory_limit: 50 * 1024 * 1024, },
77 parallel: ParallelConfig {
78 max_concurrent_tasks: 2,
79 async_worker_count: 1,
80 cpu_worker_count: 1,
81 task_timeout: Duration::from_secs(10),
82 enable_work_stealing: false,
83 },
84 token_budget: Some(8000), enable_intelligent_selection: true,
86 selection_algorithm: Some("v2_quotas".to_string()),
87 ..Default::default()
88 }
89 }
90
91 pub fn large_repository() -> Self {
93 Self {
94 streaming: StreamingConfig {
95 enable_streaming: true,
96 chunk_size: 100,
97 memory_limit: 500 * 1024 * 1024, },
99 parallel: ParallelConfig {
100 max_concurrent_tasks: 16,
101 async_worker_count: 8,
102 cpu_worker_count: 8,
103 task_timeout: Duration::from_secs(60),
104 enable_work_stealing: true,
105 },
106 token_budget: Some(15000), enable_intelligent_selection: true,
108 selection_algorithm: Some("v5_integrated".to_string()),
109 ..Default::default()
110 }
111 }
112
113 pub fn with_token_budget(token_budget: usize) -> Self {
115 Self {
116 token_budget: Some(token_budget),
117 enable_intelligent_selection: true,
118 selection_algorithm: Some(match token_budget {
119 0..=2000 => "v2_quotas",
120 2001..=15000 => "v5_integrated",
121 _ => "v5_integrated"
122 }.to_string()),
123 ..Self::default()
124 }
125 }
126}
127
128#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct ProcessingResult {
131 pub files: Vec<FileMetadata>,
133
134 pub total_files: usize,
136
137 pub processing_time: Duration,
139
140 pub memory_peak: usize,
142
143 pub cache_hits: u64,
145
146 pub cache_misses: u64,
148
149 pub metrics: ScalingMetrics,
151}
152
153pub struct ScalingEngine {
155 config: ScalingConfig,
156 started_at: Option<Instant>,
157}
158
159impl ScalingEngine {
160 pub async fn new(config: ScalingConfig) -> ScalingResult<Self> {
162 info!("Initializing scaling engine with configuration: {:?}", config);
163
164 Ok(Self {
165 config,
166 started_at: None,
167 })
168 }
169
170 pub async fn with_defaults() -> ScalingResult<Self> {
172 Self::new(ScalingConfig::default()).await
173 }
174
175 pub fn with_config(config: ScalingConfig) -> Self {
177 Self {
178 config,
179 started_at: None,
180 }
181 }
182
183 pub async fn process_repository(&mut self, path: &Path) -> ScalingResult<ProcessingResult> {
185 let start_time = Instant::now();
186 self.started_at = Some(start_time);
187
188 info!("Processing repository: {:?}", path);
189
190 if !path.exists() {
191 return Err(ScalingError::path("Repository path does not exist", path));
192 }
193
194 if !path.is_dir() {
195 return Err(ScalingError::path("Repository path is not a directory", path));
196 }
197
198 if self.config.enable_intelligent_selection && self.config.token_budget.is_some() {
200 return self.process_with_intelligent_selection(path).await;
201 }
202
203 let mut files = Vec::new();
205 let mut total_size = 0u64;
206
207 for entry in walkdir::WalkDir::new(path)
208 .follow_links(false)
209 .max_depth(10)
210 {
211 match entry {
212 Ok(entry) => {
213 if entry.file_type().is_file() {
214 let (size, modified) = match entry.metadata() {
215 Ok(metadata) => {
216 let size = metadata.len();
217 let modified = metadata.modified().unwrap_or_else(|_| std::time::SystemTime::now());
218 (size, modified)
219 }
220 Err(_) => (0, std::time::SystemTime::now()),
221 };
222
223 let metadata = FileMetadata {
224 path: entry.path().to_path_buf(),
225 size,
226 modified,
227 language: detect_language(&entry.path()),
228 file_type: classify_file_type(&entry.path()),
229 };
230
231 total_size += metadata.size;
232 files.push(metadata);
233 }
234 }
235 Err(e) => {
236 warn!("Skipping file due to error: {}", e);
237 }
238 }
239 }
240
241 let processing_time = start_time.elapsed();
242
243 info!("Processed {} files in {:?}", files.len(), processing_time);
244
245 Ok(ProcessingResult {
246 total_files: files.len(),
247 processing_time,
248 memory_peak: estimate_memory_usage(files.len()),
249 cache_hits: 0, cache_misses: files.len() as u64, metrics: ScalingMetrics {
252 files_processed: files.len() as u64,
253 total_processing_time: processing_time,
254 memory_peak: estimate_memory_usage(files.len()),
255 cache_hits: 0,
256 cache_misses: files.len() as u64,
257 parallel_efficiency: 1.0, streaming_overhead: Duration::from_millis(0),
259 },
260 files,
261 })
262 }
263
264 pub async fn benchmark(&mut self, path: &Path, iterations: usize) -> ScalingResult<Vec<BenchmarkResult>> {
266 let mut results = Vec::with_capacity(iterations);
267
268 for i in 0..iterations {
269 info!("Running benchmark iteration {}/{}", i + 1, iterations);
270
271 let start = Instant::now();
272 let result = self.process_repository(path).await?;
273 let duration = start.elapsed();
274
275 let benchmark_result = BenchmarkResult::new(
276 format!("iteration_{}", i + 1),
277 duration,
278 result.memory_peak,
279 result.total_files as f64 / duration.as_secs_f64(),
280 1.0, );
282
283 results.push(benchmark_result);
284 }
285
286 Ok(results)
287 }
288
289 pub fn config(&self) -> &ScalingConfig {
291 &self.config
292 }
293
294 async fn process_with_intelligent_selection(&self, path: &Path) -> ScalingResult<ProcessingResult> {
296 info!("Processing repository with intelligent selection enabled");
297
298 let token_budget = self.config.token_budget.unwrap_or(8000);
300 let mut selector = crate::selector::ScalingSelector::with_token_budget(token_budget);
301
302 let selection_result = selector.select_and_process(path).await?;
304
305 info!("Intelligent selection completed: {} files selected, {:.1}% token utilization",
306 selection_result.selected_files.len(),
307 selection_result.token_utilization * 100.0);
308
309 Ok(selection_result.processing_result)
311 }
312
313 pub fn is_ready(&self) -> bool {
315 true }
317}
318
319fn detect_language(path: &Path) -> String {
321 match path.extension().and_then(|s| s.to_str()) {
322 Some("rs") => "Rust".to_string(),
323 Some("py") => "Python".to_string(),
324 Some("js") => "JavaScript".to_string(),
325 Some("ts") => "TypeScript".to_string(),
326 Some("go") => "Go".to_string(),
327 Some("java") => "Java".to_string(),
328 Some("cpp" | "cc" | "cxx") => "C++".to_string(),
329 Some("c") => "C".to_string(),
330 Some("h") => "Header".to_string(),
331 Some("md") => "Markdown".to_string(),
332 Some("json") => "JSON".to_string(),
333 Some("yaml" | "yml") => "YAML".to_string(),
334 Some("toml") => "TOML".to_string(),
335 _ => "Unknown".to_string(),
336 }
337}
338
339fn classify_file_type(path: &Path) -> String {
341 match path.extension().and_then(|s| s.to_str()) {
342 Some("rs" | "py" | "js" | "ts" | "go" | "java" | "cpp" | "cc" | "cxx" | "c") => "Source".to_string(),
343 Some("h" | "hpp" | "hxx") => "Header".to_string(),
344 Some("md" | "txt" | "rst") => "Documentation".to_string(),
345 Some("json" | "yaml" | "yml" | "toml" | "ini" | "cfg") => "Configuration".to_string(),
346 Some("png" | "jpg" | "jpeg" | "gif" | "svg") => "Image".to_string(),
347 _ => "Other".to_string(),
348 }
349}
350
351fn estimate_memory_usage(file_count: usize) -> usize {
353 file_count * 1024
355}
356
357#[cfg(test)]
358mod tests {
359 use super::*;
360 use tempfile::TempDir;
361 use std::fs;
362
363 #[tokio::test]
364 async fn test_scaling_engine_creation() {
365 let engine = ScalingEngine::with_defaults().await;
366 assert!(engine.is_ok());
367 }
368
369 #[tokio::test]
370 async fn test_repository_processing() {
371 let temp_dir = TempDir::new().unwrap();
372 let repo_path = temp_dir.path();
373
374 fs::write(repo_path.join("main.rs"), "fn main() {}").unwrap();
376 fs::write(repo_path.join("lib.rs"), "pub fn test() {}").unwrap();
377
378 let mut engine = ScalingEngine::with_defaults().await.unwrap();
379 let result = engine.process_repository(repo_path).await.unwrap();
380
381 assert!(result.total_files >= 2);
382 assert!(result.processing_time.as_nanos() > 0);
383 assert!(result.memory_peak > 0);
384 }
385
386 #[tokio::test]
387 async fn test_configuration_presets() {
388 let small_config = ScalingConfig::small_repository();
389 assert!(!small_config.streaming.enable_streaming);
390 assert_eq!(small_config.parallel.max_concurrent_tasks, 2);
391
392 let large_config = ScalingConfig::large_repository();
393 assert!(large_config.streaming.enable_streaming);
394 assert!(large_config.parallel.max_concurrent_tasks >= 16);
395 }
396
397 #[tokio::test]
398 async fn test_error_handling() {
399 let mut engine = ScalingEngine::with_defaults().await.unwrap();
400 let non_existent_path = Path::new("/non/existent/path");
401
402 let result = engine.process_repository(non_existent_path).await;
403 assert!(result.is_err());
404 }
405
406 #[tokio::test]
407 async fn test_benchmarking() {
408 let temp_dir = TempDir::new().unwrap();
409 let repo_path = temp_dir.path();
410
411 fs::write(repo_path.join("test.rs"), "fn test() {}").unwrap();
413
414 let mut engine = ScalingEngine::with_defaults().await.unwrap();
415 let results = engine.benchmark(repo_path, 3).await.unwrap();
416
417 assert_eq!(results.len(), 3);
418 for result in results {
419 assert!(result.duration.as_nanos() > 0);
420 assert!(result.throughput > 0.0);
421 assert_eq!(result.success_rate, 1.0);
422 }
423 }
424
425 #[test]
426 fn test_language_detection() {
427 assert_eq!(detect_language(Path::new("test.rs")), "Rust");
428 assert_eq!(detect_language(Path::new("test.py")), "Python");
429 assert_eq!(detect_language(Path::new("test.unknown")), "Unknown");
430 }
431
432 #[test]
433 fn test_file_type_classification() {
434 assert_eq!(classify_file_type(Path::new("main.rs")), "Source");
435 assert_eq!(classify_file_type(Path::new("README.md")), "Documentation");
436 assert_eq!(classify_file_type(Path::new("config.json")), "Configuration");
437 }
438}