1use std::collections::HashMap;
8use std::path::Path;
9use std::sync::Arc;
10use std::time::{Duration, Instant};
11
12use serde::{Deserialize, Serialize};
13use tracing::{debug, error, info, warn};
14
15use crate::adaptive::AdaptiveConfig;
16use crate::caching::CacheConfig;
17use crate::error::{ScalingError, ScalingResult};
18use crate::memory::MemoryConfig;
19use crate::metrics::{BenchmarkResult, ScalingMetrics};
20use crate::parallel::ParallelConfig;
21use crate::signatures::SignatureConfig;
22use crate::streaming::{FileMetadata, StreamingConfig};
23
24#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct ScalingConfig {
27 pub streaming: StreamingConfig,
28 pub caching: CacheConfig,
29 pub parallel: ParallelConfig,
30 pub adaptive: AdaptiveConfig,
31 pub signatures: SignatureConfig,
32 pub memory: MemoryConfig,
33
34 pub token_budget: Option<usize>,
36
37 pub enable_intelligent_selection: bool,
39
40 pub selection_algorithm: Option<String>,
42
43 pub enable_context_positioning: bool,
45
46 pub positioning_query: Option<String>,
48}
49
50impl Default for ScalingConfig {
51 fn default() -> Self {
52 Self {
53 streaming: StreamingConfig::default(),
54 caching: CacheConfig::default(),
55 parallel: ParallelConfig::default(),
56 adaptive: AdaptiveConfig::default(),
57 signatures: SignatureConfig::default(),
58 memory: MemoryConfig::default(),
59 token_budget: None, enable_intelligent_selection: false, selection_algorithm: None, enable_context_positioning: false, positioning_query: None, }
65 }
66}
67
68impl ScalingConfig {
69 pub fn small_repository() -> Self {
71 Self {
72 streaming: StreamingConfig {
73 enable_streaming: false,
74 concurrency_limit: 2,
75 memory_limit: 50 * 1024 * 1024, selection_heap_size: 1000,
77 },
78 parallel: ParallelConfig {
79 max_concurrent_tasks: 2,
80 async_worker_count: 1,
81 cpu_worker_count: 1,
82 task_timeout: Duration::from_secs(10),
83 enable_work_stealing: false,
84 },
85 token_budget: Some(8000), enable_intelligent_selection: true,
87 selection_algorithm: Some("v2_quotas".to_string()),
88 ..Default::default()
89 }
90 }
91
92 pub fn large_repository() -> Self {
94 Self {
95 streaming: StreamingConfig {
96 enable_streaming: true,
97 concurrency_limit: 8,
98 memory_limit: 500 * 1024 * 1024, selection_heap_size: 10000,
100 },
101 parallel: ParallelConfig {
102 max_concurrent_tasks: 16,
103 async_worker_count: 8,
104 cpu_worker_count: 8,
105 task_timeout: Duration::from_secs(60),
106 enable_work_stealing: true,
107 },
108 token_budget: Some(15000), enable_intelligent_selection: true,
110 selection_algorithm: Some("v5_integrated".to_string()),
111 ..Default::default()
112 }
113 }
114
115 pub fn with_token_budget(token_budget: usize) -> Self {
117 Self {
118 token_budget: Some(token_budget),
119 enable_intelligent_selection: true,
120 selection_algorithm: Some(
121 match token_budget {
122 0..=2000 => "v2_quotas",
123 2001..=15000 => "v5_integrated",
124 _ => "v5_integrated",
125 }
126 .to_string(),
127 ),
128 ..Self::default()
129 }
130 }
131}
132
133#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct ProcessingResult {
136 pub files: Vec<FileMetadata>,
138
139 pub total_files: usize,
141
142 pub processing_time: Duration,
144
145 pub memory_peak: usize,
147
148 pub cache_hits: u64,
150
151 pub cache_misses: u64,
153
154 pub metrics: ScalingMetrics,
156}
157
158pub struct ScalingEngine {
160 config: ScalingConfig,
161 started_at: Option<Instant>,
162}
163
164impl ScalingEngine {
165 pub async fn new(config: ScalingConfig) -> ScalingResult<Self> {
167 info!(
168 "Initializing scaling engine with configuration: {:?}",
169 config
170 );
171
172 Ok(Self {
173 config,
174 started_at: None,
175 })
176 }
177
178 pub async fn with_defaults() -> ScalingResult<Self> {
180 Self::new(ScalingConfig::default()).await
181 }
182
183 pub fn with_config(config: ScalingConfig) -> Self {
185 Self {
186 config,
187 started_at: None,
188 }
189 }
190
191 pub async fn process_repository(&mut self, path: &Path) -> ScalingResult<ProcessingResult> {
193 let start_time = Instant::now();
194 self.started_at = Some(start_time);
195
196 info!("Processing repository: {:?}", path);
197
198 if !path.exists() {
199 return Err(ScalingError::path("Repository path does not exist", path));
200 }
201
202 if !path.is_dir() {
203 return Err(ScalingError::path(
204 "Repository path is not a directory",
205 path,
206 ));
207 }
208
209 if self.config.enable_intelligent_selection && self.config.token_budget.is_some() {
211 return self.process_with_intelligent_selection(path).await;
212 }
213
214 info!("Using optimized streaming file discovery for basic processing");
216
217 let streaming_config = crate::streaming::StreamingConfig {
219 enable_streaming: true,
220 concurrency_limit: self.config.parallel.max_concurrent_tasks,
221 memory_limit: self.config.streaming.memory_limit,
222 selection_heap_size: 50000, };
224
225 let streaming_selector = crate::streaming::StreamingSelector::new(streaming_config);
226
227 let target_count = 50000; let token_budget = 1_000_000; let score_fn = |_file: &FileMetadata| -> f64 { 1.0 };
233 let token_fn = |file: &FileMetadata| -> usize { (file.size / 4) as usize };
234
235 let scored_files = streaming_selector
236 .select_files_streaming(path, target_count, token_budget, score_fn, token_fn)
237 .await?;
238
239 let files: Vec<FileMetadata> = scored_files
240 .into_iter()
241 .map(|scored| scored.metadata)
242 .collect();
243
244 let total_size: u64 = files.iter().map(|f| f.size).sum();
245
246 let processing_time = start_time.elapsed();
247
248 info!("Processed {} files in {:?}", files.len(), processing_time);
249
250 Ok(ProcessingResult {
251 total_files: files.len(),
252 processing_time,
253 memory_peak: estimate_memory_usage(files.len()),
254 cache_hits: 0, cache_misses: files.len() as u64, metrics: ScalingMetrics {
257 files_processed: files.len() as u64,
258 total_processing_time: processing_time,
259 memory_peak: estimate_memory_usage(files.len()),
260 cache_hits: 0,
261 cache_misses: files.len() as u64,
262 parallel_efficiency: 1.0, streaming_overhead: Duration::from_millis(0),
264 },
265 files,
266 })
267 }
268
269 pub async fn benchmark(
271 &mut self,
272 path: &Path,
273 iterations: usize,
274 ) -> ScalingResult<Vec<BenchmarkResult>> {
275 let mut results = Vec::with_capacity(iterations);
276
277 for i in 0..iterations {
278 info!("Running benchmark iteration {}/{}", i + 1, iterations);
279
280 let start = Instant::now();
281 let result = self.process_repository(path).await?;
282 let duration = start.elapsed();
283
284 let benchmark_result = BenchmarkResult::new(
285 format!("iteration_{}", i + 1),
286 duration,
287 result.memory_peak,
288 result.total_files as f64 / duration.as_secs_f64(),
289 1.0, );
291
292 results.push(benchmark_result);
293 }
294
295 Ok(results)
296 }
297
298 pub fn config(&self) -> &ScalingConfig {
300 &self.config
301 }
302
303 async fn process_with_intelligent_selection(
305 &self,
306 path: &Path,
307 ) -> ScalingResult<ProcessingResult> {
308 info!("Processing repository with intelligent selection enabled");
309
310 let token_budget = self.config.token_budget.unwrap_or(8000);
312 let mut selector = crate::selector::ScalingSelector::with_token_budget(token_budget);
313
314 let selection_result = selector.select_and_process(path).await?;
316
317 info!(
318 "Intelligent selection completed: {} files selected, {:.1}% token utilization",
319 selection_result.selected_files.len(),
320 selection_result.token_utilization * 100.0
321 );
322
323 Ok(selection_result.processing_result)
325 }
326
327 pub fn is_ready(&self) -> bool {
329 true }
331}
332
333fn detect_language(path: &Path) -> String {
335 match path.extension().and_then(|s| s.to_str()) {
336 Some("rs") => "Rust".to_string(),
337 Some("py") => "Python".to_string(),
338 Some("js") => "JavaScript".to_string(),
339 Some("ts") => "TypeScript".to_string(),
340 Some("go") => "Go".to_string(),
341 Some("java") => "Java".to_string(),
342 Some("cpp" | "cc" | "cxx") => "C++".to_string(),
343 Some("c") => "C".to_string(),
344 Some("h") => "Header".to_string(),
345 Some("md") => "Markdown".to_string(),
346 Some("json") => "JSON".to_string(),
347 Some("yaml" | "yml") => "YAML".to_string(),
348 Some("toml") => "TOML".to_string(),
349 _ => "Unknown".to_string(),
350 }
351}
352
353fn classify_file_type(path: &Path) -> String {
355 match path.extension().and_then(|s| s.to_str()) {
356 Some("rs" | "py" | "js" | "ts" | "go" | "java" | "cpp" | "cc" | "cxx" | "c") => {
357 "Source".to_string()
358 }
359 Some("h" | "hpp" | "hxx") => "Header".to_string(),
360 Some("md" | "txt" | "rst") => "Documentation".to_string(),
361 Some("json" | "yaml" | "yml" | "toml" | "ini" | "cfg") => "Configuration".to_string(),
362 Some("png" | "jpg" | "jpeg" | "gif" | "svg") => "Image".to_string(),
363 _ => "Other".to_string(),
364 }
365}
366
367fn estimate_memory_usage(file_count: usize) -> usize {
369 file_count * 1024
371}
372
373#[cfg(test)]
374mod tests {
375 use super::*;
376 use std::fs;
377 use tempfile::TempDir;
378
379 #[tokio::test]
380 async fn test_scaling_engine_creation() {
381 let engine = ScalingEngine::with_defaults().await;
382 assert!(engine.is_ok());
383 }
384
385 #[tokio::test]
386 async fn test_repository_processing() {
387 let temp_dir = TempDir::new().unwrap();
388 let repo_path = temp_dir.path();
389
390 fs::write(repo_path.join("main.rs"), "fn main() {}").unwrap();
392 fs::write(repo_path.join("lib.rs"), "pub fn test() {}").unwrap();
393
394 let mut engine = ScalingEngine::with_defaults().await.unwrap();
395 let result = engine.process_repository(repo_path).await.unwrap();
396
397 assert!(result.total_files >= 2);
398 assert!(result.processing_time.as_nanos() > 0);
399 assert!(result.memory_peak > 0);
400 }
401
402 #[tokio::test]
403 async fn test_configuration_presets() {
404 let small_config = ScalingConfig::small_repository();
405 assert!(!small_config.streaming.enable_streaming);
406 assert_eq!(small_config.parallel.max_concurrent_tasks, 2);
407
408 let large_config = ScalingConfig::large_repository();
409 assert!(large_config.streaming.enable_streaming);
410 assert!(large_config.parallel.max_concurrent_tasks >= 16);
411 }
412
413 #[tokio::test]
414 async fn test_error_handling() {
415 let mut engine = ScalingEngine::with_defaults().await.unwrap();
416 let non_existent_path = Path::new("/non/existent/path");
417
418 let result = engine.process_repository(non_existent_path).await;
419 assert!(result.is_err());
420 }
421
422 #[tokio::test]
423 async fn test_benchmarking() {
424 let temp_dir = TempDir::new().unwrap();
425 let repo_path = temp_dir.path();
426
427 fs::write(repo_path.join("test.rs"), "fn test() {}").unwrap();
429
430 let mut engine = ScalingEngine::with_defaults().await.unwrap();
431 let results = engine.benchmark(repo_path, 3).await.unwrap();
432
433 assert_eq!(results.len(), 3);
434 for result in results {
435 assert!(result.duration.as_nanos() > 0);
436 assert!(result.throughput > 0.0);
437 assert_eq!(result.success_rate, 1.0);
438 }
439 }
440
441 #[test]
442 fn test_language_detection() {
443 assert_eq!(detect_language(Path::new("test.rs")), "Rust");
444 assert_eq!(detect_language(Path::new("test.py")), "Python");
445 assert_eq!(detect_language(Path::new("test.unknown")), "Unknown");
446 }
447
448 #[test]
449 fn test_file_type_classification() {
450 assert_eq!(classify_file_type(Path::new("main.rs")), "Source");
451 assert_eq!(classify_file_type(Path::new("README.md")), "Documentation");
452 assert_eq!(
453 classify_file_type(Path::new("config.json")),
454 "Configuration"
455 );
456 }
457}