1use std::collections::HashMap;
7use std::path::Path;
8use std::time::{Duration, Instant};
9
10use serde::{Deserialize, Serialize};
11use tracing::{debug, info, warn};
12
13use crate::engine::{ProcessingResult, ScalingConfig};
14use crate::error::{ScalingError, ScalingResult};
15use crate::positioning::{ContextPositioner, ContextPositioningConfig, PositionedSelection};
16use crate::streaming::{FileMetadata, ScoredFile, StreamingSelector};
17use scribe_core::{file, FileInfo, FileType};
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
21pub enum FileCategory {
22 Config,
23 Entry,
24 Examples,
25 General,
26}
27
28#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
30pub enum SelectionAlgorithm {
31 V5Integrated,
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct ScalingSelectionConfig {
38 pub token_budget: usize,
40
41 pub selection_algorithm: SelectionAlgorithm,
43
44 pub enable_quotas: bool,
46
47 pub positioning_config: ContextPositioningConfig,
49
50 pub scaling_config: ScalingConfig,
52}
53
54impl Default for ScalingSelectionConfig {
55 fn default() -> Self {
56 Self {
57 token_budget: 8000,
58 selection_algorithm: SelectionAlgorithm::V5Integrated,
59 enable_quotas: true,
60 positioning_config: ContextPositioningConfig::default(),
61 scaling_config: ScalingConfig::default(),
62 }
63 }
64}
65
66impl ScalingSelectionConfig {
67 pub fn small_budget() -> Self {
69 Self {
70 token_budget: 1000,
71 selection_algorithm: SelectionAlgorithm::V5Integrated,
72 enable_quotas: true,
73 positioning_config: ContextPositioningConfig::default(),
74 scaling_config: ScalingConfig::small_repository(),
75 }
76 }
77
78 pub fn with_test_exclusion(mut self) -> Self {
80 self.positioning_config.auto_exclude_tests = true;
81 self
82 }
83
84 pub fn medium_budget() -> Self {
86 Self {
87 token_budget: 10000,
88 selection_algorithm: SelectionAlgorithm::V5Integrated,
89 enable_quotas: true,
90 positioning_config: ContextPositioningConfig::default(),
91 scaling_config: ScalingConfig::default(),
92 }
93 }
94
95 pub fn large_budget() -> Self {
97 Self {
98 token_budget: 100000,
99 selection_algorithm: SelectionAlgorithm::V5Integrated,
100 enable_quotas: true,
101 positioning_config: ContextPositioningConfig::default(),
102 scaling_config: ScalingConfig::large_repository(),
103 }
104 }
105}
106
107#[derive(Debug, Clone, Serialize, Deserialize)]
109pub struct ScalingSelectionResult {
110 pub selected_files: Vec<FileMetadata>,
112
113 pub positioned_selection: Option<PositionedSelection>,
115
116 pub total_files_considered: usize,
118
119 pub token_utilization: f64,
121
122 pub tokens_used: usize,
124
125 pub algorithm_used: SelectionAlgorithm,
127
128 pub selection_time: Duration,
130
131 pub processing_result: ProcessingResult,
133}
134
135#[derive(Debug, Clone)]
137struct SelectorScoredFile {
138 metadata: FileMetadata,
139 tokens: usize,
140 score: f64,
141 category: FileCategory,
142}
143
144pub struct ScalingSelector {
146 config: ScalingSelectionConfig,
147}
148
149impl ScalingSelector {
150 pub fn new(config: ScalingSelectionConfig) -> Self {
152 Self { config }
153 }
154
155 pub fn with_defaults() -> Self {
157 Self::new(ScalingSelectionConfig::default())
158 }
159
160 pub fn with_token_budget(token_budget: usize) -> Self {
162 let config = match token_budget {
163 0..=2000 => ScalingSelectionConfig::small_budget(),
164 2001..=15000 => ScalingSelectionConfig::medium_budget(),
165 _ => ScalingSelectionConfig::large_budget(),
166 };
167
168 Self::new(ScalingSelectionConfig {
169 token_budget,
170 ..config
171 })
172 }
173
174 pub async fn select_and_process(
176 &mut self,
177 repo_path: &Path,
178 ) -> ScalingResult<ScalingSelectionResult> {
179 self.select_and_process_with_query(repo_path, None).await
180 }
181
182 pub async fn select_and_process_with_query(
184 &mut self,
185 repo_path: &Path,
186 query_hint: Option<&str>,
187 ) -> ScalingResult<ScalingSelectionResult> {
188 let start_time = Instant::now();
189
190 info!(
191 "Starting intelligent scaling selection for: {:?}",
192 repo_path
193 );
194 info!(
195 "Token budget: {}, Algorithm: {:?}",
196 self.config.token_budget, self.config.selection_algorithm
197 );
198 if let Some(query) = query_hint {
199 info!("Query hint for positioning: '{}'", query);
200 }
201
202 let discovery_start = Instant::now();
204 let selected_files = self.discover_and_select_files_streaming(repo_path).await?;
205 let discovery_time = discovery_start.elapsed();
206
207 info!(
208 "Selected {} files in {:?}",
209 selected_files.len(),
210 discovery_time
211 );
212
213 let total_files_considered = selected_files.len();
215 let (positioned_selection, final_files, final_tokens) =
216 if self.config.positioning_config.enable_positioning {
217 let positioner = ContextPositioner::new(self.config.positioning_config.clone());
218 let positioned = positioner
219 .position_files(selected_files.clone(), query_hint)
220 .await?;
221
222 info!(
223 "Context positioning applied: HEAD={}, MIDDLE={}, TAIL={}",
224 positioned.positioning.head_files.len(),
225 positioned.positioning.middle_files.len(),
226 positioned.positioning.tail_files.len()
227 );
228
229 let tokens = positioned.total_tokens;
230 (Some(positioned), selected_files, tokens)
231 } else {
232 let tokens = self.calculate_tokens_used(&selected_files);
233 (None, selected_files, tokens)
234 };
235
236 let processing_result = self.apply_scaling_optimizations(&final_files).await?;
238
239 let token_utilization = final_tokens as f64 / self.config.token_budget as f64;
241
242 let total_time = start_time.elapsed();
243 info!("Total selection and processing time: {:?}", total_time);
244 info!(
245 "Token utilization: {:.1}% ({}/{})",
246 token_utilization * 100.0,
247 final_tokens,
248 self.config.token_budget
249 );
250
251 Ok(ScalingSelectionResult {
252 selected_files: final_files,
253 positioned_selection,
254 total_files_considered, token_utilization,
256 tokens_used: final_tokens,
257 algorithm_used: self.config.selection_algorithm,
258 selection_time: discovery_time, processing_result,
260 })
261 }
262
263 async fn discover_and_select_files_streaming(
265 &self,
266 repo_path: &Path,
267 ) -> ScalingResult<Vec<FileMetadata>> {
268 info!("Using optimized streaming file discovery");
269
270 let streaming_config = crate::streaming::StreamingConfig {
272 enable_streaming: true,
273 concurrency_limit: num_cpus::get() * 2,
274 memory_limit: 100 * 1024 * 1024, selection_heap_size: self.config.token_budget * 2, };
277
278 let streaming_selector = StreamingSelector::new(streaming_config);
279
280 let target_count = self.estimate_target_file_count();
282
283 let score_fn = {
285 let token_budget = self.config.token_budget;
286 move |file: &FileMetadata| -> f64 {
287 Self::calculate_file_score_static(file, token_budget)
288 }
289 };
290
291 let token_fn = {
292 let token_budget = self.config.token_budget;
293 move |file: &FileMetadata| -> usize { Self::estimate_tokens_static(file, token_budget) }
294 };
295
296 let scored_files = streaming_selector
298 .select_files_streaming(
299 repo_path,
300 target_count,
301 self.config.token_budget,
302 score_fn,
303 token_fn,
304 )
305 .await?;
306
307 let selected_files: Vec<FileMetadata> = scored_files
309 .into_iter()
310 .map(|scored| scored.metadata)
311 .collect();
312
313 info!(
314 "Streaming selection completed: {} files selected",
315 selected_files.len()
316 );
317 Ok(selected_files)
318 }
319
320 fn estimate_target_file_count(&self) -> usize {
322 let estimated_files = self.config.token_budget / 300;
325
326 estimated_files.clamp(5, 200)
328 }
329
330 fn detect_language(&self, path: &Path) -> String {
332 let ext = path
333 .extension()
334 .and_then(|s| s.to_str())
335 .map(|s| s.to_lowercase());
336
337 if matches!(ext.as_deref(), Some("h" | "hpp" | "hxx")) {
338 return "Header".to_string();
339 }
340
341 if path
342 .file_name()
343 .and_then(|s| s.to_str())
344 .map(|s| s.eq_ignore_ascii_case("dockerfile"))
345 .unwrap_or(false)
346 {
347 return "Dockerfile".to_string();
348 }
349
350 let language = file::detect_language_from_path(path);
351 file::language_display_name(&language).to_string()
352 }
353
354 fn classify_file_type(&self, path: &Path) -> String {
356 let extension = path
357 .extension()
358 .and_then(|s| s.to_str())
359 .map(|s| s.to_lowercase())
360 .unwrap_or_default();
361
362 let language = file::detect_language_from_path(path);
363 let file_type =
364 FileInfo::classify_file_type(path.to_string_lossy().as_ref(), &language, &extension);
365
366 match file_type {
367 FileType::Test { .. } => "Test".to_string(),
368 FileType::Documentation { .. } => "Documentation".to_string(),
369 FileType::Configuration { .. } => "Configuration".to_string(),
370 FileType::Binary => "Binary".to_string(),
371 FileType::Generated => "Generated".to_string(),
372 FileType::Source { .. } => match extension.as_str() {
373 "h" | "hpp" | "hxx" => "Header".to_string(),
374 _ => "Source".to_string(),
375 },
376 FileType::Unknown => match extension.as_str() {
377 "md" | "txt" | "rst" => "Documentation".to_string(),
378 "json" | "yaml" | "yml" | "toml" | "ini" | "cfg" => "Configuration".to_string(),
379 "png" | "jpg" | "jpeg" | "gif" | "svg" => "Image".to_string(),
380 _ => "Other".to_string(),
381 },
382 }
383 }
384
385 async fn apply_intelligent_selection(
387 &self,
388 files: &[FileMetadata],
389 ) -> ScalingResult<Vec<FileMetadata>> {
390 self.apply_integrated_selection(files)
392 }
393
394 fn apply_integrated_selection(
396 &self,
397 files: &[FileMetadata],
398 ) -> ScalingResult<Vec<FileMetadata>> {
399 let mut scored_files: Vec<SelectorScoredFile> = files
401 .iter()
402 .map(|file| {
403 let tokens = self.estimate_tokens(file);
404 let score = self.calculate_file_score(file);
405 let category = self.classify_file(file);
406
407 SelectorScoredFile {
408 metadata: file.clone(),
409 tokens,
410 score,
411 category,
412 }
413 })
414 .collect();
415
416 let mut categorized: HashMap<FileCategory, Vec<SelectorScoredFile>> = HashMap::new();
418 for scored_file in scored_files {
419 categorized
420 .entry(scored_file.category)
421 .or_insert_with(Vec::new)
422 .push(scored_file);
423 }
424
425 for files in categorized.values_mut() {
427 files.sort_by(|a, b| {
428 b.score
429 .partial_cmp(&a.score)
430 .unwrap_or(std::cmp::Ordering::Equal)
431 });
432 }
433
434 let mut selected = Vec::new();
436 let mut remaining_budget = self.config.token_budget;
437
438 let tier1_order = [FileCategory::Entry, FileCategory::Config];
440 for category in tier1_order.iter() {
441 if let Some(files) = categorized.get(category) {
442 let tier_budget = match category {
443 FileCategory::Entry => (self.config.token_budget as f64 * 0.35) as usize, FileCategory::Config => (self.config.token_budget as f64 * 0.25) as usize, _ => 0,
446 };
447
448 let mut used_budget = 0;
449 for scored_file in files {
450 if used_budget + scored_file.tokens <= tier_budget
451 && scored_file.tokens <= remaining_budget
452 {
453 selected.push(scored_file.metadata.clone());
454 used_budget += scored_file.tokens;
455 remaining_budget = remaining_budget.saturating_sub(scored_file.tokens);
456 }
457 }
458 }
459 }
460
461 if let Some(general_files) = categorized.get(&FileCategory::General) {
463 for scored_file in general_files {
464 if scored_file.tokens <= remaining_budget {
465 selected.push(scored_file.metadata.clone());
466 remaining_budget = remaining_budget.saturating_sub(scored_file.tokens);
467 }
468 }
469 }
470
471 if let Some(example_files) = categorized.get(&FileCategory::Examples) {
473 for scored_file in example_files {
474 if scored_file.tokens <= remaining_budget {
475 selected.push(scored_file.metadata.clone());
476 remaining_budget = remaining_budget.saturating_sub(scored_file.tokens);
477 }
478 }
479 }
480
481 Ok(selected)
482 }
483
484 async fn apply_scaling_optimizations(
486 &self,
487 selected_files: &[FileMetadata],
488 ) -> ScalingResult<ProcessingResult> {
489 let total_size: u64 = selected_files.iter().map(|f| f.size).sum();
491 let processing_time = Duration::from_millis((selected_files.len() as u64 * 2).max(10)); let memory_peak = (selected_files.len() * 1024).max(1024); Ok(ProcessingResult {
495 files: selected_files.to_vec(),
496 total_files: selected_files.len(),
497 processing_time,
498 memory_peak,
499 cache_hits: 0,
500 cache_misses: selected_files.len() as u64,
501 metrics: crate::metrics::ScalingMetrics {
502 files_processed: selected_files.len() as u64,
503 total_processing_time: processing_time,
504 memory_peak,
505 cache_hits: 0,
506 cache_misses: selected_files.len() as u64,
507 parallel_efficiency: 1.0,
508 streaming_overhead: Duration::from_millis(0),
509 },
510 })
511 }
512
513 fn calculate_tokens_used(&self, selected_files: &[FileMetadata]) -> usize {
515 selected_files
516 .iter()
517 .map(|file| self.estimate_tokens(file))
518 .sum()
519 }
520
521 fn estimate_tokens(&self, file: &FileMetadata) -> usize {
523 let base_tokens = ((file.size as f64) / 3.5) as usize;
526
527 let min_tokens = if self.config.token_budget < 5000 {
530 100 } else {
532 50 };
534 let base_tokens = base_tokens.max(min_tokens);
535
536 let multiplier = match file.file_type.as_str() {
538 "Source" => 1.2, "Documentation" => 1.0, "Configuration" => 0.8, _ => 1.1, };
543
544 let language_multiplier = match file.language.as_str() {
546 "Rust" => 1.3, "JavaScript" | "TypeScript" => 1.2, "Python" => 1.1, "C" | "Go" => 1.0, "HTML" | "CSS" => 0.9, "JSON" | "YAML" | "TOML" => 0.7, _ => 1.0, };
554
555 let final_tokens = (base_tokens as f64 * multiplier * language_multiplier) as usize;
557
558 final_tokens.min(self.config.token_budget / 4) }
561
562 fn calculate_file_score(&self, file: &FileMetadata) -> f64 {
564 let mut score: f64 = 0.1; let path_str = file.path.to_string_lossy().to_lowercase();
567
568 if path_str.contains("main") || path_str.contains("index") {
570 score += 2.0; }
572 if path_str.contains("lib.rs") || path_str.contains("mod.rs") {
573 score += 1.5; }
575 if path_str.contains("__init__.py") {
576 score += 1.3; }
578
579 let path_components = file.path.components().count();
581 if path_components <= 2 {
582 score += 1.0;
584
585 if path_str.contains("readme")
587 || path_str.contains("license")
588 || path_str.contains("cargo.toml")
589 || path_str.contains("package.json")
590 || path_str.contains("pyproject.toml")
591 || path_str.contains("setup.py")
592 {
593 score += 1.5;
594 }
595 }
596
597 match file.language.as_str() {
599 "Rust" | "Python" | "JavaScript" | "TypeScript" => score += 0.8,
600 "C" | "C++" | "Go" | "Java" => score += 0.6,
601 "Shell" | "Makefile" => score += 0.4, _ => {}
603 }
604
605 match file.file_type.as_str() {
607 "Source" => score += 0.6,
608 "Configuration" => score += 0.5, "Documentation" => score += 0.3,
610 _ => {}
611 }
612
613 if file.size > 50_000 {
615 score -= 0.5;
616 }
617 if file.size > 100_000 {
618 score -= 1.0;
619 }
620
621 if path_str.contains("test") && !path_str.contains("tests/") {
623 score += 0.2; }
625
626 if path_components > 4 {
628 score -= 0.3 * (path_components - 4) as f64;
629 }
630
631 if file.size < 10_000 && (path_str.contains("config") || path_str.contains("env")) {
633 score += 0.4;
634 }
635
636 score.clamp(0.0, 5.0) }
638
639 fn classify_file(&self, file: &FileMetadata) -> FileCategory {
641 let path_str = file.path.to_string_lossy().to_lowercase();
642 let filename = file
643 .path
644 .file_name()
645 .and_then(|n| n.to_str())
646 .unwrap_or("")
647 .to_lowercase();
648
649 if matches!(file.file_type.as_str(), "Configuration")
651 || filename.contains("config")
652 || filename.ends_with(".toml")
653 || filename.ends_with(".json")
654 || filename.ends_with(".yaml")
655 {
656 return FileCategory::Config;
657 }
658
659 if filename.contains("main")
661 || filename.contains("index")
662 || filename == "lib.rs"
663 || filename == "__init__.py"
664 {
665 return FileCategory::Entry;
666 }
667
668 if path_str.contains("example")
670 || path_str.contains("test")
671 || path_str.contains("demo")
672 || path_str.contains("sample")
673 {
674 return FileCategory::Examples;
675 }
676
677 FileCategory::General
678 }
679
680 fn calculate_file_score_static(file: &FileMetadata, token_budget: usize) -> f64 {
682 let mut score: f64 = 0.1; let path_str = file.path.to_string_lossy().to_lowercase();
685
686 if path_str.contains("main") || path_str.contains("index") {
688 score += 2.0; }
690 if path_str.contains("lib.rs") || path_str.contains("mod.rs") {
691 score += 1.5; }
693 if path_str.contains("__init__.py") {
694 score += 1.3; }
696
697 let path_components = file.path.components().count();
699 if path_components <= 2 {
700 score += 1.0;
702
703 if path_str.contains("readme")
705 || path_str.contains("license")
706 || path_str.contains("cargo.toml")
707 || path_str.contains("package.json")
708 || path_str.contains("pyproject.toml")
709 || path_str.contains("setup.py")
710 {
711 score += 1.5;
712 }
713 }
714
715 match file.language.as_str() {
717 "Rust" | "Python" | "JavaScript" | "TypeScript" => score += 0.8,
718 "C" | "C++" | "Go" | "Java" => score += 0.6,
719 "Shell" => score += 0.4, _ => {}
721 }
722
723 match file.file_type.as_str() {
725 "Source" => score += 0.6,
726 "Configuration" => score += 0.5, "Documentation" => score += 0.3,
728 _ => {}
729 }
730
731 if file.size > 50_000 {
733 score -= 0.5;
734 }
735 if file.size > 100_000 {
736 score -= 1.0;
737 }
738
739 if path_str.contains("test") && !path_str.contains("tests/") {
741 score += 0.2; }
743
744 if path_components > 4 {
746 score -= 0.3 * (path_components - 4) as f64;
747 }
748
749 if file.size < 10_000 && (path_str.contains("config") || path_str.contains("env")) {
751 score += 0.4;
752 }
753
754 score.clamp(0.0, 5.0) }
756
757 fn estimate_tokens_static(file: &FileMetadata, token_budget: usize) -> usize {
759 let base_tokens = ((file.size as f64) / 3.5) as usize;
762
763 let min_tokens = if token_budget < 5000 {
766 100 } else {
768 50 };
770 let base_tokens = base_tokens.max(min_tokens);
771
772 let multiplier = match file.file_type.as_str() {
774 "Source" => 1.2, "Documentation" => 1.0, "Configuration" => 0.8, _ => 1.1, };
779
780 let language_multiplier = match file.language.as_str() {
782 "Rust" => 1.3, "JavaScript" | "TypeScript" => 1.2, "Python" => 1.1, "C" | "Go" => 1.0, "HTML" | "CSS" => 0.9, "JSON" | "YAML" | "TOML" => 0.7, _ => 1.0, };
790
791 let final_tokens = (base_tokens as f64 * multiplier * language_multiplier) as usize;
793
794 final_tokens.min(token_budget / 4) }
797}
798
799impl ScalingSelectionResult {
800 pub fn get_optimally_ordered_files(&self) -> Vec<&FileMetadata> {
802 if let Some(positioned) = &self.positioned_selection {
803 let mut files = Vec::new();
804
805 for file in &positioned.positioning.head_files {
807 files.push(&file.metadata);
808 }
809
810 for file in &positioned.positioning.middle_files {
812 files.push(&file.metadata);
813 }
814
815 for file in &positioned.positioning.tail_files {
817 files.push(&file.metadata);
818 }
819
820 files
821 } else {
822 self.selected_files.iter().collect()
823 }
824 }
825
826 pub fn get_positioning_stats(&self) -> Option<(usize, usize, usize)> {
828 self.positioned_selection.as_ref().map(|p| {
829 (
830 p.positioning.head_files.len(),
831 p.positioning.middle_files.len(),
832 p.positioning.tail_files.len(),
833 )
834 })
835 }
836
837 pub fn get_positioning_reasoning(&self) -> Option<&str> {
839 self.positioned_selection
840 .as_ref()
841 .map(|p| p.positioning_reasoning.as_str())
842 }
843
844 pub fn has_context_positioning(&self) -> bool {
846 self.positioned_selection.is_some()
847 }
848}
849
850#[cfg(test)]
851mod tests {
852 use super::*;
853 use std::fs;
854 use tempfile::TempDir;
855
856 #[tokio::test]
857 async fn test_scaling_selector_creation() {
858 let selector = ScalingSelector::with_defaults();
859 assert_eq!(selector.config.token_budget, 8000);
860 }
861
862 #[tokio::test]
863 async fn test_small_budget_selection() {
864 let selector = ScalingSelector::with_token_budget(1000);
865 assert_eq!(selector.config.token_budget, 1000);
866 assert!(matches!(
867 selector.config.selection_algorithm,
868 SelectionAlgorithm::V5Integrated
869 ));
870 }
871
872 #[tokio::test]
873 async fn test_medium_budget_selection() {
874 let selector = ScalingSelector::with_token_budget(10000);
875 assert_eq!(selector.config.token_budget, 10000);
876 assert!(matches!(
877 selector.config.selection_algorithm,
878 SelectionAlgorithm::V5Integrated
879 ));
880 }
881
882 #[tokio::test]
883 async fn test_file_selection_process() {
884 let temp_dir = TempDir::new().unwrap();
885 let repo_path = temp_dir.path();
886
887 fs::create_dir_all(repo_path.join("src")).unwrap();
889 fs::write(
890 repo_path.join("src/main.rs"),
891 "fn main() { println!(\"Hello, world!\"); }",
892 )
893 .unwrap();
894 fs::write(
895 repo_path.join("src/lib.rs"),
896 "pub fn hello() -> String { \"Hello\".to_string() }",
897 )
898 .unwrap();
899 fs::write(
900 repo_path.join("Cargo.toml"),
901 "[package]\nname = \"test\"\nversion = \"0.1.0\"",
902 )
903 .unwrap();
904 fs::write(
905 repo_path.join("README.md"),
906 "# Test Project\n\nThis is a test project.",
907 )
908 .unwrap();
909
910 let mut selector = ScalingSelector::with_token_budget(5000);
911 let result = selector.select_and_process(repo_path).await.unwrap();
912
913 assert!(result.selected_files.len() > 0);
915 assert!(result.selected_files.len() <= 4); assert!(result.tokens_used <= 5000); assert!(result.token_utilization <= 1.0); }
919
920 #[test]
921 fn test_token_estimation() {
922 let selector = ScalingSelector::with_defaults();
923
924 let rust_file = FileMetadata {
925 path: std::path::PathBuf::from("src/main.rs"),
926 size: 1000,
927 modified: std::time::SystemTime::now(),
928 language: "Rust".to_string(),
929 file_type: "Source".to_string(),
930 };
931
932 let tokens = selector.estimate_tokens(&rust_file);
933 assert!(tokens > 200); let config_file = FileMetadata {
936 path: std::path::PathBuf::from("Cargo.toml"),
937 size: 500,
938 modified: std::time::SystemTime::now(),
939 language: "TOML".to_string(),
940 file_type: "Configuration".to_string(),
941 };
942
943 let config_tokens = selector.estimate_tokens(&config_file);
944 assert!(config_tokens < tokens); }
946
947 #[test]
948 fn test_file_scoring() {
949 let selector = ScalingSelector::with_defaults();
950
951 let main_file = FileMetadata {
952 path: std::path::PathBuf::from("src/main.rs"),
953 size: 1000,
954 modified: std::time::SystemTime::now(),
955 language: "Rust".to_string(),
956 file_type: "Source".to_string(),
957 };
958
959 let score = selector.calculate_file_score(&main_file);
960 assert!(score > 0.7); let readme = FileMetadata {
963 path: std::path::PathBuf::from("README.md"),
964 size: 500,
965 modified: std::time::SystemTime::now(),
966 language: "Markdown".to_string(),
967 file_type: "Documentation".to_string(),
968 };
969
970 let readme_score = selector.calculate_file_score(&readme);
971 assert!(readme_score < score); }
973
974 #[tokio::test]
975 async fn test_context_positioning_integration() {
976 let temp_dir = TempDir::new().unwrap();
977 let repo_path = temp_dir.path();
978
979 fs::create_dir_all(repo_path.join("src")).unwrap();
981 fs::write(
982 repo_path.join("src/main.rs"),
983 "fn main() { println!(\"Hello, world!\"); }",
984 )
985 .unwrap();
986 fs::write(
987 repo_path.join("src/lib.rs"),
988 "pub fn hello() -> String { \"Hello\".to_string() }",
989 )
990 .unwrap();
991 fs::write(repo_path.join("src/utils.rs"), "pub fn utility() {}").unwrap();
992 fs::write(
993 repo_path.join("Cargo.toml"),
994 "[package]\nname = \"test\"\nversion = \"0.1.0\"",
995 )
996 .unwrap();
997
998 let mut config = ScalingSelectionConfig::medium_budget();
1000 config.positioning_config.enable_positioning = true;
1001 let mut selector = ScalingSelector::new(config);
1002
1003 let result = selector
1004 .select_and_process_with_query(repo_path, Some("main"))
1005 .await
1006 .unwrap();
1007
1008 assert!(result.has_context_positioning());
1010
1011 let (head, middle, tail) = result.get_positioning_stats().unwrap();
1013 assert!(head > 0);
1014 assert!(head + middle + tail == result.selected_files.len());
1015
1016 assert!(result.get_positioning_reasoning().is_some());
1018 let reasoning = result.get_positioning_reasoning().unwrap();
1019 assert!(reasoning.contains("HEAD"));
1020 assert!(reasoning.contains("TAIL"));
1021
1022 let ordered_files = result.get_optimally_ordered_files();
1024 assert_eq!(ordered_files.len(), result.selected_files.len());
1025 }
1026
1027 #[tokio::test]
1028 async fn test_positioning_disabled() {
1029 let temp_dir = TempDir::new().unwrap();
1030 let repo_path = temp_dir.path();
1031
1032 fs::create_dir_all(repo_path.join("src")).unwrap();
1034 fs::write(repo_path.join("src/main.rs"), "fn main() {}").unwrap();
1035
1036 let mut config = ScalingSelectionConfig::small_budget();
1038 config.positioning_config.enable_positioning = false;
1039 let mut selector = ScalingSelector::new(config);
1040
1041 let result = selector
1042 .select_and_process_with_query(repo_path, Some("main"))
1043 .await
1044 .unwrap();
1045
1046 assert!(!result.has_context_positioning());
1048 assert!(result.positioned_selection.is_none());
1049
1050 let ordered_files = result.get_optimally_ordered_files();
1052 assert_eq!(ordered_files.len(), result.selected_files.len());
1053 }
1054
1055 #[test]
1056 fn test_configuration_builder_positioning() {
1057 let config = ScalingSelectionConfig::default();
1058 assert!(config.positioning_config.enable_positioning);
1059 assert_eq!(config.positioning_config.head_percentage, 0.20);
1060 assert_eq!(config.positioning_config.tail_percentage, 0.20);
1061
1062 let small_config = ScalingSelectionConfig::small_budget();
1063 assert!(small_config.positioning_config.enable_positioning);
1064
1065 let large_config = ScalingSelectionConfig::large_budget();
1066 assert!(large_config.positioning_config.enable_positioning);
1067 }
1068
1069 #[test]
1070 fn test_with_test_exclusion_convenience_method() {
1071 let config = ScalingSelectionConfig::default().with_test_exclusion();
1072
1073 assert!(config.positioning_config.auto_exclude_tests);
1075
1076 let config_chained = ScalingSelectionConfig::medium_budget().with_test_exclusion();
1078
1079 assert!(config_chained.positioning_config.auto_exclude_tests);
1080 assert_eq!(config_chained.token_budget, 10000); }
1082}