1use std::collections::HashMap;
7use std::path::Path;
8use std::time::{Duration, Instant};
9
10use serde::{Deserialize, Serialize};
11use tracing::{debug, info, warn};
12
13use crate::engine::{ProcessingResult, ScalingConfig};
14use crate::error::{ScalingError, ScalingResult};
15use crate::positioning::{ContextPositioner, ContextPositioningConfig, PositionedSelection};
16use crate::streaming::{FileMetadata, ScoredFile, StreamingSelector};
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
20pub enum FileCategory {
21 Config,
22 Entry,
23 Examples,
24 General,
25}
26
27#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
29pub enum SelectionAlgorithm {
30 V5Integrated,
32}
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct ScalingSelectionConfig {
37 pub token_budget: usize,
39
40 pub selection_algorithm: SelectionAlgorithm,
42
43 pub enable_quotas: bool,
45
46 pub positioning_config: ContextPositioningConfig,
48
49 pub scaling_config: ScalingConfig,
51}
52
53impl Default for ScalingSelectionConfig {
54 fn default() -> Self {
55 Self {
56 token_budget: 8000,
57 selection_algorithm: SelectionAlgorithm::V5Integrated,
58 enable_quotas: true,
59 positioning_config: ContextPositioningConfig::default(),
60 scaling_config: ScalingConfig::default(),
61 }
62 }
63}
64
65impl ScalingSelectionConfig {
66 pub fn small_budget() -> Self {
68 Self {
69 token_budget: 1000,
70 selection_algorithm: SelectionAlgorithm::V5Integrated,
71 enable_quotas: true,
72 positioning_config: ContextPositioningConfig::default(),
73 scaling_config: ScalingConfig::small_repository(),
74 }
75 }
76
77 pub fn with_test_exclusion(mut self) -> Self {
79 self.positioning_config.auto_exclude_tests = true;
80 self
81 }
82
83 pub fn medium_budget() -> Self {
85 Self {
86 token_budget: 10000,
87 selection_algorithm: SelectionAlgorithm::V5Integrated,
88 enable_quotas: true,
89 positioning_config: ContextPositioningConfig::default(),
90 scaling_config: ScalingConfig::default(),
91 }
92 }
93
94 pub fn large_budget() -> Self {
96 Self {
97 token_budget: 100000,
98 selection_algorithm: SelectionAlgorithm::V5Integrated,
99 enable_quotas: true,
100 positioning_config: ContextPositioningConfig::default(),
101 scaling_config: ScalingConfig::large_repository(),
102 }
103 }
104}
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
108pub struct ScalingSelectionResult {
109 pub selected_files: Vec<FileMetadata>,
111
112 pub positioned_selection: Option<PositionedSelection>,
114
115 pub total_files_considered: usize,
117
118 pub token_utilization: f64,
120
121 pub tokens_used: usize,
123
124 pub algorithm_used: SelectionAlgorithm,
126
127 pub selection_time: Duration,
129
130 pub processing_result: ProcessingResult,
132}
133
134#[derive(Debug, Clone)]
136struct SelectorScoredFile {
137 metadata: FileMetadata,
138 tokens: usize,
139 score: f64,
140 category: FileCategory,
141}
142
143pub struct ScalingSelector {
145 config: ScalingSelectionConfig,
146}
147
148impl ScalingSelector {
149 pub fn new(config: ScalingSelectionConfig) -> Self {
151 Self { config }
152 }
153
154 pub fn with_defaults() -> Self {
156 Self::new(ScalingSelectionConfig::default())
157 }
158
159 pub fn with_token_budget(token_budget: usize) -> Self {
161 let config = match token_budget {
162 0..=2000 => ScalingSelectionConfig::small_budget(),
163 2001..=15000 => ScalingSelectionConfig::medium_budget(),
164 _ => ScalingSelectionConfig::large_budget(),
165 };
166
167 Self::new(ScalingSelectionConfig {
168 token_budget,
169 ..config
170 })
171 }
172
173 pub async fn select_and_process(
175 &mut self,
176 repo_path: &Path,
177 ) -> ScalingResult<ScalingSelectionResult> {
178 self.select_and_process_with_query(repo_path, None).await
179 }
180
181 pub async fn select_and_process_with_query(
183 &mut self,
184 repo_path: &Path,
185 query_hint: Option<&str>,
186 ) -> ScalingResult<ScalingSelectionResult> {
187 let start_time = Instant::now();
188
189 info!(
190 "Starting intelligent scaling selection for: {:?}",
191 repo_path
192 );
193 info!(
194 "Token budget: {}, Algorithm: {:?}",
195 self.config.token_budget, self.config.selection_algorithm
196 );
197 if let Some(query) = query_hint {
198 info!("Query hint for positioning: '{}'", query);
199 }
200
201 let discovery_start = Instant::now();
203 let selected_files = self.discover_and_select_files_streaming(repo_path).await?;
204 let discovery_time = discovery_start.elapsed();
205
206 info!(
207 "Selected {} files in {:?}",
208 selected_files.len(),
209 discovery_time
210 );
211
212 let total_files_considered = selected_files.len();
214 let (positioned_selection, final_files, final_tokens) =
215 if self.config.positioning_config.enable_positioning {
216 let positioner = ContextPositioner::new(self.config.positioning_config.clone());
217 let positioned = positioner
218 .position_files(selected_files.clone(), query_hint)
219 .await?;
220
221 info!(
222 "Context positioning applied: HEAD={}, MIDDLE={}, TAIL={}",
223 positioned.positioning.head_files.len(),
224 positioned.positioning.middle_files.len(),
225 positioned.positioning.tail_files.len()
226 );
227
228 let tokens = positioned.total_tokens;
229 (Some(positioned), selected_files, tokens)
230 } else {
231 let tokens = self.calculate_tokens_used(&selected_files);
232 (None, selected_files, tokens)
233 };
234
235 let processing_result = self.apply_scaling_optimizations(&final_files).await?;
237
238 let token_utilization = final_tokens as f64 / self.config.token_budget as f64;
240
241 let total_time = start_time.elapsed();
242 info!("Total selection and processing time: {:?}", total_time);
243 info!(
244 "Token utilization: {:.1}% ({}/{})",
245 token_utilization * 100.0,
246 final_tokens,
247 self.config.token_budget
248 );
249
250 Ok(ScalingSelectionResult {
251 selected_files: final_files,
252 positioned_selection,
253 total_files_considered, token_utilization,
255 tokens_used: final_tokens,
256 algorithm_used: self.config.selection_algorithm,
257 selection_time: discovery_time, processing_result,
259 })
260 }
261
262 async fn discover_and_select_files_streaming(
264 &self,
265 repo_path: &Path,
266 ) -> ScalingResult<Vec<FileMetadata>> {
267 info!("Using optimized streaming file discovery");
268
269 let streaming_config = crate::streaming::StreamingConfig {
271 enable_streaming: true,
272 concurrency_limit: num_cpus::get() * 2,
273 memory_limit: 100 * 1024 * 1024, selection_heap_size: self.config.token_budget * 2, };
276
277 let streaming_selector = StreamingSelector::new(streaming_config);
278
279 let target_count = self.estimate_target_file_count();
281
282 let score_fn = {
284 let token_budget = self.config.token_budget;
285 move |file: &FileMetadata| -> f64 {
286 Self::calculate_file_score_static(file, token_budget)
287 }
288 };
289
290 let token_fn = {
291 let token_budget = self.config.token_budget;
292 move |file: &FileMetadata| -> usize { Self::estimate_tokens_static(file, token_budget) }
293 };
294
295 let scored_files = streaming_selector
297 .select_files_streaming(
298 repo_path,
299 target_count,
300 self.config.token_budget,
301 score_fn,
302 token_fn,
303 )
304 .await?;
305
306 let selected_files: Vec<FileMetadata> = scored_files
308 .into_iter()
309 .map(|scored| scored.metadata)
310 .collect();
311
312 info!(
313 "Streaming selection completed: {} files selected",
314 selected_files.len()
315 );
316 Ok(selected_files)
317 }
318
319 fn estimate_target_file_count(&self) -> usize {
321 let estimated_files = self.config.token_budget / 300;
324
325 estimated_files.clamp(5, 200)
327 }
328
329 fn detect_language(&self, path: &Path) -> String {
331 match path.extension().and_then(|s| s.to_str()) {
332 Some("rs") => "Rust".to_string(),
333 Some("py") => "Python".to_string(),
334 Some("js") => "JavaScript".to_string(),
335 Some("ts") => "TypeScript".to_string(),
336 Some("go") => "Go".to_string(),
337 Some("java") => "Java".to_string(),
338 Some("cpp" | "cc" | "cxx") => "C++".to_string(),
339 Some("c") => "C".to_string(),
340 Some("h") => "Header".to_string(),
341 Some("md") => "Markdown".to_string(),
342 Some("json") => "JSON".to_string(),
343 Some("yaml" | "yml") => "YAML".to_string(),
344 Some("toml") => "TOML".to_string(),
345 _ => "Unknown".to_string(),
346 }
347 }
348
349 fn classify_file_type(&self, path: &Path) -> String {
351 match path.extension().and_then(|s| s.to_str()) {
352 Some("rs" | "py" | "js" | "ts" | "go" | "java" | "cpp" | "cc" | "cxx" | "c") => {
353 "Source".to_string()
354 }
355 Some("h" | "hpp" | "hxx") => "Header".to_string(),
356 Some("md" | "txt" | "rst") => "Documentation".to_string(),
357 Some("json" | "yaml" | "yml" | "toml" | "ini" | "cfg") => "Configuration".to_string(),
358 Some("png" | "jpg" | "jpeg" | "gif" | "svg") => "Image".to_string(),
359 _ => "Other".to_string(),
360 }
361 }
362
363 async fn apply_intelligent_selection(
365 &self,
366 files: &[FileMetadata],
367 ) -> ScalingResult<Vec<FileMetadata>> {
368 self.apply_integrated_selection(files)
370 }
371
372 fn apply_integrated_selection(
374 &self,
375 files: &[FileMetadata],
376 ) -> ScalingResult<Vec<FileMetadata>> {
377 let mut scored_files: Vec<SelectorScoredFile> = files
379 .iter()
380 .map(|file| {
381 let tokens = self.estimate_tokens(file);
382 let score = self.calculate_file_score(file);
383 let category = self.classify_file(file);
384
385 SelectorScoredFile {
386 metadata: file.clone(),
387 tokens,
388 score,
389 category,
390 }
391 })
392 .collect();
393
394 let mut categorized: HashMap<FileCategory, Vec<SelectorScoredFile>> = HashMap::new();
396 for scored_file in scored_files {
397 categorized
398 .entry(scored_file.category)
399 .or_insert_with(Vec::new)
400 .push(scored_file);
401 }
402
403 for files in categorized.values_mut() {
405 files.sort_by(|a, b| {
406 b.score
407 .partial_cmp(&a.score)
408 .unwrap_or(std::cmp::Ordering::Equal)
409 });
410 }
411
412 let mut selected = Vec::new();
414 let mut remaining_budget = self.config.token_budget;
415
416 let tier1_order = [FileCategory::Entry, FileCategory::Config];
418 for category in tier1_order.iter() {
419 if let Some(files) = categorized.get(category) {
420 let tier_budget = match category {
421 FileCategory::Entry => (self.config.token_budget as f64 * 0.35) as usize, FileCategory::Config => (self.config.token_budget as f64 * 0.25) as usize, _ => 0,
424 };
425
426 let mut used_budget = 0;
427 for scored_file in files {
428 if used_budget + scored_file.tokens <= tier_budget
429 && scored_file.tokens <= remaining_budget
430 {
431 selected.push(scored_file.metadata.clone());
432 used_budget += scored_file.tokens;
433 remaining_budget = remaining_budget.saturating_sub(scored_file.tokens);
434 }
435 }
436 }
437 }
438
439 if let Some(general_files) = categorized.get(&FileCategory::General) {
441 for scored_file in general_files {
442 if scored_file.tokens <= remaining_budget {
443 selected.push(scored_file.metadata.clone());
444 remaining_budget = remaining_budget.saturating_sub(scored_file.tokens);
445 }
446 }
447 }
448
449 if let Some(example_files) = categorized.get(&FileCategory::Examples) {
451 for scored_file in example_files {
452 if scored_file.tokens <= remaining_budget {
453 selected.push(scored_file.metadata.clone());
454 remaining_budget = remaining_budget.saturating_sub(scored_file.tokens);
455 }
456 }
457 }
458
459 Ok(selected)
460 }
461
462 async fn apply_scaling_optimizations(
464 &self,
465 selected_files: &[FileMetadata],
466 ) -> ScalingResult<ProcessingResult> {
467 let total_size: u64 = selected_files.iter().map(|f| f.size).sum();
469 let processing_time = Duration::from_millis((selected_files.len() as u64 * 2).max(10)); let memory_peak = (selected_files.len() * 1024).max(1024); Ok(ProcessingResult {
473 files: selected_files.to_vec(),
474 total_files: selected_files.len(),
475 processing_time,
476 memory_peak,
477 cache_hits: 0,
478 cache_misses: selected_files.len() as u64,
479 metrics: crate::metrics::ScalingMetrics {
480 files_processed: selected_files.len() as u64,
481 total_processing_time: processing_time,
482 memory_peak,
483 cache_hits: 0,
484 cache_misses: selected_files.len() as u64,
485 parallel_efficiency: 1.0,
486 streaming_overhead: Duration::from_millis(0),
487 },
488 })
489 }
490
491 fn calculate_tokens_used(&self, selected_files: &[FileMetadata]) -> usize {
493 selected_files
494 .iter()
495 .map(|file| self.estimate_tokens(file))
496 .sum()
497 }
498
499 fn estimate_tokens(&self, file: &FileMetadata) -> usize {
501 let base_tokens = ((file.size as f64) / 3.5) as usize;
504
505 let min_tokens = if self.config.token_budget < 5000 {
508 100 } else {
510 50 };
512 let base_tokens = base_tokens.max(min_tokens);
513
514 let multiplier = match file.file_type.as_str() {
516 "Source" => 1.2, "Documentation" => 1.0, "Configuration" => 0.8, _ => 1.1, };
521
522 let language_multiplier = match file.language.as_str() {
524 "Rust" => 1.3, "JavaScript" | "TypeScript" => 1.2, "Python" => 1.1, "C" | "Go" => 1.0, "HTML" | "CSS" => 0.9, "JSON" | "YAML" | "TOML" => 0.7, _ => 1.0, };
532
533 let final_tokens = (base_tokens as f64 * multiplier * language_multiplier) as usize;
535
536 final_tokens.min(self.config.token_budget / 4) }
539
540 fn calculate_file_score(&self, file: &FileMetadata) -> f64 {
542 let mut score: f64 = 0.1; let path_str = file.path.to_string_lossy().to_lowercase();
545
546 if path_str.contains("main") || path_str.contains("index") {
548 score += 2.0; }
550 if path_str.contains("lib.rs") || path_str.contains("mod.rs") {
551 score += 1.5; }
553 if path_str.contains("__init__.py") {
554 score += 1.3; }
556
557 let path_components = file.path.components().count();
559 if path_components <= 2 {
560 score += 1.0;
562
563 if path_str.contains("readme")
565 || path_str.contains("license")
566 || path_str.contains("cargo.toml")
567 || path_str.contains("package.json")
568 || path_str.contains("pyproject.toml")
569 || path_str.contains("setup.py")
570 {
571 score += 1.5;
572 }
573 }
574
575 match file.language.as_str() {
577 "Rust" | "Python" | "JavaScript" | "TypeScript" => score += 0.8,
578 "C" | "C++" | "Go" | "Java" => score += 0.6,
579 "Shell" | "Makefile" => score += 0.4, _ => {}
581 }
582
583 match file.file_type.as_str() {
585 "Source" => score += 0.6,
586 "Configuration" => score += 0.5, "Documentation" => score += 0.3,
588 _ => {}
589 }
590
591 if file.size > 50_000 {
593 score -= 0.5;
594 }
595 if file.size > 100_000 {
596 score -= 1.0;
597 }
598
599 if path_str.contains("test") && !path_str.contains("tests/") {
601 score += 0.2; }
603
604 if path_components > 4 {
606 score -= 0.3 * (path_components - 4) as f64;
607 }
608
609 if file.size < 10_000 && (path_str.contains("config") || path_str.contains("env")) {
611 score += 0.4;
612 }
613
614 score.clamp(0.0, 5.0) }
616
617 fn classify_file(&self, file: &FileMetadata) -> FileCategory {
619 let path_str = file.path.to_string_lossy().to_lowercase();
620 let filename = file
621 .path
622 .file_name()
623 .and_then(|n| n.to_str())
624 .unwrap_or("")
625 .to_lowercase();
626
627 if matches!(file.file_type.as_str(), "Configuration")
629 || filename.contains("config")
630 || filename.ends_with(".toml")
631 || filename.ends_with(".json")
632 || filename.ends_with(".yaml")
633 {
634 return FileCategory::Config;
635 }
636
637 if filename.contains("main")
639 || filename.contains("index")
640 || filename == "lib.rs"
641 || filename == "__init__.py"
642 {
643 return FileCategory::Entry;
644 }
645
646 if path_str.contains("example")
648 || path_str.contains("test")
649 || path_str.contains("demo")
650 || path_str.contains("sample")
651 {
652 return FileCategory::Examples;
653 }
654
655 FileCategory::General
656 }
657
658 fn calculate_file_score_static(file: &FileMetadata, token_budget: usize) -> f64 {
660 let mut score: f64 = 0.1; let path_str = file.path.to_string_lossy().to_lowercase();
663
664 if path_str.contains("main") || path_str.contains("index") {
666 score += 2.0; }
668 if path_str.contains("lib.rs") || path_str.contains("mod.rs") {
669 score += 1.5; }
671 if path_str.contains("__init__.py") {
672 score += 1.3; }
674
675 let path_components = file.path.components().count();
677 if path_components <= 2 {
678 score += 1.0;
680
681 if path_str.contains("readme")
683 || path_str.contains("license")
684 || path_str.contains("cargo.toml")
685 || path_str.contains("package.json")
686 || path_str.contains("pyproject.toml")
687 || path_str.contains("setup.py")
688 {
689 score += 1.5;
690 }
691 }
692
693 match file.language.as_str() {
695 "Rust" | "Python" | "JavaScript" | "TypeScript" => score += 0.8,
696 "C" | "C++" | "Go" | "Java" => score += 0.6,
697 "Shell" => score += 0.4, _ => {}
699 }
700
701 match file.file_type.as_str() {
703 "Source" => score += 0.6,
704 "Configuration" => score += 0.5, "Documentation" => score += 0.3,
706 _ => {}
707 }
708
709 if file.size > 50_000 {
711 score -= 0.5;
712 }
713 if file.size > 100_000 {
714 score -= 1.0;
715 }
716
717 if path_str.contains("test") && !path_str.contains("tests/") {
719 score += 0.2; }
721
722 if path_components > 4 {
724 score -= 0.3 * (path_components - 4) as f64;
725 }
726
727 if file.size < 10_000 && (path_str.contains("config") || path_str.contains("env")) {
729 score += 0.4;
730 }
731
732 score.clamp(0.0, 5.0) }
734
735 fn estimate_tokens_static(file: &FileMetadata, token_budget: usize) -> usize {
737 let base_tokens = ((file.size as f64) / 3.5) as usize;
740
741 let min_tokens = if token_budget < 5000 {
744 100 } else {
746 50 };
748 let base_tokens = base_tokens.max(min_tokens);
749
750 let multiplier = match file.file_type.as_str() {
752 "Source" => 1.2, "Documentation" => 1.0, "Configuration" => 0.8, _ => 1.1, };
757
758 let language_multiplier = match file.language.as_str() {
760 "Rust" => 1.3, "JavaScript" | "TypeScript" => 1.2, "Python" => 1.1, "C" | "Go" => 1.0, "HTML" | "CSS" => 0.9, "JSON" | "YAML" | "TOML" => 0.7, _ => 1.0, };
768
769 let final_tokens = (base_tokens as f64 * multiplier * language_multiplier) as usize;
771
772 final_tokens.min(token_budget / 4) }
775}
776
777impl ScalingSelectionResult {
778 pub fn get_optimally_ordered_files(&self) -> Vec<&FileMetadata> {
780 if let Some(positioned) = &self.positioned_selection {
781 let mut files = Vec::new();
782
783 for file in &positioned.positioning.head_files {
785 files.push(&file.metadata);
786 }
787
788 for file in &positioned.positioning.middle_files {
790 files.push(&file.metadata);
791 }
792
793 for file in &positioned.positioning.tail_files {
795 files.push(&file.metadata);
796 }
797
798 files
799 } else {
800 self.selected_files.iter().collect()
801 }
802 }
803
804 pub fn get_positioning_stats(&self) -> Option<(usize, usize, usize)> {
806 self.positioned_selection.as_ref().map(|p| {
807 (
808 p.positioning.head_files.len(),
809 p.positioning.middle_files.len(),
810 p.positioning.tail_files.len(),
811 )
812 })
813 }
814
815 pub fn get_positioning_reasoning(&self) -> Option<&str> {
817 self.positioned_selection
818 .as_ref()
819 .map(|p| p.positioning_reasoning.as_str())
820 }
821
822 pub fn has_context_positioning(&self) -> bool {
824 self.positioned_selection.is_some()
825 }
826}
827
828#[cfg(test)]
829mod tests {
830 use super::*;
831 use std::fs;
832 use tempfile::TempDir;
833
834 #[tokio::test]
835 async fn test_scaling_selector_creation() {
836 let selector = ScalingSelector::with_defaults();
837 assert_eq!(selector.config.token_budget, 8000);
838 }
839
840 #[tokio::test]
841 async fn test_small_budget_selection() {
842 let selector = ScalingSelector::with_token_budget(1000);
843 assert_eq!(selector.config.token_budget, 1000);
844 assert!(matches!(
845 selector.config.selection_algorithm,
846 SelectionAlgorithm::V5Integrated
847 ));
848 }
849
850 #[tokio::test]
851 async fn test_medium_budget_selection() {
852 let selector = ScalingSelector::with_token_budget(10000);
853 assert_eq!(selector.config.token_budget, 10000);
854 assert!(matches!(
855 selector.config.selection_algorithm,
856 SelectionAlgorithm::V5Integrated
857 ));
858 }
859
860 #[tokio::test]
861 async fn test_file_selection_process() {
862 let temp_dir = TempDir::new().unwrap();
863 let repo_path = temp_dir.path();
864
865 fs::create_dir_all(repo_path.join("src")).unwrap();
867 fs::write(
868 repo_path.join("src/main.rs"),
869 "fn main() { println!(\"Hello, world!\"); }",
870 )
871 .unwrap();
872 fs::write(
873 repo_path.join("src/lib.rs"),
874 "pub fn hello() -> String { \"Hello\".to_string() }",
875 )
876 .unwrap();
877 fs::write(
878 repo_path.join("Cargo.toml"),
879 "[package]\nname = \"test\"\nversion = \"0.1.0\"",
880 )
881 .unwrap();
882 fs::write(
883 repo_path.join("README.md"),
884 "# Test Project\n\nThis is a test project.",
885 )
886 .unwrap();
887
888 let mut selector = ScalingSelector::with_token_budget(5000);
889 let result = selector.select_and_process(repo_path).await.unwrap();
890
891 assert!(result.selected_files.len() > 0);
893 assert!(result.selected_files.len() <= 4); assert!(result.tokens_used <= 5000); assert!(result.token_utilization <= 1.0); }
897
898 #[test]
899 fn test_token_estimation() {
900 let selector = ScalingSelector::with_defaults();
901
902 let rust_file = FileMetadata {
903 path: std::path::PathBuf::from("src/main.rs"),
904 size: 1000,
905 modified: std::time::SystemTime::now(),
906 language: "Rust".to_string(),
907 file_type: "Source".to_string(),
908 };
909
910 let tokens = selector.estimate_tokens(&rust_file);
911 assert!(tokens > 200); let config_file = FileMetadata {
914 path: std::path::PathBuf::from("Cargo.toml"),
915 size: 500,
916 modified: std::time::SystemTime::now(),
917 language: "TOML".to_string(),
918 file_type: "Configuration".to_string(),
919 };
920
921 let config_tokens = selector.estimate_tokens(&config_file);
922 assert!(config_tokens < tokens); }
924
925 #[test]
926 fn test_file_scoring() {
927 let selector = ScalingSelector::with_defaults();
928
929 let main_file = FileMetadata {
930 path: std::path::PathBuf::from("src/main.rs"),
931 size: 1000,
932 modified: std::time::SystemTime::now(),
933 language: "Rust".to_string(),
934 file_type: "Source".to_string(),
935 };
936
937 let score = selector.calculate_file_score(&main_file);
938 assert!(score > 0.7); let readme = FileMetadata {
941 path: std::path::PathBuf::from("README.md"),
942 size: 500,
943 modified: std::time::SystemTime::now(),
944 language: "Markdown".to_string(),
945 file_type: "Documentation".to_string(),
946 };
947
948 let readme_score = selector.calculate_file_score(&readme);
949 assert!(readme_score < score); }
951
952 #[tokio::test]
953 async fn test_context_positioning_integration() {
954 let temp_dir = TempDir::new().unwrap();
955 let repo_path = temp_dir.path();
956
957 fs::create_dir_all(repo_path.join("src")).unwrap();
959 fs::write(
960 repo_path.join("src/main.rs"),
961 "fn main() { println!(\"Hello, world!\"); }",
962 )
963 .unwrap();
964 fs::write(
965 repo_path.join("src/lib.rs"),
966 "pub fn hello() -> String { \"Hello\".to_string() }",
967 )
968 .unwrap();
969 fs::write(repo_path.join("src/utils.rs"), "pub fn utility() {}").unwrap();
970 fs::write(
971 repo_path.join("Cargo.toml"),
972 "[package]\nname = \"test\"\nversion = \"0.1.0\"",
973 )
974 .unwrap();
975
976 let mut config = ScalingSelectionConfig::medium_budget();
978 config.positioning_config.enable_positioning = true;
979 let mut selector = ScalingSelector::new(config);
980
981 let result = selector
982 .select_and_process_with_query(repo_path, Some("main"))
983 .await
984 .unwrap();
985
986 assert!(result.has_context_positioning());
988
989 let (head, middle, tail) = result.get_positioning_stats().unwrap();
991 assert!(head > 0);
992 assert!(head + middle + tail == result.selected_files.len());
993
994 assert!(result.get_positioning_reasoning().is_some());
996 let reasoning = result.get_positioning_reasoning().unwrap();
997 assert!(reasoning.contains("HEAD"));
998 assert!(reasoning.contains("TAIL"));
999
1000 let ordered_files = result.get_optimally_ordered_files();
1002 assert_eq!(ordered_files.len(), result.selected_files.len());
1003 }
1004
1005 #[tokio::test]
1006 async fn test_positioning_disabled() {
1007 let temp_dir = TempDir::new().unwrap();
1008 let repo_path = temp_dir.path();
1009
1010 fs::create_dir_all(repo_path.join("src")).unwrap();
1012 fs::write(repo_path.join("src/main.rs"), "fn main() {}").unwrap();
1013
1014 let mut config = ScalingSelectionConfig::small_budget();
1016 config.positioning_config.enable_positioning = false;
1017 let mut selector = ScalingSelector::new(config);
1018
1019 let result = selector
1020 .select_and_process_with_query(repo_path, Some("main"))
1021 .await
1022 .unwrap();
1023
1024 assert!(!result.has_context_positioning());
1026 assert!(result.positioned_selection.is_none());
1027
1028 let ordered_files = result.get_optimally_ordered_files();
1030 assert_eq!(ordered_files.len(), result.selected_files.len());
1031 }
1032
1033 #[test]
1034 fn test_configuration_builder_positioning() {
1035 let config = ScalingSelectionConfig::default();
1036 assert!(config.positioning_config.enable_positioning);
1037 assert_eq!(config.positioning_config.head_percentage, 0.20);
1038 assert_eq!(config.positioning_config.tail_percentage, 0.20);
1039
1040 let small_config = ScalingSelectionConfig::small_budget();
1041 assert!(small_config.positioning_config.enable_positioning);
1042
1043 let large_config = ScalingSelectionConfig::large_budget();
1044 assert!(large_config.positioning_config.enable_positioning);
1045 }
1046
1047 #[test]
1048 fn test_with_test_exclusion_convenience_method() {
1049 let config = ScalingSelectionConfig::default().with_test_exclusion();
1050
1051 assert!(config.positioning_config.auto_exclude_tests);
1053
1054 let config_chained = ScalingSelectionConfig::medium_budget().with_test_exclusion();
1056
1057 assert!(config_chained.positioning_config.auto_exclude_tests);
1058 assert_eq!(config_chained.token_budget, 10000); }
1060}