1use petgraph::algo::kosaraju_scc;
10use petgraph::visit::EdgeRef;
11use petgraph::{graph::NodeIndex, Directed, Graph};
12use rayon::prelude::*;
13use serde::{Deserialize, Serialize};
14use std::collections::{HashMap, HashSet};
15use std::path::Path;
16use tracing::{debug, info, warn};
17
18use crate::error::{ScalingError, ScalingResult};
19use crate::streaming::FileMetadata;
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct ContextPositioningConfig {
24 pub enable_positioning: bool,
26
27 pub head_percentage: f64,
29
30 pub tail_percentage: f64,
32
33 pub centrality_weight: f64,
35
36 pub relatedness_weight: f64,
38
39 pub query_relevance_weight: f64,
41
42 pub auto_exclude_tests: bool,
44}
45
46impl Default for ContextPositioningConfig {
47 fn default() -> Self {
48 Self {
49 enable_positioning: true,
50 head_percentage: 0.20,
51 tail_percentage: 0.20,
52 centrality_weight: 0.4,
53 relatedness_weight: 0.3,
54 query_relevance_weight: 0.3,
55 auto_exclude_tests: false,
56 }
57 }
58}
59
60#[derive(Debug, Clone, Default, Serialize, Deserialize)]
62pub struct CentralityScores {
63 pub betweenness: f64,
65
66 pub pagerank: f64,
68
69 pub degree: f64,
71
72 pub combined: f64,
74}
75
76#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct FileWithCentrality {
79 pub metadata: FileMetadata,
80 pub centrality: CentralityScores,
81 pub query_relevance: f64,
82 pub relatedness_group: String,
83}
84
85#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct ContextPositioning {
88 pub head_files: Vec<FileWithCentrality>,
90
91 pub middle_files: Vec<FileWithCentrality>,
93
94 pub tail_files: Vec<FileWithCentrality>,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct PositionedSelection {
101 pub positioning: ContextPositioning,
102 pub total_tokens: usize,
103 pub positioning_reasoning: String,
104}
105
106pub struct ContextPositioner {
108 config: ContextPositioningConfig,
109}
110
111impl ContextPositioner {
112 pub fn new(config: ContextPositioningConfig) -> Self {
114 Self { config }
115 }
116
117 pub fn with_defaults() -> Self {
119 Self::new(ContextPositioningConfig::default())
120 }
121
122 pub async fn position_files(
124 &self,
125 files: Vec<FileMetadata>,
126 query_hint: Option<&str>,
127 ) -> ScalingResult<PositionedSelection> {
128 if !self.config.enable_positioning || files.is_empty() {
129 return Ok(self.create_simple_positioning(files));
130 }
131
132 let filtered_files = if self.config.auto_exclude_tests {
134 let original_count = files.len();
135 let non_test_files: Vec<FileMetadata> = files
136 .into_iter()
137 .filter(|file| !self.is_test_file(&file.path))
138 .collect();
139 let filtered_count = non_test_files.len();
140
141 if original_count != filtered_count {
142 info!(
143 "Auto-excluded {} test files, {} files remaining",
144 original_count - filtered_count,
145 filtered_count
146 );
147 }
148
149 non_test_files
150 } else {
151 files
152 };
153
154 info!(
155 "Starting context positioning for {} files",
156 filtered_files.len()
157 );
158
159 let files_with_centrality = self.calculate_centrality_scores(filtered_files).await?;
161
162 let files_with_relevance = self
164 .calculate_query_relevance(files_with_centrality, query_hint)
165 .await?;
166
167 let files_with_groups = self.group_by_relatedness(files_with_relevance).await?;
169
170 let positioning = self.apply_positioning_strategy(files_with_groups).await?;
172
173 let total_tokens = self.calculate_total_tokens(&positioning);
175 let reasoning = self.generate_positioning_reasoning(&positioning, query_hint);
176
177 info!(
178 "Context positioning complete: HEAD={}, MIDDLE={}, TAIL={}",
179 positioning.head_files.len(),
180 positioning.middle_files.len(),
181 positioning.tail_files.len()
182 );
183
184 Ok(PositionedSelection {
185 positioning,
186 total_tokens,
187 positioning_reasoning: reasoning,
188 })
189 }
190
191 async fn calculate_centrality_scores(
193 &self,
194 files: Vec<FileMetadata>,
195 ) -> ScalingResult<Vec<FileWithCentrality>> {
196 debug!("Calculating centrality scores for {} files", files.len());
197
198 if files.is_empty() {
199 return Ok(Vec::new());
200 }
201
202 let (graph, node_map) = self.build_dependency_graph(&files).await?;
204
205 let centrality_scores = self.calculate_all_centralities(&graph, &node_map).await?;
207
208 let files_with_centrality: Vec<FileWithCentrality> = files
210 .into_par_iter()
211 .map(|file| {
212 let file_key = self.file_to_key(&file.path);
213 let centrality = centrality_scores
214 .get(&file_key)
215 .cloned()
216 .unwrap_or_default();
217
218 FileWithCentrality {
219 metadata: file,
220 centrality,
221 query_relevance: 0.0, relatedness_group: String::new(), }
224 })
225 .collect();
226
227 debug!(
228 "Calculated centrality for {} files",
229 files_with_centrality.len()
230 );
231 Ok(files_with_centrality)
232 }
233
234 async fn build_dependency_graph(
236 &self,
237 files: &[FileMetadata],
238 ) -> ScalingResult<(Graph<String, (), Directed>, HashMap<String, NodeIndex>)> {
239 let mut graph = Graph::new();
240 let mut node_map = HashMap::new();
241
242 for file in files {
244 let file_key = self.file_to_key(&file.path);
245 let node_idx = graph.add_node(file_key.clone());
246 node_map.insert(file_key, node_idx);
247 }
248
249 for file in files {
251 let file_key = self.file_to_key(&file.path);
252 let dependencies = self.extract_dependencies(file).await?;
253
254 if let Some(&from_idx) = node_map.get(&file_key) {
255 for dep in dependencies {
256 if let Some(&to_idx) = node_map.get(&dep) {
257 graph.add_edge(from_idx, to_idx, ());
258 }
259 }
260 }
261 }
262
263 debug!(
264 "Built dependency graph: {} nodes, {} edges",
265 graph.node_count(),
266 graph.edge_count()
267 );
268
269 Ok((graph, node_map))
270 }
271
272 async fn extract_dependencies(&self, file: &FileMetadata) -> ScalingResult<Vec<String>> {
274 let mut dependencies = Vec::new();
276
277 let path_str = file.path.to_string_lossy();
278 let dir_path = file
279 .path
280 .parent()
281 .map(|p| p.to_string_lossy().to_string())
282 .unwrap_or_default();
283
284 if file.language == "Rust" {
286 let filename = file.path.file_name().and_then(|n| n.to_str()).unwrap_or("");
287
288 if filename == "mod.rs" || filename == "lib.rs" {
289 } else {
291 dependencies.push(format!("{}/lib.rs", dir_path));
293 dependencies.push(format!("{}/mod.rs", dir_path));
294 }
295 }
296
297 if file.language == "Python" {
299 let filename = file.path.file_name().and_then(|n| n.to_str()).unwrap_or("");
300
301 if filename != "__init__.py" {
302 dependencies.push(format!("{}/__init__.py", dir_path));
303 }
304 }
305
306 if file.language == "JavaScript" || file.language == "TypeScript" {
308 dependencies.push(format!("{}/index.js", dir_path));
309 dependencies.push(format!("{}/index.ts", dir_path));
310 }
311
312 if file.file_type == "Configuration" {
314 dependencies.push("package.json".to_string());
315 dependencies.push("Cargo.toml".to_string());
316 dependencies.push("pyproject.toml".to_string());
317 }
318
319 Ok(dependencies)
320 }
321
322 async fn calculate_all_centralities(
324 &self,
325 graph: &Graph<String, (), Directed>,
326 node_map: &HashMap<String, NodeIndex>,
327 ) -> ScalingResult<HashMap<String, CentralityScores>> {
328 let mut centrality_scores = HashMap::new();
329
330 if graph.node_count() == 0 {
331 return Ok(centrality_scores);
332 }
333
334 let pagerank_scores = self.calculate_simple_pagerank(graph, node_map)?;
337
338 let degree_scores: Vec<(NodeIndex, f64)> = node_map
340 .par_iter()
341 .map(|(_, &node_idx)| {
342 let in_degree = graph.edges_directed(node_idx, petgraph::Incoming).count();
343 let out_degree = graph.edges_directed(node_idx, petgraph::Outgoing).count();
344 let total_degree = in_degree + out_degree;
345 let max_possible = graph.node_count().saturating_sub(1);
346
347 let normalized_degree = if max_possible == 0 {
348 0.0
349 } else {
350 total_degree as f64 / max_possible as f64
351 };
352
353 (node_idx, normalized_degree)
354 })
355 .collect();
356
357 let betweenness_scores = self.calculate_betweenness_from_scc(graph, node_map)?;
359
360 for (file_key, &node_idx) in node_map {
362 let pagerank = pagerank_scores
363 .get(node_idx.index())
364 .copied()
365 .unwrap_or(0.0);
366 let degree = degree_scores
367 .iter()
368 .find(|(idx, _)| *idx == node_idx)
369 .map(|(_, score)| *score)
370 .unwrap_or(0.0);
371 let betweenness = betweenness_scores.get(&node_idx).copied().unwrap_or(0.0);
372
373 let combined = (degree * 0.3) + (pagerank * 0.5) + (betweenness * 0.2);
375
376 centrality_scores.insert(
377 file_key.clone(),
378 CentralityScores {
379 degree,
380 pagerank,
381 betweenness,
382 combined,
383 },
384 );
385 }
386
387 debug!(
388 "Calculated centrality scores for {} files",
389 centrality_scores.len()
390 );
391 Ok(centrality_scores)
392 }
393
394 fn calculate_betweenness_from_scc(
396 &self,
397 graph: &Graph<String, (), Directed>,
398 node_map: &HashMap<String, NodeIndex>,
399 ) -> ScalingResult<HashMap<NodeIndex, f64>> {
400 let mut betweenness_scores = HashMap::new();
401
402 let sccs = kosaraju_scc(graph);
404
405 for &node_idx in node_map.values() {
407 let mut betweenness = 0.0;
408
409 let node_scc = sccs.iter().position(|scc| scc.contains(&node_idx));
411
412 if let Some(scc_idx) = node_scc {
413 let out_edges: HashSet<usize> = graph
415 .edges_directed(node_idx, petgraph::Outgoing)
416 .filter_map(|edge| {
417 let target = edge.target();
418 sccs.iter().position(|scc| scc.contains(&target))
419 })
420 .filter(|&target_scc| target_scc != scc_idx)
421 .collect();
422
423 let in_edges: HashSet<usize> = graph
424 .edges_directed(node_idx, petgraph::Incoming)
425 .filter_map(|edge| {
426 let source = edge.source();
427 sccs.iter().position(|scc| scc.contains(&source))
428 })
429 .filter(|&source_scc| source_scc != scc_idx)
430 .collect();
431
432 betweenness = (out_edges.len() + in_edges.len()) as f64;
434
435 let max_components = sccs.len().saturating_sub(1);
437 if max_components > 0 {
438 betweenness /= max_components as f64;
439 }
440 }
441
442 betweenness_scores.insert(node_idx, betweenness);
443 }
444
445 Ok(betweenness_scores)
446 }
447
448 fn calculate_simple_pagerank(
450 &self,
451 graph: &Graph<String, (), Directed>,
452 node_map: &HashMap<String, NodeIndex>,
453 ) -> ScalingResult<Vec<f64>> {
454 let node_count = graph.node_count();
455 if node_count == 0 {
456 return Ok(Vec::new());
457 }
458
459 let mut scores = vec![1.0 / node_count as f64; node_count];
460 let damping = 0.85;
461 let iterations = 10; for _ in 0..iterations {
464 let mut new_scores = vec![(1.0 - damping) / node_count as f64; node_count];
465
466 for &node_idx in node_map.values() {
467 let out_degree = graph.edges_directed(node_idx, petgraph::Outgoing).count();
468 if out_degree > 0 {
469 let contribution = scores[node_idx.index()] * damping / out_degree as f64;
470
471 for edge in graph.edges_directed(node_idx, petgraph::Outgoing) {
472 let target_idx = edge.target().index();
473 new_scores[target_idx] += contribution;
474 }
475 }
476 }
477
478 scores = new_scores;
479 }
480
481 Ok(scores)
482 }
483
484 async fn calculate_query_relevance(
486 &self,
487 mut files: Vec<FileWithCentrality>,
488 query_hint: Option<&str>,
489 ) -> ScalingResult<Vec<FileWithCentrality>> {
490 if let Some(query) = query_hint {
491 debug!("Calculating query relevance for: {}", query);
492
493 let query_lower = query.to_lowercase();
494 let query_words: Vec<&str> = query_lower.split_whitespace().collect();
495
496 for file in &mut files {
497 file.query_relevance =
498 self.calculate_file_query_relevance(&file.metadata, &query_words);
499 }
500 }
501
502 Ok(files)
503 }
504
505 fn calculate_file_query_relevance(&self, file: &FileMetadata, query_words: &[&str]) -> f64 {
507 let path_str = file.path.to_string_lossy().to_lowercase();
508 let filename = file
509 .path
510 .file_name()
511 .and_then(|n| n.to_str())
512 .unwrap_or("")
513 .to_lowercase();
514
515 let mut relevance = 0.0;
516
517 for word in query_words {
518 if filename.contains(word) {
520 relevance += 1.0;
521 }
522 else if path_str.contains(word) {
524 relevance += 0.5;
525 }
526 else if file.language.to_lowercase().contains(word) {
528 relevance += 0.2;
529 }
530 }
531
532 if filename.contains("main")
534 || filename.contains("index")
535 || filename == "lib.rs"
536 || filename == "__init__.py"
537 {
538 relevance += 0.3;
539 }
540
541 relevance
542 }
543
544 async fn group_by_relatedness(
546 &self,
547 mut files: Vec<FileWithCentrality>,
548 ) -> ScalingResult<Vec<FileWithCentrality>> {
549 debug!("Grouping {} files by relatedness", files.len());
550
551 for file in &mut files {
552 file.relatedness_group = self.determine_relatedness_group(&file.metadata);
553 }
554
555 Ok(files)
556 }
557
558 fn determine_relatedness_group(&self, file: &FileMetadata) -> String {
560 let path_str = file.path.to_string_lossy();
561
562 let path_components: Vec<&str> = path_str.split('/').collect();
564 let group = if path_components.len() >= 2 {
565 format!("{}/{}", path_components[0], path_components[1])
566 } else if path_components.len() == 1 {
567 path_components[0].to_string()
568 } else {
569 "root".to_string()
570 };
571
572 format!("{}::{}", group, file.language)
574 }
575
576 async fn apply_positioning_strategy(
578 &self,
579 files: Vec<FileWithCentrality>,
580 ) -> ScalingResult<ContextPositioning> {
581 if files.is_empty() {
582 return Ok(ContextPositioning {
583 head_files: Vec::new(),
584 middle_files: Vec::new(),
585 tail_files: Vec::new(),
586 });
587 }
588
589 let total_files = files.len();
590 let head_count = ((total_files as f64 * self.config.head_percentage) as usize).max(1);
591 let tail_count = ((total_files as f64 * self.config.tail_percentage) as usize).max(1);
592
593 debug!(
594 "Positioning strategy: HEAD={}, TAIL={}, MIDDLE={}",
595 head_count,
596 tail_count,
597 total_files - head_count - tail_count
598 );
599
600 let mut head_candidates = files.clone();
602 head_candidates.sort_by(|a, b| {
603 let score_a = (a.query_relevance * self.config.query_relevance_weight)
604 + (a.centrality.combined * self.config.centrality_weight);
605 let score_b = (b.query_relevance * self.config.query_relevance_weight)
606 + (b.centrality.combined * self.config.centrality_weight);
607 score_b
608 .partial_cmp(&score_a)
609 .unwrap_or(std::cmp::Ordering::Equal)
610 });
611
612 let mut tail_candidates = files.clone();
614 tail_candidates.sort_by(|a, b| {
615 b.centrality
616 .combined
617 .partial_cmp(&a.centrality.combined)
618 .unwrap_or(std::cmp::Ordering::Equal)
619 });
620
621 let mut selected_files = HashSet::new();
623 let mut head_files = Vec::new();
624
625 for file in head_candidates.into_iter().take(head_count) {
626 let file_key = self.file_to_key(&file.metadata.path);
627 selected_files.insert(file_key);
628 head_files.push(file);
629 }
630
631 let mut tail_files = Vec::new();
633 for file in tail_candidates {
634 if tail_files.len() >= tail_count {
635 break;
636 }
637 let file_key = self.file_to_key(&file.metadata.path);
638 if !selected_files.contains(&file_key) {
639 selected_files.insert(file_key);
640 tail_files.push(file);
641 }
642 }
643
644 let mut middle_files = Vec::new();
646 for file in files {
647 let file_key = self.file_to_key(&file.metadata.path);
648 if !selected_files.contains(&file_key) {
649 middle_files.push(file);
650 }
651 }
652
653 self.group_within_tier(&mut head_files);
655 self.group_within_tier(&mut middle_files);
656 self.group_within_tier(&mut tail_files);
657
658 Ok(ContextPositioning {
659 head_files,
660 middle_files,
661 tail_files,
662 })
663 }
664
665 fn group_within_tier(&self, files: &mut Vec<FileWithCentrality>) {
667 files.sort_by(|a, b| {
668 let group_cmp = a.relatedness_group.cmp(&b.relatedness_group);
670 if group_cmp != std::cmp::Ordering::Equal {
671 return group_cmp;
672 }
673
674 b.centrality
676 .combined
677 .partial_cmp(&a.centrality.combined)
678 .unwrap_or(std::cmp::Ordering::Equal)
679 });
680 }
681
682 fn calculate_total_tokens(&self, positioning: &ContextPositioning) -> usize {
684 let head_tokens = positioning
685 .head_files
686 .iter()
687 .map(|f| self.estimate_tokens(&f.metadata))
688 .sum::<usize>();
689
690 let middle_tokens = positioning
691 .middle_files
692 .iter()
693 .map(|f| self.estimate_tokens(&f.metadata))
694 .sum::<usize>();
695
696 let tail_tokens = positioning
697 .tail_files
698 .iter()
699 .map(|f| self.estimate_tokens(&f.metadata))
700 .sum::<usize>();
701
702 head_tokens + middle_tokens + tail_tokens
703 }
704
705 fn generate_positioning_reasoning(
707 &self,
708 positioning: &ContextPositioning,
709 query_hint: Option<&str>,
710 ) -> String {
711 let mut reasoning = Vec::new();
712
713 reasoning.push("🎯 Context Positioning Strategy Applied".to_string());
714 reasoning.push("".to_string());
715
716 reasoning.push(format!(
718 "📍 HEAD ({} files): Query-specific high centrality files",
719 positioning.head_files.len()
720 ));
721 if let Some(query) = query_hint {
722 reasoning.push(format!(" Query hint: '{}'", query));
723 }
724 for (i, file) in positioning.head_files.iter().take(3).enumerate() {
725 reasoning.push(format!(
726 " {}. {} (centrality: {:.3}, relevance: {:.3})",
727 i + 1,
728 file.metadata
729 .path
730 .file_name()
731 .and_then(|n| n.to_str())
732 .unwrap_or("?"),
733 file.centrality.combined,
734 file.query_relevance
735 ));
736 }
737 if positioning.head_files.len() > 3 {
738 reasoning.push(format!(
739 " ... and {} more files",
740 positioning.head_files.len() - 3
741 ));
742 }
743 reasoning.push("".to_string());
744
745 reasoning.push(format!(
747 "🔄 MIDDLE ({} files): Supporting utilities and low-centrality files",
748 positioning.middle_files.len()
749 ));
750 reasoning.push("".to_string());
751
752 reasoning.push(format!(
754 "🏛️ TAIL ({} files): Core functionality, high centrality",
755 positioning.tail_files.len()
756 ));
757 for (i, file) in positioning.tail_files.iter().take(3).enumerate() {
758 reasoning.push(format!(
759 " {}. {} (centrality: {:.3})",
760 i + 1,
761 file.metadata
762 .path
763 .file_name()
764 .and_then(|n| n.to_str())
765 .unwrap_or("?"),
766 file.centrality.combined
767 ));
768 }
769 if positioning.tail_files.len() > 3 {
770 reasoning.push(format!(
771 " ... and {} more files",
772 positioning.tail_files.len() - 3
773 ));
774 }
775
776 reasoning.join("\n")
777 }
778
779 fn create_simple_positioning(&self, files: Vec<FileMetadata>) -> PositionedSelection {
781 let files_with_centrality: Vec<FileWithCentrality> = files
782 .into_iter()
783 .map(|metadata| FileWithCentrality {
784 metadata,
785 centrality: CentralityScores::default(),
786 query_relevance: 0.0,
787 relatedness_group: "default".to_string(),
788 })
789 .collect();
790
791 let positioning = ContextPositioning {
792 head_files: Vec::new(),
793 middle_files: files_with_centrality,
794 tail_files: Vec::new(),
795 };
796
797 let total_tokens = self.calculate_total_tokens(&positioning);
798
799 PositionedSelection {
800 positioning,
801 total_tokens,
802 positioning_reasoning: "Context positioning disabled - using default order".to_string(),
803 }
804 }
805
806 fn file_to_key(&self, path: &Path) -> String {
808 path.to_string_lossy().to_string()
809 }
810
811 fn estimate_tokens(&self, file: &FileMetadata) -> usize {
813 let base_tokens = ((file.size as f64) / 3.5) as usize;
815
816 let multiplier = match file.language.as_str() {
818 "Rust" => 1.3,
819 "JavaScript" | "TypeScript" => 1.2,
820 "Python" => 1.1,
821 "C" | "Go" => 1.0,
822 "JSON" | "YAML" | "TOML" => 0.7,
823 _ => 1.0,
824 };
825
826 (base_tokens as f64 * multiplier) as usize
827 }
828
829 fn is_test_file(&self, path: &Path) -> bool {
831 let path_str = path.to_string_lossy().to_lowercase();
832 let file_name = path
833 .file_name()
834 .map(|s| s.to_string_lossy().to_lowercase())
835 .unwrap_or_default();
836
837 if path_str.contains("/test/")
839 || path_str.contains("/tests/")
840 || path_str.contains("\\test\\")
841 || path_str.contains("\\tests\\")
842 || path_str.contains("/__tests__/")
843 || path_str.contains("\\__tests__\\")
844 {
845 return true;
846 }
847
848 if file_name.starts_with("test_")
850 || file_name.ends_with("_test.rs")
851 || file_name.ends_with("_test.py")
852 || file_name.ends_with("_test.js")
853 || file_name.ends_with("_test.ts")
854 || file_name.ends_with(".test.js")
855 || file_name.ends_with(".test.ts")
856 || file_name.ends_with(".test.jsx")
857 || file_name.ends_with(".test.tsx")
858 || file_name.ends_with(".spec.js")
859 || file_name.ends_with(".spec.ts")
860 || file_name.ends_with(".spec.jsx")
861 || file_name.ends_with(".spec.tsx")
862 || file_name.ends_with("_spec.py")
863 || file_name.ends_with("_spec.rb")
864 {
865 return true;
866 }
867
868 match path.extension().and_then(|s| s.to_str()) {
870 Some("rs") => {
871 file_name.contains("test")
873 && (file_name.starts_with("test_")
874 || file_name.ends_with("_test.rs")
875 || path_str.contains("/tests/"))
876 }
877 Some("py") => {
878 file_name.starts_with("test_")
880 || file_name.ends_with("_test.py")
881 || file_name.contains("test_")
882 }
883 Some("go") => {
884 file_name.ends_with("_test.go")
886 }
887 Some("java") | Some("kt") => {
888 file_name.ends_with("test.java")
890 || file_name.ends_with("tests.java")
891 || file_name.ends_with("test.kt")
892 || file_name.ends_with("tests.kt")
893 || path_str.contains("/test/")
894 || path_str.contains("/tests/")
895 }
896 Some("js") | Some("ts") | Some("jsx") | Some("tsx") => {
897 file_name.contains(".test.")
899 || file_name.contains(".spec.")
900 || file_name.ends_with(".test.js")
901 || file_name.ends_with(".test.ts")
902 || file_name.ends_with(".spec.js")
903 || file_name.ends_with(".spec.ts")
904 || path_str.contains("/__tests__/")
905 || path_str.contains("/test/")
906 || path_str.contains("/tests/")
907 }
908 Some("rb") => {
909 file_name.ends_with("_test.rb")
911 || file_name.ends_with("_spec.rb")
912 || path_str.contains("/spec/")
913 || path_str.contains("/test/")
914 }
915 Some("php") => {
916 file_name.ends_with("test.php")
918 || file_name.ends_with("_test.php")
919 || file_name.contains("test") && path_str.contains("/test")
920 }
921 _ => false,
922 }
923 }
924}
925
926#[cfg(test)]
927mod tests {
928 use super::*;
929 use std::path::PathBuf;
930 use std::time::SystemTime;
931
932 fn create_test_file(path: &str, size: u64, language: &str) -> FileMetadata {
933 FileMetadata {
934 path: PathBuf::from(path),
935 size,
936 modified: SystemTime::now(),
937 language: language.to_string(),
938 file_type: if language == "Rust" {
939 "Source"
940 } else {
941 "Other"
942 }
943 .to_string(),
944 }
945 }
946
947 #[tokio::test]
948 async fn test_context_positioner_creation() {
949 let positioner = ContextPositioner::with_defaults();
950 assert!(positioner.config.enable_positioning);
951 assert_eq!(positioner.config.head_percentage, 0.20);
952 assert_eq!(positioner.config.tail_percentage, 0.20);
953 }
954
955 #[tokio::test]
956 async fn test_centrality_calculation() {
957 let positioner = ContextPositioner::with_defaults();
958
959 let files = vec![
960 create_test_file("src/main.rs", 1000, "Rust"),
961 create_test_file("src/lib.rs", 2000, "Rust"),
962 create_test_file("src/utils.rs", 500, "Rust"),
963 ];
964
965 let files_with_centrality = positioner.calculate_centrality_scores(files).await.unwrap();
966 assert_eq!(files_with_centrality.len(), 3);
967
968 for file in &files_with_centrality {
970 assert!(file.centrality.combined >= 0.0);
971 assert!(file.centrality.degree >= 0.0);
972 assert!(file.centrality.pagerank >= 0.0);
973 assert!(file.centrality.betweenness >= 0.0);
974 }
975
976 let max_centrality = files_with_centrality
978 .iter()
979 .map(|f| f.centrality.combined)
980 .fold(0.0, f64::max);
981 let min_centrality = files_with_centrality
982 .iter()
983 .map(|f| f.centrality.combined)
984 .fold(1.0, f64::min);
985
986 assert!(max_centrality >= min_centrality);
988 }
989
990 #[tokio::test]
991 async fn test_positioning_strategy() {
992 let positioner = ContextPositioner::with_defaults();
993
994 let files = vec![
995 create_test_file("src/main.rs", 1000, "Rust"),
996 create_test_file("src/lib.rs", 2000, "Rust"),
997 create_test_file("src/utils.rs", 500, "Rust"),
998 create_test_file("tests/integration.rs", 800, "Rust"),
999 create_test_file("README.md", 300, "Markdown"),
1000 ];
1001
1002 let result = positioner
1003 .position_files(files, Some("main"))
1004 .await
1005 .unwrap();
1006
1007 assert!(!result.positioning.head_files.is_empty());
1009 assert!(!result.positioning.middle_files.is_empty());
1010 assert!(!result.positioning.tail_files.is_empty());
1011
1012 let total = result.positioning.head_files.len()
1014 + result.positioning.middle_files.len()
1015 + result.positioning.tail_files.len();
1016 assert_eq!(total, 5);
1017
1018 assert!(!result.positioning_reasoning.is_empty());
1020 assert!(result.positioning_reasoning.contains("HEAD"));
1021 assert!(result.positioning_reasoning.contains("TAIL"));
1022 }
1023
1024 #[tokio::test]
1025 async fn test_query_relevance() {
1026 let positioner = ContextPositioner::with_defaults();
1027
1028 let files = vec![
1029 FileWithCentrality {
1030 metadata: create_test_file("src/main.rs", 1000, "Rust"),
1031 centrality: CentralityScores::default(),
1032 query_relevance: 0.0,
1033 relatedness_group: String::new(),
1034 },
1035 FileWithCentrality {
1036 metadata: create_test_file("src/utils.rs", 500, "Rust"),
1037 centrality: CentralityScores::default(),
1038 query_relevance: 0.0,
1039 relatedness_group: String::new(),
1040 },
1041 ];
1042
1043 let result = positioner
1044 .calculate_query_relevance(files, Some("main"))
1045 .await
1046 .unwrap();
1047
1048 let main_relevance = result
1050 .iter()
1051 .find(|f| f.metadata.path.to_string_lossy().contains("main.rs"))
1052 .unwrap();
1053 let utils_relevance = result
1054 .iter()
1055 .find(|f| f.metadata.path.to_string_lossy().contains("utils.rs"))
1056 .unwrap();
1057
1058 assert!(main_relevance.query_relevance > utils_relevance.query_relevance);
1059 }
1060
1061 #[test]
1062 fn test_relatedness_grouping() {
1063 let positioner = ContextPositioner::with_defaults();
1064
1065 let file = create_test_file("src/api/handlers.rs", 1000, "Rust");
1066 let group = positioner.determine_relatedness_group(&file);
1067
1068 assert!(group.contains("src/api"));
1069 assert!(group.contains("Rust"));
1070 }
1071
1072 #[test]
1073 fn test_token_estimation() {
1074 let positioner = ContextPositioner::with_defaults();
1075
1076 let rust_file = create_test_file("src/main.rs", 1000, "Rust");
1077 let json_file = create_test_file("package.json", 1000, "JSON");
1078
1079 let rust_tokens = positioner.estimate_tokens(&rust_file);
1080 let json_tokens = positioner.estimate_tokens(&json_file);
1081
1082 assert!(rust_tokens > json_tokens);
1084 }
1085
1086 #[test]
1087 fn test_is_test_file_detection() {
1088 let positioner = ContextPositioner::with_defaults();
1089
1090 assert!(positioner.is_test_file(&std::path::Path::new("src/test/utils.rs")));
1092 assert!(positioner.is_test_file(&std::path::Path::new("src/tests/integration.py")));
1093 assert!(positioner.is_test_file(&std::path::Path::new("__tests__/component.test.js")));
1094
1095 assert!(positioner.is_test_file(&std::path::Path::new("test_utils.py")));
1097 assert!(positioner.is_test_file(&std::path::Path::new("utils_test.rs")));
1098 assert!(positioner.is_test_file(&std::path::Path::new("component.test.tsx")));
1099 assert!(positioner.is_test_file(&std::path::Path::new("service.spec.ts")));
1100 assert!(positioner.is_test_file(&std::path::Path::new("model_test.go")));
1101
1102 assert!(positioner.is_test_file(&std::path::Path::new("UserTest.java")));
1104 assert!(positioner.is_test_file(&std::path::Path::new("user_spec.rb")));
1105 assert!(positioner.is_test_file(&std::path::Path::new("UserTest.php")));
1106
1107 assert!(!positioner.is_test_file(&std::path::Path::new("src/main.rs")));
1109 assert!(!positioner.is_test_file(&std::path::Path::new("lib/utils.py")));
1110 assert!(!positioner.is_test_file(&std::path::Path::new("components/Button.tsx")));
1111 assert!(!positioner.is_test_file(&std::path::Path::new("README.md")));
1112 assert!(!positioner.is_test_file(&std::path::Path::new("package.json")));
1113 }
1114
1115 #[tokio::test]
1116 async fn test_auto_exclude_tests() {
1117 let mut config = ContextPositioningConfig::default();
1118 config.auto_exclude_tests = true;
1119 let positioner = ContextPositioner::new(config);
1120
1121 let files = vec![
1123 create_test_file("src/main.rs", 1000, "Rust"),
1124 create_test_file("src/lib.rs", 800, "Rust"),
1125 create_test_file("src/tests/integration_test.rs", 1200, "Rust"),
1126 create_test_file("test/unit_test.py", 600, "Python"),
1127 create_test_file("components/Button.tsx", 900, "TypeScript"),
1128 create_test_file("__tests__/Button.test.tsx", 700, "TypeScript"),
1129 ];
1130
1131 let result = positioner.position_files(files, None).await.unwrap();
1132
1133 let all_files: Vec<&FileWithCentrality> = result
1135 .positioning
1136 .head_files
1137 .iter()
1138 .chain(result.positioning.middle_files.iter())
1139 .chain(result.positioning.tail_files.iter())
1140 .collect();
1141
1142 assert_eq!(all_files.len(), 3);
1144
1145 for file in all_files {
1147 let path_str = file.metadata.path.to_string_lossy();
1148 assert!(!path_str.contains("test"));
1149 assert!(!path_str.contains("__tests__"));
1150 }
1151
1152 let file_names: Vec<String> = result
1154 .positioning
1155 .head_files
1156 .iter()
1157 .chain(result.positioning.middle_files.iter())
1158 .chain(result.positioning.tail_files.iter())
1159 .map(|f| {
1160 f.metadata
1161 .path
1162 .file_name()
1163 .unwrap()
1164 .to_string_lossy()
1165 .to_string()
1166 })
1167 .collect();
1168
1169 assert!(file_names.contains(&"main.rs".to_string()));
1170 assert!(file_names.contains(&"lib.rs".to_string()));
1171 assert!(file_names.contains(&"Button.tsx".to_string()));
1172 }
1173
1174 #[tokio::test]
1175 async fn test_auto_exclude_disabled() {
1176 let mut config = ContextPositioningConfig::default();
1177 config.auto_exclude_tests = false; let positioner = ContextPositioner::new(config);
1179
1180 let files = vec![
1182 create_test_file("src/main.rs", 1000, "Rust"),
1183 create_test_file("src/tests/integration_test.rs", 1200, "Rust"),
1184 create_test_file("test_utils.py", 600, "Python"),
1185 ];
1186
1187 let result = positioner.position_files(files, None).await.unwrap();
1188
1189 let all_files: Vec<&FileWithCentrality> = result
1191 .positioning
1192 .head_files
1193 .iter()
1194 .chain(result.positioning.middle_files.iter())
1195 .chain(result.positioning.tail_files.iter())
1196 .collect();
1197
1198 assert_eq!(all_files.len(), 3);
1200 }
1201}