1use petgraph::algo::kosaraju_scc;
10use petgraph::visit::EdgeRef;
11use petgraph::{graph::NodeIndex, Directed, Graph};
12use rayon::prelude::*;
13use serde::{Deserialize, Serialize};
14use std::collections::{HashMap, HashSet};
15use std::path::Path;
16use tracing::{debug, info, warn};
17
18use crate::error::{ScalingError, ScalingResult};
19use crate::streaming::FileMetadata;
20use scribe_core::file;
21
22#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct ContextPositioningConfig {
25 pub enable_positioning: bool,
27
28 pub head_percentage: f64,
30
31 pub tail_percentage: f64,
33
34 pub centrality_weight: f64,
36
37 pub relatedness_weight: f64,
39
40 pub query_relevance_weight: f64,
42
43 pub auto_exclude_tests: bool,
45}
46
47impl Default for ContextPositioningConfig {
48 fn default() -> Self {
49 Self {
50 enable_positioning: true,
51 head_percentage: 0.20,
52 tail_percentage: 0.20,
53 centrality_weight: 0.4,
54 relatedness_weight: 0.3,
55 query_relevance_weight: 0.3,
56 auto_exclude_tests: false,
57 }
58 }
59}
60
61#[derive(Debug, Clone, Default, Serialize, Deserialize)]
63pub struct CentralityScores {
64 pub betweenness: f64,
66
67 pub pagerank: f64,
69
70 pub degree: f64,
72
73 pub combined: f64,
75}
76
77#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct FileWithCentrality {
80 pub metadata: FileMetadata,
81 pub centrality: CentralityScores,
82 pub query_relevance: f64,
83 pub relatedness_group: String,
84}
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct ContextPositioning {
89 pub head_files: Vec<FileWithCentrality>,
91
92 pub middle_files: Vec<FileWithCentrality>,
94
95 pub tail_files: Vec<FileWithCentrality>,
97}
98
99#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct PositionedSelection {
102 pub positioning: ContextPositioning,
103 pub total_tokens: usize,
104 pub positioning_reasoning: String,
105}
106
107pub struct ContextPositioner {
109 config: ContextPositioningConfig,
110}
111
112impl ContextPositioner {
113 pub fn new(config: ContextPositioningConfig) -> Self {
115 Self { config }
116 }
117
118 pub fn with_defaults() -> Self {
120 Self::new(ContextPositioningConfig::default())
121 }
122
123 pub async fn position_files(
125 &self,
126 files: Vec<FileMetadata>,
127 query_hint: Option<&str>,
128 ) -> ScalingResult<PositionedSelection> {
129 if !self.config.enable_positioning || files.is_empty() {
130 return Ok(self.create_simple_positioning(files));
131 }
132
133 let filtered_files = if self.config.auto_exclude_tests {
135 let original_count = files.len();
136 let non_test_files: Vec<FileMetadata> = files
137 .into_iter()
138 .filter(|file| !self.is_test_file(&file.path))
139 .collect();
140 let filtered_count = non_test_files.len();
141
142 if original_count != filtered_count {
143 info!(
144 "Auto-excluded {} test files, {} files remaining",
145 original_count - filtered_count,
146 filtered_count
147 );
148 }
149
150 non_test_files
151 } else {
152 files
153 };
154
155 info!(
156 "Starting context positioning for {} files",
157 filtered_files.len()
158 );
159
160 let files_with_centrality = self.calculate_centrality_scores(filtered_files).await?;
162
163 let files_with_relevance = self
165 .calculate_query_relevance(files_with_centrality, query_hint)
166 .await?;
167
168 let files_with_groups = self.group_by_relatedness(files_with_relevance).await?;
170
171 let positioning = self.apply_positioning_strategy(files_with_groups).await?;
173
174 let total_tokens = self.calculate_total_tokens(&positioning);
176 let reasoning = self.generate_positioning_reasoning(&positioning, query_hint);
177
178 info!(
179 "Context positioning complete: HEAD={}, MIDDLE={}, TAIL={}",
180 positioning.head_files.len(),
181 positioning.middle_files.len(),
182 positioning.tail_files.len()
183 );
184
185 Ok(PositionedSelection {
186 positioning,
187 total_tokens,
188 positioning_reasoning: reasoning,
189 })
190 }
191
192 async fn calculate_centrality_scores(
194 &self,
195 files: Vec<FileMetadata>,
196 ) -> ScalingResult<Vec<FileWithCentrality>> {
197 debug!("Calculating centrality scores for {} files", files.len());
198
199 if files.is_empty() {
200 return Ok(Vec::new());
201 }
202
203 let (graph, node_map) = self.build_dependency_graph(&files).await?;
205
206 let centrality_scores = self.calculate_all_centralities(&graph, &node_map).await?;
208
209 let files_with_centrality: Vec<FileWithCentrality> = files
211 .into_par_iter()
212 .map(|file| {
213 let file_key = self.file_to_key(&file.path);
214 let centrality = centrality_scores
215 .get(&file_key)
216 .cloned()
217 .unwrap_or_default();
218
219 FileWithCentrality {
220 metadata: file,
221 centrality,
222 query_relevance: 0.0, relatedness_group: String::new(), }
225 })
226 .collect();
227
228 debug!(
229 "Calculated centrality for {} files",
230 files_with_centrality.len()
231 );
232 Ok(files_with_centrality)
233 }
234
235 async fn build_dependency_graph(
237 &self,
238 files: &[FileMetadata],
239 ) -> ScalingResult<(Graph<String, (), Directed>, HashMap<String, NodeIndex>)> {
240 let mut graph = Graph::new();
241 let mut node_map = HashMap::new();
242
243 for file in files {
245 let file_key = self.file_to_key(&file.path);
246 let node_idx = graph.add_node(file_key.clone());
247 node_map.insert(file_key, node_idx);
248 }
249
250 for file in files {
252 let file_key = self.file_to_key(&file.path);
253 let dependencies = self.extract_dependencies(file).await?;
254
255 if let Some(&from_idx) = node_map.get(&file_key) {
256 for dep in dependencies {
257 if let Some(&to_idx) = node_map.get(&dep) {
258 graph.add_edge(from_idx, to_idx, ());
259 }
260 }
261 }
262 }
263
264 debug!(
265 "Built dependency graph: {} nodes, {} edges",
266 graph.node_count(),
267 graph.edge_count()
268 );
269
270 Ok((graph, node_map))
271 }
272
273 async fn extract_dependencies(&self, file: &FileMetadata) -> ScalingResult<Vec<String>> {
275 let mut dependencies = Vec::new();
277
278 let path_str = file.path.to_string_lossy();
279 let dir_path = file
280 .path
281 .parent()
282 .map(|p| p.to_string_lossy().to_string())
283 .unwrap_or_default();
284
285 if file.language == "Rust" {
287 let filename = file.path.file_name().and_then(|n| n.to_str()).unwrap_or("");
288
289 if filename == "mod.rs" || filename == "lib.rs" {
290 } else {
292 dependencies.push(format!("{}/lib.rs", dir_path));
294 dependencies.push(format!("{}/mod.rs", dir_path));
295 }
296 }
297
298 if file.language == "Python" {
300 let filename = file.path.file_name().and_then(|n| n.to_str()).unwrap_or("");
301
302 if filename != "__init__.py" {
303 dependencies.push(format!("{}/__init__.py", dir_path));
304 }
305 }
306
307 if file.language == "JavaScript" || file.language == "TypeScript" {
309 dependencies.push(format!("{}/index.js", dir_path));
310 dependencies.push(format!("{}/index.ts", dir_path));
311 }
312
313 if file.file_type == "Configuration" {
315 dependencies.push("package.json".to_string());
316 dependencies.push("Cargo.toml".to_string());
317 dependencies.push("pyproject.toml".to_string());
318 }
319
320 Ok(dependencies)
321 }
322
323 async fn calculate_all_centralities(
325 &self,
326 graph: &Graph<String, (), Directed>,
327 node_map: &HashMap<String, NodeIndex>,
328 ) -> ScalingResult<HashMap<String, CentralityScores>> {
329 let mut centrality_scores = HashMap::new();
330
331 if graph.node_count() == 0 {
332 return Ok(centrality_scores);
333 }
334
335 let pagerank_scores = self.calculate_simple_pagerank(graph, node_map)?;
338
339 let degree_scores: Vec<(NodeIndex, f64)> = node_map
341 .par_iter()
342 .map(|(_, &node_idx)| {
343 let in_degree = graph.edges_directed(node_idx, petgraph::Incoming).count();
344 let out_degree = graph.edges_directed(node_idx, petgraph::Outgoing).count();
345 let total_degree = in_degree + out_degree;
346 let max_possible = graph.node_count().saturating_sub(1);
347
348 let normalized_degree = if max_possible == 0 {
349 0.0
350 } else {
351 total_degree as f64 / max_possible as f64
352 };
353
354 (node_idx, normalized_degree)
355 })
356 .collect();
357
358 let betweenness_scores = self.calculate_betweenness_from_scc(graph, node_map)?;
360
361 for (file_key, &node_idx) in node_map {
363 let pagerank = pagerank_scores
364 .get(node_idx.index())
365 .copied()
366 .unwrap_or(0.0);
367 let degree = degree_scores
368 .iter()
369 .find(|(idx, _)| *idx == node_idx)
370 .map(|(_, score)| *score)
371 .unwrap_or(0.0);
372 let betweenness = betweenness_scores.get(&node_idx).copied().unwrap_or(0.0);
373
374 let combined = (degree * 0.3) + (pagerank * 0.5) + (betweenness * 0.2);
376
377 centrality_scores.insert(
378 file_key.clone(),
379 CentralityScores {
380 degree,
381 pagerank,
382 betweenness,
383 combined,
384 },
385 );
386 }
387
388 debug!(
389 "Calculated centrality scores for {} files",
390 centrality_scores.len()
391 );
392 Ok(centrality_scores)
393 }
394
395 fn calculate_betweenness_from_scc(
397 &self,
398 graph: &Graph<String, (), Directed>,
399 node_map: &HashMap<String, NodeIndex>,
400 ) -> ScalingResult<HashMap<NodeIndex, f64>> {
401 let mut betweenness_scores = HashMap::new();
402
403 let sccs = kosaraju_scc(graph);
405
406 for &node_idx in node_map.values() {
408 let mut betweenness = 0.0;
409
410 let node_scc = sccs.iter().position(|scc| scc.contains(&node_idx));
412
413 if let Some(scc_idx) = node_scc {
414 let out_edges: HashSet<usize> = graph
416 .edges_directed(node_idx, petgraph::Outgoing)
417 .filter_map(|edge| {
418 let target = edge.target();
419 sccs.iter().position(|scc| scc.contains(&target))
420 })
421 .filter(|&target_scc| target_scc != scc_idx)
422 .collect();
423
424 let in_edges: HashSet<usize> = graph
425 .edges_directed(node_idx, petgraph::Incoming)
426 .filter_map(|edge| {
427 let source = edge.source();
428 sccs.iter().position(|scc| scc.contains(&source))
429 })
430 .filter(|&source_scc| source_scc != scc_idx)
431 .collect();
432
433 betweenness = (out_edges.len() + in_edges.len()) as f64;
435
436 let max_components = sccs.len().saturating_sub(1);
438 if max_components > 0 {
439 betweenness /= max_components as f64;
440 }
441 }
442
443 betweenness_scores.insert(node_idx, betweenness);
444 }
445
446 Ok(betweenness_scores)
447 }
448
449 fn calculate_simple_pagerank(
451 &self,
452 graph: &Graph<String, (), Directed>,
453 node_map: &HashMap<String, NodeIndex>,
454 ) -> ScalingResult<Vec<f64>> {
455 let node_count = graph.node_count();
456 if node_count == 0 {
457 return Ok(Vec::new());
458 }
459
460 let mut scores = vec![1.0 / node_count as f64; node_count];
461 let damping = 0.85;
462 let iterations = 10; for _ in 0..iterations {
465 let mut new_scores = vec![(1.0 - damping) / node_count as f64; node_count];
466
467 for &node_idx in node_map.values() {
468 let out_degree = graph.edges_directed(node_idx, petgraph::Outgoing).count();
469 if out_degree > 0 {
470 let contribution = scores[node_idx.index()] * damping / out_degree as f64;
471
472 for edge in graph.edges_directed(node_idx, petgraph::Outgoing) {
473 let target_idx = edge.target().index();
474 new_scores[target_idx] += contribution;
475 }
476 }
477 }
478
479 scores = new_scores;
480 }
481
482 Ok(scores)
483 }
484
485 async fn calculate_query_relevance(
487 &self,
488 mut files: Vec<FileWithCentrality>,
489 query_hint: Option<&str>,
490 ) -> ScalingResult<Vec<FileWithCentrality>> {
491 if let Some(query) = query_hint {
492 debug!("Calculating query relevance for: {}", query);
493
494 let query_lower = query.to_lowercase();
495 let query_words: Vec<&str> = query_lower.split_whitespace().collect();
496
497 for file in &mut files {
498 file.query_relevance =
499 self.calculate_file_query_relevance(&file.metadata, &query_words);
500 }
501 }
502
503 Ok(files)
504 }
505
506 fn calculate_file_query_relevance(&self, file: &FileMetadata, query_words: &[&str]) -> f64 {
508 let path_str = file.path.to_string_lossy().to_lowercase();
509 let filename = file
510 .path
511 .file_name()
512 .and_then(|n| n.to_str())
513 .unwrap_or("")
514 .to_lowercase();
515
516 let mut relevance = 0.0;
517
518 for word in query_words {
519 if filename.contains(word) {
521 relevance += 1.0;
522 }
523 else if path_str.contains(word) {
525 relevance += 0.5;
526 }
527 else if file.language.to_lowercase().contains(word) {
529 relevance += 0.2;
530 }
531 }
532
533 if filename.contains("main")
535 || filename.contains("index")
536 || filename == "lib.rs"
537 || filename == "__init__.py"
538 {
539 relevance += 0.3;
540 }
541
542 relevance
543 }
544
545 async fn group_by_relatedness(
547 &self,
548 mut files: Vec<FileWithCentrality>,
549 ) -> ScalingResult<Vec<FileWithCentrality>> {
550 debug!("Grouping {} files by relatedness", files.len());
551
552 for file in &mut files {
553 file.relatedness_group = self.determine_relatedness_group(&file.metadata);
554 }
555
556 Ok(files)
557 }
558
559 fn determine_relatedness_group(&self, file: &FileMetadata) -> String {
561 let path_str = file.path.to_string_lossy();
562
563 let path_components: Vec<&str> = path_str.split('/').collect();
565 let group = if path_components.len() >= 2 {
566 format!("{}/{}", path_components[0], path_components[1])
567 } else if path_components.len() == 1 {
568 path_components[0].to_string()
569 } else {
570 "root".to_string()
571 };
572
573 format!("{}::{}", group, file.language)
575 }
576
577 async fn apply_positioning_strategy(
579 &self,
580 files: Vec<FileWithCentrality>,
581 ) -> ScalingResult<ContextPositioning> {
582 if files.is_empty() {
583 return Ok(ContextPositioning {
584 head_files: Vec::new(),
585 middle_files: Vec::new(),
586 tail_files: Vec::new(),
587 });
588 }
589
590 let total_files = files.len();
591 let head_count = ((total_files as f64 * self.config.head_percentage) as usize).max(1);
592 let tail_count = ((total_files as f64 * self.config.tail_percentage) as usize).max(1);
593
594 debug!(
595 "Positioning strategy: HEAD={}, TAIL={}, MIDDLE={}",
596 head_count,
597 tail_count,
598 total_files - head_count - tail_count
599 );
600
601 let mut head_candidates = files.clone();
603 head_candidates.sort_by(|a, b| {
604 let score_a = (a.query_relevance * self.config.query_relevance_weight)
605 + (a.centrality.combined * self.config.centrality_weight);
606 let score_b = (b.query_relevance * self.config.query_relevance_weight)
607 + (b.centrality.combined * self.config.centrality_weight);
608 score_b
609 .partial_cmp(&score_a)
610 .unwrap_or(std::cmp::Ordering::Equal)
611 });
612
613 let mut tail_candidates = files.clone();
615 tail_candidates.sort_by(|a, b| {
616 b.centrality
617 .combined
618 .partial_cmp(&a.centrality.combined)
619 .unwrap_or(std::cmp::Ordering::Equal)
620 });
621
622 let mut selected_files = HashSet::new();
624 let mut head_files = Vec::new();
625
626 for file in head_candidates.into_iter().take(head_count) {
627 let file_key = self.file_to_key(&file.metadata.path);
628 selected_files.insert(file_key);
629 head_files.push(file);
630 }
631
632 let mut tail_files = Vec::new();
634 for file in tail_candidates {
635 if tail_files.len() >= tail_count {
636 break;
637 }
638 let file_key = self.file_to_key(&file.metadata.path);
639 if !selected_files.contains(&file_key) {
640 selected_files.insert(file_key);
641 tail_files.push(file);
642 }
643 }
644
645 let mut middle_files = Vec::new();
647 for file in files {
648 let file_key = self.file_to_key(&file.metadata.path);
649 if !selected_files.contains(&file_key) {
650 middle_files.push(file);
651 }
652 }
653
654 self.group_within_tier(&mut head_files);
656 self.group_within_tier(&mut middle_files);
657 self.group_within_tier(&mut tail_files);
658
659 Ok(ContextPositioning {
660 head_files,
661 middle_files,
662 tail_files,
663 })
664 }
665
666 fn group_within_tier(&self, files: &mut Vec<FileWithCentrality>) {
668 files.sort_by(|a, b| {
669 let group_cmp = a.relatedness_group.cmp(&b.relatedness_group);
671 if group_cmp != std::cmp::Ordering::Equal {
672 return group_cmp;
673 }
674
675 b.centrality
677 .combined
678 .partial_cmp(&a.centrality.combined)
679 .unwrap_or(std::cmp::Ordering::Equal)
680 });
681 }
682
683 fn calculate_total_tokens(&self, positioning: &ContextPositioning) -> usize {
685 let head_tokens = positioning
686 .head_files
687 .iter()
688 .map(|f| self.estimate_tokens(&f.metadata))
689 .sum::<usize>();
690
691 let middle_tokens = positioning
692 .middle_files
693 .iter()
694 .map(|f| self.estimate_tokens(&f.metadata))
695 .sum::<usize>();
696
697 let tail_tokens = positioning
698 .tail_files
699 .iter()
700 .map(|f| self.estimate_tokens(&f.metadata))
701 .sum::<usize>();
702
703 head_tokens + middle_tokens + tail_tokens
704 }
705
706 fn generate_positioning_reasoning(
708 &self,
709 positioning: &ContextPositioning,
710 query_hint: Option<&str>,
711 ) -> String {
712 let mut reasoning = Vec::new();
713
714 reasoning.push("🎯 Context Positioning Strategy Applied".to_string());
715 reasoning.push("".to_string());
716
717 reasoning.push(format!(
719 "📍 HEAD ({} files): Query-specific high centrality files",
720 positioning.head_files.len()
721 ));
722 if let Some(query) = query_hint {
723 reasoning.push(format!(" Query hint: '{}'", query));
724 }
725 for (i, file) in positioning.head_files.iter().take(3).enumerate() {
726 reasoning.push(format!(
727 " {}. {} (centrality: {:.3}, relevance: {:.3})",
728 i + 1,
729 file.metadata
730 .path
731 .file_name()
732 .and_then(|n| n.to_str())
733 .unwrap_or("?"),
734 file.centrality.combined,
735 file.query_relevance
736 ));
737 }
738 if positioning.head_files.len() > 3 {
739 reasoning.push(format!(
740 " ... and {} more files",
741 positioning.head_files.len() - 3
742 ));
743 }
744 reasoning.push("".to_string());
745
746 reasoning.push(format!(
748 "🔄 MIDDLE ({} files): Supporting utilities and low-centrality files",
749 positioning.middle_files.len()
750 ));
751 reasoning.push("".to_string());
752
753 reasoning.push(format!(
755 "🏛️ TAIL ({} files): Core functionality, high centrality",
756 positioning.tail_files.len()
757 ));
758 for (i, file) in positioning.tail_files.iter().take(3).enumerate() {
759 reasoning.push(format!(
760 " {}. {} (centrality: {:.3})",
761 i + 1,
762 file.metadata
763 .path
764 .file_name()
765 .and_then(|n| n.to_str())
766 .unwrap_or("?"),
767 file.centrality.combined
768 ));
769 }
770 if positioning.tail_files.len() > 3 {
771 reasoning.push(format!(
772 " ... and {} more files",
773 positioning.tail_files.len() - 3
774 ));
775 }
776
777 reasoning.join("\n")
778 }
779
780 fn create_simple_positioning(&self, files: Vec<FileMetadata>) -> PositionedSelection {
782 let files_with_centrality: Vec<FileWithCentrality> = files
783 .into_iter()
784 .map(|metadata| FileWithCentrality {
785 metadata,
786 centrality: CentralityScores::default(),
787 query_relevance: 0.0,
788 relatedness_group: "default".to_string(),
789 })
790 .collect();
791
792 let positioning = ContextPositioning {
793 head_files: Vec::new(),
794 middle_files: files_with_centrality,
795 tail_files: Vec::new(),
796 };
797
798 let total_tokens = self.calculate_total_tokens(&positioning);
799
800 PositionedSelection {
801 positioning,
802 total_tokens,
803 positioning_reasoning: "Context positioning disabled - using default order".to_string(),
804 }
805 }
806
807 fn file_to_key(&self, path: &Path) -> String {
809 path.to_string_lossy().to_string()
810 }
811
812 fn estimate_tokens(&self, file: &FileMetadata) -> usize {
814 let base_tokens = ((file.size as f64) / 3.5) as usize;
816
817 let multiplier = match file.language.as_str() {
819 "Rust" => 1.3,
820 "JavaScript" | "TypeScript" => 1.2,
821 "Python" => 1.1,
822 "C" | "Go" => 1.0,
823 "JSON" | "YAML" | "TOML" => 0.7,
824 _ => 1.0,
825 };
826
827 (base_tokens as f64 * multiplier) as usize
828 }
829
830 fn is_test_file(&self, path: &Path) -> bool {
832 file::is_test_path(path)
833 }
834}
835
836#[cfg(test)]
837mod tests {
838 use super::*;
839 use std::path::PathBuf;
840 use std::time::SystemTime;
841
842 fn create_test_file(path: &str, size: u64, language: &str) -> FileMetadata {
843 FileMetadata {
844 path: PathBuf::from(path),
845 size,
846 modified: SystemTime::now(),
847 language: language.to_string(),
848 file_type: if language == "Rust" {
849 "Source"
850 } else {
851 "Other"
852 }
853 .to_string(),
854 }
855 }
856
857 #[tokio::test]
858 async fn test_context_positioner_creation() {
859 let positioner = ContextPositioner::with_defaults();
860 assert!(positioner.config.enable_positioning);
861 assert_eq!(positioner.config.head_percentage, 0.20);
862 assert_eq!(positioner.config.tail_percentage, 0.20);
863 }
864
865 #[tokio::test]
866 async fn test_centrality_calculation() {
867 let positioner = ContextPositioner::with_defaults();
868
869 let files = vec![
870 create_test_file("src/main.rs", 1000, "Rust"),
871 create_test_file("src/lib.rs", 2000, "Rust"),
872 create_test_file("src/utils.rs", 500, "Rust"),
873 ];
874
875 let files_with_centrality = positioner.calculate_centrality_scores(files).await.unwrap();
876 assert_eq!(files_with_centrality.len(), 3);
877
878 for file in &files_with_centrality {
880 assert!(file.centrality.combined >= 0.0);
881 assert!(file.centrality.degree >= 0.0);
882 assert!(file.centrality.pagerank >= 0.0);
883 assert!(file.centrality.betweenness >= 0.0);
884 }
885
886 let max_centrality = files_with_centrality
888 .iter()
889 .map(|f| f.centrality.combined)
890 .fold(0.0, f64::max);
891 let min_centrality = files_with_centrality
892 .iter()
893 .map(|f| f.centrality.combined)
894 .fold(1.0, f64::min);
895
896 assert!(max_centrality >= min_centrality);
898 }
899
900 #[tokio::test]
901 async fn test_positioning_strategy() {
902 let positioner = ContextPositioner::with_defaults();
903
904 let files = vec![
905 create_test_file("src/main.rs", 1000, "Rust"),
906 create_test_file("src/lib.rs", 2000, "Rust"),
907 create_test_file("src/utils.rs", 500, "Rust"),
908 create_test_file("tests/integration.rs", 800, "Rust"),
909 create_test_file("README.md", 300, "Markdown"),
910 ];
911
912 let result = positioner
913 .position_files(files, Some("main"))
914 .await
915 .unwrap();
916
917 assert!(!result.positioning.head_files.is_empty());
919 assert!(!result.positioning.middle_files.is_empty());
920 assert!(!result.positioning.tail_files.is_empty());
921
922 let total = result.positioning.head_files.len()
924 + result.positioning.middle_files.len()
925 + result.positioning.tail_files.len();
926 assert_eq!(total, 5);
927
928 assert!(!result.positioning_reasoning.is_empty());
930 assert!(result.positioning_reasoning.contains("HEAD"));
931 assert!(result.positioning_reasoning.contains("TAIL"));
932 }
933
934 #[tokio::test]
935 async fn test_query_relevance() {
936 let positioner = ContextPositioner::with_defaults();
937
938 let files = vec![
939 FileWithCentrality {
940 metadata: create_test_file("src/main.rs", 1000, "Rust"),
941 centrality: CentralityScores::default(),
942 query_relevance: 0.0,
943 relatedness_group: String::new(),
944 },
945 FileWithCentrality {
946 metadata: create_test_file("src/utils.rs", 500, "Rust"),
947 centrality: CentralityScores::default(),
948 query_relevance: 0.0,
949 relatedness_group: String::new(),
950 },
951 ];
952
953 let result = positioner
954 .calculate_query_relevance(files, Some("main"))
955 .await
956 .unwrap();
957
958 let main_relevance = result
960 .iter()
961 .find(|f| f.metadata.path.to_string_lossy().contains("main.rs"))
962 .unwrap();
963 let utils_relevance = result
964 .iter()
965 .find(|f| f.metadata.path.to_string_lossy().contains("utils.rs"))
966 .unwrap();
967
968 assert!(main_relevance.query_relevance > utils_relevance.query_relevance);
969 }
970
971 #[test]
972 fn test_relatedness_grouping() {
973 let positioner = ContextPositioner::with_defaults();
974
975 let file = create_test_file("src/api/handlers.rs", 1000, "Rust");
976 let group = positioner.determine_relatedness_group(&file);
977
978 assert!(group.contains("src/api"));
979 assert!(group.contains("Rust"));
980 }
981
982 #[test]
983 fn test_token_estimation() {
984 let positioner = ContextPositioner::with_defaults();
985
986 let rust_file = create_test_file("src/main.rs", 1000, "Rust");
987 let json_file = create_test_file("package.json", 1000, "JSON");
988
989 let rust_tokens = positioner.estimate_tokens(&rust_file);
990 let json_tokens = positioner.estimate_tokens(&json_file);
991
992 assert!(rust_tokens > json_tokens);
994 }
995
996 #[test]
997 fn test_is_test_file_detection() {
998 let positioner = ContextPositioner::with_defaults();
999
1000 assert!(positioner.is_test_file(&std::path::Path::new("src/test/utils.rs")));
1002 assert!(positioner.is_test_file(&std::path::Path::new("src/tests/integration.py")));
1003 assert!(positioner.is_test_file(&std::path::Path::new("__tests__/component.test.js")));
1004
1005 assert!(positioner.is_test_file(&std::path::Path::new("test_utils.py")));
1007 assert!(positioner.is_test_file(&std::path::Path::new("utils_test.rs")));
1008 assert!(positioner.is_test_file(&std::path::Path::new("component.test.tsx")));
1009 assert!(positioner.is_test_file(&std::path::Path::new("service.spec.ts")));
1010 assert!(positioner.is_test_file(&std::path::Path::new("model_test.go")));
1011
1012 assert!(positioner.is_test_file(&std::path::Path::new("UserTest.java")));
1014 assert!(positioner.is_test_file(&std::path::Path::new("user_spec.rb")));
1015 assert!(positioner.is_test_file(&std::path::Path::new("UserTest.php")));
1016
1017 assert!(!positioner.is_test_file(&std::path::Path::new("src/main.rs")));
1019 assert!(!positioner.is_test_file(&std::path::Path::new("lib/utils.py")));
1020 assert!(!positioner.is_test_file(&std::path::Path::new("components/Button.tsx")));
1021 assert!(!positioner.is_test_file(&std::path::Path::new("README.md")));
1022 assert!(!positioner.is_test_file(&std::path::Path::new("package.json")));
1023 }
1024
1025 #[tokio::test]
1026 async fn test_auto_exclude_tests() {
1027 let mut config = ContextPositioningConfig::default();
1028 config.auto_exclude_tests = true;
1029 let positioner = ContextPositioner::new(config);
1030
1031 let files = vec![
1033 create_test_file("src/main.rs", 1000, "Rust"),
1034 create_test_file("src/lib.rs", 800, "Rust"),
1035 create_test_file("src/tests/integration_test.rs", 1200, "Rust"),
1036 create_test_file("test/unit_test.py", 600, "Python"),
1037 create_test_file("components/Button.tsx", 900, "TypeScript"),
1038 create_test_file("__tests__/Button.test.tsx", 700, "TypeScript"),
1039 ];
1040
1041 let result = positioner.position_files(files, None).await.unwrap();
1042
1043 let all_files: Vec<&FileWithCentrality> = result
1045 .positioning
1046 .head_files
1047 .iter()
1048 .chain(result.positioning.middle_files.iter())
1049 .chain(result.positioning.tail_files.iter())
1050 .collect();
1051
1052 assert_eq!(all_files.len(), 3);
1054
1055 for file in all_files {
1057 let path_str = file.metadata.path.to_string_lossy();
1058 assert!(!path_str.contains("test"));
1059 assert!(!path_str.contains("__tests__"));
1060 }
1061
1062 let file_names: Vec<String> = result
1064 .positioning
1065 .head_files
1066 .iter()
1067 .chain(result.positioning.middle_files.iter())
1068 .chain(result.positioning.tail_files.iter())
1069 .map(|f| {
1070 f.metadata
1071 .path
1072 .file_name()
1073 .unwrap()
1074 .to_string_lossy()
1075 .to_string()
1076 })
1077 .collect();
1078
1079 assert!(file_names.contains(&"main.rs".to_string()));
1080 assert!(file_names.contains(&"lib.rs".to_string()));
1081 assert!(file_names.contains(&"Button.tsx".to_string()));
1082 }
1083
1084 #[tokio::test]
1085 async fn test_auto_exclude_disabled() {
1086 let mut config = ContextPositioningConfig::default();
1087 config.auto_exclude_tests = false; let positioner = ContextPositioner::new(config);
1089
1090 let files = vec![
1092 create_test_file("src/main.rs", 1000, "Rust"),
1093 create_test_file("src/tests/integration_test.rs", 1200, "Rust"),
1094 create_test_file("test_utils.py", 600, "Python"),
1095 ];
1096
1097 let result = positioner.position_files(files, None).await.unwrap();
1098
1099 let all_files: Vec<&FileWithCentrality> = result
1101 .positioning
1102 .head_files
1103 .iter()
1104 .chain(result.positioning.middle_files.iter())
1105 .chain(result.positioning.tail_files.iter())
1106 .collect();
1107
1108 assert_eq!(all_files.len(), 3);
1110 }
1111}