1use std::collections::{HashMap, HashSet};
2
3use super::graph_index::ProjectIndex;
4
5use super::neural::attention_learned::LearnedAttention;
6
7#[derive(Debug, Clone)]
8pub struct RelevanceScore {
9 pub path: String,
10 pub score: f64,
11 pub recommended_mode: &'static str,
12}
13
14pub fn compute_relevance(
15 index: &ProjectIndex,
16 task_files: &[String],
17 task_keywords: &[String],
18) -> Vec<RelevanceScore> {
19 let adj = build_adjacency_resolved(index);
20 let all_nodes: Vec<String> = index.files.keys().cloned().collect();
21 if all_nodes.is_empty() {
22 return Vec::new();
23 }
24
25 let node_idx: HashMap<&str, usize> = all_nodes
26 .iter()
27 .enumerate()
28 .map(|(i, n)| (n.as_str(), i))
29 .collect();
30 let n = all_nodes.len();
31
32 let degrees: Vec<f64> = all_nodes
34 .iter()
35 .map(|node| {
36 adj.get(node)
37 .map_or(0.0, |neigh| neigh.len() as f64)
38 .max(1.0)
39 })
40 .collect();
41
42 let mut heat: Vec<f64> = vec![0.0; n];
44 for f in task_files {
45 if let Some(&idx) = node_idx.get(f.as_str()) {
46 heat[idx] = 1.0;
47 }
48 }
49
50 let alpha = 0.5;
53 let iterations = 4;
54 for _ in 0..iterations {
55 let mut new_heat = vec![0.0; n];
56 for (i, node) in all_nodes.iter().enumerate() {
57 let self_term = (1.0 - alpha) * heat[i];
58 let mut neighbor_sum = 0.0;
59 if let Some(neighbors) = adj.get(node) {
60 for neighbor in neighbors {
61 if let Some(&j) = node_idx.get(neighbor.as_str()) {
62 neighbor_sum += heat[j] / degrees[j];
63 }
64 }
65 }
66 new_heat[i] = self_term + alpha * neighbor_sum;
67 }
68 heat = new_heat;
69 }
70
71 let mut pagerank = vec![1.0 / n as f64; n];
73 let damping = 0.85;
74 for _ in 0..8 {
75 let mut new_pr = vec![(1.0 - damping) / n as f64; n];
76 for (i, node) in all_nodes.iter().enumerate() {
77 if let Some(neighbors) = adj.get(node) {
78 let out_deg = neighbors.len().max(1) as f64;
79 for neighbor in neighbors {
80 if let Some(&j) = node_idx.get(neighbor.as_str()) {
81 new_pr[j] += damping * pagerank[i] / out_deg;
82 }
83 }
84 }
85 }
86 pagerank = new_pr;
87 }
88
89 let mut scores: HashMap<String, f64> = HashMap::new();
91 let heat_max = heat.iter().cloned().fold(0.0_f64, f64::max).max(1e-10);
92 let pr_max = pagerank.iter().cloned().fold(0.0_f64, f64::max).max(1e-10);
93
94 for (i, node) in all_nodes.iter().enumerate() {
95 let h = heat[i] / heat_max;
96 let pr = pagerank[i] / pr_max;
97 let combined = h * 0.8 + pr * 0.2;
98 if combined > 0.01 {
99 scores.insert(node.clone(), combined);
100 }
101 }
102
103 if !task_keywords.is_empty() {
105 let kw_lower: Vec<String> = task_keywords.iter().map(|k| k.to_lowercase()).collect();
106 for (file_path, file_entry) in &index.files {
107 let path_lower = file_path.to_lowercase();
108 let mut keyword_hits = 0;
109 for kw in &kw_lower {
110 if path_lower.contains(kw) {
111 keyword_hits += 1;
112 }
113 for export in &file_entry.exports {
114 if export.to_lowercase().contains(kw) {
115 keyword_hits += 1;
116 }
117 }
118 }
119 if keyword_hits > 0 {
120 let boost = (keyword_hits as f64 * 0.15).min(0.6);
121 let entry = scores.entry(file_path.clone()).or_insert(0.0);
122 *entry = (*entry + boost).min(1.0);
123 }
124 }
125 }
126
127 let mut result: Vec<RelevanceScore> = scores
128 .into_iter()
129 .map(|(path, score)| {
130 let mode = recommend_mode(score);
131 RelevanceScore {
132 path,
133 score,
134 recommended_mode: mode,
135 }
136 })
137 .collect();
138
139 result.sort_by(|a, b| {
140 b.score
141 .partial_cmp(&a.score)
142 .unwrap_or(std::cmp::Ordering::Equal)
143 });
144 result
145}
146
147pub fn compute_relevance_from_intent(
148 index: &ProjectIndex,
149 intent: &super::intent_engine::StructuredIntent,
150) -> Vec<RelevanceScore> {
151 use super::intent_engine::IntentScope;
152
153 let mut file_seeds: Vec<String> = Vec::new();
154 let mut extra_keywords: Vec<String> = intent.keywords.clone();
155
156 for target in &intent.targets {
157 if target.contains('.') || target.contains('/') {
158 let matched = resolve_target_to_files(index, target);
159 if matched.is_empty() {
160 extra_keywords.push(target.clone());
161 } else {
162 file_seeds.extend(matched);
163 }
164 } else {
165 let from_symbol = resolve_symbol_to_files(index, target);
166 if from_symbol.is_empty() {
167 extra_keywords.push(target.clone());
168 } else {
169 file_seeds.extend(from_symbol);
170 }
171 }
172 }
173
174 if let Some(lang) = &intent.language_hint {
175 let lang_ext = match lang.as_str() {
176 "rust" => Some("rs"),
177 "typescript" => Some("ts"),
178 "javascript" => Some("js"),
179 "python" => Some("py"),
180 "go" => Some("go"),
181 "ruby" => Some("rb"),
182 "java" => Some("java"),
183 _ => None,
184 };
185 if let Some(ext) = lang_ext {
186 if file_seeds.is_empty() {
187 for path in index.files.keys() {
188 if path.ends_with(&format!(".{ext}")) {
189 extra_keywords.push(
190 std::path::Path::new(path)
191 .file_stem()
192 .and_then(|s| s.to_str())
193 .unwrap_or("")
194 .to_string(),
195 );
196 break;
197 }
198 }
199 }
200 }
201 }
202
203 let mut result = compute_relevance(index, &file_seeds, &extra_keywords);
204
205 match intent.scope {
206 IntentScope::SingleFile => {
207 result.truncate(5);
208 }
209 IntentScope::MultiFile => {
210 result.truncate(15);
211 }
212 IntentScope::CrossModule | IntentScope::ProjectWide => {}
213 }
214
215 result
216}
217
218fn resolve_target_to_files(index: &ProjectIndex, target: &str) -> Vec<String> {
219 let mut matches = Vec::new();
220 for path in index.files.keys() {
221 if path.ends_with(target) || path.contains(target) {
222 matches.push(path.clone());
223 }
224 }
225 matches
226}
227
228fn resolve_symbol_to_files(index: &ProjectIndex, symbol: &str) -> Vec<String> {
229 let sym_lower = symbol.to_lowercase();
230 let mut matches = Vec::new();
231 for entry in index.symbols.values() {
232 let name_lower = entry.name.to_lowercase();
233 if (name_lower == sym_lower || name_lower.contains(&sym_lower))
234 && !matches.contains(&entry.file)
235 {
236 matches.push(entry.file.clone());
237 }
238 }
239 if matches.is_empty() {
240 for (path, file_entry) in &index.files {
241 if file_entry
242 .exports
243 .iter()
244 .any(|e| e.to_lowercase().contains(&sym_lower))
245 && !matches.contains(path)
246 {
247 matches.push(path.clone());
248 }
249 }
250 }
251 matches
252}
253
254fn recommend_mode(score: f64) -> &'static str {
255 if score >= 0.8 {
256 "full"
257 } else if score >= 0.5 {
258 "signatures"
259 } else if score >= 0.2 {
260 "map"
261 } else {
262 "reference"
263 }
264}
265
266fn build_adjacency_resolved(index: &ProjectIndex) -> HashMap<String, Vec<String>> {
271 let module_to_file = build_module_map(index);
272 let mut adj: HashMap<String, Vec<String>> = HashMap::new();
273
274 for edge in &index.edges {
275 let from = &edge.from;
276 let to_resolved = module_to_file
277 .get(&edge.to)
278 .cloned()
279 .unwrap_or_else(|| edge.to.clone());
280
281 if index.files.contains_key(from) && index.files.contains_key(&to_resolved) {
282 adj.entry(from.clone())
283 .or_default()
284 .push(to_resolved.clone());
285 adj.entry(to_resolved).or_default().push(from.clone());
286 }
287 }
288 adj
289}
290
291fn build_module_map(index: &ProjectIndex) -> HashMap<String, String> {
294 let file_paths: Vec<&str> = index.files.keys().map(|s| s.as_str()).collect();
295 let mut mapping: HashMap<String, String> = HashMap::new();
296
297 let edge_targets: HashSet<String> = index.edges.iter().map(|e| e.to.clone()).collect();
298
299 for target in &edge_targets {
300 if index.files.contains_key(target) {
301 mapping.insert(target.clone(), target.clone());
302 continue;
303 }
304
305 if let Some(resolved) = resolve_module_to_file(target, &file_paths) {
306 mapping.insert(target.clone(), resolved);
307 }
308 }
309
310 mapping
311}
312
313fn resolve_module_to_file(module_path: &str, file_paths: &[&str]) -> Option<String> {
314 let cleaned = module_path
315 .trim_start_matches("crate::")
316 .trim_start_matches("super::");
317
318 let parts: Vec<&str> = cleaned.split("::").collect();
320
321 for end in (1..=parts.len()).rev() {
323 let candidate = parts[..end].join("/");
324
325 for fp in file_paths {
327 let fp_normalized = fp
328 .trim_start_matches("rust/src/")
329 .trim_start_matches("src/");
330
331 if fp_normalized == format!("{candidate}.rs")
332 || fp_normalized == format!("{candidate}/mod.rs")
333 || fp.ends_with(&format!("/{candidate}.rs"))
334 || fp.ends_with(&format!("/{candidate}/mod.rs"))
335 {
336 return Some(fp.to_string());
337 }
338 }
339 }
340
341 if let Some(last) = parts.last() {
343 let stem = format!("{last}.rs");
344 for fp in file_paths {
345 if fp.ends_with(&stem) {
346 return Some(fp.to_string());
347 }
348 }
349 }
350
351 None
352}
353
354pub fn parse_task_hints(task_description: &str) -> (Vec<String>, Vec<String>) {
356 let mut files = Vec::new();
357 let mut keywords = Vec::new();
358
359 for word in task_description.split_whitespace() {
360 let clean = word.trim_matches(|c: char| {
361 !c.is_alphanumeric() && c != '.' && c != '/' && c != '_' && c != '-'
362 });
363 if clean.contains('.')
364 && (clean.contains('/')
365 || clean.ends_with(".rs")
366 || clean.ends_with(".ts")
367 || clean.ends_with(".py")
368 || clean.ends_with(".go")
369 || clean.ends_with(".js"))
370 {
371 files.push(clean.to_string());
372 } else if clean.len() >= 3 && !STOP_WORDS.contains(&clean.to_lowercase().as_str()) {
373 keywords.push(clean.to_string());
374 }
375 }
376
377 (files, keywords)
378}
379
380const STOP_WORDS: &[&str] = &[
381 "the", "and", "for", "that", "this", "with", "from", "have", "has", "was", "are", "been",
382 "not", "but", "all", "can", "had", "her", "one", "our", "out", "you", "its", "will", "each",
383 "make", "like", "fix", "add", "use", "get", "set", "run", "new", "old", "should", "would",
384 "could", "into", "also", "than", "them", "then", "when", "just", "only", "very", "some",
385 "more", "other", "nach", "und", "die", "der", "das", "ist", "ein", "eine", "nicht", "auf",
386 "mit",
387];
388
389struct StructuralWeights {
390 error_handling: f64,
391 definition: f64,
392 control_flow: f64,
393 closing_brace: f64,
394 other: f64,
395}
396
397impl StructuralWeights {
398 const DEFAULT: Self = Self {
399 error_handling: 1.5,
400 definition: 1.0,
401 control_flow: 0.5,
402 closing_brace: 0.15,
403 other: 0.3,
404 };
405
406 fn for_task_type(task_type: Option<super::intent_engine::TaskType>) -> Self {
407 use super::intent_engine::TaskType;
408 match task_type {
409 Some(TaskType::FixBug) => Self {
410 error_handling: 2.0,
411 definition: 0.8,
412 control_flow: 0.8,
413 closing_brace: 0.1,
414 other: 0.2,
415 },
416 Some(TaskType::Debug) => Self {
417 error_handling: 2.0,
418 definition: 0.6,
419 control_flow: 1.0,
420 closing_brace: 0.1,
421 other: 0.2,
422 },
423 Some(TaskType::Generate) => Self {
424 error_handling: 0.8,
425 definition: 1.5,
426 control_flow: 0.3,
427 closing_brace: 0.15,
428 other: 0.4,
429 },
430 Some(TaskType::Refactor) => Self {
431 error_handling: 1.0,
432 definition: 1.5,
433 control_flow: 0.6,
434 closing_brace: 0.2,
435 other: 0.3,
436 },
437 Some(TaskType::Test) => Self {
438 error_handling: 1.2,
439 definition: 1.3,
440 control_flow: 0.4,
441 closing_brace: 0.15,
442 other: 0.3,
443 },
444 Some(TaskType::Review) => Self {
445 error_handling: 1.3,
446 definition: 1.2,
447 control_flow: 0.6,
448 closing_brace: 0.15,
449 other: 0.3,
450 },
451 Some(TaskType::Explore) | None => Self::DEFAULT,
452 Some(_) => Self::DEFAULT,
453 }
454 }
455}
456
457pub fn information_bottleneck_filter(
468 content: &str,
469 task_keywords: &[String],
470 budget_ratio: f64,
471) -> String {
472 information_bottleneck_filter_typed(content, task_keywords, budget_ratio, None)
473}
474
475pub fn information_bottleneck_filter_typed(
477 content: &str,
478 task_keywords: &[String],
479 budget_ratio: f64,
480 task_type: Option<super::intent_engine::TaskType>,
481) -> String {
482 let lines: Vec<&str> = content.lines().collect();
483 if lines.is_empty() {
484 return String::new();
485 }
486
487 let n = lines.len();
488 let kw_lower: Vec<String> = task_keywords.iter().map(|k| k.to_lowercase()).collect();
489 let attention = LearnedAttention::with_defaults();
490
491 let mut global_token_freq: HashMap<&str, usize> = HashMap::new();
492 for line in &lines {
493 for token in line.split_whitespace() {
494 *global_token_freq.entry(token).or_insert(0) += 1;
495 }
496 }
497 let total_unique = global_token_freq.len().max(1) as f64;
498 let total_lines = n.max(1) as f64;
499
500 let task_token_set: HashSet<String> = kw_lower
501 .iter()
502 .flat_map(|kw| kw.split(|c: char| !c.is_alphanumeric()).map(String::from))
503 .filter(|t| t.len() >= 2)
504 .collect();
505
506 let effective_ratio = if !task_token_set.is_empty() {
507 adaptive_ib_budget(content, budget_ratio)
508 } else {
509 budget_ratio
510 };
511
512 let weights = StructuralWeights::for_task_type(task_type);
513
514 let mut scored_lines: Vec<(usize, &str, f64)> = lines
515 .iter()
516 .enumerate()
517 .map(|(i, line)| {
518 let trimmed = line.trim();
519 if trimmed.is_empty() {
520 return (i, *line, 0.05);
521 }
522
523 let line_lower = trimmed.to_lowercase();
524 let line_tokens: Vec<&str> = trimmed.split_whitespace().collect();
525 let line_token_count = line_tokens.len().max(1) as f64;
526
527 let mi_score = if task_token_set.is_empty() {
528 0.0
529 } else {
530 let line_token_set: HashSet<String> =
531 line_tokens.iter().map(|t| t.to_lowercase()).collect();
532 let overlap: f64 = line_token_set
533 .iter()
534 .filter(|t| task_token_set.iter().any(|kw| t.contains(kw.as_str())))
535 .map(|t| {
536 let freq = *global_token_freq.get(t.as_str()).unwrap_or(&1) as f64;
537 (total_lines / freq).ln().max(0.1)
538 })
539 .sum();
540 overlap / line_token_count
541 };
542
543 let keyword_hits: f64 = kw_lower
544 .iter()
545 .filter(|kw| line_lower.contains(kw.as_str()))
546 .count() as f64;
547
548 let structural = if is_error_handling(trimmed) {
549 weights.error_handling
550 } else if is_definition_line(trimmed) {
551 weights.definition
552 } else if is_control_flow(trimmed) {
553 weights.control_flow
554 } else if is_closing_brace(trimmed) {
555 weights.closing_brace
556 } else {
557 weights.other
558 };
559 let relevance = mi_score * 0.4 + keyword_hits * 0.3 + structural;
560
561 let unique_in_line = line_tokens.iter().collect::<HashSet<_>>().len() as f64;
562 let token_diversity = unique_in_line / line_token_count;
563
564 let avg_idf: f64 = if line_tokens.is_empty() {
565 0.0
566 } else {
567 line_tokens
568 .iter()
569 .map(|t| {
570 let freq = *global_token_freq.get(t).unwrap_or(&1) as f64;
571 (total_unique / freq).ln().max(0.0)
572 })
573 .sum::<f64>()
574 / line_token_count
575 };
576 let information = (token_diversity * 0.4 + (avg_idf.min(3.0) / 3.0) * 0.6).min(1.0);
577
578 let pos = i as f64 / n.max(1) as f64;
579 let attn_weight = attention.weight(pos);
580
581 let score = (relevance * 0.6 + 0.05)
582 * (information * 0.25 + 0.05)
583 * (attn_weight * 0.15 + 0.05);
584
585 (i, *line, score)
586 })
587 .collect();
588
589 let budget = ((n as f64) * effective_ratio).ceil() as usize;
590
591 scored_lines.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
592
593 let selected = mmr_select(&scored_lines, budget, 0.3);
594
595 let mut output_lines: Vec<&str> = Vec::with_capacity(budget + 1);
596
597 if !kw_lower.is_empty() {
598 output_lines.push("");
599 }
600
601 for (_, line, _) in &selected {
602 output_lines.push(line);
603 }
604
605 if !kw_lower.is_empty() {
606 let summary = format!("[task: {}]", task_keywords.join(", "));
607 let mut result = summary;
608 result.push('\n');
609 result.push_str(&output_lines[1..].to_vec().join("\n"));
610 return result;
611 }
612
613 output_lines.join("\n")
614}
615
616fn mmr_select<'a>(
621 candidates: &[(usize, &'a str, f64)],
622 budget: usize,
623 lambda: f64,
624) -> Vec<(usize, &'a str, f64)> {
625 if candidates.is_empty() || budget == 0 {
626 return Vec::new();
627 }
628
629 let mut selected: Vec<(usize, &'a str, f64)> = Vec::with_capacity(budget);
630 let mut remaining: Vec<(usize, &'a str, f64)> = candidates.to_vec();
631
632 selected.push(remaining.remove(0));
634
635 while selected.len() < budget && !remaining.is_empty() {
636 let mut best_idx = 0;
637 let mut best_mmr = f64::NEG_INFINITY;
638
639 for (i, &(_, cand_line, cand_score)) in remaining.iter().enumerate() {
640 let cand_tokens: HashSet<&str> = cand_line.split_whitespace().collect();
641 if cand_tokens.is_empty() {
642 if cand_score > best_mmr {
643 best_mmr = cand_score;
644 best_idx = i;
645 }
646 continue;
647 }
648
649 let max_sim = selected
650 .iter()
651 .map(|&(_, sel_line, _)| {
652 let sel_tokens: HashSet<&str> = sel_line.split_whitespace().collect();
653 if sel_tokens.is_empty() {
654 return 0.0;
655 }
656 let inter = cand_tokens.intersection(&sel_tokens).count();
657 let union = cand_tokens.union(&sel_tokens).count();
658 if union == 0 {
659 0.0
660 } else {
661 inter as f64 / union as f64
662 }
663 })
664 .fold(0.0_f64, f64::max);
665
666 let mmr = cand_score - lambda * max_sim;
667 if mmr > best_mmr {
668 best_mmr = mmr;
669 best_idx = i;
670 }
671 }
672
673 selected.push(remaining.remove(best_idx));
674 }
675
676 selected
677}
678
679fn is_error_handling(line: &str) -> bool {
680 line.starts_with("return Err(")
681 || line.starts_with("Err(")
682 || line.starts_with("bail!(")
683 || line.starts_with("anyhow::bail!")
684 || line.contains(".map_err(")
685 || line.contains("unwrap()")
686 || line.contains("expect(\"")
687 || line.starts_with("raise ")
688 || line.starts_with("throw ")
689 || line.starts_with("catch ")
690 || line.starts_with("except ")
691 || line.starts_with("try ")
692 || (line.contains("?;") && !line.starts_with("//"))
693 || line.starts_with("panic!(")
694 || line.contains("Error::")
695 || line.contains("error!")
696}
697
698pub fn adaptive_ib_budget(content: &str, base_ratio: f64) -> f64 {
702 let lines: Vec<&str> = content.lines().collect();
703 if lines.len() < 10 {
704 return 1.0;
705 }
706
707 let mut token_freq: HashMap<&str, usize> = HashMap::new();
708 let mut total_tokens = 0usize;
709 for line in &lines {
710 for token in line.split_whitespace() {
711 *token_freq.entry(token).or_insert(0) += 1;
712 total_tokens += 1;
713 }
714 }
715
716 if total_tokens == 0 {
717 return base_ratio;
718 }
719
720 let unique_ratio = token_freq.len() as f64 / total_tokens as f64;
721 let repetition_factor = 1.0 - unique_ratio;
722
723 (base_ratio * (1.0 - repetition_factor * 0.3)).clamp(0.2, 1.0)
724}
725
726fn is_definition_line(line: &str) -> bool {
727 let prefixes = [
728 "fn ",
729 "pub fn ",
730 "async fn ",
731 "pub async fn ",
732 "struct ",
733 "pub struct ",
734 "enum ",
735 "pub enum ",
736 "trait ",
737 "pub trait ",
738 "impl ",
739 "type ",
740 "pub type ",
741 "const ",
742 "pub const ",
743 "static ",
744 "pub static ",
745 "class ",
746 "export class ",
747 "interface ",
748 "export interface ",
749 "function ",
750 "export function ",
751 "async function ",
752 "def ",
753 "async def ",
754 "func ",
755 ];
756 prefixes
757 .iter()
758 .any(|p| line.starts_with(p) || line.trim_start().starts_with(p))
759}
760
761fn is_control_flow(line: &str) -> bool {
762 let trimmed = line.trim();
763 trimmed.starts_with("if ")
764 || trimmed.starts_with("else ")
765 || trimmed.starts_with("match ")
766 || trimmed.starts_with("for ")
767 || trimmed.starts_with("while ")
768 || trimmed.starts_with("return ")
769 || trimmed.starts_with("break")
770 || trimmed.starts_with("continue")
771 || trimmed.starts_with("yield")
772 || trimmed.starts_with("await ")
773}
774
775fn is_closing_brace(line: &str) -> bool {
776 let trimmed = line.trim();
777 trimmed == "}" || trimmed == "};" || trimmed == "})" || trimmed == "});"
778}
779
780#[cfg(test)]
781mod tests {
782 use super::*;
783
784 #[test]
785 fn parse_task_finds_files_and_keywords() {
786 let (files, keywords) =
787 parse_task_hints("Fix the authentication bug in src/auth.rs and update tests");
788 assert!(files.iter().any(|f| f.contains("auth.rs")));
789 assert!(keywords
790 .iter()
791 .any(|k| k.to_lowercase().contains("authentication")));
792 }
793
794 #[test]
795 fn recommend_mode_by_score() {
796 assert_eq!(recommend_mode(1.0), "full");
797 assert_eq!(recommend_mode(0.6), "signatures");
798 assert_eq!(recommend_mode(0.3), "map");
799 assert_eq!(recommend_mode(0.1), "reference");
800 }
801
802 #[test]
803 fn info_bottleneck_preserves_definitions() {
804 let content = "fn main() {\n let x = 42;\n // boring comment\n println!(x);\n}\n";
805 let result = information_bottleneck_filter(content, &["main".to_string()], 0.6);
806 assert!(result.contains("fn main"), "definitions must be preserved");
807 assert!(result.contains("[task: main]"), "should have task summary");
808 }
809
810 #[test]
811 fn info_bottleneck_error_handling_priority() {
812 let content = "fn validate() {\n let data = parse()?;\n return Err(\"invalid\");\n let x = 1;\n let y = 2;\n}\n";
813 let result = information_bottleneck_filter(content, &["validate".to_string()], 0.5);
814 assert!(
815 result.contains("return Err"),
816 "error handling should survive filtering"
817 );
818 }
819
820 #[test]
821 fn info_bottleneck_score_sorted() {
822 let content = "fn important() {\n let x = 1;\n let y = 2;\n let z = 3;\n}\n}\n";
823 let result = information_bottleneck_filter(content, &[], 0.6);
824 let lines: Vec<&str> = result.lines().collect();
825 let def_pos = lines.iter().position(|l| l.contains("fn important"));
826 let brace_pos = lines.iter().position(|l| l.trim() == "}");
827 if let (Some(d), Some(b)) = (def_pos, brace_pos) {
828 assert!(
829 d < b,
830 "definitions should appear before closing braces in score-sorted output"
831 );
832 }
833 }
834
835 #[test]
836 fn adaptive_budget_reduces_for_repetitive() {
837 let repetitive = "let x = 1;\n".repeat(50);
838 let diverse = (0..50)
839 .map(|i| format!("let var_{i} = func_{i}(arg_{i});"))
840 .collect::<Vec<_>>()
841 .join("\n");
842 let budget_rep = super::adaptive_ib_budget(&repetitive, 0.7);
843 let budget_div = super::adaptive_ib_budget(&diverse, 0.7);
844 assert!(
845 budget_rep < budget_div,
846 "repetitive content should get lower budget"
847 );
848 }
849}